fix: subtitle
This commit is contained in:
@@ -510,6 +510,7 @@ export const apiSubtitle = async ({
|
|||||||
apiSetting,
|
apiSetting,
|
||||||
}) => {
|
}) => {
|
||||||
const cacheOpts = {
|
const cacheOpts = {
|
||||||
|
apiSlug: apiSetting.apiSlug,
|
||||||
videoId,
|
videoId,
|
||||||
fromLang,
|
fromLang,
|
||||||
toLang,
|
toLang,
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ import { parseJsonObj, extractJson } from "../libs/utils";
|
|||||||
import { kissLog } from "../libs/log";
|
import { kissLog } from "../libs/log";
|
||||||
import { fetchData } from "../libs/fetch";
|
import { fetchData } from "../libs/fetch";
|
||||||
import { getMsgHistory } from "./history";
|
import { getMsgHistory } from "./history";
|
||||||
|
import { parseBilingualVtt } from "../subtitle/vtt";
|
||||||
|
|
||||||
const keyMap = new Map();
|
const keyMap = new Map();
|
||||||
const urlMap = new Map();
|
const urlMap = new Map();
|
||||||
@@ -118,8 +119,9 @@ const parseSTRes = (raw) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const jsonString = extractJson(raw);
|
// const jsonString = extractJson(raw);
|
||||||
const data = JSON.parse(jsonString);
|
// const data = JSON.parse(jsonString);
|
||||||
|
const data = parseBilingualVtt(raw);
|
||||||
if (Array.isArray(data)) {
|
if (Array.isArray(data)) {
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -354,23 +354,49 @@ Output: {"translations":[{"id":1,"text":"一个<b>React</b>组件","sourceLangua
|
|||||||
|
|
||||||
Fail-safe: On any error, return {"translations":[]}.`;
|
Fail-safe: On any error, return {"translations":[]}.`;
|
||||||
|
|
||||||
const defaultSubtitlePrompt = `Goal: Convert raw subtitle event JSON into a clean, sentence-based JSON array.
|
// const defaultSubtitlePrompt = `Goal: Convert raw subtitle event JSON into a clean, sentence-based JSON array.
|
||||||
|
|
||||||
Output (valid JSON array, output ONLY this array):
|
// Output (valid JSON array, output ONLY this array):
|
||||||
[{
|
// [{
|
||||||
"text": "string", // Full sentence with correct punctuation
|
// "text": "string", // Full sentence with correct punctuation
|
||||||
"translation": "string", // Translation in ${INPUT_PLACE_TO}
|
// "translation": "string", // Translation in ${INPUT_PLACE_TO}
|
||||||
"start": int, // Start time (ms)
|
// "start": int, // Start time (ms)
|
||||||
"end": int, // End time (ms)
|
// "end": int, // End time (ms)
|
||||||
"duration": int // end - start
|
// }]
|
||||||
}]
|
|
||||||
|
|
||||||
Guidelines:
|
// Guidelines:
|
||||||
1. **Segmentation**: Merge sequential 'utf8' strings from 'segs' into full sentences, merging groups logically.
|
// 1. **Segmentation**: Merge sequential 'utf8' strings from 'segs' into full sentences, merging groups logically.
|
||||||
2. **Punctuation**: Ensure proper sentence-final punctuation (., ?, !); add if missing.
|
// 2. **Punctuation**: Ensure proper sentence-final punctuation (., ?, !); add if missing.
|
||||||
3. **Translation**: Translate 'text' into ${INPUT_PLACE_TO}, place result in 'translation'.
|
// 3. **Translation**: Translate 'text' into ${INPUT_PLACE_TO}, place result in 'translation'.
|
||||||
4. **Special Cases**: '[Music]' (and similar cues) are standalone entries. Translate appropriately (e.g., '[音乐]', '[Musique]').
|
// 4. **Special Cases**: '[Music]' (and similar cues) are standalone entries. Translate appropriately (e.g., '[音乐]', '[Musique]').
|
||||||
`;
|
// `;
|
||||||
|
|
||||||
|
const defaultSubtitlePrompt = `You are an expert AI for subtitle generation. Convert a JSON array of word-level timestamps into a bilingual VTT file.
|
||||||
|
|
||||||
|
**Workflow:**
|
||||||
|
1. Merge \`text\` fields into complete sentences; ignore empty text.
|
||||||
|
2. Split long sentences into smaller, manageable subtitle cues (one sentence per cue).
|
||||||
|
3. Translate each cue into ${INPUT_PLACE_TO}.
|
||||||
|
4. Format as VTT:
|
||||||
|
- Start with \`WEBVTT\`.
|
||||||
|
- Each cue: timestamps (\`start --> end\` in milliseconds), original text, translated text.
|
||||||
|
- Keep non-speech text (e.g., \`[Music]\`) untranslated.
|
||||||
|
- Separate cues with a blank line.
|
||||||
|
|
||||||
|
**Output:** Only the pure VTT content.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
\`\`\`vtt
|
||||||
|
WEBVTT
|
||||||
|
|
||||||
|
1000 --> 3500
|
||||||
|
Hello world!
|
||||||
|
你好,世界!
|
||||||
|
|
||||||
|
4000 --> 6000
|
||||||
|
Good morning.
|
||||||
|
早上好。
|
||||||
|
\`\`\``;
|
||||||
|
|
||||||
const defaultRequestHook = `async (args, { url, body, headers, userMsg, method } = {}) => {
|
const defaultRequestHook = `async (args, { url, body, headers, userMsg, method } = {}) => {
|
||||||
console.log("request hook args:", args);
|
console.log("request hook args:", args);
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ export function useApiList() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
const aiEnabledApis = useMemo(
|
const aiEnabledApis = useMemo(
|
||||||
() => enabledApis.filter((api) => API_SPE_TYPES.ai.has(api.apiSlug)),
|
() => enabledApis.filter((api) => API_SPE_TYPES.ai.has(api.apiType)),
|
||||||
[enabledApis]
|
[enabledApis]
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -124,6 +124,7 @@ export function useApiItem(apiSlug) {
|
|||||||
apiSlug: item.apiSlug,
|
apiSlug: item.apiSlug,
|
||||||
apiName: item.apiName,
|
apiName: item.apiName,
|
||||||
apiType: item.apiType,
|
apiType: item.apiType,
|
||||||
|
key: item.key,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return item;
|
return item;
|
||||||
|
|||||||
@@ -111,6 +111,11 @@ class YouTubeCaptionProvider {
|
|||||||
kissControls.appendChild(toggleButton);
|
kissControls.appendChild(toggleButton);
|
||||||
|
|
||||||
toggleButton.onclick = () => {
|
toggleButton.onclick = () => {
|
||||||
|
if (this.#isBusy) {
|
||||||
|
logger.info(`Youtube Provider: It's budy now...`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!this.#enabled) {
|
if (!this.#enabled) {
|
||||||
logger.info(`Youtube Provider: Feature toggled ON.`);
|
logger.info(`Youtube Provider: Feature toggled ON.`);
|
||||||
this.#startManager();
|
this.#startManager();
|
||||||
@@ -283,9 +288,10 @@ class YouTubeCaptionProvider {
|
|||||||
OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang.slice(0, 2)) ||
|
OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang.slice(0, 2)) ||
|
||||||
"auto";
|
"auto";
|
||||||
if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) {
|
if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) {
|
||||||
|
// todo: 切分多次发送接受以适应接口处理能力
|
||||||
subtitles = await this.#aiSegment({
|
subtitles = await this.#aiSegment({
|
||||||
videoId,
|
videoId,
|
||||||
events,
|
events: this.#flatEvents(events),
|
||||||
fromLang,
|
fromLang,
|
||||||
toLang,
|
toLang,
|
||||||
segApiSetting,
|
segApiSetting,
|
||||||
@@ -408,10 +414,7 @@ class YouTubeCaptionProvider {
|
|||||||
lines = this.#processSubtitles({ events, usePause: true });
|
lines = this.#processSubtitles({ events, usePause: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
return lines.map((item) => ({
|
return lines;
|
||||||
...item,
|
|
||||||
duration: Math.max(0, item.end - item.start),
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#isQualityPoor(lines, lengthThreshold = 250, percentageThreshold = 0.1) {
|
#isQualityPoor(lines, lengthThreshold = 250, percentageThreshold = 0.1) {
|
||||||
@@ -580,6 +583,39 @@ class YouTubeCaptionProvider {
|
|||||||
|
|
||||||
return sentences;
|
return sentences;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#flatEvents(events = []) {
|
||||||
|
const segments = [];
|
||||||
|
let buffer = null;
|
||||||
|
|
||||||
|
events.forEach(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => {
|
||||||
|
segs.forEach(({ utf8 = "", tOffsetMs = 0 }, j) => {
|
||||||
|
const text = utf8.trim().replace(/\s+/g, " ");
|
||||||
|
const start = tStartMs + tOffsetMs;
|
||||||
|
|
||||||
|
if (buffer) {
|
||||||
|
if (!buffer.end || buffer.end > start) {
|
||||||
|
buffer.end = start;
|
||||||
|
}
|
||||||
|
segments.push(buffer);
|
||||||
|
buffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer = {
|
||||||
|
text,
|
||||||
|
start,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (j === segs.length - 1) {
|
||||||
|
buffer.end = tStartMs + dDurationMs;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
segments.push(buffer);
|
||||||
|
|
||||||
|
return segments.filter((item) => item.text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export const YouTubeInitializer = (() => {
|
export const YouTubeInitializer = (() => {
|
||||||
|
|||||||
44
src/subtitle/vtt.js
Normal file
44
src/subtitle/vtt.js
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
function millisecondsStringToNumber(msString) {
|
||||||
|
const cleanString = msString.trim();
|
||||||
|
const milliseconds = parseInt(cleanString, 10);
|
||||||
|
|
||||||
|
if (isNaN(milliseconds)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return milliseconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseBilingualVtt(vttText) {
|
||||||
|
const cleanText = vttText.replace(/^\uFEFF/, "").trim();
|
||||||
|
const cues = cleanText.split(/\n\n+/);
|
||||||
|
|
||||||
|
const result = [];
|
||||||
|
|
||||||
|
for (const cue of cues) {
|
||||||
|
if (!cue.includes("-->")) continue;
|
||||||
|
|
||||||
|
const lines = cue.split("\n");
|
||||||
|
|
||||||
|
const timestampLineIndex = lines.findIndex((line) => line.includes("-->"));
|
||||||
|
if (timestampLineIndex === -1) continue;
|
||||||
|
|
||||||
|
const [startTimeString, endTimeString] =
|
||||||
|
lines[timestampLineIndex].split(" --> ");
|
||||||
|
const textLines = lines.slice(timestampLineIndex + 1);
|
||||||
|
|
||||||
|
if (startTimeString && endTimeString && textLines.length > 0) {
|
||||||
|
const originalText = textLines[0].trim();
|
||||||
|
const translatedText = (textLines[1] || "").trim();
|
||||||
|
|
||||||
|
result.push({
|
||||||
|
start: millisecondsStringToNumber(startTimeString),
|
||||||
|
end: millisecondsStringToNumber(endTimeString),
|
||||||
|
text: originalText,
|
||||||
|
translation: translatedText,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user