From 1afe9767773345762916d8030c1c0455e41ef249 Mon Sep 17 00:00:00 2001 From: Gabe Date: Sat, 11 Oct 2025 21:06:38 +0800 Subject: [PATCH] feat: support subtitle chunks for AI --- src/apis/index.js | 2 + src/config/i18n.js | 11 +- src/config/setting.js | 1 + src/subtitle/BilingualSubtitleManager.js | 19 ++ src/subtitle/YouTubeCaptionProvider.js | 336 +++++++++++++++++------ src/views/Options/Subtitle.js | 19 ++ 6 files changed, 304 insertions(+), 84 deletions(-) diff --git a/src/apis/index.js b/src/apis/index.js index 494daf8..f330386 100644 --- a/src/apis/index.js +++ b/src/apis/index.js @@ -517,6 +517,7 @@ export const apiTranslate = async ({ // 字幕处理/翻译 export const apiSubtitle = async ({ videoId, + chunkSign, fromLang = "auto", toLang, events = [], @@ -525,6 +526,7 @@ export const apiSubtitle = async ({ const cacheOpts = { apiSlug: apiSetting.apiSlug, videoId, + chunkSign, fromLang, toLang, }; diff --git a/src/config/i18n.js b/src/config/i18n.js index f3984af..248bc57 100644 --- a/src/config/i18n.js +++ b/src/config/i18n.js @@ -1554,9 +1554,9 @@ export const I18N = { zh_TW: `啟用字幕翻譯`, }, is_bilingual_view: { - zh: `启用双语显示`, - en: `DEnable bilingual display`, - zh_TW: `啟用雙語顯示`, + zh: `双语显示`, + en: `Enable bilingual display`, + zh_TW: `雙語顯示`, }, background_styles: { zh: `背景样式`, @@ -1578,6 +1578,11 @@ export const I18N = { en: `AI intelligent punctuation`, zh_TW: `AI智慧斷句`, }, + ai_chunk_length: { + zh: `AI处理切割长度`, + en: `AI processing chunk length`, + zh_TW: `AI处理切割长度`, + }, subtitle_helper_1: { zh: `1、目前仅支持Youtube,且仅支持浏览器扩展。`, en: `1. Currently only supports Youtube and browser extensions.`, diff --git a/src/config/setting.js b/src/config/setting.js index 434556f..d8cbc79 100644 --- a/src/config/setting.js +++ b/src/config/setting.js @@ -112,6 +112,7 @@ export const DEFAULT_SUBTITLE_SETTING = { enabled: true, // 是否开启 apiSlug: OPT_TRANS_MICROSOFT, segSlug: "-", // AI智能断句 + chunkLength: 1000, // AI处理切割长度 // fromLang: "en", toLang: "zh-CN", isBilingual: true, // 是否双语显示 diff --git a/src/subtitle/BilingualSubtitleManager.js b/src/subtitle/BilingualSubtitleManager.js index 2b3b55c..1d5912e 100644 --- a/src/subtitle/BilingualSubtitleManager.js +++ b/src/subtitle/BilingualSubtitleManager.js @@ -230,4 +230,23 @@ export class BilingualSubtitleManager { } } } + + /** + * 追加新的字幕 + * @param {Array} newSubtitlesChunk - 新的、要追加的字幕数据块。 + */ + appendSubtitles(newSubtitlesChunk) { + if (!newSubtitlesChunk || newSubtitlesChunk.length === 0) { + return; + } + + logger.info( + `Bilingual Subtitle Manager: Appending ${newSubtitlesChunk.length} new subtitles...` + ); + + this.#formattedSubtitles.push(...newSubtitlesChunk); + this.#formattedSubtitles.sort((a, b) => a.start - b.start); + this.#currentSubtitleIndex = -1; + this.onTimeUpdate(); + } } diff --git a/src/subtitle/YouTubeCaptionProvider.js b/src/subtitle/YouTubeCaptionProvider.js index 14e00ce..96546fa 100644 --- a/src/subtitle/YouTubeCaptionProvider.js +++ b/src/subtitle/YouTubeCaptionProvider.js @@ -218,10 +218,14 @@ class YouTubeCaptionProvider { return docUrl.searchParams.get("v"); } - async #aiSegment({ videoId, toLang, events, segApiSetting }) { + async #aiSegment({ videoId, fromLang, toLang, chunkEvents, segApiSetting }) { try { + const events = chunkEvents.filter((item) => item.text); + const chunkSign = `${events[0].start} --> ${events[events.length - 1].end}`; const subtitles = await apiSubtitle({ videoId, + chunkSign, + fromLang, toLang, events, apiSetting: segApiSetting, @@ -279,7 +283,8 @@ class YouTubeCaptionProvider { return; } - let subtitles = []; + const flatEvents = this.#flatEvents(events); + if (!flatEvents.length) return; const { segApiSetting, toLang } = this.#setting; const lang = potUrl.searchParams.get("lang"); @@ -287,26 +292,77 @@ class YouTubeCaptionProvider { OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang) || OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang.slice(0, 2)) || "auto"; + if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) { - // todo: 切分多次发送接受以适应接口处理能力 - subtitles = await this.#aiSegment({ + logger.info("Youtube Provider: Starting AI ..."); + + const eventChunks = this.#splitEventsIntoChunks( + flatEvents, + segApiSetting.chunkLength + ); + const subtitlesFallback = () => + this.#formatSubtitles(flatEvents, fromLang); + + if (eventChunks.length === 0) { + this.#onCaptionsReady({ + videoId, + subtitles: subtitlesFallback(), + fromLang, + isInitialLoad: true, + }); + return; + } + + const firstChunkEvents = eventChunks[0]; + const firstBatchSubtitles = await this.#aiSegment({ videoId, - events: this.#flatEvents(events), + chunkEvents: firstChunkEvents, fromLang, toLang, segApiSetting, }); - } - if (!subtitles?.length) { - subtitles = this.#formatSubtitles(events, fromLang); - } - if (!subtitles?.length) { - logger.info("Youtube Provider: No subtitles after format."); - return; - } + if (!firstBatchSubtitles?.length) { + this.#onCaptionsReady({ + videoId, + subtitles: subtitlesFallback(), + fromLang, + isInitialLoad: true, + }); + return; + } - this.#onCaptionsReady({ videoId, subtitles, fromLang }); + this.#onCaptionsReady({ + videoId, + subtitles: firstBatchSubtitles, + fromLang, + isInitialLoad: true, + }); + + if (eventChunks.length > 1) { + const remainingChunks = eventChunks.slice(1); + this.#processRemainingChunksAsync({ + chunks: remainingChunks, + videoId, + fromLang, + toLang, + segApiSetting, + }); + } + } else { + const subtitles = this.#formatSubtitles(flatEvents, fromLang); + if (!subtitles?.length) { + logger.info("Youtube Provider: No subtitles after format."); + return; + } + + this.#onCaptionsReady({ + videoId, + subtitles, + fromLang, + isInitialLoad: true, + }); + } } catch (error) { logger.warn("Youtube Provider: unknow error", error); } finally { @@ -382,8 +438,8 @@ class YouTubeCaptionProvider { } } - #formatSubtitles(events, lang) { - if (!events?.length) return []; + #formatSubtitles(flatEvents, lang) { + if (!flatEvents?.length) return []; const noSpaceLanguages = [ "zh", // 中文 @@ -396,25 +452,49 @@ class YouTubeCaptionProvider { ]; if (noSpaceLanguages.some((l) => lang?.startsWith(l))) { - return events - .map(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => ({ - text: segs - .map(({ utf8 = "" }) => utf8) - .join("") - ?.trim(), - start: tStartMs, - end: tStartMs + dDurationMs, - })) - .filter((item) => item.text); + const subtitles = []; + let currentLine = null; + const MAX_LENGTH = 100; + + for (const segment of flatEvents) { + if (segment.text) { + if (!currentLine) { + currentLine = { + text: segment.text, + start: segment.start, + end: segment.end, + }; + } else { + currentLine.text += segment.text; + currentLine.end = segment.end; + } + + if (currentLine.text.length >= MAX_LENGTH) { + subtitles.push(currentLine); + currentLine = null; + } + } else { + if (currentLine) { + subtitles.push(currentLine); + currentLine = null; + } + } + } + + if (currentLine) { + subtitles.push(currentLine); + } + + return subtitles; } - let lines = this.#processSubtitles({ events }); - const isPoor = this.#isQualityPoor(lines); + let subtitles = this.#processSubtitles({ flatEvents }); + const isPoor = this.#isQualityPoor(subtitles); if (isPoor) { - lines = this.#processSubtitles({ events, usePause: true }); + subtitles = this.#processSubtitles({ flatEvents, usePause: true }); } - return lines; + return subtitles; } #isQualityPoor(lines, lengthThreshold = 250, percentageThreshold = 0.1) { @@ -426,9 +506,9 @@ class YouTubeCaptionProvider { } #processSubtitles({ - events, + flatEvents, usePause = false, - timeout = 1500, + timeout = 1000, maxWords = 15, } = {}) { const groupedPauseWords = { @@ -516,67 +596,54 @@ class YouTubeCaptionProvider { let currentBuffer = []; let bufferWordCount = 0; - const joinSegs = (segs) => ({ - text: segs - .map((s) => s.text) - .join(" ") - .trim(), - start: segs[0].start, - end: segs[segs.length - 1].end, - }); - const flushBuffer = () => { if (currentBuffer.length > 0) { - sentences.push(joinSegs(currentBuffer)); + sentences.push({ + text: currentBuffer + .map((s) => s.text) + .join(" ") + .trim(), + start: currentBuffer[0].start, + end: currentBuffer[currentBuffer.length - 1].end, + }); } currentBuffer = []; bufferWordCount = 0; }; - events.forEach(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => { - segs.forEach(({ utf8 = "", tOffsetMs = 0 }, j) => { - const text = utf8?.trim().replace(/\s+/g, " ") || ""; - if (!text) return; + flatEvents.forEach((segment) => { + if (!segment.text) return; - const start = tStartMs + tOffsetMs; - const lastSegment = currentBuffer[currentBuffer.length - 1]; + const lastSegment = currentBuffer[currentBuffer.length - 1]; - if (lastSegment) { - if (!lastSegment.end || lastSegment.end > start) { - lastSegment.end = start; - } + if (lastSegment) { + const isEndOfSentence = /[.?!…\])]$/.test(lastSegment.text); + const isPauseOfSentence = /[,]$/.test(lastSegment.text); + const isTimeout = segment.start - lastSegment.end > timeout; + const isWordLimitExceeded = + (usePause || isPauseOfSentence) && bufferWordCount >= maxWords; - const isEndOfSentence = /[.?!…\])]$/.test(lastSegment.text); - const isPauseOfSentence = /[,]$/.test(lastSegment.text); - const isTimeout = start - lastSegment.end > timeout; - const isWordLimitExceeded = - (usePause || isPauseOfSentence) && bufferWordCount >= maxWords; + const startsWithSign = /^[[(♪]/.test(segment.text); + const startsWithPauseWord = + usePause && + groupedPauseWords["1"].has( + segment.text.toLowerCase().split(" ")[0] + ) && + currentBuffer.length > 1; - const startsWithSign = /^[[(♪]/.test(text); - const startsWithPauseWord = - usePause && - groupedPauseWords["1"].has(text.toLowerCase().split(" ")[0]) && // todo: 考虑连词开头 - currentBuffer.length > 1; - - if ( - isEndOfSentence || - isTimeout || - isWordLimitExceeded || - startsWithSign || - startsWithPauseWord - ) { - flushBuffer(); - } + if ( + isEndOfSentence || + isTimeout || + isWordLimitExceeded || + startsWithSign || + startsWithPauseWord + ) { + flushBuffer(); } + } - const currentSegment = { text, start }; - if (j === segs.length - 1) { - currentSegment.end = tStartMs + dDurationMs; - } - - currentBuffer.push(currentSegment); - bufferWordCount += text.split(/\s+/).length; - }); + currentBuffer.push(segment); + bufferWordCount += segment.text.split(/\s+/).length; }); flushBuffer(); @@ -614,7 +681,114 @@ class YouTubeCaptionProvider { segments.push(buffer); - return segments.filter((item) => item.text); + return segments; + } + + #splitEventsIntoChunks(flatEvents, chunkLength = 1000) { + if (!flatEvents || flatEvents.length === 0) { + return []; + } + + const eventChunks = []; + let currentChunk = []; + let currentChunkTextLength = 0; + const MAX_CHUNK_LENGTH = chunkLength + 500; + const PAUSE_THRESHOLD_MS = 1000; + + for (let i = 0; i < flatEvents.length; i++) { + const event = flatEvents[i]; + currentChunk.push(event); + currentChunkTextLength += event.text.length; + + const isLastEvent = i === flatEvents.length - 1; + if (isLastEvent) { + continue; + } + + let shouldSplit = false; + + if (currentChunkTextLength >= MAX_CHUNK_LENGTH) { + shouldSplit = true; + } else if (currentChunkTextLength >= chunkLength) { + const isEndOfSentence = /[.?!…\])]$/.test(event.text); + const nextEvent = flatEvents[i + 1]; + const pauseDuration = nextEvent.start - event.end; + if (isEndOfSentence || pauseDuration > PAUSE_THRESHOLD_MS) { + shouldSplit = true; + } + } + + if (shouldSplit) { + eventChunks.push(currentChunk); + currentChunk = []; + currentChunkTextLength = 0; + } + } + + if (currentChunk.length > 0) { + eventChunks.push(currentChunk); + } + + return eventChunks; + } + + async #processRemainingChunksAsync({ + chunks, + videoId, + fromLang, + toLang, + segApiSetting, + }) { + logger.info(`Youtube Provider: Starting for ${chunks.length} chunks.`); + + for (let i = 0; i < chunks.length; i++) { + const chunkEvents = chunks[i]; + const chunkNum = i + 2; + logger.info( + `Youtube Provider: Processing subtitle chunk ${chunkNum}/${chunks.length + 1}...` + ); + + let subtitlesForThisChunk = []; + + try { + const aiSubtitles = await this.#aiSegment({ + videoId, + chunkEvents, + fromLang, + toLang, + segApiSetting, + }); + + if (aiSubtitles?.length > 0) { + subtitlesForThisChunk = aiSubtitles; + } else { + logger.info( + `Youtube Provider: AI segmentation for chunk ${chunkNum} returned no data.` + ); + subtitlesForThisChunk = this.#formatSubtitles(chunkEvents, fromLang); + } + } catch (chunkError) { + subtitlesForThisChunk = this.#formatSubtitles(chunkEvents, fromLang); + } + + if (this.#videoId !== videoId) { + logger.info("Youtube Provider: videoId changed!"); + break; + } + + if (subtitlesForThisChunk.length > 0 && this.#managerInstance) { + logger.info( + `Youtube Provider: Appending ${subtitlesForThisChunk.length} subtitles from chunk ${chunkNum}.` + ); + this.#managerInstance.appendSubtitles(subtitlesForThisChunk); + } else { + logger.info(`Youtube Provider: Chunk ${chunkNum} no subtitles.`); + } + + await sleep(randomBetween(500, 1000)); + } + + logger.info("Youtube Provider: All subtitle chunks processed."); } } diff --git a/src/views/Options/Subtitle.js b/src/views/Options/Subtitle.js index 30ce80d..bd5b4a2 100644 --- a/src/views/Options/Subtitle.js +++ b/src/views/Options/Subtitle.js @@ -10,6 +10,7 @@ import Alert from "@mui/material/Alert"; import Switch from "@mui/material/Switch"; import { useSubtitle } from "../../hooks/Subtitle"; import { useApiList } from "../../hooks/Api"; +import { limitNumber } from "../../libs/utils"; export default function SubtitleSetting() { const i18n = useI18n(); @@ -19,6 +20,12 @@ export default function SubtitleSetting() { const handleChange = (e) => { e.preventDefault(); let { name, value } = e.target; + switch (name) { + case "chunkLength": + value = limitNumber(value, 200, 20000); + break; + default: + } updateSubtitle({ [name]: value, }); @@ -28,6 +35,7 @@ export default function SubtitleSetting() { enabled, apiSlug, segSlug, + chunkLength, toLang, isBilingual, windowStyle, @@ -96,6 +104,17 @@ export default function SubtitleSetting() { ))} + + +