2025-10-07 16:35:00 +08:00
|
|
|
import { logger } from "../libs/log.js";
|
2025-10-09 23:55:06 +08:00
|
|
|
import { apiSubtitle, apiTranslate } from "../apis/index.js";
|
2025-10-07 16:35:00 +08:00
|
|
|
import { BilingualSubtitleManager } from "./BilingualSubtitleManager.js";
|
2025-10-10 13:49:04 +08:00
|
|
|
import {
|
|
|
|
|
MSG_XHR_DATA_YOUTUBE,
|
|
|
|
|
APP_NAME,
|
|
|
|
|
OPT_LANGS_TO_CODE,
|
|
|
|
|
OPT_TRANS_MICROSOFT,
|
|
|
|
|
} from "../config";
|
|
|
|
|
import { sleep } from "../libs/utils.js";
|
2025-10-07 16:35:00 +08:00
|
|
|
import { createLogoSvg } from "../libs/svg.js";
|
2025-10-09 02:15:58 +08:00
|
|
|
import { randomBetween } from "../libs/utils.js";
|
2025-10-07 16:35:00 +08:00
|
|
|
|
|
|
|
|
const VIDEO_SELECT = "#container video";
|
|
|
|
|
const CONTORLS_SELECT = ".ytp-right-controls";
|
|
|
|
|
const YT_CAPTION_SELECT = "#ytp-caption-window-container";
|
|
|
|
|
|
|
|
|
|
class YouTubeCaptionProvider {
|
|
|
|
|
#setting = {};
|
|
|
|
|
#videoId = "";
|
|
|
|
|
#subtitles = [];
|
|
|
|
|
#managerInstance = null;
|
2025-10-09 02:15:58 +08:00
|
|
|
#toggleButton = null;
|
|
|
|
|
#enabled = false;
|
|
|
|
|
#ytControls = null;
|
2025-10-10 13:49:04 +08:00
|
|
|
#isBusy = false;
|
|
|
|
|
#fromLang = "auto";
|
2025-10-07 16:35:00 +08:00
|
|
|
|
|
|
|
|
constructor(setting = {}) {
|
|
|
|
|
this.#setting = setting;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
initialize() {
|
|
|
|
|
window.addEventListener("message", (event) => {
|
|
|
|
|
if (event.source !== window) return;
|
|
|
|
|
if (event.data?.type === MSG_XHR_DATA_YOUTUBE) {
|
|
|
|
|
const { url, response } = event.data;
|
2025-10-10 13:49:04 +08:00
|
|
|
if (url && response) {
|
|
|
|
|
this.#handleInterceptedRequest(url, response);
|
|
|
|
|
}
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
});
|
2025-10-09 11:55:02 +08:00
|
|
|
document.body.addEventListener("yt-navigate-finish", () => {
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
if (this.#toggleButton) {
|
|
|
|
|
this.#toggleButton.style.opacity = "0.5";
|
|
|
|
|
}
|
|
|
|
|
this.#destroyManager();
|
|
|
|
|
this.#doubleClick();
|
|
|
|
|
}, 1000);
|
|
|
|
|
});
|
2025-10-07 16:35:00 +08:00
|
|
|
this.#waitForElement(CONTORLS_SELECT, () => this.#injectToggleButton());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#waitForElement(selector, callback) {
|
|
|
|
|
const element = document.querySelector(selector);
|
|
|
|
|
if (element) {
|
|
|
|
|
callback(element);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const observer = new MutationObserver((mutations, obs) => {
|
|
|
|
|
const targetNode = document.querySelector(selector);
|
|
|
|
|
if (targetNode) {
|
|
|
|
|
obs.disconnect();
|
|
|
|
|
callback(targetNode);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
observer.observe(document.body, {
|
|
|
|
|
childList: true,
|
|
|
|
|
subtree: true,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 02:15:58 +08:00
|
|
|
async #doubleClick() {
|
|
|
|
|
const button = this.#ytControls.querySelector(
|
|
|
|
|
"button.ytp-subtitles-button"
|
|
|
|
|
);
|
|
|
|
|
if (button) {
|
|
|
|
|
await sleep(randomBetween(50, 100));
|
|
|
|
|
button.click();
|
|
|
|
|
await sleep(randomBetween(500, 1000));
|
|
|
|
|
button.click();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 16:35:00 +08:00
|
|
|
#injectToggleButton() {
|
2025-10-09 02:15:58 +08:00
|
|
|
this.#ytControls = document.querySelector(CONTORLS_SELECT);
|
|
|
|
|
if (!this.#ytControls) {
|
2025-10-07 16:35:00 +08:00
|
|
|
logger.warn("Youtube Provider: Could not find YouTube player controls.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const kissControls = document.createElement("div");
|
|
|
|
|
kissControls.className = "kiss-bilingual-subtitle-controls";
|
|
|
|
|
Object.assign(kissControls.style, {
|
|
|
|
|
height: "100%",
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const toggleButton = document.createElement("button");
|
|
|
|
|
toggleButton.className =
|
|
|
|
|
"ytp-button notranslate kiss-bilingual-subtitle-button";
|
2025-10-09 02:15:58 +08:00
|
|
|
toggleButton.title = APP_NAME;
|
2025-10-07 16:35:00 +08:00
|
|
|
Object.assign(toggleButton.style, {
|
|
|
|
|
color: "white",
|
2025-10-09 02:15:58 +08:00
|
|
|
opacity: "0.5",
|
2025-10-07 16:35:00 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
toggleButton.appendChild(createLogoSvg());
|
|
|
|
|
kissControls.appendChild(toggleButton);
|
|
|
|
|
|
|
|
|
|
toggleButton.onclick = () => {
|
2025-10-09 02:15:58 +08:00
|
|
|
if (!this.#enabled) {
|
2025-10-07 16:35:00 +08:00
|
|
|
logger.info(`Youtube Provider: Feature toggled ON.`);
|
|
|
|
|
this.#startManager();
|
|
|
|
|
} else {
|
|
|
|
|
logger.info(`Youtube Provider: Feature toggled OFF.`);
|
|
|
|
|
this.#destroyManager();
|
|
|
|
|
}
|
|
|
|
|
};
|
2025-10-09 02:15:58 +08:00
|
|
|
this.#toggleButton = toggleButton;
|
|
|
|
|
this.#ytControls.before(kissControls);
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
#isSameLang(lang1, lang2) {
|
|
|
|
|
return lang1.slice(0, 2) === lang2.slice(0, 2);
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 23:55:06 +08:00
|
|
|
// todo: 优化逻辑
|
2025-10-09 11:55:02 +08:00
|
|
|
#findCaptionTrack(captionTracks) {
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!captionTracks?.length) {
|
2025-10-09 23:55:06 +08:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
let captionTrack = null;
|
|
|
|
|
|
|
|
|
|
const asrTrack = captionTracks.find((item) => item.kind === "asr");
|
|
|
|
|
if (asrTrack) {
|
|
|
|
|
captionTrack = captionTracks.find(
|
|
|
|
|
(item) =>
|
|
|
|
|
item.kind !== "asr" &&
|
|
|
|
|
this.#isSameLang(item.languageCode, asrTrack.languageCode)
|
2025-10-08 00:01:22 +08:00
|
|
|
);
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!captionTrack) {
|
|
|
|
|
captionTrack = asrTrack;
|
|
|
|
|
}
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
2025-10-09 23:55:06 +08:00
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!captionTrack) {
|
|
|
|
|
captionTrack = captionTracks.pop();
|
|
|
|
|
}
|
2025-10-09 23:55:06 +08:00
|
|
|
|
2025-10-07 16:35:00 +08:00
|
|
|
return captionTrack;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 11:55:02 +08:00
|
|
|
async #getCaptionTracks(videoId) {
|
|
|
|
|
try {
|
|
|
|
|
const url = `https://www.youtube.com/watch?v=${videoId}`;
|
|
|
|
|
const html = await fetch(url).then((r) => r.text());
|
|
|
|
|
const match = html.match(/ytInitialPlayerResponse\s*=\s*(\{.*?\});/s);
|
|
|
|
|
if (!match) return [];
|
|
|
|
|
const data = JSON.parse(match[1]);
|
2025-10-10 13:49:04 +08:00
|
|
|
return data.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
2025-10-09 11:55:02 +08:00
|
|
|
} catch (err) {
|
|
|
|
|
logger.info("Youtube Provider: get captionTracks", err);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
async #getSubtitleEvents(capUrl, potUrl, responseText) {
|
|
|
|
|
if (
|
|
|
|
|
!potUrl.searchParams.get("tlang") &&
|
|
|
|
|
potUrl.searchParams.get("kind") === capUrl.searchParams.get("kind") &&
|
|
|
|
|
this.#isSameLang(
|
|
|
|
|
potUrl.searchParams.get("lang"),
|
|
|
|
|
capUrl.searchParams.get("lang")
|
|
|
|
|
)
|
|
|
|
|
) {
|
2025-10-07 16:35:00 +08:00
|
|
|
try {
|
2025-10-09 23:55:06 +08:00
|
|
|
const json = JSON.parse(responseText);
|
2025-10-10 13:49:04 +08:00
|
|
|
return json?.events;
|
2025-10-07 16:35:00 +08:00
|
|
|
} catch (err) {
|
2025-10-10 13:49:04 +08:00
|
|
|
logger.info("Youtube Provider: parse responseText", err);
|
2025-10-07 16:35:00 +08:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2025-10-10 13:49:04 +08:00
|
|
|
potUrl.searchParams.delete("tlang");
|
|
|
|
|
potUrl.searchParams.set("lang", capUrl.searchParams.get("lang"));
|
2025-10-09 09:49:12 +08:00
|
|
|
potUrl.searchParams.set("fmt", "json3");
|
2025-10-10 13:49:04 +08:00
|
|
|
if (capUrl.searchParams.get("kind")) {
|
|
|
|
|
potUrl.searchParams.set("kind", capUrl.searchParams.get("kind"));
|
2025-10-09 09:49:12 +08:00
|
|
|
} else {
|
|
|
|
|
potUrl.searchParams.delete("kind");
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
const res = await fetch(potUrl.href);
|
|
|
|
|
if (res?.ok) {
|
2025-10-09 09:49:12 +08:00
|
|
|
const json = await res.json();
|
2025-10-10 13:49:04 +08:00
|
|
|
return json?.events;
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
2025-10-10 13:49:04 +08:00
|
|
|
logger.info(`Youtube Provider: Failed to fetch subtitles: ${res.status}`);
|
2025-10-07 16:35:00 +08:00
|
|
|
return null;
|
|
|
|
|
} catch (error) {
|
2025-10-10 13:49:04 +08:00
|
|
|
logger.info("Youtube Provider: fetching subtitles error", error);
|
2025-10-07 16:35:00 +08:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 11:55:02 +08:00
|
|
|
#getVideoId() {
|
|
|
|
|
const docUrl = new URL(document.location.href);
|
|
|
|
|
return docUrl.searchParams.get("v");
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 23:55:06 +08:00
|
|
|
async #aiSegment({ videoId, toLang, events, segApiSetting }) {
|
|
|
|
|
try {
|
|
|
|
|
const subtitles = await apiSubtitle({
|
|
|
|
|
videoId,
|
|
|
|
|
toLang,
|
|
|
|
|
events,
|
|
|
|
|
apiSetting: segApiSetting,
|
|
|
|
|
});
|
|
|
|
|
if (Array.isArray(subtitles)) {
|
|
|
|
|
return subtitles;
|
|
|
|
|
}
|
|
|
|
|
} catch (err) {
|
|
|
|
|
logger.info("Youtube Provider: ai segmentation", err);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-07 16:35:00 +08:00
|
|
|
async #handleInterceptedRequest(url, responseText) {
|
2025-10-10 13:49:04 +08:00
|
|
|
if (this.#isBusy) {
|
|
|
|
|
logger.info("Youtube Provider is busy...");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
this.#isBusy = true; // todo: 提示用户等待中
|
2025-10-09 02:15:58 +08:00
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
try {
|
2025-10-09 11:55:02 +08:00
|
|
|
const videoId = this.#getVideoId();
|
|
|
|
|
if (!videoId) {
|
2025-10-10 13:49:04 +08:00
|
|
|
logger.info("Youtube Provider: videoId not found.");
|
2025-10-09 11:55:02 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (videoId === this.#videoId) {
|
2025-10-10 13:49:04 +08:00
|
|
|
logger.info("Youtube Provider: videoId already processed.");
|
2025-10-07 16:35:00 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const potUrl = new URL(url);
|
|
|
|
|
if (videoId !== potUrl.searchParams.get("v")) {
|
|
|
|
|
logger.info("Youtube Provider: skip other timedtext.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 11:55:02 +08:00
|
|
|
const captionTracks = await this.#getCaptionTracks(videoId);
|
|
|
|
|
const captionTrack = this.#findCaptionTrack(captionTracks);
|
|
|
|
|
if (!captionTrack) {
|
|
|
|
|
logger.info("Youtube Provider: CaptionTrack not found.");
|
2025-10-07 16:35:00 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
const capUrl = new URL(captionTrack.baseUrl);
|
|
|
|
|
const events = await this.#getSubtitleEvents(
|
|
|
|
|
capUrl,
|
2025-10-07 16:35:00 +08:00
|
|
|
potUrl,
|
|
|
|
|
responseText
|
|
|
|
|
);
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!events?.length) {
|
2025-10-09 02:15:58 +08:00
|
|
|
logger.info("Youtube Provider: SubtitleEvents not got.");
|
2025-10-07 16:35:00 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 23:55:06 +08:00
|
|
|
let subtitles = [];
|
|
|
|
|
|
|
|
|
|
const { segApiSetting, toLang } = this.#setting;
|
2025-10-10 13:49:04 +08:00
|
|
|
const lang = potUrl.searchParams.get("lang");
|
2025-10-10 15:05:47 +08:00
|
|
|
const fromLang =
|
|
|
|
|
OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang) ||
|
|
|
|
|
OPT_LANGS_TO_CODE[OPT_TRANS_MICROSOFT].get(lang.slice(0, 2)) ||
|
|
|
|
|
"auto";
|
2025-10-10 13:49:04 +08:00
|
|
|
if (potUrl.searchParams.get("kind") === "asr" && segApiSetting) {
|
2025-10-09 23:55:06 +08:00
|
|
|
subtitles = await this.#aiSegment({
|
|
|
|
|
videoId,
|
|
|
|
|
events,
|
2025-10-10 13:49:04 +08:00
|
|
|
fromLang,
|
2025-10-09 23:55:06 +08:00
|
|
|
toLang,
|
|
|
|
|
segApiSetting,
|
|
|
|
|
});
|
|
|
|
|
}
|
2025-10-10 13:49:04 +08:00
|
|
|
|
|
|
|
|
if (!subtitles?.length) {
|
|
|
|
|
subtitles = this.#formatSubtitles(events, fromLang);
|
2025-10-09 23:55:06 +08:00
|
|
|
}
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!subtitles?.length) {
|
2025-10-09 02:15:58 +08:00
|
|
|
logger.info("Youtube Provider: No subtitles after format.");
|
2025-10-08 00:01:22 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
this.#onCaptionsReady({ videoId, subtitles, fromLang });
|
2025-10-07 16:35:00 +08:00
|
|
|
} catch (error) {
|
2025-10-09 11:55:02 +08:00
|
|
|
logger.warn("Youtube Provider: unknow error", error);
|
2025-10-10 13:49:04 +08:00
|
|
|
} finally {
|
|
|
|
|
this.#isBusy = false;
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
#onCaptionsReady({ videoId, subtitles, fromLang }) {
|
2025-10-08 00:01:22 +08:00
|
|
|
this.#subtitles = subtitles;
|
2025-10-07 16:35:00 +08:00
|
|
|
this.#videoId = videoId;
|
2025-10-10 13:49:04 +08:00
|
|
|
this.#fromLang = fromLang;
|
2025-10-07 16:35:00 +08:00
|
|
|
|
2025-10-09 02:15:58 +08:00
|
|
|
if (this.#toggleButton) {
|
|
|
|
|
this.#toggleButton.style.opacity = subtitles.length ? "1" : "0.5";
|
|
|
|
|
}
|
2025-10-07 16:35:00 +08:00
|
|
|
|
2025-10-09 02:15:58 +08:00
|
|
|
if (this.#enabled) {
|
|
|
|
|
this.#destroyManager();
|
2025-10-07 16:35:00 +08:00
|
|
|
this.#startManager();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#startManager() {
|
2025-10-09 02:15:58 +08:00
|
|
|
if (this.#enabled || this.#managerInstance) {
|
2025-10-07 16:35:00 +08:00
|
|
|
return;
|
|
|
|
|
}
|
2025-10-09 02:15:58 +08:00
|
|
|
this.#enabled = true;
|
|
|
|
|
this.#toggleButton?.replaceChildren(createLogoSvg({ isSelected: true }));
|
2025-10-07 16:35:00 +08:00
|
|
|
|
|
|
|
|
const videoEl = document.querySelector(VIDEO_SELECT);
|
|
|
|
|
if (!videoEl) {
|
|
|
|
|
logger.warn("Youtube Provider: No video element found");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-09 11:55:02 +08:00
|
|
|
const videoId = this.#getVideoId();
|
|
|
|
|
if (!this.#subtitles?.length || this.#videoId !== videoId) {
|
2025-10-07 16:35:00 +08:00
|
|
|
// todo: 等待并给出用户提示
|
|
|
|
|
logger.info("Youtube Provider: No subtitles");
|
2025-10-09 02:15:58 +08:00
|
|
|
this.#doubleClick();
|
2025-10-07 16:35:00 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.info("Youtube Provider: Starting manager...");
|
|
|
|
|
|
|
|
|
|
this.#managerInstance = new BilingualSubtitleManager({
|
|
|
|
|
videoEl,
|
|
|
|
|
formattedSubtitles: this.#subtitles,
|
|
|
|
|
translationService: apiTranslate,
|
2025-10-10 13:49:04 +08:00
|
|
|
setting: { ...this.#setting, fromLang: this.#fromLang },
|
2025-10-07 16:35:00 +08:00
|
|
|
});
|
|
|
|
|
this.#managerInstance.start();
|
2025-10-09 02:15:58 +08:00
|
|
|
|
|
|
|
|
const ytCaption = document.querySelector(YT_CAPTION_SELECT);
|
|
|
|
|
ytCaption && (ytCaption.style.display = "none");
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#destroyManager() {
|
2025-10-09 02:15:58 +08:00
|
|
|
if (!this.#enabled) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
this.#enabled = false;
|
|
|
|
|
this.#toggleButton?.replaceChildren(createLogoSvg());
|
2025-10-07 16:35:00 +08:00
|
|
|
|
2025-10-09 02:15:58 +08:00
|
|
|
logger.info("Youtube Provider: Destroying manager...");
|
2025-10-07 16:35:00 +08:00
|
|
|
|
2025-10-09 02:15:58 +08:00
|
|
|
const ytCaption = document.querySelector(YT_CAPTION_SELECT);
|
|
|
|
|
ytCaption && (ytCaption.style.display = "block");
|
|
|
|
|
|
|
|
|
|
if (this.#managerInstance) {
|
2025-10-07 16:35:00 +08:00
|
|
|
this.#managerInstance.destroy();
|
|
|
|
|
this.#managerInstance = null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
#formatSubtitles(events, lang) {
|
|
|
|
|
if (!events?.length) return [];
|
|
|
|
|
|
|
|
|
|
const noSpaceLanguages = [
|
|
|
|
|
"zh", // 中文
|
|
|
|
|
"ja", // 日文
|
|
|
|
|
"ko", // 韩文(现代用空格,但结构上仍可连写)
|
|
|
|
|
"th", // 泰文
|
|
|
|
|
"lo", // 老挝文
|
|
|
|
|
"km", // 高棉文
|
|
|
|
|
"my", // 缅文
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
if (noSpaceLanguages.some((l) => lang?.startsWith(l))) {
|
|
|
|
|
return events
|
|
|
|
|
.map(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => ({
|
|
|
|
|
text: segs
|
|
|
|
|
.map(({ utf8 = "" }) => utf8)
|
|
|
|
|
.join("")
|
|
|
|
|
?.trim(),
|
|
|
|
|
start: tStartMs,
|
|
|
|
|
end: tStartMs + dDurationMs,
|
|
|
|
|
}))
|
|
|
|
|
.filter((item) => item.text);
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
let lines = this.#processSubtitles({ events });
|
2025-10-09 02:15:58 +08:00
|
|
|
const isPoor = this.#isQualityPoor(lines);
|
|
|
|
|
if (isPoor) {
|
2025-10-10 13:49:04 +08:00
|
|
|
lines = this.#processSubtitles({ events, usePause: true });
|
2025-10-09 02:15:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return lines.map((item) => ({
|
|
|
|
|
...item,
|
|
|
|
|
duration: Math.max(0, item.end - item.start),
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#isQualityPoor(lines, lengthThreshold = 250, percentageThreshold = 0.1) {
|
|
|
|
|
if (lines.length === 0) return false;
|
|
|
|
|
const longLinesCount = lines.filter(
|
|
|
|
|
(line) => line.text.length > lengthThreshold
|
|
|
|
|
).length;
|
|
|
|
|
return longLinesCount / lines.length > percentageThreshold;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
#processSubtitles({
|
|
|
|
|
events,
|
|
|
|
|
usePause = false,
|
|
|
|
|
timeout = 1500,
|
|
|
|
|
maxWords = 15,
|
|
|
|
|
} = {}) {
|
2025-10-09 02:15:58 +08:00
|
|
|
const groupedPauseWords = {
|
|
|
|
|
1: new Set([
|
|
|
|
|
"actually",
|
|
|
|
|
"also",
|
|
|
|
|
"although",
|
|
|
|
|
"and",
|
|
|
|
|
"anyway",
|
|
|
|
|
"as",
|
|
|
|
|
"basically",
|
|
|
|
|
"because",
|
|
|
|
|
"but",
|
|
|
|
|
"eventually",
|
|
|
|
|
"frankly",
|
|
|
|
|
"honestly",
|
|
|
|
|
"hopefully",
|
|
|
|
|
"however",
|
|
|
|
|
"if",
|
|
|
|
|
"instead",
|
|
|
|
|
"it's",
|
|
|
|
|
"just",
|
|
|
|
|
"let's",
|
|
|
|
|
"like",
|
|
|
|
|
"literally",
|
|
|
|
|
"maybe",
|
|
|
|
|
"meanwhile",
|
|
|
|
|
"nevertheless",
|
|
|
|
|
"nonetheless",
|
|
|
|
|
"now",
|
|
|
|
|
"okay",
|
|
|
|
|
"or",
|
|
|
|
|
"otherwise",
|
|
|
|
|
"perhaps",
|
|
|
|
|
"personally",
|
|
|
|
|
"probably",
|
|
|
|
|
"right",
|
|
|
|
|
"since",
|
|
|
|
|
"so",
|
|
|
|
|
"suddenly",
|
|
|
|
|
"that's",
|
|
|
|
|
"then",
|
|
|
|
|
"there's",
|
|
|
|
|
"therefore",
|
|
|
|
|
"though",
|
|
|
|
|
"thus",
|
|
|
|
|
"unless",
|
|
|
|
|
"until",
|
|
|
|
|
"well",
|
|
|
|
|
"while",
|
|
|
|
|
]),
|
|
|
|
|
2: new Set([
|
|
|
|
|
"after all",
|
|
|
|
|
"at first",
|
|
|
|
|
"at least",
|
|
|
|
|
"even if",
|
|
|
|
|
"even though",
|
|
|
|
|
"for example",
|
|
|
|
|
"for instance",
|
|
|
|
|
"i believe",
|
|
|
|
|
"i guess",
|
|
|
|
|
"i mean",
|
|
|
|
|
"i suppose",
|
|
|
|
|
"i think",
|
|
|
|
|
"in fact",
|
|
|
|
|
"in the end",
|
|
|
|
|
"of course",
|
|
|
|
|
"then again",
|
|
|
|
|
"to be fair",
|
|
|
|
|
"you know",
|
|
|
|
|
"you see",
|
|
|
|
|
]),
|
|
|
|
|
3: new Set([
|
|
|
|
|
"as a result",
|
|
|
|
|
"by the way",
|
|
|
|
|
"in other words",
|
|
|
|
|
"in that case",
|
|
|
|
|
"in this case",
|
|
|
|
|
"to be clear",
|
|
|
|
|
"to be honest",
|
|
|
|
|
]),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const sentences = [];
|
|
|
|
|
let currentBuffer = [];
|
|
|
|
|
let bufferWordCount = 0;
|
|
|
|
|
|
|
|
|
|
const joinSegs = (segs) => ({
|
|
|
|
|
text: segs
|
|
|
|
|
.map((s) => s.text)
|
|
|
|
|
.join(" ")
|
|
|
|
|
.trim(),
|
|
|
|
|
start: segs[0].start,
|
|
|
|
|
end: segs[segs.length - 1].end,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const flushBuffer = () => {
|
|
|
|
|
if (currentBuffer.length > 0) {
|
|
|
|
|
sentences.push(joinSegs(currentBuffer));
|
|
|
|
|
}
|
|
|
|
|
currentBuffer = [];
|
|
|
|
|
bufferWordCount = 0;
|
|
|
|
|
};
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
events.forEach(({ segs = [], tStartMs = 0, dDurationMs = 0 }) => {
|
|
|
|
|
segs.forEach(({ utf8 = "", tOffsetMs = 0 }, j) => {
|
|
|
|
|
const text = utf8?.trim().replace(/\s+/g, " ") || "";
|
2025-10-09 02:15:58 +08:00
|
|
|
if (!text) return;
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
const start = tStartMs + tOffsetMs;
|
2025-10-09 02:15:58 +08:00
|
|
|
const lastSegment = currentBuffer[currentBuffer.length - 1];
|
|
|
|
|
|
|
|
|
|
if (lastSegment) {
|
2025-10-10 13:49:04 +08:00
|
|
|
if (!lastSegment.end || lastSegment.end > start) {
|
2025-10-09 02:15:58 +08:00
|
|
|
lastSegment.end = start;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
const isEndOfSentence = /[.?!…\])]$/.test(lastSegment.text);
|
|
|
|
|
const isPauseOfSentence = /[,]$/.test(lastSegment.text);
|
2025-10-09 02:15:58 +08:00
|
|
|
const isTimeout = start - lastSegment.end > timeout;
|
2025-10-10 13:49:04 +08:00
|
|
|
const isWordLimitExceeded =
|
|
|
|
|
(usePause || isPauseOfSentence) && bufferWordCount >= maxWords;
|
2025-10-09 02:15:58 +08:00
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
const startsWithSign = /^[[(♪]/.test(text);
|
|
|
|
|
const startsWithPauseWord =
|
|
|
|
|
usePause &&
|
|
|
|
|
groupedPauseWords["1"].has(text.toLowerCase().split(" ")[0]) && // todo: 考虑连词开头
|
|
|
|
|
currentBuffer.length > 1;
|
2025-10-09 02:15:58 +08:00
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
isEndOfSentence ||
|
|
|
|
|
isTimeout ||
|
|
|
|
|
isWordLimitExceeded ||
|
2025-10-10 13:49:04 +08:00
|
|
|
startsWithSign ||
|
|
|
|
|
startsWithPauseWord
|
2025-10-09 02:15:58 +08:00
|
|
|
) {
|
|
|
|
|
flushBuffer();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const currentSegment = { text, start };
|
2025-10-10 13:49:04 +08:00
|
|
|
if (j === segs.length - 1) {
|
|
|
|
|
currentSegment.end = tStartMs + dDurationMs;
|
2025-10-09 02:15:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
currentBuffer.push(currentSegment);
|
|
|
|
|
bufferWordCount += text.split(/\s+/).length;
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
flushBuffer();
|
|
|
|
|
|
2025-10-10 13:49:04 +08:00
|
|
|
return sentences;
|
2025-10-07 16:35:00 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export const YouTubeInitializer = (() => {
|
|
|
|
|
let initialized = false;
|
|
|
|
|
|
|
|
|
|
return async (setting) => {
|
|
|
|
|
if (initialized) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
initialized = true;
|
|
|
|
|
|
|
|
|
|
logger.info("Bilingual Subtitle Extension: Initializing...");
|
|
|
|
|
const provider = new YouTubeCaptionProvider(setting);
|
|
|
|
|
provider.initialize();
|
|
|
|
|
};
|
|
|
|
|
})();
|