feat(video-pipeline): 实现 TTS 逐句分句生成与字幕精确对齐

TTS 阶段将长文本按标点切分为短句,逐句生成音频并记录每句时长到 `item.segments[]`。assemble 阶段优先使用 segments 的精确时长分配字幕时间线,无 segments 时回退到字数权重估算。同时优化音频上传流程,支持分段音频独立上传 OSS 并在配音时按段映射时间线。
This commit is contained in:
2026-05-01 14:41:28 +08:00
parent f5d47ec5db
commit 9d19437a29
4 changed files with 236 additions and 122 deletions

View File

@@ -160,6 +160,36 @@ function getManifestDir(manifestPath) {
return path.dirname(path.resolve(manifestPath))
}
// ============================================================================
// 文本切分
// ============================================================================
function splitTextIntoSentences(text) {
const sentenceEnders = /[。!?;]/
const clauseEnders = /[]/
const sentences = []
let current = ''
for (const char of text) {
current += char
if (sentenceEnders.test(char)) {
sentences.push(current.trim().replace(/[。!?;,:、]/g, ''))
current = ''
} else if (clauseEnders.test(char) && current.length > 8) {
sentences.push(current.trim().replace(/[。!?;,:、]/g, ''))
current = ''
}
}
if (current.trim()) {
sentences.push(current.trim().replace(/[。!?;,:、]/g, ''))
}
return sentences
}
// ============================================================================
// Exports
// ============================================================================
@@ -178,6 +208,7 @@ module.exports = {
ensureDir,
slugify,
renameGeneratedFile,
splitTextIntoSentences,
log,
getManifestDir,
}