refactor(video-pipeline): 移除 segments 机制,改为整段音频合成

移除 TTS 阶段逐句切分及 segments 数组逻辑,统一为整段音频合成。
CapCut 字幕切分由组装阶段按字符比例分配,简化音频上传、
时间线构建和字幕生成流程,减少冗余处理分支。
This commit is contained in:
2026-05-02 02:31:55 +08:00
parent ac753ef367
commit 6097a809bf
9 changed files with 95 additions and 244 deletions

View File

@@ -1,13 +1,13 @@
/**
* Phase: tts — 语音合成(逐句分句生成)
* Phase: tts — 语音合成(整段合成)
*
* 每个 item 的 script 按标点切分为短句,每句单独生成 TTS 音频
* 统一写入 item.segments[],单句时数组仅 1 个元素
* item.audio 指向第一段item.audioDuration 为累计时长
* 每个 item 的 script 整段合成一个音频文件,保留自然语调
* item.audio 指向完整音频item.audioDuration 为总时长
* 字幕切分由组装阶段按字符比例分配,不在 TTS 阶段处理
*/
const path = require('path')
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
async function phaseTts(manifest, manifestPath, options = {}) {
const dir = getManifestDir(manifestPath)
@@ -29,33 +29,18 @@ async function phaseTts(manifest, manifestPath, options = {}) {
const fullText = item.script || item.text
try {
const sentences = splitTextIntoSentences(fullText)
const segments = []
let totalDuration = 0
const { filePath, duration } = await synthesize(fullText, {
outputDir: audioDir,
id: String(item.id || idx),
voice: manifest.ttsVoice || undefined,
instruction: manifest.ttsInstruction || undefined,
rate: manifest.ttsRate || undefined,
})
for (let j = 0; j < sentences.length; j++) {
const sentence = sentences[j]
const segId = `${item.id || idx}_${j + 1}`
const { filePath, duration } = await synthesize(sentence, {
outputDir: audioDir,
id: segId,
voice: manifest.ttsVoice || undefined,
instruction: manifest.ttsInstruction || undefined,
rate: manifest.ttsRate || undefined,
})
segments.push({
text: sentence,
audio: path.relative(dir, filePath).replace(/\\/g, '/'),
duration: Math.round(duration * 1000) / 1000,
})
totalDuration += duration
}
// 统一使用 segments 数组(单句 = 1 元素,多句 = N 元素)
item.segments = segments
item.audio = segments[0].audio
item.audioDuration = Math.round(totalDuration * 1000) / 1000
log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s (${segments.length}句): ${fullText.substring(0, 30)}...`)
const totalDuration = Math.round(duration * 1000) / 1000
item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
item.audioDuration = totalDuration
log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s: ${fullText.substring(0, 30)}...`)
} catch (err) {
item.status = 'failed'
item.error = `TTS失败: ${err.message}`