refactor(video-pipeline): 移除 segments 机制，改为整段音频合成

移除 TTS 阶段逐句切分及 segments 数组逻辑，统一为整段音频合成。 CapCut 字幕切分由组装阶段按字符比例分配，简化音频上传、时间线构建和字幕生成流程，减少冗余处理分支。
2026-05-02 02:31:55 +08:00
parent ac753ef367
commit 6097a809bf
9 changed files with 95 additions and 244 deletions
--- a/.claude/skills/video-from-script/scripts/lib/phase-tts.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-tts.js
@@ -1,13 +1,13 @@
 /**
- * Phase: tts — 语音合成（逐句分句生成）
+ * Phase: tts — 语音合成（整段合成）
 *
- * 将每个 item 的 script 按标点切分为短句，每句单独生成 TTS 音频。
- * 统一写入 item.segments[]，单句时数组仅 1 个元素。
- * item.audio 指向第一段，item.audioDuration 为累计时长。
+ * 每个 item 的 script 整段合成一个音频文件，保留自然语调。
+ * item.audio 指向完整音频，item.audioDuration 为总时长。
+ * 字幕切分由组装阶段按字符比例分配，不在 TTS 阶段处理。
 */

 const path = require('path')
-const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
+const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')

 async function phaseTts(manifest, manifestPath, options = {}) {
  const dir = getManifestDir(manifestPath)
@@ -29,33 +29,18 @@ async function phaseTts(manifest, manifestPath, options = {}) {
    const fullText = item.script || item.text

    try {
-      const sentences = splitTextIntoSentences(fullText)
-      const segments = []
-      let totalDuration = 0
+      const { filePath, duration } = await synthesize(fullText, {
+        outputDir: audioDir,
+        id: String(item.id || idx),
+        voice: manifest.ttsVoice || undefined,
+        instruction: manifest.ttsInstruction || undefined,
+        rate: manifest.ttsRate || undefined,
+      })

-      for (let j = 0; j < sentences.length; j++) {
-        const sentence = sentences[j]
-        const segId = `${item.id || idx}_${j + 1}`
-        const { filePath, duration } = await synthesize(sentence, {
-          outputDir: audioDir,
-          id: segId,
-          voice: manifest.ttsVoice || undefined,
-          instruction: manifest.ttsInstruction || undefined,
-          rate: manifest.ttsRate || undefined,
-        })
-        segments.push({
-          text: sentence,
-          audio: path.relative(dir, filePath).replace(/\\/g, '/'),
-          duration: Math.round(duration * 1000) / 1000,
-        })
-        totalDuration += duration
-      }
-
-      // 统一使用 segments 数组（单句 = 1 元素，多句 = N 元素）
-      item.segments = segments
-      item.audio = segments[0].audio
-      item.audioDuration = Math.round(totalDuration * 1000) / 1000
-      log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s (${segments.length}句): ${fullText.substring(0, 30)}...`)
+      const totalDuration = Math.round(duration * 1000) / 1000
+      item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
+      item.audioDuration = totalDuration
+      log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s: ${fullText.substring(0, 30)}...`)
    } catch (err) {
      item.status = 'failed'
      item.error = `TTS失败: ${err.message}`