feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑

- 音频和字幕 API 调用改为先批量添加，批量失败时逐个兜底 - 重写 `splitIntoAudioSegments`，基于原始标点保留切分，合并短片段 - `qwen-tts.js` 补充中文逗号作为句末标点判断
2026-05-06 23:21:40 +08:00
parent 6eec0e8889
commit b309f54430
4 changed files with 94 additions and 117 deletions
--- a/.claude/skills/video-from-script/scripts/lib/phase-tts.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-tts.js
@@ -13,7 +13,7 @@
 */

 const path = require('path')
-const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
+const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')

 /**
 * 在语义断点处将文案切分为音频片段
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
 * @returns {Array<{text, estimatedDuration}>}
 */
 function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
-  // 优先在自然断点切分（句号/感叹号/分号）
-  const naturalBreaks = splitTextIntoSentences(text)
-  if (naturalBreaks.length <= 1) {
-    // 无自然断点：在半段处（含小数点）切分
-    const chars = text.length
-    const estimatedTotal = chars / charsPerSec
-    if (estimatedTotal <= videoDur) {
-      // 整段可容纳
-      return [{ text, estimatedDuration: estimatedTotal }]
+  const estimatedTotal = text.length / charsPerSec
+  if (estimatedTotal <= videoDur) {
+    return [{ text, estimatedDuration: estimatedTotal }]
+  }
+
+  // 在原文标点处切分，保留原始标点（不剥离、不重加）
+  const breakPattern = /[。！；，]/
+  const rawParts = []
+  let lastIdx = 0
+  for (let i = 0; i < text.length; i++) {
+    if (breakPattern.test(text[i])) {
+      rawParts.push(text.slice(lastIdx, i + 1))
+      lastIdx = i + 1
    }
-    // 无法单段容纳，在中间逗号处切
-    const mid = Math.floor(chars / 2)
-    const breakIdx = text.indexOf('，', mid)
-    if (breakIdx > 0) {
-      return [
-        { text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
-        { text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
-      ]
-    }
-    // 强制按字数切
-    const halfChars = Math.floor(chars / 2)
+  }
+  if (lastIdx < text.length) {
+    rawParts.push(text.slice(lastIdx))
+  }
+
+  // 无标点断点，强制对半切
+  if (rawParts.length <= 1) {
+    const half = Math.floor(text.length / 2)
    return [
-      { text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
-      { text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
+      { text: text.slice(0, half), estimatedDuration: half / charsPerSec },
+      { text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
    ]
  }

-  // 多个自然句：逐句判断，合并短句
+  // 合并短片段，确保每段 ≤ videoDur
  const result = []
-  let currentText = ''
-  let currentEstDur = 0
+  let curText = ''
+  let curDur = 0

-  for (let i = 0; i < naturalBreaks.length; i++) {
-    const sentence = naturalBreaks[i]
-    const sentenceLen = sentence.length
-    const sentenceEstDur = sentenceLen / charsPerSec
-
-    if (currentEstDur + sentenceEstDur <= videoDur) {
-      // 可以合并到当前段
-      currentText += sentence + '。'
-      currentEstDur += sentenceEstDur
+  for (const part of rawParts) {
+    const partDur = part.length / charsPerSec
+    if (curDur + partDur <= videoDur) {
+      curText += part
+      curDur += partDur
    } else {
-      // 先保存当前段
-      if (currentText) {
-        result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
-      }
-      currentText = sentence + '。'
-      currentEstDur = sentenceEstDur
-
-      // 单句本身超长（超 videoDur）
-      if (sentenceEstDur > videoDur) {
-        // 按半段切
-        const halfLen = Math.floor(sentenceLen / 2)
-        const half1 = sentence.slice(0, halfLen)
-        const half2 = sentence.slice(halfLen)
-        // 回退上一段，用两个半段替代
-        result.pop()
-        result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
-        currentText = half2 + '。'
-        currentEstDur = (sentenceLen - halfLen) / charsPerSec
+      if (curText) result.push({ text: curText, estimatedDuration: curDur })
+      // 单段超长，强制对半切
+      if (partDur > videoDur) {
+        const half = Math.floor(part.length / 2)
+        result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
+        curText = part.slice(half)
+        curDur = (part.length - half) / charsPerSec
+      } else {
+        curText = part
+        curDur = partDur
      }
    }
  }
-
-  if (currentText) {
-    result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
-  }
+  if (curText) result.push({ text: curText, estimatedDuration: curDur })

  return result
 }