feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑

- 音频和字幕 API 调用改为先批量添加，批量失败时逐个兜底 - 重写 `splitIntoAudioSegments`，基于原始标点保留切分，合并短片段 - `qwen-tts.js` 补充中文逗号作为句末标点判断
2026-05-06 23:21:40 +08:00
parent 6eec0e8889
commit b309f54430
4 changed files with 94 additions and 117 deletions
--- a/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
+++ b/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
@@ -10,8 +10,9 @@
 */

 const path = require('path')
-const { api, US } = require('./capcut-api')
-const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils } = require('./pipeline-utils')
+const fs = require('fs')
+const { api, US, getConfig } = require('./capcut-api')
+const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils, getManifestDir } = require('./pipeline-utils')

 // ============================================================================
 // 账号配置读取
@@ -314,17 +315,11 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
 // ============================================================================

 async function addSlots(draftUrl, items, timeline) {
-  const { api: capcutApi, US } = require('./capcut-api')
-  const { getManifestDir } = require('./pipeline-utils')
-  const path = require('path')
-
  // 获取当前云端草稿的 draft_content，获取第一个 video track 的 id
  let draftData
  try {
-    draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
+    draftData = (await api('get_draft', { draft_url: draftUrl })).data || {}
  } catch (err) {
-    // get_draft 接口不可用，尝试从本地 manifest 目录寻找草稿
-    const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
    console.log('   get_draft 不可用，切换本地写入模式')
    return addSlotsLocally(draftUrl, items, timeline)
  }
@@ -336,30 +331,12 @@ async function addSlots(draftUrl, items, timeline) {
    return
  }

-  // 构造 slot 数据
+  // 构造 slot 数据（复用 buildSlot）
  const slots = []
  for (let i = 0; i < items.length; i++) {
-    const item = items[i]
-    const tl = timeline[i]
-    const segId = item.segmentId || item._segmentId
+    const segId = items[i].segmentId || items[i]._segmentId
    if (!segId) continue
-
-    const slotId = generateUUID()
-    slots.push({
-      id: slotId,
-      material_id: segId,
-      track_id: videoTrack.id,
-      render_index: i,
-      type: 'video',
-      common_property: {
-        start_time: tl.start,
-        source_timerange: { start: 0, duration: tl.duration },
-        target_timerange: { start: tl.start, duration: tl.duration },
-        is_avatar: false,
-        audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
-        volume: 1.0,
-      },
-    })
+    slots.push(buildSlot(segId, videoTrack.id, i, timeline[i]))
  }

  if (slots.length === 0) {
@@ -369,13 +346,12 @@ async function addSlots(draftUrl, items, timeline) {

  // 通过 add_slots API 写入
  try {
-    await capcutApi('add_slots', {
+    await api('add_slots', {
      draft_url: draftUrl,
      slots: JSON.stringify(slots),
    })
    console.log(`   已写入 ${slots.length} 个 slot 到视频轨道`)
  } catch (err) {
-    // API 不支持时，降级为本地写入
    console.log(`   add_slots API 不可用: ${err.message}，降级为本地写入`)
    await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
  }
@@ -384,9 +360,6 @@ async function addSlots(draftUrl, items, timeline) {
 // 直接写入本地 draft_content.json 的 slot
 // options.draftId: 可选，直接指定 draftId（优先使用），否则从 draftUrl 提取
 async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
-  const { api: capcutApi, US } = require('./capcut-api')
-  const fs = require('fs')
-
  // 优先使用 options.draftId，否则从 draftUrl 提取
  let draftId = options.draftId || null
  if (!draftId) {
@@ -403,7 +376,6 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
    return
  }

-  const { getConfig } = require('./capcut-api')
  const jianyingPath = getConfig().jianyingDraftPath
  const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
  if (!fs.existsSync(draftPath)) {
@@ -461,7 +433,7 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
  }
 }

-function buildSlot(segId, trackId, index, tl, US) {
+function buildSlot(segId, trackId, index, tl) {
  return {
    id: generateUUID(),
    material_id: segId,
@@ -548,17 +520,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
    return
  }

-  // 逐个添加音频（CapCut API 批量添加不稳定）
+  // 批量添加音频（同一轨道），失败时逐个兜底
  let addedCount = 0
-  for (const audioInfo of segmentsFlat) {
-    try {
-      await api('add_audios', {
-        draft_url: draftUrl,
-        audio_infos: JSON.stringify([audioInfo]),
-      })
-      addedCount++
-    } catch (err) {
-      console.error(`   音频添加失败: ${err.message.slice(0, 80)}`)
+  try {
+    await api('add_audios', {
+      draft_url: draftUrl,
+      audio_infos: JSON.stringify(segmentsFlat),
+    })
+    addedCount = segmentsFlat.length
+  } catch (err) {
+    console.log(`   批量添加音频失败 (${err.message.slice(0, 60)})，逐个添加...`)
+    for (const audioInfo of segmentsFlat) {
+      try {
+        await api('add_audios', {
+          draft_url: draftUrl,
+          audio_infos: JSON.stringify([audioInfo]),
+        })
+        addedCount++
+      } catch (e2) {
+        console.error(`   音频添加失败: ${e2.message.slice(0, 80)}`)
+      }
    }
  }
  const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
@@ -702,18 +683,28 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
    style_text: 0,
  }

-  // 逐条添加字幕（CapCut API 批量添加不稳定）
+  // 批量添加字幕（同一轨道），失败时逐条兜底
  let addedCount = 0
-  for (const cap of captions) {
-    try {
-      await api('add_captions', {
-        draft_url: draftUrl,
-        captions: JSON.stringify([cap]),
-        ...commonStyle,
-      })
-      addedCount++
-    } catch (err) {
-      console.error(`   字幕添加失败: ${err.message.slice(0, 80)}`)
+  try {
+    await api('add_captions', {
+      draft_url: draftUrl,
+      captions: JSON.stringify(captions),
+      ...commonStyle,
+    })
+    addedCount = captions.length
+  } catch (err) {
+    console.log(`   批量添加字幕失败 (${err.message.slice(0, 60)})，逐条添加...`)
+    for (const cap of captions) {
+      try {
+        await api('add_captions', {
+          draft_url: draftUrl,
+          captions: JSON.stringify([cap]),
+          ...commonStyle,
+        })
+        addedCount++
+      } catch (e2) {
+        console.error(`   字幕添加失败: ${e2.message.slice(0, 80)}`)
+      }
    }
  }
  console.log(`   已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'} → ${animStyle.outAnimation || '无'})`)
--- a/.claude/skills/video-from-script/scripts/lib/phase-tts.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-tts.js
@@ -13,7 +13,7 @@
 */

 const path = require('path')
-const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
+const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')

 /**
 * 在语义断点处将文案切分为音频片段
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
 * @returns {Array<{text, estimatedDuration}>}
 */
 function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
-  // 优先在自然断点切分（句号/感叹号/分号）
-  const naturalBreaks = splitTextIntoSentences(text)
-  if (naturalBreaks.length <= 1) {
-    // 无自然断点：在半段处（含小数点）切分
-    const chars = text.length
-    const estimatedTotal = chars / charsPerSec
-    if (estimatedTotal <= videoDur) {
-      // 整段可容纳
-      return [{ text, estimatedDuration: estimatedTotal }]
+  const estimatedTotal = text.length / charsPerSec
+  if (estimatedTotal <= videoDur) {
+    return [{ text, estimatedDuration: estimatedTotal }]
+  }
+
+  // 在原文标点处切分，保留原始标点（不剥离、不重加）
+  const breakPattern = /[。！；，]/
+  const rawParts = []
+  let lastIdx = 0
+  for (let i = 0; i < text.length; i++) {
+    if (breakPattern.test(text[i])) {
+      rawParts.push(text.slice(lastIdx, i + 1))
+      lastIdx = i + 1
    }
-    // 无法单段容纳，在中间逗号处切
-    const mid = Math.floor(chars / 2)
-    const breakIdx = text.indexOf('，', mid)
-    if (breakIdx > 0) {
-      return [
-        { text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
-        { text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
-      ]
-    }
-    // 强制按字数切
-    const halfChars = Math.floor(chars / 2)
+  }
+  if (lastIdx < text.length) {
+    rawParts.push(text.slice(lastIdx))
+  }
+
+  // 无标点断点，强制对半切
+  if (rawParts.length <= 1) {
+    const half = Math.floor(text.length / 2)
    return [
-      { text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
-      { text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
+      { text: text.slice(0, half), estimatedDuration: half / charsPerSec },
+      { text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
    ]
  }

-  // 多个自然句：逐句判断，合并短句
+  // 合并短片段，确保每段 ≤ videoDur
  const result = []
-  let currentText = ''
-  let currentEstDur = 0
+  let curText = ''
+  let curDur = 0

-  for (let i = 0; i < naturalBreaks.length; i++) {
-    const sentence = naturalBreaks[i]
-    const sentenceLen = sentence.length
-    const sentenceEstDur = sentenceLen / charsPerSec
-
-    if (currentEstDur + sentenceEstDur <= videoDur) {
-      // 可以合并到当前段
-      currentText += sentence + '。'
-      currentEstDur += sentenceEstDur
+  for (const part of rawParts) {
+    const partDur = part.length / charsPerSec
+    if (curDur + partDur <= videoDur) {
+      curText += part
+      curDur += partDur
    } else {
-      // 先保存当前段
-      if (currentText) {
-        result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
-      }
-      currentText = sentence + '。'
-      currentEstDur = sentenceEstDur
-
-      // 单句本身超长（超 videoDur）
-      if (sentenceEstDur > videoDur) {
-        // 按半段切
-        const halfLen = Math.floor(sentenceLen / 2)
-        const half1 = sentence.slice(0, halfLen)
-        const half2 = sentence.slice(halfLen)
-        // 回退上一段，用两个半段替代
-        result.pop()
-        result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
-        currentText = half2 + '。'
-        currentEstDur = (sentenceLen - halfLen) / charsPerSec
+      if (curText) result.push({ text: curText, estimatedDuration: curDur })
+      // 单段超长，强制对半切
+      if (partDur > videoDur) {
+        const half = Math.floor(part.length / 2)
+        result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
+        curText = part.slice(half)
+        curDur = (part.length - half) / charsPerSec
+      } else {
+        curText = part
+        curDur = partDur
      }
    }
  }
-
-  if (currentText) {
-    result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
-  }
+  if (curText) result.push({ text: curText, estimatedDuration: curDur })

  return result
 }