feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑

- 音频和字幕 API 调用改为先批量添加，批量失败时逐个兜底 - 重写 `splitIntoAudioSegments`，基于原始标点保留切分，合并短片段 - `qwen-tts.js` 补充中文逗号作为句末标点判断
2026-05-06 23:21:40 +08:00
parent 6eec0e8889
commit b309f54430
4 changed files with 94 additions and 117 deletions
--- a/.claude/skills/video-from-script/scripts/capcut_assemble.js
+++ b/.claude/skills/video-from-script/scripts/capcut_assemble.js
@@ -173,7 +173,7 @@ async function assemble(args) {
  if (items.length === 0) throw new Error('没有可用的素材文件')
-  // ffprobe 测量实际时长
+  // 测量实际时长
  let audioMeasured = 0, videoMeasured = 0
  for (const item of items) {
    if (item.audio && !item.audio.startsWith('http')) {
--- a/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
+++ b/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
@@ -10,8 +10,9 @@
 */
 const path = require('path')
-const { api, US } = require('./capcut-api')
+const fs = require('fs')
-const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils } = require('./pipeline-utils')
+const { api, US, getConfig } = require('./capcut-api')
 const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils, getManifestDir } = require('./pipeline-utils')
 // ============================================================================
 // 账号配置读取
@@ -314,17 +315,11 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
 // ============================================================================
 async function addSlots(draftUrl, items, timeline) {
  const { api: capcutApi, US } = require('./capcut-api')
  const { getManifestDir } = require('./pipeline-utils')
  const path = require('path')
  // 获取当前云端草稿的 draft_content，获取第一个 video track 的 id
  let draftData
  try {
-    draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
+    draftData = (await api('get_draft', { draft_url: draftUrl })).data || {}
  } catch (err) {
    // get_draft 接口不可用，尝试从本地 manifest 目录寻找草稿
    const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
    console.log('   get_draft 不可用，切换本地写入模式')
    return addSlotsLocally(draftUrl, items, timeline)
  }
@@ -336,30 +331,12 @@ async function addSlots(draftUrl, items, timeline) {
    return
  }
-  // 构造 slot 数据
+  // 构造 slot 数据（复用 buildSlot）
  const slots = []
  for (let i = 0; i < items.length; i++) {
-    const item = items[i]
+    const segId = items[i].segmentId || items[i]._segmentId
    const tl = timeline[i]
    const segId = item.segmentId || item._segmentId
    if (!segId) continue
-
+    slots.push(buildSlot(segId, videoTrack.id, i, timeline[i]))
    const slotId = generateUUID()
    slots.push({
      id: slotId,
      material_id: segId,
      track_id: videoTrack.id,
      render_index: i,
      type: 'video',
      common_property: {
        start_time: tl.start,
        source_timerange: { start: 0, duration: tl.duration },
        target_timerange: { start: tl.start, duration: tl.duration },
        is_avatar: false,
        audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
        volume: 1.0,
      },
    })
  }
  if (slots.length === 0) {
@@ -369,13 +346,12 @@ async function addSlots(draftUrl, items, timeline) {
  // 通过 add_slots API 写入
  try {
-    await capcutApi('add_slots', {
+    await api('add_slots', {
      draft_url: draftUrl,
      slots: JSON.stringify(slots),
    })
    console.log(`   已写入 ${slots.length} 个 slot 到视频轨道`)
  } catch (err) {
    // API 不支持时，降级为本地写入
    console.log(`   add_slots API 不可用: ${err.message}，降级为本地写入`)
    await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
  }
@@ -384,9 +360,6 @@ async function addSlots(draftUrl, items, timeline) {
 // 直接写入本地 draft_content.json 的 slot
 // options.draftId: 可选，直接指定 draftId（优先使用），否则从 draftUrl 提取
 async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
  const { api: capcutApi, US } = require('./capcut-api')
  const fs = require('fs')
  // 优先使用 options.draftId，否则从 draftUrl 提取
  let draftId = options.draftId || null
  if (!draftId) {
@@ -403,7 +376,6 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
    return
  }
  const { getConfig } = require('./capcut-api')
  const jianyingPath = getConfig().jianyingDraftPath
  const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
  if (!fs.existsSync(draftPath)) {
@@ -461,7 +433,7 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
  }
 }
-function buildSlot(segId, trackId, index, tl, US) {
+function buildSlot(segId, trackId, index, tl) {
  return {
    id: generateUUID(),
    material_id: segId,
@@ -548,17 +520,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
    return
  }
-  // 逐个添加音频（CapCut API 批量添加不稳定）
+  // 批量添加音频（同一轨道），失败时逐个兜底
  let addedCount = 0
-  for (const audioInfo of segmentsFlat) {
+  try {
-    try {
+    await api('add_audios', {
-      await api('add_audios', {
+      draft_url: draftUrl,
-        draft_url: draftUrl,
+      audio_infos: JSON.stringify(segmentsFlat),
-        audio_infos: JSON.stringify([audioInfo]),
+    })
-      })
+    addedCount = segmentsFlat.length
-      addedCount++
+  } catch (err) {
-    } catch (err) {
+    console.log(`   批量添加音频失败 (${err.message.slice(0, 60)})，逐个添加...`)
-      console.error(`   音频添加失败: ${err.message.slice(0, 80)}`)
+    for (const audioInfo of segmentsFlat) {
      try {
        await api('add_audios', {
          draft_url: draftUrl,
          audio_infos: JSON.stringify([audioInfo]),
        })
        addedCount++
      } catch (e2) {
        console.error(`   音频添加失败: ${e2.message.slice(0, 80)}`)
      }
    }
  }
  const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
@@ -702,18 +683,28 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
    style_text: 0,
  }
-  // 逐条添加字幕（CapCut API 批量添加不稳定）
+  // 批量添加字幕（同一轨道），失败时逐条兜底
  let addedCount = 0
-  for (const cap of captions) {
+  try {
-    try {
+    await api('add_captions', {
-      await api('add_captions', {
+      draft_url: draftUrl,
-        draft_url: draftUrl,
+      captions: JSON.stringify(captions),
-        captions: JSON.stringify([cap]),
+      ...commonStyle,
-        ...commonStyle,
+    })
-      })
+    addedCount = captions.length
-      addedCount++
+  } catch (err) {
-    } catch (err) {
+    console.log(`   批量添加字幕失败 (${err.message.slice(0, 60)})，逐条添加...`)
-      console.error(`   字幕添加失败: ${err.message.slice(0, 80)}`)
+    for (const cap of captions) {
      try {
        await api('add_captions', {
          draft_url: draftUrl,
          captions: JSON.stringify([cap]),
          ...commonStyle,
        })
        addedCount++
      } catch (e2) {
        console.error(`   字幕添加失败: ${e2.message.slice(0, 80)}`)
      }
    }
  }
  console.log(`   已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'} → ${animStyle.outAnimation || '无'})`)
--- a/.claude/skills/video-from-script/scripts/lib/phase-tts.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-tts.js
@@ -13,7 +13,7 @@
 */
 const path = require('path')
-const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
+const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
 /**
 * 在语义断点处将文案切分为音频片段
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
 * @returns {Array<{text, estimatedDuration}>}
 */
 function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
-  // 优先在自然断点切分（句号/感叹号/分号）
+  const estimatedTotal = text.length / charsPerSec
-  const naturalBreaks = splitTextIntoSentences(text)
+  if (estimatedTotal <= videoDur) {
-  if (naturalBreaks.length <= 1) {
+    return [{ text, estimatedDuration: estimatedTotal }]
-    // 无自然断点：在半段处（含小数点）切分
+  }
-    const chars = text.length
+
-    const estimatedTotal = chars / charsPerSec
+  // 在原文标点处切分，保留原始标点（不剥离、不重加）
-    if (estimatedTotal <= videoDur) {
+  const breakPattern = /[。！；，]/
-      // 整段可容纳
+  const rawParts = []
-      return [{ text, estimatedDuration: estimatedTotal }]
+  let lastIdx = 0
  for (let i = 0; i < text.length; i++) {
    if (breakPattern.test(text[i])) {
      rawParts.push(text.slice(lastIdx, i + 1))
      lastIdx = i + 1
    }
-    // 无法单段容纳，在中间逗号处切
+  }
-    const mid = Math.floor(chars / 2)
+  if (lastIdx < text.length) {
-    const breakIdx = text.indexOf('，', mid)
+    rawParts.push(text.slice(lastIdx))
-    if (breakIdx > 0) {
+  }
-      return [
+
-        { text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
+  // 无标点断点，强制对半切
-        { text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
+  if (rawParts.length <= 1) {
-      ]
+    const half = Math.floor(text.length / 2)
    }
    // 强制按字数切
    const halfChars = Math.floor(chars / 2)
    return [
-      { text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
+      { text: text.slice(0, half), estimatedDuration: half / charsPerSec },
-      { text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
+      { text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
    ]
  }
-  // 多个自然句：逐句判断，合并短句
+  // 合并短片段，确保每段 ≤ videoDur
  const result = []
-  let currentText = ''
+  let curText = ''
-  let currentEstDur = 0
+  let curDur = 0
-  for (let i = 0; i < naturalBreaks.length; i++) {
+  for (const part of rawParts) {
-    const sentence = naturalBreaks[i]
+    const partDur = part.length / charsPerSec
-    const sentenceLen = sentence.length
+    if (curDur + partDur <= videoDur) {
-    const sentenceEstDur = sentenceLen / charsPerSec
+      curText += part
-
+      curDur += partDur
    if (currentEstDur + sentenceEstDur <= videoDur) {
      // 可以合并到当前段
      currentText += sentence + '。'
      currentEstDur += sentenceEstDur
    } else {
-      // 先保存当前段
+      if (curText) result.push({ text: curText, estimatedDuration: curDur })
-      if (currentText) {
+      // 单段超长，强制对半切
-        result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
+      if (partDur > videoDur) {
-      }
+        const half = Math.floor(part.length / 2)
-      currentText = sentence + '。'
+        result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
-      currentEstDur = sentenceEstDur
+        curText = part.slice(half)
-
+        curDur = (part.length - half) / charsPerSec
-      // 单句本身超长（超 videoDur）
+      } else {
-      if (sentenceEstDur > videoDur) {
+        curText = part
-        // 按半段切
+        curDur = partDur
        const halfLen = Math.floor(sentenceLen / 2)
        const half1 = sentence.slice(0, halfLen)
        const half2 = sentence.slice(halfLen)
        // 回退上一段，用两个半段替代
        result.pop()
        result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
        currentText = half2 + '。'
        currentEstDur = (sentenceLen - halfLen) / charsPerSec
      }
    }
  }
-
+  if (curText) result.push({ text: curText, estimatedDuration: curDur })
  if (currentText) {
    result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
  }
  return result
 }
--- a/.claude/skills/video-from-script/scripts/qwen-tts.js
+++ b/.claude/skills/video-from-script/scripts/qwen-tts.js
@@ -71,7 +71,7 @@ function synthesize(text, options = {}) {
    fs.mkdirSync(outputDir, { recursive: true })
    text = text.trimEnd()
-    if (!/[。！？.!?…]$/.test(text)) text += '。'
+    if (!/[。！？；，.!?…]$/.test(text)) text += '。'
    const id = options.id || 1
    const fileName = `seg_${String(id).padStart(3, '0')}.mp3`