feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑

- 音频和字幕 API 调用改为先批量添加,批量失败时逐个兜底
- 重写 `splitIntoAudioSegments`,基于原始标点保留切分,合并短片段
- `qwen-tts.js` 补充中文逗号作为句末标点判断
This commit is contained in:
2026-05-06 23:21:40 +08:00
parent 6eec0e8889
commit b309f54430
4 changed files with 94 additions and 117 deletions

View File

@@ -10,8 +10,9 @@
*/
const path = require('path')
const { api, US } = require('./capcut-api')
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils } = require('./pipeline-utils')
const fs = require('fs')
const { api, US, getConfig } = require('./capcut-api')
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils, getManifestDir } = require('./pipeline-utils')
// ============================================================================
// 账号配置读取
@@ -314,17 +315,11 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
// ============================================================================
async function addSlots(draftUrl, items, timeline) {
const { api: capcutApi, US } = require('./capcut-api')
const { getManifestDir } = require('./pipeline-utils')
const path = require('path')
// 获取当前云端草稿的 draft_content获取第一个 video track 的 id
let draftData
try {
draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
draftData = (await api('get_draft', { draft_url: draftUrl })).data || {}
} catch (err) {
// get_draft 接口不可用,尝试从本地 manifest 目录寻找草稿
const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
console.log(' get_draft 不可用,切换本地写入模式')
return addSlotsLocally(draftUrl, items, timeline)
}
@@ -336,30 +331,12 @@ async function addSlots(draftUrl, items, timeline) {
return
}
// 构造 slot 数据
// 构造 slot 数据(复用 buildSlot
const slots = []
for (let i = 0; i < items.length; i++) {
const item = items[i]
const tl = timeline[i]
const segId = item.segmentId || item._segmentId
const segId = items[i].segmentId || items[i]._segmentId
if (!segId) continue
const slotId = generateUUID()
slots.push({
id: slotId,
material_id: segId,
track_id: videoTrack.id,
render_index: i,
type: 'video',
common_property: {
start_time: tl.start,
source_timerange: { start: 0, duration: tl.duration },
target_timerange: { start: tl.start, duration: tl.duration },
is_avatar: false,
audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
volume: 1.0,
},
})
slots.push(buildSlot(segId, videoTrack.id, i, timeline[i]))
}
if (slots.length === 0) {
@@ -369,13 +346,12 @@ async function addSlots(draftUrl, items, timeline) {
// 通过 add_slots API 写入
try {
await capcutApi('add_slots', {
await api('add_slots', {
draft_url: draftUrl,
slots: JSON.stringify(slots),
})
console.log(` 已写入 ${slots.length} 个 slot 到视频轨道`)
} catch (err) {
// API 不支持时,降级为本地写入
console.log(` add_slots API 不可用: ${err.message},降级为本地写入`)
await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
}
@@ -384,9 +360,6 @@ async function addSlots(draftUrl, items, timeline) {
// 直接写入本地 draft_content.json 的 slot
// options.draftId: 可选,直接指定 draftId优先使用否则从 draftUrl 提取
async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
const { api: capcutApi, US } = require('./capcut-api')
const fs = require('fs')
// 优先使用 options.draftId否则从 draftUrl 提取
let draftId = options.draftId || null
if (!draftId) {
@@ -403,7 +376,6 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
return
}
const { getConfig } = require('./capcut-api')
const jianyingPath = getConfig().jianyingDraftPath
const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
if (!fs.existsSync(draftPath)) {
@@ -461,7 +433,7 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
}
}
function buildSlot(segId, trackId, index, tl, US) {
function buildSlot(segId, trackId, index, tl) {
return {
id: generateUUID(),
material_id: segId,
@@ -548,17 +520,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
return
}
// 逐个添加音频(CapCut API 批量添加不稳定)
// 批量添加音频(同一轨道),失败时逐个兜底
let addedCount = 0
for (const audioInfo of segmentsFlat) {
try {
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify([audioInfo]),
})
addedCount++
} catch (err) {
console.error(` 音频添加失败: ${err.message.slice(0, 80)}`)
try {
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify(segmentsFlat),
})
addedCount = segmentsFlat.length
} catch (err) {
console.log(` 批量添加音频失败 (${err.message.slice(0, 60)}),逐个添加...`)
for (const audioInfo of segmentsFlat) {
try {
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify([audioInfo]),
})
addedCount++
} catch (e2) {
console.error(` 音频添加失败: ${e2.message.slice(0, 80)}`)
}
}
}
const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
@@ -702,18 +683,28 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
style_text: 0,
}
// 逐条添加字幕(CapCut API 批量添加不稳定)
// 批量添加字幕(同一轨道),失败时逐条兜底
let addedCount = 0
for (const cap of captions) {
try {
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify([cap]),
...commonStyle,
})
addedCount++
} catch (err) {
console.error(` 字幕添加失败: ${err.message.slice(0, 80)}`)
try {
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify(captions),
...commonStyle,
})
addedCount = captions.length
} catch (err) {
console.log(` 批量添加字幕失败 (${err.message.slice(0, 60)}),逐条添加...`)
for (const cap of captions) {
try {
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify([cap]),
...commonStyle,
})
addedCount++
} catch (e2) {
console.error(` 字幕添加失败: ${e2.message.slice(0, 80)}`)
}
}
}
console.log(` 已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'}${animStyle.outAnimation || '无'})`)

View File

@@ -13,7 +13,7 @@
*/
const path = require('path')
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
/**
* 在语义断点处将文案切分为音频片段
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
* @returns {Array<{text, estimatedDuration}>}
*/
function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
// 优先在自然断点切分(句号/感叹号/分号)
const naturalBreaks = splitTextIntoSentences(text)
if (naturalBreaks.length <= 1) {
// 无自然断点:在半段处(含小数点)切分
const chars = text.length
const estimatedTotal = chars / charsPerSec
if (estimatedTotal <= videoDur) {
// 整段可容纳
return [{ text, estimatedDuration: estimatedTotal }]
const estimatedTotal = text.length / charsPerSec
if (estimatedTotal <= videoDur) {
return [{ text, estimatedDuration: estimatedTotal }]
}
// 在原文标点处切分,保留原始标点(不剥离、不重加)
const breakPattern = /[。!;,]/
const rawParts = []
let lastIdx = 0
for (let i = 0; i < text.length; i++) {
if (breakPattern.test(text[i])) {
rawParts.push(text.slice(lastIdx, i + 1))
lastIdx = i + 1
}
// 无法单段容纳,在中间逗号处切
const mid = Math.floor(chars / 2)
const breakIdx = text.indexOf('', mid)
if (breakIdx > 0) {
return [
{ text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
{ text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
]
}
// 强制按字数切
const halfChars = Math.floor(chars / 2)
}
if (lastIdx < text.length) {
rawParts.push(text.slice(lastIdx))
}
// 无标点断点,强制对半切
if (rawParts.length <= 1) {
const half = Math.floor(text.length / 2)
return [
{ text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
{ text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
{ text: text.slice(0, half), estimatedDuration: half / charsPerSec },
{ text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
]
}
// 多个自然句:逐句判断,合并短句
// 合并短片段,确保每段 ≤ videoDur
const result = []
let currentText = ''
let currentEstDur = 0
let curText = ''
let curDur = 0
for (let i = 0; i < naturalBreaks.length; i++) {
const sentence = naturalBreaks[i]
const sentenceLen = sentence.length
const sentenceEstDur = sentenceLen / charsPerSec
if (currentEstDur + sentenceEstDur <= videoDur) {
// 可以合并到当前段
currentText += sentence + '。'
currentEstDur += sentenceEstDur
for (const part of rawParts) {
const partDur = part.length / charsPerSec
if (curDur + partDur <= videoDur) {
curText += part
curDur += partDur
} else {
// 先保存当前段
if (currentText) {
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
}
currentText = sentence + '。'
currentEstDur = sentenceEstDur
// 单句本身超长(超 videoDur
if (sentenceEstDur > videoDur) {
// 按半段切
const halfLen = Math.floor(sentenceLen / 2)
const half1 = sentence.slice(0, halfLen)
const half2 = sentence.slice(halfLen)
// 回退上一段,用两个半段替代
result.pop()
result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
currentText = half2 + '。'
currentEstDur = (sentenceLen - halfLen) / charsPerSec
if (curText) result.push({ text: curText, estimatedDuration: curDur })
// 单段超长,强制对半切
if (partDur > videoDur) {
const half = Math.floor(part.length / 2)
result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
curText = part.slice(half)
curDur = (part.length - half) / charsPerSec
} else {
curText = part
curDur = partDur
}
}
}
if (currentText) {
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
}
if (curText) result.push({ text: curText, estimatedDuration: curDur })
return result
}