feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑

- 音频和字幕 API 调用改为先批量添加,批量失败时逐个兜底
- 重写 `splitIntoAudioSegments`,基于原始标点保留切分,合并短片段
- `qwen-tts.js` 补充中文逗号作为句末标点判断
This commit is contained in:
2026-05-06 23:21:40 +08:00
parent 6eec0e8889
commit b309f54430
4 changed files with 94 additions and 117 deletions

View File

@@ -173,7 +173,7 @@ async function assemble(args) {
if (items.length === 0) throw new Error('没有可用的素材文件') if (items.length === 0) throw new Error('没有可用的素材文件')
// ffprobe 测量实际时长 // 测量实际时长
let audioMeasured = 0, videoMeasured = 0 let audioMeasured = 0, videoMeasured = 0
for (const item of items) { for (const item of items) {
if (item.audio && !item.audio.startsWith('http')) { if (item.audio && !item.audio.startsWith('http')) {

View File

@@ -10,8 +10,9 @@
*/ */
const path = require('path') const path = require('path')
const { api, US } = require('./capcut-api') const fs = require('fs')
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils } = require('./pipeline-utils') const { api, US, getConfig } = require('./capcut-api')
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils, getManifestDir } = require('./pipeline-utils')
// ============================================================================ // ============================================================================
// 账号配置读取 // 账号配置读取
@@ -314,17 +315,11 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
// ============================================================================ // ============================================================================
async function addSlots(draftUrl, items, timeline) { async function addSlots(draftUrl, items, timeline) {
const { api: capcutApi, US } = require('./capcut-api')
const { getManifestDir } = require('./pipeline-utils')
const path = require('path')
// 获取当前云端草稿的 draft_content获取第一个 video track 的 id // 获取当前云端草稿的 draft_content获取第一个 video track 的 id
let draftData let draftData
try { try {
draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {} draftData = (await api('get_draft', { draft_url: draftUrl })).data || {}
} catch (err) { } catch (err) {
// get_draft 接口不可用,尝试从本地 manifest 目录寻找草稿
const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
console.log(' get_draft 不可用,切换本地写入模式') console.log(' get_draft 不可用,切换本地写入模式')
return addSlotsLocally(draftUrl, items, timeline) return addSlotsLocally(draftUrl, items, timeline)
} }
@@ -336,30 +331,12 @@ async function addSlots(draftUrl, items, timeline) {
return return
} }
// 构造 slot 数据 // 构造 slot 数据(复用 buildSlot
const slots = [] const slots = []
for (let i = 0; i < items.length; i++) { for (let i = 0; i < items.length; i++) {
const item = items[i] const segId = items[i].segmentId || items[i]._segmentId
const tl = timeline[i]
const segId = item.segmentId || item._segmentId
if (!segId) continue if (!segId) continue
slots.push(buildSlot(segId, videoTrack.id, i, timeline[i]))
const slotId = generateUUID()
slots.push({
id: slotId,
material_id: segId,
track_id: videoTrack.id,
render_index: i,
type: 'video',
common_property: {
start_time: tl.start,
source_timerange: { start: 0, duration: tl.duration },
target_timerange: { start: tl.start, duration: tl.duration },
is_avatar: false,
audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
volume: 1.0,
},
})
} }
if (slots.length === 0) { if (slots.length === 0) {
@@ -369,13 +346,12 @@ async function addSlots(draftUrl, items, timeline) {
// 通过 add_slots API 写入 // 通过 add_slots API 写入
try { try {
await capcutApi('add_slots', { await api('add_slots', {
draft_url: draftUrl, draft_url: draftUrl,
slots: JSON.stringify(slots), slots: JSON.stringify(slots),
}) })
console.log(` 已写入 ${slots.length} 个 slot 到视频轨道`) console.log(` 已写入 ${slots.length} 个 slot 到视频轨道`)
} catch (err) { } catch (err) {
// API 不支持时,降级为本地写入
console.log(` add_slots API 不可用: ${err.message},降级为本地写入`) console.log(` add_slots API 不可用: ${err.message},降级为本地写入`)
await addSlotsLocally(draftUrl, items, timeline, videoTrack.id) await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
} }
@@ -384,9 +360,6 @@ async function addSlots(draftUrl, items, timeline) {
// 直接写入本地 draft_content.json 的 slot // 直接写入本地 draft_content.json 的 slot
// options.draftId: 可选,直接指定 draftId优先使用否则从 draftUrl 提取 // options.draftId: 可选,直接指定 draftId优先使用否则从 draftUrl 提取
async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) { async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
const { api: capcutApi, US } = require('./capcut-api')
const fs = require('fs')
// 优先使用 options.draftId否则从 draftUrl 提取 // 优先使用 options.draftId否则从 draftUrl 提取
let draftId = options.draftId || null let draftId = options.draftId || null
if (!draftId) { if (!draftId) {
@@ -403,7 +376,6 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
return return
} }
const { getConfig } = require('./capcut-api')
const jianyingPath = getConfig().jianyingDraftPath const jianyingPath = getConfig().jianyingDraftPath
const draftPath = path.join(jianyingPath, draftId, 'draft_content.json') const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
if (!fs.existsSync(draftPath)) { if (!fs.existsSync(draftPath)) {
@@ -461,7 +433,7 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
} }
} }
function buildSlot(segId, trackId, index, tl, US) { function buildSlot(segId, trackId, index, tl) {
return { return {
id: generateUUID(), id: generateUUID(),
material_id: segId, material_id: segId,
@@ -548,8 +520,16 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
return return
} }
// 逐个添加音频(CapCut API 批量添加不稳定) // 批量添加音频(同一轨道),失败时逐个兜底
let addedCount = 0 let addedCount = 0
try {
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify(segmentsFlat),
})
addedCount = segmentsFlat.length
} catch (err) {
console.log(` 批量添加音频失败 (${err.message.slice(0, 60)}),逐个添加...`)
for (const audioInfo of segmentsFlat) { for (const audioInfo of segmentsFlat) {
try { try {
await api('add_audios', { await api('add_audios', {
@@ -557,8 +537,9 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
audio_infos: JSON.stringify([audioInfo]), audio_infos: JSON.stringify([audioInfo]),
}) })
addedCount++ addedCount++
} catch (err) { } catch (e2) {
console.error(` 音频添加失败: ${err.message.slice(0, 80)}`) console.error(` 音频添加失败: ${e2.message.slice(0, 80)}`)
}
} }
} }
const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
@@ -702,8 +683,17 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
style_text: 0, style_text: 0,
} }
// 逐条添加字幕(CapCut API 批量添加不稳定) // 批量添加字幕(同一轨道),失败时逐条兜底
let addedCount = 0 let addedCount = 0
try {
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify(captions),
...commonStyle,
})
addedCount = captions.length
} catch (err) {
console.log(` 批量添加字幕失败 (${err.message.slice(0, 60)}),逐条添加...`)
for (const cap of captions) { for (const cap of captions) {
try { try {
await api('add_captions', { await api('add_captions', {
@@ -712,8 +702,9 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
...commonStyle, ...commonStyle,
}) })
addedCount++ addedCount++
} catch (err) { } catch (e2) {
console.error(` 字幕添加失败: ${err.message.slice(0, 80)}`) console.error(` 字幕添加失败: ${e2.message.slice(0, 80)}`)
}
} }
} }
console.log(` 已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'}${animStyle.outAnimation || '无'})`) console.log(` 已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'}${animStyle.outAnimation || '无'})`)

View File

@@ -13,7 +13,7 @@
*/ */
const path = require('path') const path = require('path')
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils') const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
/** /**
* 在语义断点处将文案切分为音频片段 * 在语义断点处将文案切分为音频片段
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
* @returns {Array<{text, estimatedDuration}>} * @returns {Array<{text, estimatedDuration}>}
*/ */
function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) { function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
// 优先在自然断点切分(句号/感叹号/分号) const estimatedTotal = text.length / charsPerSec
const naturalBreaks = splitTextIntoSentences(text)
if (naturalBreaks.length <= 1) {
// 无自然断点:在半段处(含小数点)切分
const chars = text.length
const estimatedTotal = chars / charsPerSec
if (estimatedTotal <= videoDur) { if (estimatedTotal <= videoDur) {
// 整段可容纳
return [{ text, estimatedDuration: estimatedTotal }] return [{ text, estimatedDuration: estimatedTotal }]
} }
// 无法单段容纳,在中间逗号处切
const mid = Math.floor(chars / 2) // 在原文标点处切分,保留原始标点(不剥离、不重加)
const breakIdx = text.indexOf('', mid) const breakPattern = /[。!;,]/
if (breakIdx > 0) { const rawParts = []
return [ let lastIdx = 0
{ text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec }, for (let i = 0; i < text.length; i++) {
{ text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec }, if (breakPattern.test(text[i])) {
] rawParts.push(text.slice(lastIdx, i + 1))
lastIdx = i + 1
} }
// 强制按字数切 }
const halfChars = Math.floor(chars / 2) if (lastIdx < text.length) {
rawParts.push(text.slice(lastIdx))
}
// 无标点断点,强制对半切
if (rawParts.length <= 1) {
const half = Math.floor(text.length / 2)
return [ return [
{ text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec }, { text: text.slice(0, half), estimatedDuration: half / charsPerSec },
{ text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec }, { text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
] ]
} }
// 多个自然句:逐句判断,合并短句 // 合并短片段,确保每段 ≤ videoDur
const result = [] const result = []
let currentText = '' let curText = ''
let currentEstDur = 0 let curDur = 0
for (let i = 0; i < naturalBreaks.length; i++) { for (const part of rawParts) {
const sentence = naturalBreaks[i] const partDur = part.length / charsPerSec
const sentenceLen = sentence.length if (curDur + partDur <= videoDur) {
const sentenceEstDur = sentenceLen / charsPerSec curText += part
curDur += partDur
if (currentEstDur + sentenceEstDur <= videoDur) {
// 可以合并到当前段
currentText += sentence + '。'
currentEstDur += sentenceEstDur
} else { } else {
// 先保存当前段 if (curText) result.push({ text: curText, estimatedDuration: curDur })
if (currentText) { // 单段超长,强制对半切
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur }) if (partDur > videoDur) {
} const half = Math.floor(part.length / 2)
currentText = sentence + '。' result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
currentEstDur = sentenceEstDur curText = part.slice(half)
curDur = (part.length - half) / charsPerSec
// 单句本身超长(超 videoDur } else {
if (sentenceEstDur > videoDur) { curText = part
// 按半段切 curDur = partDur
const halfLen = Math.floor(sentenceLen / 2)
const half1 = sentence.slice(0, halfLen)
const half2 = sentence.slice(halfLen)
// 回退上一段,用两个半段替代
result.pop()
result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
currentText = half2 + '。'
currentEstDur = (sentenceLen - halfLen) / charsPerSec
} }
} }
} }
if (curText) result.push({ text: curText, estimatedDuration: curDur })
if (currentText) {
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
}
return result return result
} }

View File

@@ -71,7 +71,7 @@ function synthesize(text, options = {}) {
fs.mkdirSync(outputDir, { recursive: true }) fs.mkdirSync(outputDir, { recursive: true })
text = text.trimEnd() text = text.trimEnd()
if (!/[。!?.!?…]$/.test(text)) text += '。' if (!/[。!?.!?…]$/.test(text)) text += '。'
const id = options.id || 1 const id = options.id || 1
const fileName = `seg_${String(id).padStart(3, '0')}.mp3` const fileName = `seg_${String(id).padStart(3, '0')}.mp3`