feat(capcut): 优化音频/字幕添加策略并重构语音切分逻辑
- 音频和字幕 API 调用改为先批量添加,批量失败时逐个兜底 - 重写 `splitIntoAudioSegments`,基于原始标点保留切分,合并短片段 - `qwen-tts.js` 补充中文逗号作为句末标点判断
This commit is contained in:
@@ -173,7 +173,7 @@ async function assemble(args) {
|
||||
|
||||
if (items.length === 0) throw new Error('没有可用的素材文件')
|
||||
|
||||
// ffprobe 测量实际时长
|
||||
// 测量实际时长
|
||||
let audioMeasured = 0, videoMeasured = 0
|
||||
for (const item of items) {
|
||||
if (item.audio && !item.audio.startsWith('http')) {
|
||||
|
||||
@@ -10,8 +10,9 @@
|
||||
*/
|
||||
|
||||
const path = require('path')
|
||||
const { api, US } = require('./capcut-api')
|
||||
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils } = require('./pipeline-utils')
|
||||
const fs = require('fs')
|
||||
const { api, US, getConfig } = require('./capcut-api')
|
||||
const { splitTextIntoSentences, loadAccountConfig: loadAccountConfigFromUtils, getManifestDir } = require('./pipeline-utils')
|
||||
|
||||
// ============================================================================
|
||||
// 账号配置读取
|
||||
@@ -314,17 +315,11 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
|
||||
// ============================================================================
|
||||
|
||||
async function addSlots(draftUrl, items, timeline) {
|
||||
const { api: capcutApi, US } = require('./capcut-api')
|
||||
const { getManifestDir } = require('./pipeline-utils')
|
||||
const path = require('path')
|
||||
|
||||
// 获取当前云端草稿的 draft_content,获取第一个 video track 的 id
|
||||
let draftData
|
||||
try {
|
||||
draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
|
||||
draftData = (await api('get_draft', { draft_url: draftUrl })).data || {}
|
||||
} catch (err) {
|
||||
// get_draft 接口不可用,尝试从本地 manifest 目录寻找草稿
|
||||
const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
|
||||
console.log(' get_draft 不可用,切换本地写入模式')
|
||||
return addSlotsLocally(draftUrl, items, timeline)
|
||||
}
|
||||
@@ -336,30 +331,12 @@ async function addSlots(draftUrl, items, timeline) {
|
||||
return
|
||||
}
|
||||
|
||||
// 构造 slot 数据
|
||||
// 构造 slot 数据(复用 buildSlot)
|
||||
const slots = []
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
const item = items[i]
|
||||
const tl = timeline[i]
|
||||
const segId = item.segmentId || item._segmentId
|
||||
const segId = items[i].segmentId || items[i]._segmentId
|
||||
if (!segId) continue
|
||||
|
||||
const slotId = generateUUID()
|
||||
slots.push({
|
||||
id: slotId,
|
||||
material_id: segId,
|
||||
track_id: videoTrack.id,
|
||||
render_index: i,
|
||||
type: 'video',
|
||||
common_property: {
|
||||
start_time: tl.start,
|
||||
source_timerange: { start: 0, duration: tl.duration },
|
||||
target_timerange: { start: tl.start, duration: tl.duration },
|
||||
is_avatar: false,
|
||||
audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
|
||||
volume: 1.0,
|
||||
},
|
||||
})
|
||||
slots.push(buildSlot(segId, videoTrack.id, i, timeline[i]))
|
||||
}
|
||||
|
||||
if (slots.length === 0) {
|
||||
@@ -369,13 +346,12 @@ async function addSlots(draftUrl, items, timeline) {
|
||||
|
||||
// 通过 add_slots API 写入
|
||||
try {
|
||||
await capcutApi('add_slots', {
|
||||
await api('add_slots', {
|
||||
draft_url: draftUrl,
|
||||
slots: JSON.stringify(slots),
|
||||
})
|
||||
console.log(` 已写入 ${slots.length} 个 slot 到视频轨道`)
|
||||
} catch (err) {
|
||||
// API 不支持时,降级为本地写入
|
||||
console.log(` add_slots API 不可用: ${err.message},降级为本地写入`)
|
||||
await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
|
||||
}
|
||||
@@ -384,9 +360,6 @@ async function addSlots(draftUrl, items, timeline) {
|
||||
// 直接写入本地 draft_content.json 的 slot
|
||||
// options.draftId: 可选,直接指定 draftId(优先使用),否则从 draftUrl 提取
|
||||
async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
|
||||
const { api: capcutApi, US } = require('./capcut-api')
|
||||
const fs = require('fs')
|
||||
|
||||
// 优先使用 options.draftId,否则从 draftUrl 提取
|
||||
let draftId = options.draftId || null
|
||||
if (!draftId) {
|
||||
@@ -403,7 +376,6 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
|
||||
return
|
||||
}
|
||||
|
||||
const { getConfig } = require('./capcut-api')
|
||||
const jianyingPath = getConfig().jianyingDraftPath
|
||||
const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
|
||||
if (!fs.existsSync(draftPath)) {
|
||||
@@ -461,7 +433,7 @@ async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {})
|
||||
}
|
||||
}
|
||||
|
||||
function buildSlot(segId, trackId, index, tl, US) {
|
||||
function buildSlot(segId, trackId, index, tl) {
|
||||
return {
|
||||
id: generateUUID(),
|
||||
material_id: segId,
|
||||
@@ -548,17 +520,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
|
||||
return
|
||||
}
|
||||
|
||||
// 逐个添加音频(CapCut API 批量添加不稳定)
|
||||
// 批量添加音频(同一轨道),失败时逐个兜底
|
||||
let addedCount = 0
|
||||
for (const audioInfo of segmentsFlat) {
|
||||
try {
|
||||
await api('add_audios', {
|
||||
draft_url: draftUrl,
|
||||
audio_infos: JSON.stringify([audioInfo]),
|
||||
})
|
||||
addedCount++
|
||||
} catch (err) {
|
||||
console.error(` 音频添加失败: ${err.message.slice(0, 80)}`)
|
||||
try {
|
||||
await api('add_audios', {
|
||||
draft_url: draftUrl,
|
||||
audio_infos: JSON.stringify(segmentsFlat),
|
||||
})
|
||||
addedCount = segmentsFlat.length
|
||||
} catch (err) {
|
||||
console.log(` 批量添加音频失败 (${err.message.slice(0, 60)}),逐个添加...`)
|
||||
for (const audioInfo of segmentsFlat) {
|
||||
try {
|
||||
await api('add_audios', {
|
||||
draft_url: draftUrl,
|
||||
audio_infos: JSON.stringify([audioInfo]),
|
||||
})
|
||||
addedCount++
|
||||
} catch (e2) {
|
||||
console.error(` 音频添加失败: ${e2.message.slice(0, 80)}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
|
||||
@@ -702,18 +683,28 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
|
||||
style_text: 0,
|
||||
}
|
||||
|
||||
// 逐条添加字幕(CapCut API 批量添加不稳定)
|
||||
// 批量添加字幕(同一轨道),失败时逐条兜底
|
||||
let addedCount = 0
|
||||
for (const cap of captions) {
|
||||
try {
|
||||
await api('add_captions', {
|
||||
draft_url: draftUrl,
|
||||
captions: JSON.stringify([cap]),
|
||||
...commonStyle,
|
||||
})
|
||||
addedCount++
|
||||
} catch (err) {
|
||||
console.error(` 字幕添加失败: ${err.message.slice(0, 80)}`)
|
||||
try {
|
||||
await api('add_captions', {
|
||||
draft_url: draftUrl,
|
||||
captions: JSON.stringify(captions),
|
||||
...commonStyle,
|
||||
})
|
||||
addedCount = captions.length
|
||||
} catch (err) {
|
||||
console.log(` 批量添加字幕失败 (${err.message.slice(0, 60)}),逐条添加...`)
|
||||
for (const cap of captions) {
|
||||
try {
|
||||
await api('add_captions', {
|
||||
draft_url: draftUrl,
|
||||
captions: JSON.stringify([cap]),
|
||||
...commonStyle,
|
||||
})
|
||||
addedCount++
|
||||
} catch (e2) {
|
||||
console.error(` 字幕添加失败: ${e2.message.slice(0, 80)}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(` 已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'} → ${animStyle.outAnimation || '无'})`)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
*/
|
||||
|
||||
const path = require('path')
|
||||
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
|
||||
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
|
||||
|
||||
/**
|
||||
* 在语义断点处将文案切分为音频片段
|
||||
@@ -25,73 +25,59 @@ const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } =
|
||||
* @returns {Array<{text, estimatedDuration}>}
|
||||
*/
|
||||
function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
|
||||
// 优先在自然断点切分(句号/感叹号/分号)
|
||||
const naturalBreaks = splitTextIntoSentences(text)
|
||||
if (naturalBreaks.length <= 1) {
|
||||
// 无自然断点:在半段处(含小数点)切分
|
||||
const chars = text.length
|
||||
const estimatedTotal = chars / charsPerSec
|
||||
if (estimatedTotal <= videoDur) {
|
||||
// 整段可容纳
|
||||
return [{ text, estimatedDuration: estimatedTotal }]
|
||||
const estimatedTotal = text.length / charsPerSec
|
||||
if (estimatedTotal <= videoDur) {
|
||||
return [{ text, estimatedDuration: estimatedTotal }]
|
||||
}
|
||||
|
||||
// 在原文标点处切分,保留原始标点(不剥离、不重加)
|
||||
const breakPattern = /[。!;,]/
|
||||
const rawParts = []
|
||||
let lastIdx = 0
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
if (breakPattern.test(text[i])) {
|
||||
rawParts.push(text.slice(lastIdx, i + 1))
|
||||
lastIdx = i + 1
|
||||
}
|
||||
// 无法单段容纳,在中间逗号处切
|
||||
const mid = Math.floor(chars / 2)
|
||||
const breakIdx = text.indexOf(',', mid)
|
||||
if (breakIdx > 0) {
|
||||
return [
|
||||
{ text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
|
||||
{ text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
|
||||
]
|
||||
}
|
||||
// 强制按字数切
|
||||
const halfChars = Math.floor(chars / 2)
|
||||
}
|
||||
if (lastIdx < text.length) {
|
||||
rawParts.push(text.slice(lastIdx))
|
||||
}
|
||||
|
||||
// 无标点断点,强制对半切
|
||||
if (rawParts.length <= 1) {
|
||||
const half = Math.floor(text.length / 2)
|
||||
return [
|
||||
{ text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
|
||||
{ text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
|
||||
{ text: text.slice(0, half), estimatedDuration: half / charsPerSec },
|
||||
{ text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
|
||||
]
|
||||
}
|
||||
|
||||
// 多个自然句:逐句判断,合并短句
|
||||
// 合并短片段,确保每段 ≤ videoDur
|
||||
const result = []
|
||||
let currentText = ''
|
||||
let currentEstDur = 0
|
||||
let curText = ''
|
||||
let curDur = 0
|
||||
|
||||
for (let i = 0; i < naturalBreaks.length; i++) {
|
||||
const sentence = naturalBreaks[i]
|
||||
const sentenceLen = sentence.length
|
||||
const sentenceEstDur = sentenceLen / charsPerSec
|
||||
|
||||
if (currentEstDur + sentenceEstDur <= videoDur) {
|
||||
// 可以合并到当前段
|
||||
currentText += sentence + '。'
|
||||
currentEstDur += sentenceEstDur
|
||||
for (const part of rawParts) {
|
||||
const partDur = part.length / charsPerSec
|
||||
if (curDur + partDur <= videoDur) {
|
||||
curText += part
|
||||
curDur += partDur
|
||||
} else {
|
||||
// 先保存当前段
|
||||
if (currentText) {
|
||||
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
|
||||
}
|
||||
currentText = sentence + '。'
|
||||
currentEstDur = sentenceEstDur
|
||||
|
||||
// 单句本身超长(超 videoDur)
|
||||
if (sentenceEstDur > videoDur) {
|
||||
// 按半段切
|
||||
const halfLen = Math.floor(sentenceLen / 2)
|
||||
const half1 = sentence.slice(0, halfLen)
|
||||
const half2 = sentence.slice(halfLen)
|
||||
// 回退上一段,用两个半段替代
|
||||
result.pop()
|
||||
result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
|
||||
currentText = half2 + '。'
|
||||
currentEstDur = (sentenceLen - halfLen) / charsPerSec
|
||||
if (curText) result.push({ text: curText, estimatedDuration: curDur })
|
||||
// 单段超长,强制对半切
|
||||
if (partDur > videoDur) {
|
||||
const half = Math.floor(part.length / 2)
|
||||
result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
|
||||
curText = part.slice(half)
|
||||
curDur = (part.length - half) / charsPerSec
|
||||
} else {
|
||||
curText = part
|
||||
curDur = partDur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentText) {
|
||||
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
|
||||
}
|
||||
if (curText) result.push({ text: curText, estimatedDuration: curDur })
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ function synthesize(text, options = {}) {
|
||||
fs.mkdirSync(outputDir, { recursive: true })
|
||||
|
||||
text = text.trimEnd()
|
||||
if (!/[。!?.!?…]$/.test(text)) text += '。'
|
||||
if (!/[。!?;,.!?…]$/.test(text)) text += '。'
|
||||
|
||||
const id = options.id || 1
|
||||
const fileName = `seg_${String(id).padStart(3, '0')}.mp3`
|
||||
|
||||
Reference in New Issue
Block a user