feat(skills): 完善视频生产 pipeline 及新增健身跟练账号

- SKILL.md: 新增工作流阶段定义、质量卡点、分镜规则
- manifest-schema.md: 补充完整字段规范及类型定义
- phase-tts.js: 优化 TTS 合成长逻辑,添加进度追踪
- capcut-tracks.js: 扩展轨道构建能力,支持更多元素类型
- capcut-timeline.js: 改进时间线生成,支持淡入淡出
- capcut_assemble.js: 新增 assemble 阶段完整实现
- cmd-init.js: 完善 init 命令逻辑
- qwen-tts.js: 调整超时配置
- accounts/禁忌帝王学: 更新拆分/图像/台词提示词
- accounts/健身跟练: 新增账号含 account.json 及全套提示词模板
- 新增 workflow-issues-20260501.md 参考文档

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
lc
2026-05-06 22:53:37 +08:00
parent e6daf7a8d8
commit 6eec0e8889
28 changed files with 2199 additions and 253 deletions

View File

@@ -3,12 +3,15 @@
*
* 核心算法模块。纯函数 + ffmpeg自包含可测试。
*
* 规则:
* 铁律(固化,不可绕过):
* 音频生成后不可调速TTS=1.15xCapCut无speed字段
* 视频:始终配合音频时长(只允许加速/截断,不允许慢放/冻结)
*
* 时间线规则:
* 图片模式: TTS 音频时长 = 画面时长,无音频 = 跳过
* 视频模式: TTS 为主轴,视频通过策略适配
* 视频比音频长 → 加速(≤2x) / 裁剪(>2x)
* 视频比音频短 → 放缓(≥0.5x) / 画面停顿(<0.5x)
* 所有策略失败 → 兜底截断
* 视频比音频短 → 禁止!应在分镜阶段拆分 shot不允许慢放/冻结补齐
*/
const fs = require('fs')
@@ -20,6 +23,20 @@ const { US } = require('./capcut-api')
// 时间线构建
// ============================================================================
/**
* 构建时间线条目
*
* @param {Array} items - manifest items
* @returns {Array} timeline entries
*
* 策略选择(固化,按 ratio = videoDur / audioDur
* ≥ 1.1, ≤ 2 → speed_up (视频加速追上音频,最优)
* > 2 → trim (视频截断至音频时长)
* 0.9 ~ 1.1 → none (接近匹配,无需调整)
* < 0.9 → 禁止!音频时长超过视频,分镜阶段未正确拆分 shot
*
* 铁律:不允许 slow_down / freeze不允许音频调速
*/
function buildTimeline(items) {
let offset = 0
return items.map(item => {
@@ -46,7 +63,7 @@ function buildTimeline(items) {
return entry
}
// 视频模式:策略选择
// 视频模式:策略选择(铁律:不允许音频>视频)
const ratio = videoDur / audioDur
if (ratio > 1.1) {
@@ -59,23 +76,25 @@ function buildTimeline(items) {
offset += dur
return entry
}
} else if (ratio < 0.9) {
if (ratio >= 0.5) {
const entry = { start: offset, end: offset + dur, duration: dur, speed: ratio, strategy: 'slow_down' }
offset += dur
return entry
} else {
const entry = {
start: offset, end: offset + dur, duration: dur, speed: 1,
strategy: 'freeze', freezeExtra: dur - videoDur,
}
offset += dur
return entry
}
} else {
} else if (ratio >= 0.9) {
// 0.9 ~ 1.1:无需调整
const entry = { start: offset, end: offset + dur, duration: dur, speed: 1, strategy: 'none' }
offset += dur
return entry
} else {
// ratio < 0.9:音频时长超过视频!
// 铁律禁止:不允许慢放/冻结/拼接补齐。此情况应在分镜阶段拆分 shot。
// 强制截断并打印错误标记,由主 Agent 上报给用户/打回分镜重做。
const entry = {
start: offset, end: offset + dur, duration: dur, speed: 1,
strategy: 'FORBIDDEN_audio_gt_video',
ratio: parseFloat(ratio.toFixed(3)),
videoDur: parseFloat((videoDur / US).toFixed(2)),
audioDur: parseFloat((audioDur / US).toFixed(2)),
error: '音频时长(' + (audioDur / US).toFixed(2) + 's) > 视频时长(' + (videoDur / US).toFixed(2) + 's),分镜阶段 shot 未正确拆分,请打回重新切割',
}
offset += dur
return entry
}
})
}
@@ -87,16 +106,18 @@ function buildTimeline(items) {
/**
* ffmpeg 视频调整:根据策略适配音频时长
*
* 策略(按 ratio = videoDur / audioDur 选择):
* speed_up (ratio > 1.1, ≤2x) → setpts 压缩时间(加速)
* trim (ratio > 2x) → 截断到目标时长
* slow_down (ratio < 0.9, ≥0.5x) → setpts 拉长时间(慢放)
* freeze (ratio < 0.5x) → 视频原速 + 最后一帧冻结补时长
* 允许策略(按 ratio = videoDur / audioDur 选择):
* speed_up (ratio > 1.1, ≤2x) → setpts 压缩时间(加速),最优
* trim (ratio > 2x) → 截断到目标时长,次选
* none (0.9~1.1) → 无需调整
*
* 禁止策略(已删除):
* slow_down (ratio < 0.9) → ❌ 音频不可调速!
* freeze (ratio < 0.5) → ❌ 不允许冻结帧补齐!
*
* 所有策略失败后兜底:截断到目标时长
*/
async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none', speed = 1, freezeExtraUs = 0) {
async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none', speed = 1) {
if (!fs.existsSync(videoPath)) return videoPath
if (strategy === 'none') return videoPath
@@ -150,72 +171,9 @@ async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none',
console.log(` 加速: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speedVal}x)`)
resolve(outPath)
})
} else if (strategy === 'slow_down') {
const factor = (1 / speed).toFixed(3)
execFile('ffmpeg', [
'-y', '-i', videoPath,
'-filter_complex', `setpts=PTS*${factor}`,
'-an',
outPath
], { timeout: 30000 }, (err) => {
if (err) {
console.log(` 放缓失败,兜底截断: ${err.message}`)
fallbackTrim(resolve)
return
}
console.log(` 放缓: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speed.toFixed(2)}x speed)`)
resolve(outPath)
})
} else if (strategy === 'freeze') {
const freezeSec = freezeExtraUs / US
execFile('ffmpeg', [
'-y', '-i', videoPath,
'-filter_complex', `tpad=stop=-1:stop_duration=${freezeSec.toFixed(3)}`,
'-an',
outPath
], { timeout: 30000 }, (err) => {
if (err) {
console.log(` tpad freeze 失败,尝试 concat 方案: ${err.message}`)
const lastFrame = videoPath.replace(/(\.\w+)$/, '_lastframe.png')
const frozenVideo = videoPath.replace(/(\.\w+)$/, '_frozen.mp4')
execFile('ffmpeg', [
'-y', '-sseof', '-0.1', '-i', videoPath,
'-frames:v', '1', lastFrame
], { timeout: 10000 }, (err2) => {
if (err2) { console.log(` concat 方案也失败,兜底截断`); fallbackTrim(resolve); return }
execFile('ffmpeg', [
'-y', '-loop', '1', '-i', lastFrame,
'-t', String(freezeSec.toFixed(3)),
'-pix_fmt', 'yuv420p',
'-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2',
frozenVideo
], { timeout: 15000 }, (err3) => {
if (err3) {
try { fs.unlinkSync(lastFrame) } catch (_) {}
console.log(` 冻结帧视频生成失败,兜底截断`)
fallbackTrim(resolve)
return
}
const concatList = path.join(path.dirname(videoPath), '_freeze_concat.txt')
fs.writeFileSync(concatList, `file '${videoPath}'\nfile '${frozenVideo}'\n`)
execFile('ffmpeg', [
'-y', '-f', 'concat', '-safe', '0', '-i', concatList,
'-c', 'copy', outPath
], { timeout: 30000 }, (err4) => {
try { fs.unlinkSync(lastFrame); fs.unlinkSync(frozenVideo); fs.unlinkSync(concatList) } catch (_) {}
if (err4) { console.log(` 拼接失败,兜底截断`); fallbackTrim(resolve); return }
console.log(` 画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
resolve(outPath)
})
})
})
return
}
console.log(` 画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
resolve(outPath)
})
} else {
resolve(videoPath)
// 未知策略,兜底截断
fallbackTrim(resolve)
}
})
})

View File

@@ -3,6 +3,10 @@
*
* 所有 add* 函数 + 转场策略 + 账号配置读取。
* Agent 修改字幕风格、Ken Burns、转场、特效等只需关注此文件。
*
* 音频策略(固化铁律):
* - 音频由 TTS 1.15x 生成,导入 CapCut 时无 speed 字段(不可调速)
* - 每个 item 的 segments[] 逐段添加,各段 start 按 startOffset 精确对齐
*/
const path = require('path')
@@ -303,33 +307,233 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
return allSegmentIds
}
// ============================================================================
// 将 segment 写入视频轨道时间线slot
// 背景add_videos 只负责把视频加入素材库,不自动上时间线。
// 此函数在 add_videos 成功后调用,将每个 segment_id 写入第一个 video track。
// ============================================================================
async function addSlots(draftUrl, items, timeline) {
const { api: capcutApi, US } = require('./capcut-api')
const { getManifestDir } = require('./pipeline-utils')
const path = require('path')
// 获取当前云端草稿的 draft_content获取第一个 video track 的 id
let draftData
try {
draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
} catch (err) {
// get_draft 接口不可用,尝试从本地 manifest 目录寻找草稿
const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
console.log(' get_draft 不可用,切换本地写入模式')
return addSlotsLocally(draftUrl, items, timeline)
}
const tracks = draftData.tracks || []
const videoTrack = tracks.find(t => t.type === 'video')
if (!videoTrack) {
console.log(' 未找到 video track跳过 slot 写入')
return
}
// 构造 slot 数据
const slots = []
for (let i = 0; i < items.length; i++) {
const item = items[i]
const tl = timeline[i]
const segId = item.segmentId || item._segmentId
if (!segId) continue
const slotId = generateUUID()
slots.push({
id: slotId,
material_id: segId,
track_id: videoTrack.id,
render_index: i,
type: 'video',
common_property: {
start_time: tl.start,
source_timerange: { start: 0, duration: tl.duration },
target_timerange: { start: tl.start, duration: tl.duration },
is_avatar: false,
audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
volume: 1.0,
},
})
}
if (slots.length === 0) {
console.log(' 无有效 slot 数据,跳过')
return
}
// 通过 add_slots API 写入
try {
await capcutApi('add_slots', {
draft_url: draftUrl,
slots: JSON.stringify(slots),
})
console.log(` 已写入 ${slots.length} 个 slot 到视频轨道`)
} catch (err) {
// API 不支持时,降级为本地写入
console.log(` add_slots API 不可用: ${err.message},降级为本地写入`)
await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
}
}
// 直接写入本地 draft_content.json 的 slot
// options.draftId: 可选,直接指定 draftId优先使用否则从 draftUrl 提取
async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
const { api: capcutApi, US } = require('./capcut-api')
const fs = require('fs')
// 优先使用 options.draftId否则从 draftUrl 提取
let draftId = options.draftId || null
if (!draftId) {
try {
draftId = new URL(draftUrl).searchParams.get('draft_id')
} catch {
console.log(' 无法解析 draftUrl跳过本地 slot 写入')
return
}
}
if (!draftId) {
console.log(' 无法提取 draft_id跳过本地 slot 写入')
return
}
const { getConfig } = require('./capcut-api')
const jianyingPath = getConfig().jianyingDraftPath
const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
if (!fs.existsSync(draftPath)) {
console.log(` 本地草稿不存在: ${draftPath},跳过 slot 写入`)
return
}
let draft
try {
draft = JSON.parse(fs.readFileSync(draftPath, 'utf-8'))
} catch {
console.log(' draft_content.json 读取失败,跳过')
return
}
// 找到第一个 video track
const videoTrack = trackId
? draft.tracks.find(t => t.id === trackId)
: draft.tracks.find(t => t.type === 'video')
if (!videoTrack) {
console.log(' 未找到 video track跳过')
return
}
const slots = []
for (let i = 0; i < items.length; i++) {
const item = items[i]
const tl = timeline[i]
const segId = item.segmentId || item._segmentId
if (!segId) {
// 尝试从 materials.videos 匹配
const fname = item.video ? path.basename(item.video) : ''
const matVideo = (draft.materials.videos || []).find(v => {
const matFname = path.basename(v.path || '')
return fname && matFname.includes(fname.replace('videos/', ''))
})
if (matVideo) {
items[i]._segmentId = matVideo.id
slots.push(buildSlot(matVideo.id, videoTrack.id, i, tl, US))
}
} else {
slots.push(buildSlot(segId, videoTrack.id, i, tl, US))
}
}
if (slots.length > 0) {
videoTrack.slots = slots
draft.duration = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
fs.writeFileSync(draftPath, JSON.stringify(draft, null, 2), 'utf-8')
console.log(` 已本地写入 ${slots.length} 个 slot 到视频轨道`)
// 触发剪映扫描
triggerDirScan(path.dirname(draftPath))
}
}
function buildSlot(segId, trackId, index, tl, US) {
return {
id: generateUUID(),
material_id: segId,
track_id: trackId,
render_index: index,
type: 'video',
common_property: {
start_time: tl.start,
source_timerange: { start: 0, duration: tl.duration },
target_timerange: { start: tl.start, duration: tl.duration },
is_avatar: false,
audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
volume: 1.0,
},
}
}
function generateUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
const r = Math.random() * 16 | 0
return (c === 'x' ? r : (r & 0x3 | 0x8)).toString(16).toUpperCase()
})
}
function triggerDirScan(dir) {
const { execFile } = require('child_process')
const tmp = dir + '.slot_tmp'
if (process.platform === 'darwin') {
execFile('rsync', ['-a', dir + '/', tmp], (err) => {
try { require('fs').rmSync(tmp, { recursive: true, force: true }) } catch {}
})
}
}
// ============================================================================
// 添加 TTS 配音
// ============================================================================
async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {}) {
const audioItems = items.filter(item => item.audio)
if (audioItems.length === 0) {
console.log(' 无 TTS 音频文件,跳过')
return
}
const audioInfos = []
const resolveAudio = (relPath) => {
if (relPath.startsWith('http')) return relPath
if (audioUrls[relPath]) return audioUrls[relPath]
return path.isAbsolute(relPath) ? relPath : path.resolve(inputDir, relPath)
}
// 优先使用 segments[] 逐段添加(精确对齐)
// 无 segments 时降级为旧的整段方式
const segmentsFlat = []
for (let i = 0; i < items.length; i++) {
const item = items[i]
const tl = timeline[i]
if (!item.audio) continue
if (item.audio) {
const audioUrl = resolveAudio(item.audio)
if (item.segments && item.segments.length > 0) {
// 使用 segments 精确添加
for (const seg of item.segments) {
if (!seg.audio || seg.error) continue
const audioUrl = seg.audio.startsWith('http')
? seg.audio
: (audioUrls[seg.audio] || path.resolve(inputDir, seg.audio))
const segDurUs = Math.round(seg.duration * US)
const segStartUs = tl.start + Math.round(seg.startOffset * US)
segmentsFlat.push({
audio_url: audioUrl,
start: segStartUs,
end: segStartUs + segDurUs,
duration: segDurUs,
volume: 1.0,
})
}
} else {
// 降级:整段添加
const audioUrl = item.audio.startsWith('http')
? item.audio
: (audioUrls[item.audio] || path.resolve(inputDir, item.audio))
const audioDurUs = item.audioDuration ? item.audioDuration * US : tl.duration
audioInfos.push({
segmentsFlat.push({
audio_url: audioUrl,
start: tl.start,
end: tl.start + audioDurUs,
@@ -339,17 +543,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
}
}
if (audioInfos.length === 0) {
console.log(' 无可用音频,跳过配音')
if (segmentsFlat.length === 0) {
console.log(' 无 TTS 音频文件,跳过')
return
}
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify(audioInfos),
})
const ossCount = audioInfos.filter(a => a.audio_url.startsWith('http')).length
console.log(` 已添加 ${audioInfos.length} 段 TTS 配音 (${ossCount > 0 ? `${ossCount} 段 OSS + ` : ''}${audioInfos.length - ossCount} 段本地)`)
// 逐个添加音频CapCut API 批量添加不稳定)
let addedCount = 0
for (const audioInfo of segmentsFlat) {
try {
await api('add_audios', {
draft_url: draftUrl,
audio_infos: JSON.stringify([audioInfo]),
})
addedCount++
} catch (err) {
console.error(` 音频添加失败: ${err.message.slice(0, 80)}`)
}
}
const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
console.log(` 已添加 ${addedCount}/${segmentsFlat.length} 段 TTS 配音 (${ossCount} 段 OSS)`)
}
// ============================================================================
@@ -402,7 +615,24 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
const tl = timeline[i]
if (split) {
if (split && item.segments && item.segments.length > 0) {
// 精确字幕模式:使用 segments 实测时长,逐段添加字幕
for (const seg of item.segments) {
if (seg.error || !seg.text) continue
const segStartUs = tl.start + Math.round(seg.startOffset * US)
const segDurUs = Math.round(seg.duration * US)
const cap = {
start: segStartUs,
end: segStartUs + segDurUs,
text: seg.text,
}
applyAnimationProps(cap, animStyle)
captions.push(cap)
}
} else if (split) {
// 降级:按字符比例分配(无 segments 时)
const sentences = splitTextIntoSentences(text)
if (sentences.length === 0) continue
@@ -447,9 +677,7 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
return
}
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify(captions),
const commonStyle = {
font: style.font || null,
font_size: style.fontSize || 15,
text_color: style.color || '#ffffff',
@@ -472,9 +700,23 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
transform_x: 0,
transform_y: style.transformY || 0,
style_text: 0,
})
}
console.log(` 已添加 ${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'}${animStyle.outAnimation || '无'})`)
// 逐条添加字幕CapCut API 批量添加不稳定)
let addedCount = 0
for (const cap of captions) {
try {
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify([cap]),
...commonStyle,
})
addedCount++
} catch (err) {
console.error(` 字幕添加失败: ${err.message.slice(0, 80)}`)
}
}
console.log(` 已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'}${animStyle.outAnimation || '无'})`)
}
// ============================================================================
@@ -583,6 +825,8 @@ module.exports = {
addBGM,
addSubtitles,
addKeywordOverlays,
addSlots,
addSlotsLocally,
addEffects,
addFilter,
}

View File

@@ -72,6 +72,28 @@ function initManifest(options) {
console.log(`${refsWithoutUrl.length} 个参考图缺少 OSS URLimages 阶段会自动上传`)
}
// 从 videoModel 推算固定时长(秒)
const videoModelFixedDurations = {
'kling': 6,
'kling-v2-5-turbo': 6,
'veo3-fast': 8,
'veo3-fast-frames': 8,
'grok-video-3': 6,
}
const estimatedVideoDuration = videoModelFixedDurations[options.videoModel || accountConfig.videoModel] || 6
// 校验时长约束
for (let i = 0; i < rawItems.length; i++) {
const item = rawItems[i]
const dur = Number(item.duration) || 5
if (dur > estimatedVideoDuration) {
console.error(`错误: items[${i}] 的 TTS 估算 duration=${dur}s > videoModel 固定时长 ${estimatedVideoDuration}s`)
console.error(` 必须先拆分 shot 再执行 init`)
console.error(` script: "${item.script}"`)
process.exit(1)
}
}
// 构建 items
const items = rawItems.map((raw, i) => {
const slug = slugify(raw.shotDesc || raw.script || `scene_${i + 1}`)
@@ -81,7 +103,8 @@ function initManifest(options) {
file: `images/scene_${String(i + 1).padStart(2, '0')}_${slug}.jpeg`,
shotDesc: raw.shotDesc || '',
script: raw.script || '',
duration: raw.duration || 5,
duration: Number(raw.duration) || 5,
estimatedVideoDuration,
imagePrompt: raw.imagePrompt,
confirmed: false,
}
@@ -102,8 +125,10 @@ function initManifest(options) {
references,
...(accountConfig.ttsVoice ? { ttsVoice: accountConfig.ttsVoice } : {}),
...(accountConfig.ttsInstruction ? { ttsInstruction: accountConfig.ttsInstruction } : {}),
...(accountConfig.ttsRate ? { ttsRate: accountConfig.ttsRate } : {}),
// 铁律ttsRate 写死 1.15x,不允许配置覆盖(除非显式传入)
ttsRate: options.ttsRate || 1.15,
items,
estimatedVideoDuration, // 顶层冗余,便于 assemble 直接读取
}
// 创建输出目录(自增序号)

View File

@@ -1,13 +1,100 @@
/**
* Phase: tts — 语音合成(整段合成)
* Phase: tts — 语音合成(先分段,后合成)
*
* 每个 item 的 script 整段合成一个音频文件,保留自然语调
* item.audio 指向完整音频item.audioDuration 为总时长。
* 字幕切分由组装阶段按字符比例分配,不在 TTS 阶段处理。
* 核心变化:音频分段优先于生图
*
* 1. 在生成图片之前,先将文案按语义断点切分为多个音频片段
* 2. 每个片段时长 < videoModel 固定时长Kling=6s
* 3. 逐段合成,记录实测时长,写入 manifest.segments[]
* 4. manifest.items[n].segments = [{text, audio, duration, startOffset}, ...]
* 5. manifest.items[n].audioDuration = 片段总和(供 assemble 计算 ratio
*
* 流程顺序变为tts → images → upload → videos → assemble
*/
const path = require('path')
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
/**
* 在语义断点处将文案切分为音频片段
* 每段时长(估算)必须 < videoDuration且尽量接近最佳 ratio 接近1.0
*
* @param {string} text - 完整文案
* @param {number} videoDur - 视频模型固定时长(秒),如 6
* @param {number} charsPerSec - 语速(字/秒),固定 5
* @returns {Array<{text, estimatedDuration}>}
*/
function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
// 优先在自然断点切分(句号/感叹号/分号)
const naturalBreaks = splitTextIntoSentences(text)
if (naturalBreaks.length <= 1) {
// 无自然断点:在半段处(含小数点)切分
const chars = text.length
const estimatedTotal = chars / charsPerSec
if (estimatedTotal <= videoDur) {
// 整段可容纳
return [{ text, estimatedDuration: estimatedTotal }]
}
// 无法单段容纳,在中间逗号处切
const mid = Math.floor(chars / 2)
const breakIdx = text.indexOf('', mid)
if (breakIdx > 0) {
return [
{ text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
{ text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
]
}
// 强制按字数切
const halfChars = Math.floor(chars / 2)
return [
{ text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
{ text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
]
}
// 多个自然句:逐句判断,合并短句
const result = []
let currentText = ''
let currentEstDur = 0
for (let i = 0; i < naturalBreaks.length; i++) {
const sentence = naturalBreaks[i]
const sentenceLen = sentence.length
const sentenceEstDur = sentenceLen / charsPerSec
if (currentEstDur + sentenceEstDur <= videoDur) {
// 可以合并到当前段
currentText += sentence + '。'
currentEstDur += sentenceEstDur
} else {
// 先保存当前段
if (currentText) {
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
}
currentText = sentence + '。'
currentEstDur = sentenceEstDur
// 单句本身超长(超 videoDur
if (sentenceEstDur > videoDur) {
// 按半段切
const halfLen = Math.floor(sentenceLen / 2)
const half1 = sentence.slice(0, halfLen)
const half2 = sentence.slice(halfLen)
// 回退上一段,用两个半段替代
result.pop()
result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
currentText = half2 + '。'
currentEstDur = (sentenceLen - halfLen) / charsPerSec
}
}
}
if (currentText) {
result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
}
return result
}
async function phaseTts(manifest, manifestPath, options = {}) {
const dir = getManifestDir(manifestPath)
@@ -16,38 +103,89 @@ async function phaseTts(manifest, manifestPath, options = {}) {
const { synthesize } = require('../qwen-tts')
const items = manifest.items.filter(it =>
it.status === 'done' && (it.script || it.text) && !it.audio
)
if (items.length === 0) { log('tts', '无待处理 item跳过'); return }
const videoDur = manifest.estimatedVideoDuration || 6
const ttsRate = manifest.ttsRate || 1.15
log('tts', `${items.length}`)
const items = manifest.items.filter(it =>
(it.script || it.text) && !it.audio
)
if (items.length === 0) { log('tts', '无待处理 item已合成跳过'); return }
log('tts', `${items.length} 段, 视频固定时长=${videoDur}s, TTS语速=${ttsRate}x`)
for (let i = 0; i < items.length; i++) {
const item = items[i]
const idx = i + 1
const fullText = item.script || item.text
const fullText = (item.script || item.text).trim()
try {
const { filePath, duration } = await synthesize(fullText, {
outputDir: audioDir,
id: String(item.id || idx),
voice: manifest.ttsVoice || undefined,
instruction: manifest.ttsInstruction || undefined,
rate: manifest.ttsRate || undefined,
})
const totalDuration = Math.round(duration * 1000) / 1000
item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
item.audioDuration = totalDuration
log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s: ${fullText.substring(0, 30)}...`)
} catch (err) {
item.status = 'failed'
item.error = `TTS失败: ${err.message}`
log('tts', `[${idx}/${items.length}] 失败: ${err.message}`)
// Step 1: 计算音频分段
const rawSegments = splitIntoAudioSegments(fullText, videoDur)
log('tts', `[${idx}/${items.length}] 原始分段: ${rawSegments.length}`)
for (const seg of rawSegments) {
log('tts', ` 分段估算: ${seg.estimatedDuration.toFixed(2)}s / ${seg.text.slice(0, 20)}...`)
}
// Step 2: 逐段合成
const segments = []
let globalOffset = 0
for (let j = 0; j < rawSegments.length; j++) {
const segInput = rawSegments[j]
const segId = `${item.id}_${j + 1}`
try {
const { filePath, duration: realDuration } = await synthesize(segInput.text, {
outputDir: audioDir,
id: segId,
voice: manifest.ttsVoice || undefined,
instruction: manifest.ttsInstruction || undefined,
rate: ttsRate,
})
const segment = {
id: segId,
text: segInput.text,
audio: path.relative(dir, filePath).replace(/\\/g, '/'),
estimatedDuration: Math.round(segInput.estimatedDuration * 1000) / 1000,
duration: Math.round(realDuration * 1000) / 1000,
startOffset: Math.round(globalOffset * 1000) / 1000,
}
segments.push(segment)
globalOffset += realDuration
log('tts', `[${idx}/${items.length}] 段${j + 1}: 估算${segInput.estimatedDuration.toFixed(2)}s → 实测${realDuration.toFixed(2)}s | ${segInput.text.slice(0, 15)}...`)
} catch (err) {
log('tts', `[${idx}/${items.length}] 段${j + 1} 合成失败: ${err.message}`)
segments.push({
id: segId,
text: segInput.text,
audio: '',
estimatedDuration: segInput.estimatedDuration,
duration: 0,
startOffset: globalOffset,
error: err.message,
})
globalOffset += segInput.estimatedDuration
}
}
// Step 3: 汇总到 item
const totalAudioDuration = Math.round(globalOffset * 1000) / 1000
item.segments = segments
item.audio = segments[0]?.audio || ''
item.audioDuration = totalAudioDuration
item.segmentCount = segments.length
// Step 4: 时长合规诊断
const ratio = videoDur / totalAudioDuration
if (ratio < 0.9) {
item._timelineWarning = `⚠ audioDur(${totalAudioDuration.toFixed(1)}s) > videoDur(${videoDur}s)ratio=${ratio.toFixed(2)}assemble 将截断`
}
log('tts', `[${idx}/${items.length}] 完成: ${segments.length}段, 总音频${totalAudioDuration.toFixed(1)}s, ratio=${ratio.toFixed(2)}`)
saveManifest(manifestPath, manifest)
}
}
module.exports = { phaseTts }
module.exports = { phaseTts, splitIntoAudioSegments }