Files
video-create/.claude/skills/video-from-script/scripts/lib/phase-tts.js
lc 8787d369d3 feat(video-from-script): 批量生产元数据增强 — 选题/转发文案/草稿命名/导出/草稿箱改名
- batch-pipeline.js: 新增 mark 元数据字段(topicA/B, draftName, forwardCopy, hashtags)
- batch-pipeline.js: 新增 export 命令导出 CSV/XLSX 最终表格
- batch-pipeline.js: 新增 rename-drafts 命令批量重命名剪映草稿(Mac 直接 mv 文件夹)
- batch-pipeline.js: 完善 displayTitle 向后兼容旧 topic 字段
- lib/phase-tts: 增强 TTS 生成稳定性
- lib/phase-videos: 视频生成优化
- lib/video-poll-utils: 提取轮询重试共享工具
- CLAUDE.md: 补充批量生产选题/转发文案/草稿命名/导出/草稿箱改名文档
- 执黑先行 account.json: 配置更新

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-14 23:16:12 +08:00

196 lines
6.7 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Phase: tts — 语音合成(先分段,后合成)
*
* 核心变化:音频分段优先于生图。
*
* 1. 在生成图片之前,先将文案按语义断点切分为多个音频片段
* 2. 每个片段时长 < videoModel 固定时长Kling=6s
* 3. 逐段合成,记录实测时长,写入 manifest.segments[]
* 4. manifest.items[n].segments = [{text, audio, duration, startOffset}, ...]
* 5. manifest.items[n].audioDuration = 片段总和(供 assemble 计算 ratio
*
* 流程顺序变为tts → images → upload → videos → assemble
*/
const path = require('path')
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
/**
* 在语义断点处将文案切分为音频片段
* 每段时长(估算)必须 < videoDuration且尽量接近最佳 ratio 接近1.0
*
* @param {string} text - 完整文案
* @param {number} videoDur - 视频模型固定时长(秒),如 6
* @param {number} charsPerSec - 语速(字/秒),固定 5
* @returns {Array<{text, estimatedDuration}>}
*/
function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
const estimatedTotal = text.length / charsPerSec
if (estimatedTotal <= videoDur) {
return [{ text, estimatedDuration: estimatedTotal }]
}
// 在原文标点处切分,保留原始标点(不剥离、不重加)
const breakPattern = /[。!;,]/
const rawParts = []
let lastIdx = 0
for (let i = 0; i < text.length; i++) {
if (breakPattern.test(text[i])) {
rawParts.push(text.slice(lastIdx, i + 1))
lastIdx = i + 1
}
}
if (lastIdx < text.length) {
rawParts.push(text.slice(lastIdx))
}
// 无标点断点,强制对半切
if (rawParts.length <= 1) {
const half = Math.floor(text.length / 2)
return [
{ text: text.slice(0, half), estimatedDuration: half / charsPerSec },
{ text: text.slice(half), estimatedDuration: (text.length - half) / charsPerSec },
]
}
// 合并短片段,确保每段 ≤ videoDur
const result = []
let curText = ''
let curDur = 0
for (const part of rawParts) {
const partDur = part.length / charsPerSec
if (curDur + partDur <= videoDur) {
curText += part
curDur += partDur
} else {
if (curText) result.push({ text: curText, estimatedDuration: curDur })
// 单段超长,强制对半切
if (partDur > videoDur) {
const half = Math.floor(part.length / 2)
result.push({ text: part.slice(0, half), estimatedDuration: half / charsPerSec })
curText = part.slice(half)
curDur = (part.length - half) / charsPerSec
} else {
curText = part
curDur = partDur
}
}
}
if (curText) result.push({ text: curText, estimatedDuration: curDur })
return result
}
async function phaseTts(manifest, manifestPath, options = {}) {
const dir = getManifestDir(manifestPath)
const audioDir = path.join(dir, 'audio')
ensureDir(audioDir)
const { synthesize } = require('../qwen-tts')
const videoDur = manifest.estimatedVideoDuration || 6
const ttsRate = manifest.ttsRate || 1.15
const items = manifest.items.filter(it =>
(it.script || it.text) && !it.audio
)
if (items.length === 0) { log('tts', '无待处理 item已合成跳过'); return }
log('tts', `${items.length} 段, 视频固定时长=${videoDur}s, TTS语速=${ttsRate}x`)
for (let i = 0; i < items.length; i++) {
const item = items[i]
const idx = i + 1
const fullText = (item.script || item.text).trim()
// Step 1: 计算音频分段
const rawSegments = splitIntoAudioSegments(fullText, videoDur)
log('tts', `[${idx}/${items.length}] 原始分段: ${rawSegments.length}`)
for (const seg of rawSegments) {
log('tts', ` 分段估算: ${seg.estimatedDuration.toFixed(2)}s / ${seg.text.slice(0, 20)}...`)
}
// Step 2: 逐段合成
const segments = []
let globalOffset = 0
for (let j = 0; j < rawSegments.length; j++) {
const segInput = rawSegments[j]
const segId = `${item.id}_${j + 1}`
// 带重试的合成最多3次指数退避
let synthResult = null
let lastErr = null
for (let retry = 0; retry < 3; retry++) {
try {
synthResult = await synthesize(segInput.text, {
outputDir: audioDir,
id: segId,
voice: manifest.ttsVoice || undefined,
model: manifest.ttsModel || undefined,
instruction: manifest.ttsInstruction || undefined,
rate: ttsRate,
})
break
} catch (e) {
lastErr = e
if (retry < 2) {
const delay = Math.pow(2, retry) * 3000
log('tts', `[${idx}/${items.length}] 段${j + 1} 重试 ${retry + 1}/3, ${delay / 1000}s 后重试...`)
await new Promise(r => setTimeout(r, delay))
}
}
}
if (synthResult) {
const { filePath, duration: realDuration } = synthResult
const segment = {
id: segId,
text: segInput.text,
audio: path.relative(dir, filePath).replace(/\\/g, '/'),
estimatedDuration: Math.round(segInput.estimatedDuration * 1000) / 1000,
duration: Math.round(realDuration * 1000) / 1000,
startOffset: Math.round(globalOffset * 1000) / 1000,
}
segments.push(segment)
globalOffset += realDuration
log('tts', `[${idx}/${items.length}] 段${j + 1}: 估算${segInput.estimatedDuration.toFixed(2)}s → 实测${realDuration.toFixed(2)}s | ${segInput.text.slice(0, 15)}...`)
} else {
log('tts', `[${idx}/${items.length}] 段${j + 1} 合成失败(重试3次后): ${lastErr?.message || '未知错误'}`)
segments.push({
id: segId,
text: segInput.text,
audio: '',
estimatedDuration: segInput.estimatedDuration,
duration: 0,
startOffset: globalOffset,
error: lastErr?.message || '未知错误',
})
globalOffset += segInput.estimatedDuration
}
}
// Step 3: 汇总到 item
const totalAudioDuration = Math.round(globalOffset * 1000) / 1000
item.segments = segments
item.audio = segments[0]?.audio || ''
item.audioDuration = totalAudioDuration
item.segmentCount = segments.length
// Step 4: 时长合规诊断
const ratio = videoDur / totalAudioDuration
if (ratio < 0.9) {
item._timelineWarning = `⚠ audioDur(${totalAudioDuration.toFixed(1)}s) > videoDur(${videoDur}s)ratio=${ratio.toFixed(2)}assemble 将截断`
}
log('tts', `[${idx}/${items.length}] 完成: ${segments.length}段, 总音频${totalAudioDuration.toFixed(1)}s, ratio=${ratio.toFixed(2)}`)
saveManifest(manifestPath, manifest)
}
}
module.exports = { phaseTts, splitIntoAudioSegments }