2026-04-30 21:18:31 +08:00
|
|
|
|
/**
|
2026-05-01 14:41:28 +08:00
|
|
|
|
* Phase: tts — 语音合成(逐句分句生成)
|
2026-04-30 21:18:31 +08:00
|
|
|
|
*
|
2026-05-01 14:41:28 +08:00
|
|
|
|
* 将每个 item 的 script 按标点切分为短句,每句单独生成 TTS 音频。
|
2026-05-02 00:14:40 +08:00
|
|
|
|
* 统一写入 item.segments[],单句时数组仅 1 个元素。
|
|
|
|
|
|
* item.audio 指向第一段,item.audioDuration 为累计时长。
|
2026-04-30 21:18:31 +08:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
const path = require('path')
|
2026-05-01 14:41:28 +08:00
|
|
|
|
const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
|
2026-04-30 21:18:31 +08:00
|
|
|
|
|
2026-05-01 00:44:18 +08:00
|
|
|
|
async function phaseTts(manifest, manifestPath, options = {}) {
|
2026-04-30 21:18:31 +08:00
|
|
|
|
const dir = getManifestDir(manifestPath)
|
|
|
|
|
|
const audioDir = path.join(dir, 'audio')
|
|
|
|
|
|
ensureDir(audioDir)
|
|
|
|
|
|
|
|
|
|
|
|
const { synthesize } = require('../qwen-tts')
|
|
|
|
|
|
|
|
|
|
|
|
const items = manifest.items.filter(it =>
|
2026-05-01 01:52:02 +08:00
|
|
|
|
it.status === 'done' && (it.script || it.text) && !it.audio
|
2026-04-30 21:18:31 +08:00
|
|
|
|
)
|
|
|
|
|
|
if (items.length === 0) { log('tts', '无待处理 item,跳过'); return }
|
|
|
|
|
|
|
|
|
|
|
|
log('tts', `共 ${items.length} 段`)
|
|
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < items.length; i++) {
|
|
|
|
|
|
const item = items[i]
|
|
|
|
|
|
const idx = i + 1
|
2026-05-01 14:41:28 +08:00
|
|
|
|
const fullText = item.script || item.text
|
|
|
|
|
|
|
2026-04-30 21:18:31 +08:00
|
|
|
|
try {
|
2026-05-01 14:41:28 +08:00
|
|
|
|
const sentences = splitTextIntoSentences(fullText)
|
2026-05-02 00:14:40 +08:00
|
|
|
|
const segments = []
|
|
|
|
|
|
let totalDuration = 0
|
2026-05-01 14:41:28 +08:00
|
|
|
|
|
2026-05-02 00:14:40 +08:00
|
|
|
|
for (let j = 0; j < sentences.length; j++) {
|
|
|
|
|
|
const sentence = sentences[j]
|
|
|
|
|
|
const segId = `${item.id || idx}_${j + 1}`
|
|
|
|
|
|
const { filePath, duration } = await synthesize(sentence, {
|
2026-05-01 14:41:28 +08:00
|
|
|
|
outputDir: audioDir,
|
2026-05-02 00:14:40 +08:00
|
|
|
|
id: segId,
|
2026-05-01 14:41:28 +08:00
|
|
|
|
voice: manifest.ttsVoice || undefined,
|
|
|
|
|
|
instruction: manifest.ttsInstruction || undefined,
|
|
|
|
|
|
rate: manifest.ttsRate || undefined,
|
|
|
|
|
|
})
|
2026-05-02 00:14:40 +08:00
|
|
|
|
segments.push({
|
|
|
|
|
|
text: sentence,
|
|
|
|
|
|
audio: path.relative(dir, filePath).replace(/\\/g, '/'),
|
|
|
|
|
|
duration: Math.round(duration * 1000) / 1000,
|
|
|
|
|
|
})
|
|
|
|
|
|
totalDuration += duration
|
2026-05-01 14:41:28 +08:00
|
|
|
|
}
|
2026-05-02 00:14:40 +08:00
|
|
|
|
|
|
|
|
|
|
// 统一使用 segments 数组(单句 = 1 元素,多句 = N 元素)
|
|
|
|
|
|
item.segments = segments
|
|
|
|
|
|
item.audio = segments[0].audio
|
|
|
|
|
|
item.audioDuration = Math.round(totalDuration * 1000) / 1000
|
|
|
|
|
|
log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s (${segments.length}句): ${fullText.substring(0, 30)}...`)
|
2026-04-30 21:18:31 +08:00
|
|
|
|
} catch (err) {
|
|
|
|
|
|
item.status = 'failed'
|
|
|
|
|
|
item.error = `TTS失败: ${err.message}`
|
|
|
|
|
|
log('tts', `[${idx}/${items.length}] 失败: ${err.message}`)
|
|
|
|
|
|
}
|
|
|
|
|
|
saveManifest(manifestPath, manifest)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
module.exports = { phaseTts }
|