/** * Phase: tts — 语音合成(逐句分句生成) * * 将每个 item 的 script 按标点切分为短句,每句单独生成 TTS 音频。 * 统一写入 item.segments[],单句时数组仅 1 个元素。 * item.audio 指向第一段,item.audioDuration 为累计时长。 */ const path = require('path') const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils') async function phaseTts(manifest, manifestPath, options = {}) { const dir = getManifestDir(manifestPath) const audioDir = path.join(dir, 'audio') ensureDir(audioDir) const { synthesize } = require('../qwen-tts') const items = manifest.items.filter(it => it.status === 'done' && (it.script || it.text) && !it.audio ) if (items.length === 0) { log('tts', '无待处理 item,跳过'); return } log('tts', `共 ${items.length} 段`) for (let i = 0; i < items.length; i++) { const item = items[i] const idx = i + 1 const fullText = item.script || item.text try { const sentences = splitTextIntoSentences(fullText) const segments = [] let totalDuration = 0 for (let j = 0; j < sentences.length; j++) { const sentence = sentences[j] const segId = `${item.id || idx}_${j + 1}` const { filePath, duration } = await synthesize(sentence, { outputDir: audioDir, id: segId, voice: manifest.ttsVoice || undefined, instruction: manifest.ttsInstruction || undefined, rate: manifest.ttsRate || undefined, }) segments.push({ text: sentence, audio: path.relative(dir, filePath).replace(/\\/g, '/'), duration: Math.round(duration * 1000) / 1000, }) totalDuration += duration } // 统一使用 segments 数组(单句 = 1 元素,多句 = N 元素) item.segments = segments item.audio = segments[0].audio item.audioDuration = Math.round(totalDuration * 1000) / 1000 log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s (${segments.length}句): ${fullText.substring(0, 30)}...`) } catch (err) { item.status = 'failed' item.error = `TTS失败: ${err.message}` log('tts', `[${idx}/${items.length}] 失败: ${err.message}`) } saveManifest(manifestPath, manifest) } } module.exports = { phaseTts }