#!/usr/bin/env node /** * 阿里云 Qwen-TTS 批量语音合成脚本 * * 输入 JSON 文件格式: * { * "segments": [ * {"id": 1, "text": "第一段文案"}, * {"id": 2, "text": "第二段文案"} * ], * "voice": "Cherry", // 可选,覆盖 config * "output_dir": "./audio" // 可选,默认 ./audio * } * * 输出 JSON (stdout): * { * "segments": [ * {"id": 1, "text": "...", "audio": "./audio/seg_001.wav", "duration": 3.456}, * ... * ] * } * * 也可作为模块调用: * const { synthesize } = require('./qwen-tts') * const { filePath, duration } = await synthesize('你好世界', { voice: 'Cherry' }) */ const axios = require('axios') const fs = require('fs') const path = require('path') const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json') function loadConfig() { if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`) return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8')) } /** * 单段语音合成(非流式) * @param {string} text - 要合成的文本 * @param {object} options - { voice, model, language, outputDir, id } * @returns {{ filePath: string, duration: number }} */ async function synthesize(text, options = {}) { const config = loadConfig() const apiKey = options.apiKey || config.ttsApiKey if (!apiKey) throw new Error('ttsApiKey 未配置,请在 config.json 中设置') const baseUrl = (options.apiBaseUrl || config.ttsApiBaseUrl || 'https://dashscope.aliyuncs.com/api/v1').replace(/\/$/, '') const model = options.model || config.ttsModel || 'qwen-tts' const voice = options.voice || config.ttsVoice || 'Cherry' const language = options.language || config.ttsLanguage || 'Chinese' const outputDir = options.outputDir || './audio' fs.mkdirSync(outputDir, { recursive: true }) // 确保文本有句末标点,让 TTS 生成自然语调和尾部停顿 text = text.trimEnd() if (!/[。!?.!?…]$/.test(text)) text += '。' const url = `${baseUrl}/services/aigc/multimodal-generation/generation` let res try { res = await axios.post(url, { model, input: { text, voice, language_type: language, }, }, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, timeout: 60000, }) } catch (err) { const detail = err.response?.data throw new Error(`TTS API 错误: ${err.message}${detail ? ' ' + JSON.stringify(detail) : ''}`) } const audioUrl = res.data?.output?.audio?.url if (!audioUrl) { throw new Error(`TTS API 未返回音频 URL: ${JSON.stringify(res.data)}`) } // 下载音频到本地 const id = options.id || 1 const fileName = `seg_${String(id).padStart(3, '0')}.wav` const filePath = path.resolve(outputDir, fileName) const audioRes = await axios.get(audioUrl, { responseType: 'arraybuffer', timeout: 30000 }) const wavBuffer = Buffer.from(audioRes.data) // 追加 0.3s 静音(句间气口) const silenceSec = options.silencePadding !== undefined ? options.silencePadding : 0.3 const silenceBytes = Math.round(24000 * 2 * silenceSec) const silenceBuffer = Buffer.alloc(silenceBytes, 0) const finalBuffer = Buffer.concat([wavBuffer, silenceBuffer]) // 更新 WAV 头的文件大小 finalBuffer.writeUInt32LE(finalBuffer.length - 8, 4) finalBuffer.writeUInt32LE(wavBuffer.length - 44 + silenceBytes, 40) fs.writeFileSync(filePath, finalBuffer) const duration = (finalBuffer.length - 44) / (24000 * 2) return { filePath, duration } } /** * 批量语音合成 * @param {Array<{id: number, text: string}>} segments * @param {object} options - { voice, outputDir } * @returns {Array<{id: number, text: string, audio: string, duration: number}>} */ async function synthesizeBatch(segments, options = {}) { const results = [] for (const seg of segments) { console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`) const { filePath, duration } = await synthesize(seg.text, { ...options, id: seg.id, }) results.push({ id: seg.id, text: seg.text, audio: filePath, duration: Math.round(duration * 1000) / 1000, }) // 间隔 0.5 秒避免限流 await new Promise(r => setTimeout(r, 500)) } return results } // CLI 入口 async function main() { const inputJson = process.argv[2] if (!inputJson) { console.error('用法: node qwen-tts.js ') console.error('') console.error('input.json 格式:') console.error(JSON.stringify({ segments: [{ id: 1, text: '文案' }], voice: 'Cherry', output_dir: './audio', }, null, 2)) process.exit(1) } const config = JSON.parse(fs.readFileSync(inputJson, 'utf-8')) const segments = config.segments const options = { voice: config.voice, outputDir: config.output_dir || './audio', } const results = await synthesizeBatch(segments, options) const output = { segments: results } process.stdout.write(JSON.stringify(output, null, 2) + '\n') } if (require.main === module) { main().catch(err => { console.error('TTS 合成失败:', err.message) process.exit(1) }) } module.exports = { synthesize, synthesizeBatch }