176 lines
5.2 KiB
JavaScript
176 lines
5.2 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* 阿里云 Qwen-TTS 批量语音合成脚本
|
|
*
|
|
* 输入 JSON 文件格式:
|
|
* {
|
|
* "segments": [
|
|
* {"id": 1, "text": "第一段文案"},
|
|
* {"id": 2, "text": "第二段文案"}
|
|
* ],
|
|
* "voice": "Cherry", // 可选,覆盖 config
|
|
* "output_dir": "./audio" // 可选,默认 ./audio
|
|
* }
|
|
*
|
|
* 输出 JSON (stdout):
|
|
* {
|
|
* "segments": [
|
|
* {"id": 1, "text": "...", "audio": "./audio/seg_001.wav", "duration": 3.456},
|
|
* ...
|
|
* ]
|
|
* }
|
|
*
|
|
* 也可作为模块调用:
|
|
* const { synthesize } = require('./qwen-tts')
|
|
* const { filePath, duration } = await synthesize('你好世界', { voice: 'Cherry' })
|
|
*/
|
|
|
|
const axios = require('axios')
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
|
|
const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json')
|
|
|
|
function loadConfig() {
|
|
if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`)
|
|
return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
|
|
}
|
|
|
|
/**
|
|
* 单段语音合成(非流式)
|
|
* @param {string} text - 要合成的文本
|
|
* @param {object} options - { voice, model, language, outputDir, id }
|
|
* @returns {{ filePath: string, duration: number }}
|
|
*/
|
|
async function synthesize(text, options = {}) {
|
|
const config = loadConfig()
|
|
|
|
const apiKey = options.apiKey || config.ttsApiKey
|
|
if (!apiKey) throw new Error('ttsApiKey 未配置,请在 config.json 中设置')
|
|
|
|
const baseUrl = (options.apiBaseUrl || config.ttsApiBaseUrl || 'https://dashscope.aliyuncs.com/api/v1').replace(/\/$/, '')
|
|
const model = options.model || config.ttsModel || 'qwen-tts'
|
|
const voice = options.voice || config.ttsVoice || 'Cherry'
|
|
const language = options.language || config.ttsLanguage || 'Chinese'
|
|
const outputDir = options.outputDir || './audio'
|
|
|
|
fs.mkdirSync(outputDir, { recursive: true })
|
|
|
|
// 确保文本有句末标点,让 TTS 生成自然语调和尾部停顿
|
|
text = text.trimEnd()
|
|
if (!/[。!?.!?…]$/.test(text)) text += '。'
|
|
|
|
const url = `${baseUrl}/services/aigc/multimodal-generation/generation`
|
|
|
|
let res
|
|
try {
|
|
res = await axios.post(url, {
|
|
model,
|
|
input: {
|
|
text,
|
|
voice,
|
|
language_type: language,
|
|
},
|
|
}, {
|
|
headers: {
|
|
'Authorization': `Bearer ${apiKey}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
timeout: 60000,
|
|
})
|
|
} catch (err) {
|
|
const detail = err.response?.data
|
|
throw new Error(`TTS API 错误: ${err.message}${detail ? ' ' + JSON.stringify(detail) : ''}`)
|
|
}
|
|
|
|
const audioUrl = res.data?.output?.audio?.url
|
|
if (!audioUrl) {
|
|
throw new Error(`TTS API 未返回音频 URL: ${JSON.stringify(res.data)}`)
|
|
}
|
|
|
|
// 下载音频到本地
|
|
const id = options.id || 1
|
|
const fileName = `seg_${String(id).padStart(3, '0')}.wav`
|
|
const filePath = path.resolve(outputDir, fileName)
|
|
|
|
const audioRes = await axios.get(audioUrl, { responseType: 'arraybuffer', timeout: 30000 })
|
|
const wavBuffer = Buffer.from(audioRes.data)
|
|
|
|
// 追加 0.3s 静音(句间气口)
|
|
const silenceSec = options.silencePadding !== undefined ? options.silencePadding : 0.3
|
|
const silenceBytes = Math.round(24000 * 2 * silenceSec)
|
|
const silenceBuffer = Buffer.alloc(silenceBytes, 0)
|
|
const finalBuffer = Buffer.concat([wavBuffer, silenceBuffer])
|
|
// 更新 WAV 头的文件大小
|
|
finalBuffer.writeUInt32LE(finalBuffer.length - 8, 4)
|
|
finalBuffer.writeUInt32LE(wavBuffer.length - 44 + silenceBytes, 40)
|
|
fs.writeFileSync(filePath, finalBuffer)
|
|
|
|
const duration = (finalBuffer.length - 44) / (24000 * 2)
|
|
|
|
return { filePath, duration }
|
|
}
|
|
|
|
/**
|
|
* 批量语音合成
|
|
* @param {Array<{id: number, text: string}>} segments
|
|
* @param {object} options - { voice, outputDir }
|
|
* @returns {Array<{id: number, text: string, audio: string, duration: number}>}
|
|
*/
|
|
async function synthesizeBatch(segments, options = {}) {
|
|
const results = []
|
|
for (const seg of segments) {
|
|
console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`)
|
|
const { filePath, duration } = await synthesize(seg.text, {
|
|
...options,
|
|
id: seg.id,
|
|
})
|
|
results.push({
|
|
id: seg.id,
|
|
text: seg.text,
|
|
audio: filePath,
|
|
duration: Math.round(duration * 1000) / 1000,
|
|
})
|
|
// 间隔 0.5 秒避免限流
|
|
await new Promise(r => setTimeout(r, 500))
|
|
}
|
|
return results
|
|
}
|
|
|
|
// CLI 入口
|
|
async function main() {
|
|
const inputJson = process.argv[2]
|
|
if (!inputJson) {
|
|
console.error('用法: node qwen-tts.js <input.json>')
|
|
console.error('')
|
|
console.error('input.json 格式:')
|
|
console.error(JSON.stringify({
|
|
segments: [{ id: 1, text: '文案' }],
|
|
voice: 'Cherry',
|
|
output_dir: './audio',
|
|
}, null, 2))
|
|
process.exit(1)
|
|
}
|
|
|
|
const config = JSON.parse(fs.readFileSync(inputJson, 'utf-8'))
|
|
const segments = config.segments
|
|
const options = {
|
|
voice: config.voice,
|
|
outputDir: config.output_dir || './audio',
|
|
}
|
|
|
|
const results = await synthesizeBatch(segments, options)
|
|
const output = { segments: results }
|
|
process.stdout.write(JSON.stringify(output, null, 2) + '\n')
|
|
}
|
|
|
|
if (require.main === module) {
|
|
main().catch(err => {
|
|
console.error('TTS 合成失败:', err.message)
|
|
process.exit(1)
|
|
})
|
|
}
|
|
|
|
module.exports = { synthesize, synthesizeBatch }
|