190 lines
6.0 KiB
JavaScript
190 lines
6.0 KiB
JavaScript
|
|
#!/usr/bin/env node
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* MiniMax TTS — T2A v2 HTTP 语音合成
|
|||
|
|
*
|
|||
|
|
* 模块用法:
|
|||
|
|
* const { synthesize } = require('./minimax-tts')
|
|||
|
|
* const { filePath, duration } = await synthesize('你好', { voice: 'my_voice_id' })
|
|||
|
|
*
|
|||
|
|
* CLI 用法:
|
|||
|
|
* node minimax-tts.js <input.json>
|
|||
|
|
* input.json: { "segments": [{"id": 1, "text": "..."}], "voice": "voice_id", "output_dir": "./audio" }
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
const https = require('https')
|
|||
|
|
const fs = require('fs')
|
|||
|
|
const path = require('path')
|
|||
|
|
const { execFileSync } = require('child_process')
|
|||
|
|
|
|||
|
|
const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json')
|
|||
|
|
|
|||
|
|
function loadConfig() {
|
|||
|
|
if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`)
|
|||
|
|
return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function resolveVoice(voice, config) {
|
|||
|
|
if (!voice) return voice
|
|||
|
|
const voices = config.minimaxVoices || {}
|
|||
|
|
return voices[voice] || voice
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function getAudioDuration(filePath) {
|
|||
|
|
try {
|
|||
|
|
const out = execFileSync('ffprobe', [
|
|||
|
|
'-v', 'quiet', '-show_entries', 'format=duration',
|
|||
|
|
'-of', 'default=noprint_wrappers=1:nokey=1', filePath,
|
|||
|
|
], { encoding: 'utf-8', timeout: 10000 })
|
|||
|
|
return parseFloat(out.trim())
|
|||
|
|
} catch {
|
|||
|
|
const stat = fs.statSync(filePath)
|
|||
|
|
return stat.size * 8 / 128000
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 单段语音合成(MiniMax T2A v2 HTTP,非流式)
|
|||
|
|
* @param {string} text
|
|||
|
|
* @param {object} options - { voice, model, outputDir, id, rate, apiKey }
|
|||
|
|
* @returns {Promise<{filePath: string, duration: number}>}
|
|||
|
|
*/
|
|||
|
|
function synthesize(text, options = {}) {
|
|||
|
|
return new Promise((resolve, reject) => {
|
|||
|
|
const config = loadConfig()
|
|||
|
|
|
|||
|
|
const apiKey = options.apiKey || config.minimaxApiKey
|
|||
|
|
if (!apiKey) { reject(new Error('minimaxApiKey 未配置,请在 config.json 中添加')); return }
|
|||
|
|
|
|||
|
|
const model = options.model || config.minimaxTtsModel || 'speech-02-hd'
|
|||
|
|
const voiceId = resolveVoice(options.voice || config.minimaxTtsVoice, config)
|
|||
|
|
if (!voiceId) { reject(new Error('未指定 voice_id,请在 options.voice 或 config.minimaxTtsVoice 中配置')); return }
|
|||
|
|
|
|||
|
|
const outputDir = options.outputDir || './audio'
|
|||
|
|
fs.mkdirSync(outputDir, { recursive: true })
|
|||
|
|
|
|||
|
|
text = text.trimEnd()
|
|||
|
|
if (!/[。!?;,.!?…]$/.test(text)) text += '。'
|
|||
|
|
|
|||
|
|
const id = options.id || 1
|
|||
|
|
const fileName = `seg_${String(id).padStart(3, '0')}.mp3`
|
|||
|
|
const filePath = path.resolve(outputDir, fileName)
|
|||
|
|
|
|||
|
|
const voiceSetting = {
|
|||
|
|
voice_id: voiceId,
|
|||
|
|
speed: options.rate || 1.0,
|
|||
|
|
vol: 1.0,
|
|||
|
|
pitch: options.pitch ?? 0,
|
|||
|
|
}
|
|||
|
|
if (options.emotion) voiceSetting.emotion = options.emotion
|
|||
|
|
|
|||
|
|
const requestBody = {
|
|||
|
|
model,
|
|||
|
|
text,
|
|||
|
|
stream: false,
|
|||
|
|
voice_setting: voiceSetting,
|
|||
|
|
audio_setting: {
|
|||
|
|
sample_rate: 32000,
|
|||
|
|
format: 'mp3',
|
|||
|
|
channel: 1,
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
if (options.languageBoost) requestBody.language_boost = options.languageBoost
|
|||
|
|
|
|||
|
|
const body = JSON.stringify(requestBody)
|
|||
|
|
|
|||
|
|
const baseUrl = config.minimaxApiBase || 'https://api.yunwu.ai/minimax'
|
|||
|
|
const url = new URL(`${baseUrl}/v1/t2a_v2`)
|
|||
|
|
|
|||
|
|
const reqOptions = {
|
|||
|
|
hostname: url.hostname,
|
|||
|
|
path: url.pathname,
|
|||
|
|
method: 'POST',
|
|||
|
|
headers: {
|
|||
|
|
'Authorization': `Bearer ${apiKey}`,
|
|||
|
|
'Content-Type': 'application/json',
|
|||
|
|
'Content-Length': Buffer.byteLength(body),
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
let settled = false
|
|||
|
|
const timer = setTimeout(() => {
|
|||
|
|
if (!settled) { settled = true; req.destroy(); reject(new Error('MiniMax TTS 超时 (90s)')) }
|
|||
|
|
}, 90000)
|
|||
|
|
|
|||
|
|
const req = https.request(reqOptions, (res) => {
|
|||
|
|
let body = ''
|
|||
|
|
|
|||
|
|
res.on('data', chunk => { body += chunk.toString() })
|
|||
|
|
|
|||
|
|
res.on('end', () => {
|
|||
|
|
clearTimeout(timer)
|
|||
|
|
if (settled) return
|
|||
|
|
settled = true
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const d = JSON.parse(body)
|
|||
|
|
if (d.base_resp?.status_code && d.base_resp.status_code !== 0) {
|
|||
|
|
reject(new Error(`MiniMax TTS 失败 [${d.base_resp.status_code}]: ${d.base_resp.status_msg}`))
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
const hex = d.data?.audio
|
|||
|
|
if (!hex) { reject(new Error('MiniMax TTS 未返回音频')); return }
|
|||
|
|
|
|||
|
|
const audioBuffer = Buffer.from(hex, 'hex')
|
|||
|
|
fs.writeFileSync(filePath, audioBuffer)
|
|||
|
|
resolve({ filePath, duration: getAudioDuration(filePath) })
|
|||
|
|
} catch (e) {
|
|||
|
|
reject(new Error(`MiniMax TTS 解析响应失败: ${e.message}`))
|
|||
|
|
}
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
res.on('error', (e) => {
|
|||
|
|
clearTimeout(timer)
|
|||
|
|
if (!settled) { settled = true; reject(e) }
|
|||
|
|
})
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
req.on('error', (e) => {
|
|||
|
|
clearTimeout(timer)
|
|||
|
|
if (!settled) { settled = true; reject(new Error(`MiniMax TTS 请求失败: ${e.message}`)) }
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
req.write(body)
|
|||
|
|
req.end()
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async function synthesizeBatch(segments, options = {}) {
|
|||
|
|
const results = []
|
|||
|
|
for (const seg of segments) {
|
|||
|
|
console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`)
|
|||
|
|
try {
|
|||
|
|
const { filePath, duration } = await synthesize(seg.text, { ...options, id: seg.id })
|
|||
|
|
results.push({ id: seg.id, text: seg.text, audio: filePath, duration: Math.round(duration * 1000) / 1000 })
|
|||
|
|
} catch (err) {
|
|||
|
|
results.push({ id: seg.id, text: seg.text, audio: '', duration: 0, error: err.message })
|
|||
|
|
}
|
|||
|
|
await new Promise(r => setTimeout(r, 300))
|
|||
|
|
}
|
|||
|
|
return results
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async function main() {
|
|||
|
|
const inputJson = process.argv[2]
|
|||
|
|
if (!inputJson) {
|
|||
|
|
console.error('用法: node minimax-tts.js <input.json>')
|
|||
|
|
console.error(JSON.stringify({ segments: [{ id: 1, text: '文案' }], voice: 'voice_id', output_dir: './audio' }, null, 2))
|
|||
|
|
process.exit(1)
|
|||
|
|
}
|
|||
|
|
const input = JSON.parse(fs.readFileSync(inputJson, 'utf-8'))
|
|||
|
|
const results = await synthesizeBatch(input.segments, { voice: input.voice, outputDir: input.output_dir || './audio' })
|
|||
|
|
process.stdout.write(JSON.stringify({ segments: results }, null, 2) + '\n')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (require.main === module) {
|
|||
|
|
main().catch(err => { console.error('合成失败:', err.message); process.exit(1) })
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
module.exports = { synthesize, synthesizeBatch }
|