#!/usr/bin/env node /** * MiniMax TTS — T2A v2 HTTP 语音合成 * * 模块用法: * const { synthesize } = require('./minimax-tts') * const { filePath, duration } = await synthesize('你好', { voice: 'my_voice_id' }) * * CLI 用法: * node minimax-tts.js * input.json: { "segments": [{"id": 1, "text": "..."}], "voice": "voice_id", "output_dir": "./audio" } */ const https = require('https') const fs = require('fs') const path = require('path') const { execFileSync } = require('child_process') const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json') function loadConfig() { if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`) return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8')) } function resolveVoice(voice, config) { if (!voice) return voice const voices = config.minimaxVoices || {} return voices[voice] || voice } function getAudioDuration(filePath) { try { const out = execFileSync('ffprobe', [ '-v', 'quiet', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filePath, ], { encoding: 'utf-8', timeout: 10000 }) return parseFloat(out.trim()) } catch { const stat = fs.statSync(filePath) return stat.size * 8 / 128000 } } /** * 单段语音合成(MiniMax T2A v2 HTTP,非流式) * @param {string} text * @param {object} options - { voice, model, outputDir, id, rate, apiKey } * @returns {Promise<{filePath: string, duration: number}>} */ function synthesize(text, options = {}) { return new Promise((resolve, reject) => { const config = loadConfig() const apiKey = options.apiKey || config.minimaxApiKey if (!apiKey) { reject(new Error('minimaxApiKey 未配置,请在 config.json 中添加')); return } const model = options.model || config.minimaxTtsModel || 'speech-02-hd' const voiceId = resolveVoice(options.voice || config.minimaxTtsVoice, config) if (!voiceId) { reject(new Error('未指定 voice_id,请在 options.voice 或 config.minimaxTtsVoice 中配置')); return } const outputDir = options.outputDir || './audio' fs.mkdirSync(outputDir, { recursive: true }) text = text.trimEnd() if (!/[。!?;,.!?…]$/.test(text)) text += '。' const id = options.id || 1 const fileName = `seg_${String(id).padStart(3, '0')}.mp3` const filePath = path.resolve(outputDir, fileName) const voiceSetting = { voice_id: voiceId, speed: options.rate || 1.0, vol: 1.0, pitch: options.pitch ?? 0, } if (options.emotion) voiceSetting.emotion = options.emotion const requestBody = { model, text, stream: false, voice_setting: voiceSetting, audio_setting: { sample_rate: 32000, format: 'mp3', channel: 1, }, } if (options.languageBoost) requestBody.language_boost = options.languageBoost const body = JSON.stringify(requestBody) const baseUrl = config.minimaxApiBase || 'https://api.yunwu.ai/minimax' const url = new URL(`${baseUrl}/v1/t2a_v2`) const reqOptions = { hostname: url.hostname, path: url.pathname, method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body), }, } let settled = false const timer = setTimeout(() => { if (!settled) { settled = true; req.destroy(); reject(new Error('MiniMax TTS 超时 (90s)')) } }, 90000) const req = https.request(reqOptions, (res) => { let body = '' res.on('data', chunk => { body += chunk.toString() }) res.on('end', () => { clearTimeout(timer) if (settled) return settled = true try { const d = JSON.parse(body) if (d.base_resp?.status_code && d.base_resp.status_code !== 0) { reject(new Error(`MiniMax TTS 失败 [${d.base_resp.status_code}]: ${d.base_resp.status_msg}`)) return } const hex = d.data?.audio if (!hex) { reject(new Error('MiniMax TTS 未返回音频')); return } const audioBuffer = Buffer.from(hex, 'hex') fs.writeFileSync(filePath, audioBuffer) resolve({ filePath, duration: getAudioDuration(filePath) }) } catch (e) { reject(new Error(`MiniMax TTS 解析响应失败: ${e.message}`)) } }) res.on('error', (e) => { clearTimeout(timer) if (!settled) { settled = true; reject(e) } }) }) req.on('error', (e) => { clearTimeout(timer) if (!settled) { settled = true; reject(new Error(`MiniMax TTS 请求失败: ${e.message}`)) } }) req.write(body) req.end() }) } async function synthesizeBatch(segments, options = {}) { const results = [] for (const seg of segments) { console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`) try { const { filePath, duration } = await synthesize(seg.text, { ...options, id: seg.id }) results.push({ id: seg.id, text: seg.text, audio: filePath, duration: Math.round(duration * 1000) / 1000 }) } catch (err) { results.push({ id: seg.id, text: seg.text, audio: '', duration: 0, error: err.message }) } await new Promise(r => setTimeout(r, 300)) } return results } async function main() { const inputJson = process.argv[2] if (!inputJson) { console.error('用法: node minimax-tts.js ') console.error(JSON.stringify({ segments: [{ id: 1, text: '文案' }], voice: 'voice_id', output_dir: './audio' }, null, 2)) process.exit(1) } const input = JSON.parse(fs.readFileSync(inputJson, 'utf-8')) const results = await synthesizeBatch(input.segments, { voice: input.voice, outputDir: input.output_dir || './audio' }) process.stdout.write(JSON.stringify({ segments: results }, null, 2) + '\n') } if (require.main === module) { main().catch(err => { console.error('合成失败:', err.message); process.exit(1) }) } module.exports = { synthesize, synthesizeBatch }