Files
video-create/.claude/skills/video-from-script/scripts/minimax-tts.js

190 lines
6.0 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
/**
* MiniMax TTS T2A v2 HTTP 语音合成
*
* 模块用法:
* const { synthesize } = require('./minimax-tts')
* const { filePath, duration } = await synthesize('你好', { voice: 'my_voice_id' })
*
* CLI 用法:
* node minimax-tts.js <input.json>
* input.json: { "segments": [{"id": 1, "text": "..."}], "voice": "voice_id", "output_dir": "./audio" }
*/
const https = require('https')
const fs = require('fs')
const path = require('path')
const { execFileSync } = require('child_process')
const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json')
function loadConfig() {
if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`)
return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
}
function resolveVoice(voice, config) {
if (!voice) return voice
const voices = config.minimaxVoices || {}
return voices[voice] || voice
}
function getAudioDuration(filePath) {
try {
const out = execFileSync('ffprobe', [
'-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', filePath,
], { encoding: 'utf-8', timeout: 10000 })
return parseFloat(out.trim())
} catch {
const stat = fs.statSync(filePath)
return stat.size * 8 / 128000
}
}
/**
* 单段语音合成MiniMax T2A v2 HTTP非流式
* @param {string} text
* @param {object} options - { voice, model, outputDir, id, rate, apiKey }
* @returns {Promise<{filePath: string, duration: number}>}
*/
function synthesize(text, options = {}) {
return new Promise((resolve, reject) => {
const config = loadConfig()
const apiKey = options.apiKey || config.minimaxApiKey
if (!apiKey) { reject(new Error('minimaxApiKey 未配置,请在 config.json 中添加')); return }
const model = options.model || config.minimaxTtsModel || 'speech-02-hd'
const voiceId = resolveVoice(options.voice || config.minimaxTtsVoice, config)
if (!voiceId) { reject(new Error('未指定 voice_id请在 options.voice 或 config.minimaxTtsVoice 中配置')); return }
const outputDir = options.outputDir || './audio'
fs.mkdirSync(outputDir, { recursive: true })
text = text.trimEnd()
if (!/[。!?;,.!?…]$/.test(text)) text += '。'
const id = options.id || 1
const fileName = `seg_${String(id).padStart(3, '0')}.mp3`
const filePath = path.resolve(outputDir, fileName)
const voiceSetting = {
voice_id: voiceId,
speed: options.rate || 1.0,
vol: 1.0,
pitch: options.pitch ?? 0,
}
if (options.emotion) voiceSetting.emotion = options.emotion
const requestBody = {
model,
text,
stream: false,
voice_setting: voiceSetting,
audio_setting: {
sample_rate: 32000,
format: 'mp3',
channel: 1,
},
}
if (options.languageBoost) requestBody.language_boost = options.languageBoost
const body = JSON.stringify(requestBody)
const baseUrl = config.minimaxApiBase || 'https://api.yunwu.ai/minimax'
const url = new URL(`${baseUrl}/v1/t2a_v2`)
const reqOptions = {
hostname: url.hostname,
path: url.pathname,
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(body),
},
}
let settled = false
const timer = setTimeout(() => {
if (!settled) { settled = true; req.destroy(); reject(new Error('MiniMax TTS 超时 (90s)')) }
}, 90000)
const req = https.request(reqOptions, (res) => {
let body = ''
res.on('data', chunk => { body += chunk.toString() })
res.on('end', () => {
clearTimeout(timer)
if (settled) return
settled = true
try {
const d = JSON.parse(body)
if (d.base_resp?.status_code && d.base_resp.status_code !== 0) {
reject(new Error(`MiniMax TTS 失败 [${d.base_resp.status_code}]: ${d.base_resp.status_msg}`))
return
}
const hex = d.data?.audio
if (!hex) { reject(new Error('MiniMax TTS 未返回音频')); return }
const audioBuffer = Buffer.from(hex, 'hex')
fs.writeFileSync(filePath, audioBuffer)
resolve({ filePath, duration: getAudioDuration(filePath) })
} catch (e) {
reject(new Error(`MiniMax TTS 解析响应失败: ${e.message}`))
}
})
res.on('error', (e) => {
clearTimeout(timer)
if (!settled) { settled = true; reject(e) }
})
})
req.on('error', (e) => {
clearTimeout(timer)
if (!settled) { settled = true; reject(new Error(`MiniMax TTS 请求失败: ${e.message}`)) }
})
req.write(body)
req.end()
})
}
async function synthesizeBatch(segments, options = {}) {
const results = []
for (const seg of segments) {
console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`)
try {
const { filePath, duration } = await synthesize(seg.text, { ...options, id: seg.id })
results.push({ id: seg.id, text: seg.text, audio: filePath, duration: Math.round(duration * 1000) / 1000 })
} catch (err) {
results.push({ id: seg.id, text: seg.text, audio: '', duration: 0, error: err.message })
}
await new Promise(r => setTimeout(r, 300))
}
return results
}
async function main() {
const inputJson = process.argv[2]
if (!inputJson) {
console.error('用法: node minimax-tts.js <input.json>')
console.error(JSON.stringify({ segments: [{ id: 1, text: '文案' }], voice: 'voice_id', output_dir: './audio' }, null, 2))
process.exit(1)
}
const input = JSON.parse(fs.readFileSync(inputJson, 'utf-8'))
const results = await synthesizeBatch(input.segments, { voice: input.voice, outputDir: input.output_dir || './audio' })
process.stdout.write(JSON.stringify({ segments: results }, null, 2) + '\n')
}
if (require.main === module) {
main().catch(err => { console.error('合成失败:', err.message); process.exit(1) })
}
module.exports = { synthesize, synthesizeBatch }