.claude/skills/video-from-script/scripts/minimax-tts.js

#!/usr/bin/env node

/**
 * MiniMax TTS — T2A v2 HTTP 语音合成
 *
 * 模块用法:
 *   const { synthesize } = require('./minimax-tts')
 *   const { filePath, duration } = await synthesize('你好', { voice: 'my_voice_id' })
 *
 * CLI 用法:
 *   node minimax-tts.js <input.json>
 *   input.json: { "segments": [{"id": 1, "text": "..."}], "voice": "voice_id", "output_dir": "./audio" }
 */

const https = require('https')
const fs = require('fs')
const path = require('path')
const { execFileSync } = require('child_process')

const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json')

function loadConfig() {
  if (!fs.existsSync(CONFIG_PATH)) throw new Error(`config.json 不存在: ${CONFIG_PATH}`)
  return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
}

function resolveVoice(voice, config) {
  if (!voice) return voice
  const voices = config.minimaxVoices || {}
  return voices[voice] || voice
}

function getAudioDuration(filePath) {
  try {
    const out = execFileSync('ffprobe', [
      '-v', 'quiet', '-show_entries', 'format=duration',
      '-of', 'default=noprint_wrappers=1:nokey=1', filePath,
    ], { encoding: 'utf-8', timeout: 10000 })
    return parseFloat(out.trim())
  } catch {
    const stat = fs.statSync(filePath)
    return stat.size * 8 / 128000
  }
}

/**
 * 单段语音合成（MiniMax T2A v2 HTTP，非流式）
 * @param {string} text
 * @param {object} options - { voice, model, outputDir, id, rate, apiKey }
 * @returns {Promise<{filePath: string, duration: number}>}
 */
function synthesize(text, options = {}) {
  return new Promise((resolve, reject) => {
    const config = loadConfig()

    const apiKey = options.apiKey || config.minimaxApiKey
    if (!apiKey) { reject(new Error('minimaxApiKey 未配置，请在 config.json 中添加')); return }

    const model = options.model || config.minimaxTtsModel || 'speech-02-hd'
    const voiceId = resolveVoice(options.voice || config.minimaxTtsVoice, config)
    if (!voiceId) { reject(new Error('未指定 voice_id，请在 options.voice 或 config.minimaxTtsVoice 中配置')); return }

    const outputDir = options.outputDir || './audio'
    fs.mkdirSync(outputDir, { recursive: true })

    text = text.trimEnd()
    if (!/[。！？；，.!?…]$/.test(text)) text += '。'

    const id = options.id || 1
    const fileName = `seg_${String(id).padStart(3, '0')}.mp3`
    const filePath = path.resolve(outputDir, fileName)

    const voiceSetting = {
      voice_id: voiceId,
      speed: options.rate || 1.0,
      vol: 1.0,
      pitch: options.pitch ?? 0,
    }
    if (options.emotion) voiceSetting.emotion = options.emotion

    const requestBody = {
      model,
      text,
      stream: false,
      voice_setting: voiceSetting,
      audio_setting: {
        sample_rate: 32000,
        format: 'mp3',
        channel: 1,
      },
    }
    if (options.languageBoost) requestBody.language_boost = options.languageBoost

    const body = JSON.stringify(requestBody)

    const baseUrl = config.minimaxApiBase || 'https://api.yunwu.ai/minimax'
    const url = new URL(`${baseUrl}/v1/t2a_v2`)

    const reqOptions = {
      hostname: url.hostname,
      path: url.pathname,
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${apiKey}`,
        'Content-Type': 'application/json',
        'Content-Length': Buffer.byteLength(body),
      },
    }

    let settled = false
    const timer = setTimeout(() => {
      if (!settled) { settled = true; req.destroy(); reject(new Error('MiniMax TTS 超时 (90s)')) }
    }, 90000)

    const req = https.request(reqOptions, (res) => {
      let body = ''

      res.on('data', chunk => { body += chunk.toString() })

      res.on('end', () => {
        clearTimeout(timer)
        if (settled) return
        settled = true

        try {
          const d = JSON.parse(body)
          if (d.base_resp?.status_code && d.base_resp.status_code !== 0) {
            reject(new Error(`MiniMax TTS 失败 [${d.base_resp.status_code}]: ${d.base_resp.status_msg}`))
            return
          }
          const hex = d.data?.audio
          if (!hex) { reject(new Error('MiniMax TTS 未返回音频')); return }

          const audioBuffer = Buffer.from(hex, 'hex')
          fs.writeFileSync(filePath, audioBuffer)
          resolve({ filePath, duration: getAudioDuration(filePath) })
        } catch (e) {
          reject(new Error(`MiniMax TTS 解析响应失败: ${e.message}`))
        }
      })

      res.on('error', (e) => {
        clearTimeout(timer)
        if (!settled) { settled = true; reject(e) }
      })
    })

    req.on('error', (e) => {
      clearTimeout(timer)
      if (!settled) { settled = true; reject(new Error(`MiniMax TTS 请求失败: ${e.message}`)) }
    })

    req.write(body)
    req.end()
  })
}

async function synthesizeBatch(segments, options = {}) {
  const results = []
  for (const seg of segments) {
    console.error(`  合成 #${seg.id}: ${seg.text.substring(0, 30)}...`)
    try {
      const { filePath, duration } = await synthesize(seg.text, { ...options, id: seg.id })
      results.push({ id: seg.id, text: seg.text, audio: filePath, duration: Math.round(duration * 1000) / 1000 })
    } catch (err) {
      results.push({ id: seg.id, text: seg.text, audio: '', duration: 0, error: err.message })
    }
    await new Promise(r => setTimeout(r, 300))
  }
  return results
}

async function main() {
  const inputJson = process.argv[2]
  if (!inputJson) {
    console.error('用法: node minimax-tts.js <input.json>')
    console.error(JSON.stringify({ segments: [{ id: 1, text: '文案' }], voice: 'voice_id', output_dir: './audio' }, null, 2))
    process.exit(1)
  }
  const input = JSON.parse(fs.readFileSync(inputJson, 'utf-8'))
  const results = await synthesizeBatch(input.segments, { voice: input.voice, outputDir: input.output_dir || './audio' })
  process.stdout.write(JSON.stringify({ segments: results }, null, 2) + '\n')
}

if (require.main === module) {
  main().catch(err => { console.error('合成失败:', err.message); process.exit(1) })
}

module.exports = { synthesize, synthesizeBatch }