feat(video-from-script): 添加 TTS 音色管理和解析功能

- 在 config.json 中添加 `ttsVoices` 音色库，支持音色名称到 ID 的映射 - 实现 `resolveVoice` 函数，将音色名称解析为实际 ID - 更新账号系统和批量管道，支持通过音色名称配置 TTS 语音 - Excel 导入和 CLI 参数新增音色字段，支持按行指定不同音色
2026-05-08 23:53:37 +08:00
parent 4a15e38169
commit 18fce1b5a1
12 changed files with 66 additions and 25 deletions
--- a/.claude/skills/config.json
+++ b/.claude/skills/config.json
@@ -32,6 +32,10 @@
  "ttsApiBaseUrl": "https://dashscope.aliyuncs.com/api/v1",
  "ttsApiKey": "sk-1c503705b0f844a6b4f2386f6c1cc35b",
  "ttsModel": "cosyvoice-v3.5-plus",
-  "ttsVoice": "cosyvoice-v3.5-plus-bailian-fa8787c0f70b4ba2a907c35511e6a6f6",
+  "ttsVoice": "斯内普",
-  "ttsLanguage": "Chinese"
+  "ttsLanguage": "Chinese",
  "ttsVoices": {
    "斯内普": "cosyvoice-v3.5-plus-bailian-fa8787c0f70b4ba2a907c35511e6a6f6",
    "布拉德": "cosyvoice-v3.5-plus-bailian-574be4b7013a4e1f924de08fa8b9bdef"
  }
 }
--- a/.claude/skills/video-from-script/SKILL.md
+++ b/.claude/skills/video-from-script/SKILL.md
@@ -56,7 +56,11 @@ B 模式又分两种：**单图模式**（1 图 → 1 段视频）/ **首尾帧
 3. 账号：扫描 accounts/*/account.json → 展示可用账号 → 用户选
   → 未指定让选，不匹配告知并问是否新建
-4. 参数：画幅、生图模型、(B 模式)视频模型 — 优先从 account.json 继承
+4. 音色：读取 config.json 的 ttsVoices 音色库，展示可用音色让用户选
   → 默认用 account.json 的 ttsVoice，未指定则用 config.json 全局 ttsVoice
   → 用户也可指定音色 ID
 5. 参数：画幅、生图模型、(B 模式)视频模型 — 优先从 account.json 继承
 ```
 → 5 项确认后，输出执行计划让用户最终确认。用户说"开始"才进入 Step 0。
--- a/.claude/skills/video-from-script/references/account-creation.md
+++ b/.claude/skills/video-from-script/references/account-creation.md
@@ -116,7 +116,7 @@ digraph creation_flow {
 | # | 问题 | 默认值 | 说明 |
 |---|------|--------|------|
-| 12 | TTS 音色？ | config.json 全局 ttsVoice | account.json 的 ttsVoice，留空用全局默认 |
+| 12 | TTS 音色？ | config.json 全局 ttsVoice | account.json 的 ttsVoice。从 config.json 的 `ttsVoices` 音色库中选择（如"斯内普"、"布拉德"），也可直接填音色 ID |
 | 13 | TTS 语气指令？ | 无 | account.json 的 ttsInstruction，描述期望的语气风格 |
 | 14 | 背景音乐偏好？ | 无 | account.json 的 capcut.defaultBGM。提供 URL 或描述风格，Agent 辅助查找 |
--- a/.claude/skills/video-from-script/references/account-system.md
+++ b/.claude/skills/video-from-script/references/account-system.md
@@ -49,7 +49,7 @@ accounts/                             # 项目根目录下
      ]
    }
  },
-  "ttsVoice": "cosyvoice-v3.5-plus-bailian-xxx",
+  "ttsVoice": "斯内普",
  "ttsInstruction": "用冷静理性的男性声音朗读，语速适中",
  "storyboardPrompt": "prompts/分镜.md",
  "imageStylePrompt": "prompts/图片提示词.md",
@@ -108,7 +108,7 @@ accounts/                             # 项目根目录下
 | `videoModel` | string | 默认视频模型（`veo3-fast` / `grok-video-3` / `kling`） |
 | `batchSize` | number | 默认批量生成数量 |
 | `styles` | object | 命名风格预设，每项含 `references` 数组 |
-| `ttsVoice` | string | TTS 音色 ID，留空用 config.json 全局默认 |
+| `ttsVoice` | string | TTS 音色名称（如"斯内普"）或音色 ID，留空用 config.json 全局默认。可用音色见 config.json 的 `ttsVoices` |
 | `ttsInstruction` | string | TTS 语气指令（描述期望的语气、语速、情感） |
 | `storyboardPrompt` | string | 分镜提示词模板路径（相对于账号目录） |
 | `imageStylePrompt` | string | 图片提示词模板路径（相对于账号目录） |
--- a/.claude/skills/video-from-script/scripts/batch-pipeline.js
+++ b/.claude/skills/video-from-script/scripts/batch-pipeline.js
@@ -15,7 +15,7 @@
 const fs = require('fs')
 const path = require('path')
-const { SKILLS_DIR, ACCOUNTS_DIR } = require('./lib/pipeline-utils')
+const { SKILLS_DIR, ACCOUNTS_DIR, loadConfig, resolveVoice } = require('./lib/pipeline-utils')
 // output/ 在项目根的父级（美图/output/）
 const OUTPUT_BASE = path.join(SKILLS_DIR, '..', '..', '..', 'output')
@@ -30,6 +30,7 @@ function parseArgs(argv) {
    if (argv[i] === '--file' && argv[i + 1]) args.file = argv[++i]
    else if (argv[i] === '--account' && argv[i + 1]) args.account = argv[++i]
    else if (argv[i] === '--mode' && argv[i + 1]) args.mode = argv[++i]
    else if (argv[i] === '--voice' && argv[i + 1]) args.voice = argv[++i]
    else if (argv[i] === '--row' && argv[i + 1]) args.row = parseInt(argv[++i])
    else if (argv[i] === '--status' && argv[i + 1]) args.status = argv[++i]
    else if (argv[i] === '--manifest-path' && argv[i + 1]) args.manifestPath = argv[++i]
@@ -80,6 +81,7 @@ function cmdInit(args) {
  const defaultAccount = args.account || ''
  const defaultMode = args.mode || 'single'
  const defaultVoice = args.voice || ''
  // 构建 items + 提取脚本
  const items = []
@@ -89,6 +91,7 @@ function cmdInit(args) {
    const title = extractField(row, ['选题', '标题', 'title', 'name']) || `视频${i + 1}`
    const account = extractField(row, ['账号', 'account']) || defaultAccount
    const mode = extractField(row, ['模式', 'mode']) || defaultMode
    const voiceName = extractField(row, ['音色', 'voice']) || defaultVoice
    if (!script || !script.trim()) {
      console.warn(`  ⚠ 第 ${i + 2} 行（${title}）脚本为空，跳过`)
@@ -98,11 +101,15 @@ function cmdInit(args) {
    const scriptFile = path.join(scriptsDir, `row_${String(i + 1).padStart(3, '0')}.txt`)
    fs.writeFileSync(scriptFile, script.trim(), 'utf-8')
    // 解析音色名称 → ID
    const resolvedVoice = voiceName ? resolveVoice(voiceName) : ''
    items.push({
      row: i + 1,
      title,
      account: account || defaultAccount,
      mode: mode || defaultMode,
      voice: resolvedVoice,
      scriptFile: `scripts/row_${String(i + 1).padStart(3, '0')}.txt`,
      status: 'pending',
      manifestPath: null,
@@ -122,7 +129,7 @@ function cmdInit(args) {
  const batchManifest = {
    source: path.basename(filePath),
    createdAt: new Date().toISOString(),
-    defaults: { account: defaultAccount, mode: defaultMode },
+    defaults: { account: defaultAccount, mode: defaultMode, voice: defaultVoice ? resolveVoice(defaultVoice) : '' },
    stats: calcStats(items),
    items,
  }
@@ -135,6 +142,7 @@ function cmdInit(args) {
  console.log(`  总数: ${items.length}`)
  console.log(`  默认账号: ${defaultAccount || '(未指定，需每行填写)'}`)
  console.log(`  默认模式: ${defaultMode}`)
  console.log(`  默认音色: ${defaultVoice || '(用账号配置)'}`)
  console.log(`  脚本目录: ${scriptsDir}/`)
  console.log()
 }
@@ -183,14 +191,14 @@ function cmdStatus(args) {
  if (grouped.pending.length > 0) {
    console.log(`  ⏳ 待处理 (${grouped.pending.length}):`)
    for (const it of grouped.pending) {
-      console.log(`    #${it.row} ${it.title} (账号: ${it.account || '未指定'}, 模式: ${it.mode})`)
+      console.log(`    #${it.row} ${it.title} (账号: ${it.account || '未指定'}, 模式: ${it.mode}, 音色: ${it.voice || '账号默认'})`)
    }
  }
  // 输出下一个待处理的行号（方便 AI agent 消费）
  const next = batch.items.find(it => it.status === 'pending')
  if (next) {
-    console.log(`\n  ▶ 下一条: #${next.row} (账号: ${next.account}, 模式: ${next.mode})`)
+    console.log(`\n  ▶ 下一条: #${next.row} (账号: ${next.account}, 模式: ${next.mode}, 音色: ${next.voice || '账号默认'})`)
    console.log(`    脚本文件: ${path.resolve(batchDir, next.scriptFile)}`)
  }
@@ -271,6 +279,7 @@ function cmdNext(args) {
    title: item.title,
    account: item.account,
    mode: item.mode,
    voice: item.voice || '',
    scriptFile: path.resolve(batchDir, item.scriptFile),
  }))
 }
@@ -431,7 +440,7 @@ function main() {
    console.log('批量视频生产编排器')
    console.log('')
    console.log('用法:')
-    console.log('  batch-pipeline.js init --file <xlsx/csv> [--account <账号>] [--mode <single|framePair>]')
+    console.log('  batch-pipeline.js init --file <xlsx/csv> [--account <账号>] [--mode <single|framePair>] [--voice <音色>]')
    console.log('  batch-pipeline.js status --file <batch-manifest.json>')
    console.log('  batch-pipeline.js next --file <batch-manifest.json>')
    console.log('  batch-pipeline.js mark --file <...> --row <N> --status <pending|processing|completed|failed> [--manifest-path <path>] [--error <msg>]')
@@ -443,6 +452,7 @@ function main() {
    console.log('  脚本/文案/旁白 — 口播文案（必填）')
    console.log('  账号/account — 账号ID（可选，可由 --account 指定默认值）')
    console.log('  模式/mode — single|framePair（可选，可由 --mode 指定默认值）')
    console.log('  音色/voice — 音色名称或ID（可选，可由 --voice 指定默认值）')
  }
 }
--- a/.claude/skills/video-from-script/scripts/lib/cmd-init.js
+++ b/.claude/skills/video-from-script/scripts/lib/cmd-init.js
@@ -6,7 +6,7 @@
 const fs = require('fs')
 const path = require('path')
-const { loadAccountConfig, saveManifest, ensureDir, slugify, ACCOUNTS_DIR, SKILLS_DIR } = require('./pipeline-utils')
+const { loadAccountConfig, loadConfig, resolveVoice, saveManifest, ensureDir, slugify, ACCOUNTS_DIR, SKILLS_DIR } = require('./pipeline-utils')
 function initManifest(options) {
  const { account: accountId, mode, items: itemsJson, itemsFile } = options
@@ -17,6 +17,7 @@ function initManifest(options) {
  }
  const accountConfig = loadAccountConfig(accountId)
  const globalConfig = loadConfig()
  // 解析 items
  let rawItems
@@ -123,7 +124,8 @@ function initManifest(options) {
    format: options.format || accountConfig.defaultFormat || '9:16',
    mode: resolvedMode,
    references,
-    ...(accountConfig.ttsVoice ? { ttsVoice: accountConfig.ttsVoice } : {}),
+    ...(accountConfig.ttsVoice ? { ttsVoice: resolveVoice(accountConfig.ttsVoice, globalConfig) } : {}),
    ...(options.ttsVoice ? { ttsVoice: resolveVoice(options.ttsVoice, globalConfig) } : {}),
    ...(accountConfig.ttsInstruction ? { ttsInstruction: accountConfig.ttsInstruction } : {}),
    // 铁律：ttsRate 写死 1.15x，不允许配置覆盖（除非显式传入）
    ttsRate: options.ttsRate || 1.15,
--- a/.claude/skills/video-from-script/scripts/lib/pipeline-utils.js
+++ b/.claude/skills/video-from-script/scripts/lib/pipeline-utils.js
@@ -8,11 +8,12 @@ const fs = require('fs')
 const path = require('path')
 // 路径常量（基于 lib/ 的父目录 scripts/）
-const SCRIPTS_DIR = path.join(__dirname, '..')
+const SCRIPTS_DIR = path.join(__dirname, '..')                // scripts/
-const SKILLS_DIR = path.join(SCRIPTS_DIR, '..')
+const SKILLS_DIR = path.join(SCRIPTS_DIR, '..')                // video-from-script/
-const PROJECT_ROOT = path.join(SKILLS_DIR, '..', '..')
+const SKILL_PARENT_DIR = path.join(SKILLS_DIR, '..')           // skills/
-const CONFIG_PATH = path.join(SKILLS_DIR, 'config.json')
+const PROJECT_ROOT = path.join(SKILLS_DIR, '..', '..')         // .claude/
-const ACCOUNTS_DIR = path.join(PROJECT_ROOT, '..', 'accounts')
+const CONFIG_PATH = path.join(SKILL_PARENT_DIR, 'config.json') // skills/config.json
 const ACCOUNTS_DIR = path.join(PROJECT_ROOT, '..', 'accounts') // 美图/accounts
 // ============================================================================
 // 配置 & Manifest
@@ -22,6 +23,15 @@ function loadConfig() {
  return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
 }
 /**
 * 解析音色：名称 → ID。如果是音色库中的名称则查 ttsVoices 映射表，否则原样返回。
 */
 function resolveVoice(voice, config) {
  if (!voice) return voice
  const voices = (config || loadConfig()).ttsVoices || {}
  return voices[voice] || voice
 }
 function loadManifest(manifestPath) {
  return JSON.parse(fs.readFileSync(manifestPath, 'utf-8'))
 }
@@ -221,6 +231,7 @@ module.exports = {
  CONFIG_PATH,
  ACCOUNTS_DIR,
  loadConfig,
  resolveVoice,
  loadManifest,
  saveManifest,
  loadAccountConfig,
--- a/.claude/skills/video-from-script/scripts/pipeline.js
+++ b/.claude/skills/video-from-script/scripts/pipeline.js
@@ -168,6 +168,7 @@ function parseArgs(argv) {
    else if (argv[i] === '--format' && argv[i + 1]) args.format = argv[++i]
    else if (argv[i] === '--image-model' && argv[i + 1]) args.imageModel = argv[++i]
    else if (argv[i] === '--video-model' && argv[i + 1]) args.videoModel = argv[++i]
    else if (argv[i] === '--tts-voice' && argv[i + 1]) args.ttsVoice = argv[++i]
    else if (argv[i] === '--references' && argv[i + 1]) args.references = argv[++i]
    else if (argv[i] === '--all') args.all = true
    else if (!args.command) args.command = argv[i]
@@ -225,7 +226,7 @@ async function main() {
  console.log('用法:')
  console.log('  pipeline.js create-account --id <id> --name <名称> [--desc ...] [--references file1,file2]')
  console.log('  pipeline.js validate-account --account <id>')
-  console.log('  pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|gpt-image|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
+  console.log('  pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|gpt-image|mj] [--video-model veo3-fast|grok|kling] [--format 9:16] [--tts-voice <音色>]')
  console.log('  pipeline.js validate --manifest <path>')
  console.log('  pipeline.js confirm --manifest <path> --all')
  console.log('  pipeline.js confirm --manifest <path> --items 1,3,5')
--- a/.claude/skills/video-from-script/scripts/qwen-tts.js
+++ b/.claude/skills/video-from-script/scripts/qwen-tts.js
@@ -37,6 +37,15 @@ function loadConfig() {
  return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
 }
 /**
 * 解析音色：名称 → ID。如果是名称则查 ttsVoices 映射表，否则原样返回。
 */
 function resolveVoice(voice, config) {
  if (!voice) return voice
  const voices = config.ttsVoices || {}
  return voices[voice] || voice
 }
 function getAudioDuration(filePath) {
  try {
    const out = execFileSync('ffprobe', [
@@ -64,7 +73,7 @@ function synthesize(text, options = {}) {
    if (!apiKey) { reject(new Error('ttsApiKey 未配置')); return }
    const model = options.model || config.ttsModel || 'cosyvoice-v3-flash'
-    const voice = options.voice || config.ttsVoice || 'longanyang'
+    const voice = resolveVoice(options.voice || config.ttsVoice, config) || 'longanyang'
    const instruction = options.instruction || config.ttsInstruction || ''
    const outputDir = options.outputDir || './audio'
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -39,13 +39,13 @@
 用户给一个 Excel/CSV，每行一条视频，Agent 逐条 spawn Worker 子 Agent 执行完整 pipeline。
-**Excel 格式：** `选题 | 脚本 | 账号 | 模式`（账号/模式可选，可由 CLI 参数指定默认值）
+**Excel 格式：** `选题 | 脚本 | 账号 | 模式 | 音色`（账号/模式/音色可选，可由 CLI 参数指定默认值）
 **CLI 命令：**
 ```bash
 # 1. 初始化批量任务
-node .claude/skills/video-from-script/scripts/batch-pipeline.js init --file <xlsx/csv> --account <默认账号> --mode single
+node .claude/skills/video-from-script/scripts/batch-pipeline.js init --file <xlsx/csv> --account <默认账号> --mode single --voice <默认音色>
 # 2. 查看进度
 node .claude/skills/video-from-script/scripts/batch-pipeline.js status --file output/batch_XXX/batch-manifest.json
@@ -63,7 +63,7 @@ node .claude/skills/video-from-script/scripts/batch-pipeline.js retry-failed --f
 **执行策略：Orchestrator-Worker**
 - **Orchestrator（主 Agent）**：读 batch-manifest 元数据，逐条 spawn Worker 子 Agent，收集结果
- **Worker（子 Agent）**：独立上下文，处理单条视频的完整流程（分镜 → 生图 → 生视频 → TTS → 成片）
+- **Worker（子 Agent）**：独立上下文，处理单条视频的完整流程（分镜 → 生图 → 生视频 → TTS → 成片）。Worker 调用 `pipeline.js init` 时通过 `--tts-voice` 传入音色
 - Orchestrator 上下文只存 batch-manifest 元数据，不读脚本正文
 - 脚本正文通过文件路径传给 Worker，Worker 自行 Read
 - 批量模式下人工确认环节自动跳过（`confirm --all`）
--- a/accounts/军事账号/account.json
+++ b/accounts/军事账号/account.json
@@ -14,7 +14,7 @@
      ]
    }
  },
-  "ttsVoice": "cosyvoice-v3.5-plus-bailian-fa8787c0f70b4ba2a907c35511e6a6f6",
+  "ttsVoice": "斯内普",
  "ttsInstruction": "用沉稳有力的男性声音朗读，语速适中偏慢，语气低沉、坚定、有压迫感，像是一个看透人性的老手在冷静地讲述残酷的真相",
  "storyboardPrompt": "prompts/分镜.md",
  "imageStylePrompt": "prompts/图片提示词.md",
--- a/accounts/执黑先行/account.json
+++ b/accounts/执黑先行/account.json
@@ -12,7 +12,7 @@
      "references": []
    }
  },
-  "ttsVoice": "cosyvoice-v3.5-plus-bailian-fa8787c0f70b4ba2a907c35511e6a6f6",
+  "ttsVoice": "斯内普",
  "ttsInstruction": "用沉稳有力的男性声音朗读，语速适中，语气坚定有力，像是一个有经历有力量的人在平静地讲述生活的方向",
  "storyboardPrompt": "prompts/分镜.md",
  "imageStylePrompt": "prompts/图片提示词.md",