feat(video-pipeline): 重构多阶段生成管线并集成 CosyVoice TTS
- 重写 `phase-images`:改为并发 3 张并行生成,每个 item 完成立即写入 manifest,支持 MJ task ID 恢复 - 重写 `phase-videos`:先恢复已有 task ID 再提交新任务(并发 3),支持中断恢复 - 迁移 TTS 引擎:从 Qwen-TTS HTTP 接口切换为 CosyVoice WebSocket 接口,支持音色/语气参数透传 - 精简账号系统:移除 `styles/` 目录、`taskId` 过滤和 `--id` 正则校验,`references` 改为顶层字段 - 调整 `slugify`:限制中文字符 5 个、其他 10 个,避免文件名过长 - 更新文档:`manifest-schema.md` 中 `narration` 改为完整原文案,`account-creation.md` 新增 TTS 配置项 - 配置更新:默认 TTS 模型切换为 `cosyvoice-v3.5-plus`,新增 `localAudio` 参数
This commit is contained in:
@@ -28,7 +28,7 @@
|
||||
"ossExpires": 31536000,
|
||||
"ttsApiBaseUrl": "https://dashscope.aliyuncs.com/api/v1",
|
||||
"ttsApiKey": "sk-1c503705b0f844a6b4f2386f6c1cc35b",
|
||||
"ttsModel": "qwen3-tts-flash",
|
||||
"ttsVoice": "Cherry",
|
||||
"ttsModel": "cosyvoice-v3.5-plus",
|
||||
"ttsVoice": "cosyvoice-v3.5-plus-bailian-fa8787c0f70b4ba2a907c35511e6a6f6",
|
||||
"ttsLanguage": "Chinese"
|
||||
}
|
||||
|
||||
@@ -89,6 +89,8 @@ Phase 4: 技术配置(有默认值,可跳过)
|
||||
| 12 | 生图模型? | gemini | account.json 的 imageModel |
|
||||
| 13 | 视频模型? | veo3-fast | account.json 的 videoModel |
|
||||
| 14 | 参考图文件? | 无 | 用户稍后放入 references/ 目录,Agent 上传 OSS 回写 URL |
|
||||
| 15 | TTS 音色? | config.json 全局 ttsVoice | account.json 的 ttsVoice,留空用全局默认 |
|
||||
| 16 | TTS 语气指令? | 无 | account.json 的 ttsInstruction,描述期望的语气风格 |
|
||||
|
||||
**运动偏好 → 视频提示词映射**:
|
||||
|
||||
@@ -128,6 +130,8 @@ Phase 4: 技术配置(有默认值,可跳过)
|
||||
- 画幅:{Q11}
|
||||
- 生图模型:{Q12}
|
||||
- 视频模型:{Q13}
|
||||
- TTS音色:{Q15}
|
||||
- TTS语气:{Q16}
|
||||
|
||||
确认 "开始" → 创建账号
|
||||
修改 → 调整后重新输出
|
||||
|
||||
@@ -58,7 +58,7 @@ node pipeline.js validate --manifest <path>
|
||||
|------|------|
|
||||
| `status` | 固定写 `"pending"` |
|
||||
| `shotDesc` | 英文分镜描述(含隐性动势,40-80词) |
|
||||
| `narration` | 中文口播旁白(≤22字) |
|
||||
| `narration` | **该段的完整原文案**(不提炼,保留论证、例子、细节)|
|
||||
| `duration` | 计划视频时长(秒),来自分镜阶段 |
|
||||
| `imagePrompt` | 英文画面描述(给 Gemini/MJ),Step 2-A 生成 |
|
||||
| `directorRef` | 导演构图参考(tarantino / kitano / fincher),三层透传 |
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* CapCut 成片组装脚本
|
||||
*
|
||||
@@ -167,6 +166,7 @@ async function assemble(args) {
|
||||
apiKey = '',
|
||||
duration = '4',
|
||||
animation = 'kenburns-zoom',
|
||||
localAudio = 'false',
|
||||
} = args
|
||||
|
||||
if (!input) throw new Error('缺少 --input 参数')
|
||||
@@ -295,7 +295,7 @@ async function assemble(args) {
|
||||
// -- 添加 TTS 配音 --
|
||||
step++; console.log(`[${step}/${totalSteps}] 添加 TTS 配音...`)
|
||||
if (voiceover === 'true' && hasTTS) {
|
||||
await addVoiceover(draftUrl, inputDir, items, timeline)
|
||||
await addVoiceover(draftUrl, inputDir, items, timeline, localAudio === 'true')
|
||||
} else {
|
||||
console.log(' 跳过(无 TTS 音频或未启用)')
|
||||
}
|
||||
@@ -567,7 +567,7 @@ async function batchUploadAudio(inputDir, items) {
|
||||
// 添加 TTS 配音(每段音频按时间线排列)
|
||||
// ============================================================================
|
||||
|
||||
async function addVoiceover(draftUrl, inputDir, items, timeline) {
|
||||
async function addVoiceover(draftUrl, inputDir, items, timeline, localAudio = false) {
|
||||
// 收集需要上传的音频
|
||||
const audioItems = items.filter(item => item.audio)
|
||||
if (audioItems.length === 0) {
|
||||
@@ -576,8 +576,10 @@ async function addVoiceover(draftUrl, inputDir, items, timeline) {
|
||||
}
|
||||
|
||||
// 上传本地音频到 OSS(已有的 URL 直接通过)
|
||||
console.log(' 上传 TTS 音频到 OSS...')
|
||||
const audioUrls = await batchUploadAudio(inputDir, items)
|
||||
// 根据 localAudio 参数决定是否上传
|
||||
const audioUrls = localAudio
|
||||
? {} // 本地模式:不上传,使用本地路径
|
||||
: await batchUploadAudio(inputDir, items)
|
||||
|
||||
const audioInfos = []
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
/**
|
||||
* Command: create-account — 一键创建账号
|
||||
*
|
||||
* 创建目录结构 → 复制参考图 → 上传 OSS → 生成 account.json + 风格骨架
|
||||
* 创建目录结构 → 复制参考图 → 上传 OSS → 生成 account.json
|
||||
* prompt 文件通过 Agent Q&A 流程生成(见 account-creation.md)
|
||||
*/
|
||||
|
||||
const fs = require('fs')
|
||||
@@ -9,10 +10,9 @@ const path = require('path')
|
||||
const { ensureDir, log, ACCOUNTS_DIR } = require('./pipeline-utils')
|
||||
|
||||
async function createAccount(args) {
|
||||
const { id, name, desc, format, imageModel, videoModel, references } = args
|
||||
const { id, name, desc, format, imageModel, videoModel, references, ttsVoice, ttsInstruction } = args
|
||||
|
||||
if (!id) { console.error('错误: 必须指定 --id <账号ID>'); process.exit(1) }
|
||||
if (!/^[a-z0-9_-]+$/.test(id)) { console.error('错误: id 只允许小写字母/数字/短横线/下划线'); process.exit(1) }
|
||||
if (!name) { console.error('错误: 必须指定 --name <账号名>'); process.exit(1) }
|
||||
|
||||
const accountDir = path.join(ACCOUNTS_DIR, id)
|
||||
@@ -21,7 +21,6 @@ async function createAccount(args) {
|
||||
ensureDir(accountDir)
|
||||
ensureDir(path.join(accountDir, 'prompts'))
|
||||
ensureDir(path.join(accountDir, 'references'))
|
||||
ensureDir(path.join(accountDir, 'styles'))
|
||||
|
||||
// 复制参考图到 references/ 并上传 OSS
|
||||
const refs = (references || '').split(',').filter(Boolean)
|
||||
@@ -47,7 +46,6 @@ async function createAccount(args) {
|
||||
}
|
||||
|
||||
// 生成 account.json
|
||||
const styleName = args.style || id
|
||||
const accountConfig = {
|
||||
id,
|
||||
name,
|
||||
@@ -56,9 +54,12 @@ async function createAccount(args) {
|
||||
imageModel: imageModel || 'gemini',
|
||||
videoModel: videoModel || '',
|
||||
batchSize: 30,
|
||||
ttsVoice: ttsVoice || '',
|
||||
ttsInstruction: ttsInstruction || '',
|
||||
storyboardPrompt: 'prompts/分镜.md',
|
||||
imageStylePrompt: 'prompts/图片提示词.md',
|
||||
videoStylePrompt: 'prompts/视频提示词.md',
|
||||
references: uploadedRefs,
|
||||
capcut: {
|
||||
effects: [],
|
||||
filter: '',
|
||||
@@ -72,65 +73,15 @@ async function createAccount(args) {
|
||||
},
|
||||
}
|
||||
|
||||
if (uploadedRefs.length > 0) {
|
||||
accountConfig.styles = {
|
||||
[styleName]: { references: uploadedRefs },
|
||||
}
|
||||
}
|
||||
|
||||
const accountPath = path.join(accountDir, 'account.json')
|
||||
fs.writeFileSync(accountPath, JSON.stringify(accountConfig, null, 2), 'utf-8')
|
||||
|
||||
// 生成默认风格骨架
|
||||
const stylePath = path.join(accountDir, 'styles', `${styleName}.md`)
|
||||
const styleContent = [
|
||||
`# ${styleName}`,
|
||||
'',
|
||||
`${desc || name} 的视觉风格。`,
|
||||
'',
|
||||
'---',
|
||||
'',
|
||||
'## 图片提示词',
|
||||
'',
|
||||
'### 核心视觉要素',
|
||||
'',
|
||||
'(待填充:描述关键视觉元素)',
|
||||
'',
|
||||
'### 色调方案',
|
||||
'',
|
||||
'(待填充)',
|
||||
'',
|
||||
'### 图片 Prompt 模板',
|
||||
'',
|
||||
'(待填充)',
|
||||
'',
|
||||
'### 图片禁止项',
|
||||
'',
|
||||
'- 文字水印',
|
||||
'- 字幕覆盖',
|
||||
'',
|
||||
'---',
|
||||
'',
|
||||
'## 视频提示词',
|
||||
'',
|
||||
'### 运镜规则',
|
||||
'',
|
||||
'(待填充)',
|
||||
'',
|
||||
'### 视频 Prompt 模板',
|
||||
'',
|
||||
'(待填充)',
|
||||
'',
|
||||
].join('\n')
|
||||
fs.writeFileSync(stylePath, styleContent, 'utf-8')
|
||||
|
||||
console.log(`\n账号已创建: ${accountDir}`)
|
||||
console.log(` ID: ${id}`)
|
||||
console.log(` 名称: ${name}`)
|
||||
console.log(` 模型: ${accountConfig.imageModel} + ${accountConfig.videoModel || '(未指定)'}`)
|
||||
console.log(` 参考图: ${uploadedRefs.length} 张(${uploadedRefs.filter(r => r.url).length} 已上传)`)
|
||||
console.log(` 风格: ${styleName}`)
|
||||
console.log(`\n下一步: 编辑 ${stylePath} 完善提示词策略\n`)
|
||||
console.log(`\n下一步: 通过 Agent Q&A 流程生成 prompts/*.md(或手动创建)\n`)
|
||||
|
||||
return accountPath
|
||||
}
|
||||
|
||||
@@ -57,11 +57,9 @@ function initManifest(options) {
|
||||
}
|
||||
}
|
||||
|
||||
// 从 account.json 继承参考图
|
||||
const styles = accountConfig.styles || {}
|
||||
const firstStyleKey = Object.keys(styles)[0]
|
||||
const styleRefs = firstStyleKey ? (styles[firstStyleKey].references || []) : []
|
||||
const references = styleRefs.map(ref => {
|
||||
// 从 account.json 继承参考图(顶层 references)
|
||||
const accountRefs = accountConfig.references || []
|
||||
const references = accountRefs.map(ref => {
|
||||
const entry = {}
|
||||
if (ref.file) entry.file = path.join(ACCOUNTS_DIR, accountId, 'references', ref.file)
|
||||
if (ref.url) entry.url = ref.url
|
||||
@@ -88,11 +86,13 @@ function initManifest(options) {
|
||||
// 组装 manifest
|
||||
const manifest = {
|
||||
account: accountId,
|
||||
imageModel: accountConfig.imageModel || 'gemini',
|
||||
videoModel: accountConfig.videoModel || 'veo3-fast-frames',
|
||||
format: accountConfig.defaultFormat || '9:16',
|
||||
imageModel: options.imageModel || accountConfig.imageModel || 'gemini',
|
||||
videoModel: options.videoModel || accountConfig.videoModel || 'veo3-fast-frames',
|
||||
format: options.format || accountConfig.defaultFormat || '9:16',
|
||||
mode: resolvedMode,
|
||||
references,
|
||||
...(accountConfig.ttsVoice ? { ttsVoice: accountConfig.ttsVoice } : {}),
|
||||
...(accountConfig.ttsInstruction ? { ttsInstruction: accountConfig.ttsInstruction } : {}),
|
||||
items,
|
||||
}
|
||||
|
||||
|
||||
@@ -72,6 +72,24 @@ function validateAccount(accountId) {
|
||||
if (!config.imageModel) issues.push('缺少 imageModel')
|
||||
if (!config.defaultFormat) issues.push('缺少 defaultFormat')
|
||||
|
||||
// 检查 prompts 文件
|
||||
const promptFiles = [
|
||||
{ field: 'storyboardPrompt', label: '分镜' },
|
||||
{ field: 'imageStylePrompt', label: '图片提示词' },
|
||||
{ field: 'videoStylePrompt', label: '视频提示词' },
|
||||
]
|
||||
for (const { field, label } of promptFiles) {
|
||||
const relPath = config[field]
|
||||
if (!relPath) {
|
||||
issues.push(`缺少 ${field}(prompts 路径)`)
|
||||
} else {
|
||||
const absPath = path.join(accountDir, relPath)
|
||||
if (!fs.existsSync(absPath)) {
|
||||
issues.push(`${label}文件不存在: ${relPath}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const refDir = path.join(accountDir, 'references')
|
||||
const localRefs = fs.existsSync(refDir)
|
||||
? fs.readdirSync(refDir).filter(f => /\.(png|jpg|jpeg|webp)$/i.test(f))
|
||||
|
||||
@@ -23,19 +23,23 @@ async function phaseAssemble(manifest, manifestPath, options) {
|
||||
subtitles: mode === 'images' ? 'true' : 'false',
|
||||
voiceover: manifest.items.some(it => it.audio) ? 'true' : 'false',
|
||||
duration: '4',
|
||||
animation: 'kenburns-zoom',
|
||||
animation: capcutConfig.animation || 'kenburns-zoom',
|
||||
}
|
||||
|
||||
if (capcutConfig.defaultBGM) assembleArgs.bgm = capcutConfig.defaultBGM
|
||||
if (capcutConfig.effects) assembleArgs.effects = capcutConfig.effects.join(',')
|
||||
if (capcutConfig.filter) assembleArgs.filter = capcutConfig.filter
|
||||
|
||||
log('assemble', `模式: ${mode}, 字幕: true, 配音: ${assembleArgs.voiceover}`)
|
||||
log('assemble', `模式: ${mode}, 字幕: true, 配音: ${assembleArgs.voiceover}, 动画: ${assembleArgs.animation}`)
|
||||
|
||||
const { assemble } = require('../capcut_assemble')
|
||||
await assemble(assembleArgs)
|
||||
|
||||
log('assemble', '成片完成')
|
||||
try {
|
||||
const { assemble } = require('../capcut_assemble')
|
||||
await assemble(assembleArgs)
|
||||
log('assemble', '成片完成')
|
||||
} catch (err) {
|
||||
log('assemble', `成片失败: ${err.message}`)
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { phaseAssemble }
|
||||
|
||||
@@ -2,11 +2,15 @@
|
||||
* Phase: images — 图片生成
|
||||
*
|
||||
* 支持 Gemini / MJ / Kling 三种模型,含首尾帧模式
|
||||
* 并发生成,支持 task ID 恢复(MJ)
|
||||
*/
|
||||
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const { saveManifest, getReferences, ensureDir, renameGeneratedFile, log, getManifestDir } = require('./pipeline-utils')
|
||||
|
||||
const IMAGE_CONCURRENCY = 3
|
||||
|
||||
async function phaseImages(manifest, manifestPath, options) {
|
||||
const dir = getManifestDir(manifestPath)
|
||||
const imagesDir = path.join(dir, 'images')
|
||||
@@ -21,104 +25,172 @@ async function phaseImages(manifest, manifestPath, options) {
|
||||
let model = options.imageModel || manifest.imageModel || accountConfig.imageModel || 'gemini'
|
||||
const ratio = manifest.format || accountConfig.defaultFormat || '9:16'
|
||||
|
||||
// 首尾帧模式:MJ 降级为 Gemini(MJ 出4张候选图无法一一对应首尾帧)
|
||||
// 首尾帧模式:MJ 降级为 Gemini
|
||||
if (model === 'mj' && manifest.mode === 'framePair') {
|
||||
log('images', '首尾帧模式不支持 MJ,自动降级为 Gemini')
|
||||
model = 'gemini'
|
||||
}
|
||||
const refs = getReferences(manifest, accountConfig)
|
||||
|
||||
log('images', `共 ${items.length} 张, 模型: ${model}, 画幅: ${ratio}, 参考图: ${refs.localPaths.length}本地/${refs.urls.length}URL`)
|
||||
log('images', `共 ${items.length} 张, 模型: ${model}, 画幅: ${ratio}, 参考图: ${refs.localPaths.length}本地/${refs.urls.length}URL, 并发: ${IMAGE_CONCURRENCY}`)
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
const item = items[i]
|
||||
const idx = i + 1
|
||||
try {
|
||||
item.status = 'generating'
|
||||
saveManifest(manifestPath, manifest)
|
||||
// 分批并发处理
|
||||
for (let batchStart = 0; batchStart < items.length; batchStart += IMAGE_CONCURRENCY) {
|
||||
const batch = items.slice(batchStart, batchStart + IMAGE_CONCURRENCY)
|
||||
|
||||
let result
|
||||
if (model === 'gemini') {
|
||||
const { generate: geminiGen, edit: geminiEdit } = require('../gemini-image-generator')
|
||||
if (refs.localPaths.length > 0) {
|
||||
log('images', `[${idx}/${items.length}] Gemini 图生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await geminiEdit(item.imagePrompt, refs.localPaths, {
|
||||
outputDir: imagesDir,
|
||||
aspectRatio: ratio,
|
||||
})
|
||||
} else {
|
||||
log('images', `[${idx}/${items.length}] Gemini 文生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await geminiGen(item.imagePrompt, {
|
||||
outputDir: imagesDir,
|
||||
aspectRatio: ratio,
|
||||
})
|
||||
}
|
||||
if (result.savedFiles && result.savedFiles.length > 0) {
|
||||
item.file = renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
)
|
||||
}
|
||||
} else if (model === 'mj') {
|
||||
const { generate: mjGen } = require('../mj-image-generator')
|
||||
const mjOpts = { outputDir: imagesDir, aspectRatio: ratio, split: true }
|
||||
if (refs.urls.length > 0) {
|
||||
mjOpts.referenceImages = refs.urls
|
||||
mjOpts.styleWeight = 200
|
||||
}
|
||||
log('images', `[${idx}/${items.length}] MJ 生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await mjGen(item.imagePrompt, mjOpts)
|
||||
if (result.files && result.files.length > 0) {
|
||||
item.candidates = result.files.map((f, ci) =>
|
||||
renameGeneratedFile(
|
||||
path.relative(dir, f).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, `cand${ci + 1}`
|
||||
)
|
||||
)
|
||||
item.file = item.candidates[0]
|
||||
log('images', `[${idx}/${items.length}] ${result.files.length} 张候选,默认选第1张`)
|
||||
}
|
||||
} else if (model === 'kling') {
|
||||
const { generate: klingGen } = require('../kling-image-generator')
|
||||
const klingOpts = { outputDir: imagesDir, aspectRatio: ratio }
|
||||
if (refs.urls.length > 0) {
|
||||
klingOpts.styleImageUrl = refs.urls[0]
|
||||
}
|
||||
log('images', `[${idx}/${items.length}] 可灵生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await klingGen(item.imagePrompt, klingOpts)
|
||||
if (result.savedFiles && result.savedFiles.length > 0) {
|
||||
item.file = renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
)
|
||||
}
|
||||
} else {
|
||||
throw new Error(`不支持的模型: ${model}(支持: gemini, mj, kling)`)
|
||||
}
|
||||
const results = await Promise.allSettled(
|
||||
batch.map(async (item) => {
|
||||
const idx = item.id
|
||||
try {
|
||||
item.status = 'generating'
|
||||
saveManifest(manifestPath, manifest)
|
||||
|
||||
if (item.file) {
|
||||
item.status = 'done'
|
||||
log('images', `[${idx}/${items.length}] 完成: ${item.file}`)
|
||||
} else {
|
||||
item.status = 'failed'
|
||||
item.error = '生成器未返回文件'
|
||||
log('images', `[${idx}/${items.length}] 失败: 生成器未返回文件`)
|
||||
}
|
||||
let result
|
||||
if (model === 'gemini') {
|
||||
result = await generateGemini(item, idx, dir, imagesDir, ratio, refs)
|
||||
} else if (model === 'mj') {
|
||||
result = await generateMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath)
|
||||
} else if (model === 'kling') {
|
||||
result = await generateKling(item, idx, dir, imagesDir, ratio, refs)
|
||||
} else {
|
||||
throw new Error(`不支持的模型: ${model}(支持: gemini, mj, kling)`)
|
||||
}
|
||||
|
||||
// 首尾帧模式:生成第二张图(lastFrame)
|
||||
if (item.status === 'done' && manifest.mode === 'framePair' && item.lastFramePrompt && !item.lastFrame) {
|
||||
await generateLastFrame(item, idx, items.length, manifest, dir, imagesDir, model, ratio, manifestPath)
|
||||
}
|
||||
} catch (err) {
|
||||
item.status = 'failed'
|
||||
item.error = err.message
|
||||
log('images', `[${idx}/${items.length}] 失败: ${err.message}`)
|
||||
}
|
||||
saveManifest(manifestPath, manifest)
|
||||
if (result.file) {
|
||||
item.file = result.file
|
||||
if (result.candidates) item.candidates = result.candidates
|
||||
item.status = 'done'
|
||||
log('images', `[${idx}] 完成: ${item.file}`)
|
||||
} else {
|
||||
item.status = 'failed'
|
||||
item.error = '生成器未返回文件'
|
||||
log('images', `[${idx}] 失败: 生成器未返回文件`)
|
||||
}
|
||||
// 每个 item 完成后立即写盘,防止崩溃丢失已完成的结果
|
||||
saveManifest(manifestPath, manifest)
|
||||
|
||||
// 首尾帧模式:生成第二张图
|
||||
if (item.status === 'done' && manifest.mode === 'framePair' && item.lastFramePrompt && !item.lastFrame) {
|
||||
await generateLastFrame(item, idx, manifest, dir, imagesDir, model, ratio, manifestPath)
|
||||
}
|
||||
|
||||
return { ok: true }
|
||||
} catch (err) {
|
||||
item.status = 'failed'
|
||||
item.error = err.message
|
||||
log('images', `[${idx}] 失败: ${err.message}`)
|
||||
saveManifest(manifestPath, manifest)
|
||||
return { ok: false, error: err.message }
|
||||
}
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async function generateLastFrame(item, idx, total, manifest, dir, imagesDir, model, ratio, manifestPath) {
|
||||
// ============================================================================
|
||||
// 各模型生成逻辑
|
||||
// ============================================================================
|
||||
|
||||
async function generateGemini(item, idx, dir, imagesDir, ratio, refs) {
|
||||
const { generate: geminiGen, edit: geminiEdit } = require('../gemini-image-generator')
|
||||
let result
|
||||
if (refs.localPaths.length > 0) {
|
||||
log('images', `[${idx}] Gemini 图生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await geminiEdit(item.imagePrompt, refs.localPaths, {
|
||||
outputDir: imagesDir,
|
||||
aspectRatio: ratio,
|
||||
})
|
||||
} else {
|
||||
log('images', `[${idx}] Gemini 文生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
result = await geminiGen(item.imagePrompt, {
|
||||
outputDir: imagesDir,
|
||||
aspectRatio: ratio,
|
||||
})
|
||||
}
|
||||
const file = (result.savedFiles && result.savedFiles.length > 0)
|
||||
? renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
)
|
||||
: null
|
||||
return { file }
|
||||
}
|
||||
|
||||
async function generateMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath) {
|
||||
const { MJApi, ImageUtils } = require('../mj-image-generator')
|
||||
const referenceImages = refs.urls.length > 0 ? refs.urls : []
|
||||
const styleWeight = 200
|
||||
|
||||
let result
|
||||
|
||||
// 尝试恢复中断的 MJ 任务
|
||||
if (item.taskId && item.status === 'generating') {
|
||||
try {
|
||||
log('images', `[${idx}] 恢复 MJ 任务: ${item.taskId}`)
|
||||
const pollResult = await MJApi.poll(item.taskId)
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
|
||||
const gridFile = path.join(imagesDir, `${timestamp}_grid.png`)
|
||||
await ImageUtils.download(pollResult.imageUrl, gridFile)
|
||||
const splitFiles = await ImageUtils.split4(gridFile, imagesDir, timestamp)
|
||||
fs.unlinkSync(gridFile)
|
||||
result = { files: splitFiles }
|
||||
log('images', `[${idx}] MJ 任务恢复成功`)
|
||||
} catch (err) {
|
||||
log('images', `[${idx}] MJ 任务恢复失败: ${err.message},重新提交`)
|
||||
delete item.taskId
|
||||
}
|
||||
}
|
||||
|
||||
// 新提交
|
||||
if (!result) {
|
||||
log('images', `[${idx}] MJ 生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
const taskId = await MJApi.submit(item.imagePrompt, { referenceImages, aspectRatio: ratio, styleWeight })
|
||||
item.taskId = taskId
|
||||
saveManifest(manifestPath, manifest)
|
||||
|
||||
const pollResult = await MJApi.poll(taskId)
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
|
||||
const gridFile = path.join(imagesDir, `${timestamp}_grid.png`)
|
||||
await ImageUtils.download(pollResult.imageUrl, gridFile)
|
||||
const splitFiles = await ImageUtils.split4(gridFile, imagesDir, timestamp)
|
||||
fs.unlinkSync(gridFile)
|
||||
result = { files: splitFiles }
|
||||
}
|
||||
|
||||
const file = (result.files && result.files.length > 0) ? result.files[0] : null
|
||||
const candidates = (result.files && result.files.length > 0)
|
||||
? result.files.map((f, ci) =>
|
||||
renameGeneratedFile(
|
||||
path.relative(dir, f).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, `cand${ci + 1}`
|
||||
)
|
||||
)
|
||||
: null
|
||||
|
||||
delete item.taskId
|
||||
if (candidates && candidates.length > 0) {
|
||||
log('images', `[${idx}] ${candidates.length} 张候选,默认选第1张`)
|
||||
return { file: candidates[0], candidates }
|
||||
}
|
||||
return { file }
|
||||
}
|
||||
|
||||
async function generateKling(item, idx, dir, imagesDir, ratio, refs) {
|
||||
const { generate: klingGen } = require('../kling-image-generator')
|
||||
const klingOpts = { outputDir: imagesDir, aspectRatio: ratio }
|
||||
if (refs.urls.length > 0) klingOpts.styleImageUrl = refs.urls[0]
|
||||
log('images', `[${idx}] 可灵生图: ${item.imagePrompt.substring(0, 60)}...`)
|
||||
const result = await klingGen(item.imagePrompt, klingOpts)
|
||||
const file = (result.savedFiles && result.savedFiles.length > 0)
|
||||
? renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
)
|
||||
: null
|
||||
return { file }
|
||||
}
|
||||
|
||||
async function generateLastFrame(item, idx, manifest, dir, imagesDir, model, ratio, manifestPath) {
|
||||
try {
|
||||
item.status = 'generating'
|
||||
saveManifest(manifestPath, manifest)
|
||||
@@ -131,14 +203,6 @@ async function generateLastFrame(item, idx, total, manifest, dir, imagesDir, mod
|
||||
outputDir: imagesDir,
|
||||
aspectRatio: ratio,
|
||||
})
|
||||
} else if (model === 'mj') {
|
||||
const { generate: mjGen } = require('../mj-image-generator')
|
||||
const mjOpts = { outputDir: imagesDir, aspectRatio: ratio, split: false }
|
||||
if (item.url) {
|
||||
mjOpts.referenceImages = [item.url]
|
||||
mjOpts.styleWeight = 200
|
||||
}
|
||||
lastResult = await mjGen(item.lastFramePrompt, mjOpts)
|
||||
} else if (model === 'kling') {
|
||||
const { generate: klingGen } = require('../kling-image-generator')
|
||||
lastResult = await klingGen(item.lastFramePrompt, {
|
||||
@@ -156,17 +220,17 @@ async function generateLastFrame(item, idx, total, manifest, dir, imagesDir, mod
|
||||
dir, idx, item.narration || item.shotDesc, 'last'
|
||||
)
|
||||
item.status = 'done'
|
||||
log('images', `[${idx}/${total}] lastFrame 完成: ${item.lastFrame}`)
|
||||
log('images', `[${idx}] lastFrame 完成: ${item.lastFrame}`)
|
||||
} else {
|
||||
item.status = 'failed'
|
||||
item.error = 'lastFrame 生成器未返回文件'
|
||||
log('images', `[${idx}/${total}] lastFrame 失败: 未返回文件`)
|
||||
log('images', `[${idx}] lastFrame 失败: 未返回文件`)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
item.status = 'failed'
|
||||
item.error = `lastFrame 失败: ${err.message}`
|
||||
log('images', `[${idx}/${total}] lastFrame 失败: ${err.message}`)
|
||||
log('images', `[${idx}] lastFrame 失败: ${err.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
const path = require('path')
|
||||
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
|
||||
|
||||
async function phaseTts(manifest, manifestPath) {
|
||||
async function phaseTts(manifest, manifestPath, options = {}) {
|
||||
const dir = getManifestDir(manifestPath)
|
||||
const audioDir = path.join(dir, 'audio')
|
||||
ensureDir(audioDir)
|
||||
@@ -28,6 +28,8 @@ async function phaseTts(manifest, manifestPath) {
|
||||
const { filePath, duration } = await synthesize(item.narration || item.text, {
|
||||
outputDir: audioDir,
|
||||
id: item.id || idx,
|
||||
voice: manifest.ttsVoice || undefined,
|
||||
instruction: manifest.ttsInstruction || undefined,
|
||||
})
|
||||
item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
|
||||
item.audioDuration = Math.round(duration * 1000) / 1000
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
* Phase: videos — 视频生成(VEO / Grok / Kling)
|
||||
*
|
||||
* 图生视频,批量提交,生成后自动上传 OSS
|
||||
* 支持 task ID 恢复:中断后重跑时优先恢复已有任务
|
||||
*/
|
||||
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
|
||||
|
||||
@@ -21,63 +23,140 @@ async function phaseVideos(manifest, manifestPath, options) {
|
||||
if (items.length === 0) { log('videos', '无待处理 item,跳过'); return }
|
||||
|
||||
// 选择生成器
|
||||
let generator
|
||||
let Api, pollFn
|
||||
const modelLower = videoModel.toLowerCase()
|
||||
if (modelLower.includes('grok')) {
|
||||
generator = require('../grok-video-generator')
|
||||
const gen = require('../grok-video-generator')
|
||||
Api = gen.GrokApi; pollFn = gen.pollWithRetry
|
||||
} else if (modelLower.includes('kling')) {
|
||||
generator = require('../kling-video-generator')
|
||||
const gen = require('../kling-video-generator')
|
||||
Api = gen.KlingApi; pollFn = gen.pollWithRetry
|
||||
} else {
|
||||
generator = require('../veo-video-generator')
|
||||
const gen = require('../veo-video-generator')
|
||||
Api = gen.VeoApi; pollFn = gen.pollWithRetry
|
||||
}
|
||||
|
||||
const ratio = manifest.format || '9:16'
|
||||
log('videos', `共 ${items.length} 个, 模型: ${videoModel}`)
|
||||
|
||||
const tasks = items.map((item, i) => {
|
||||
const task = {
|
||||
id: item.id || i + 1,
|
||||
prompt: item.videoPrompt,
|
||||
image: item.url,
|
||||
outputDir: videosDir,
|
||||
}
|
||||
if (item.lastFrameUrl) {
|
||||
task.images = [item.url, item.lastFrameUrl]
|
||||
task.lastFrameUrl = item.lastFrameUrl
|
||||
// Phase 1: 恢复已有任务(有 videoTaskId 的 item)
|
||||
const recovered = []
|
||||
const needSubmit = []
|
||||
|
||||
for (const item of items) {
|
||||
if (item.videoTaskId) {
|
||||
recovered.push(item)
|
||||
} else {
|
||||
task.images = [item.url]
|
||||
needSubmit.push(item)
|
||||
}
|
||||
return task
|
||||
})
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await generator.batchGenerate(tasks, {
|
||||
videoModel,
|
||||
aspectRatio: manifest.format || '9:16',
|
||||
outputDir: videosDir,
|
||||
skipManifestWrite: true,
|
||||
// 轮询恢复的任务
|
||||
if (recovered.length > 0) {
|
||||
log('videos', `尝试恢复 ${recovered.length} 个中断任务...`)
|
||||
await Promise.allSettled(
|
||||
recovered.map(async (item) => {
|
||||
try {
|
||||
log('videos', ` 恢复 item ${item.id}: ${item.videoTaskId}`)
|
||||
const result = await pollFn(item.videoTaskId, item.videoPrompt, {
|
||||
outputDir: videosDir,
|
||||
aspectRatio: ratio,
|
||||
imageUrl: item.url,
|
||||
lastFrameUrl: item.lastFrameUrl || '',
|
||||
})
|
||||
if (result.file) {
|
||||
item.video = path.relative(dir, result.file).replace(/\\/g, '/')
|
||||
item.videoDuration = result.duration
|
||||
delete item.videoTaskId
|
||||
log('videos', ` item ${item.id} 恢复成功`)
|
||||
}
|
||||
} catch (err) {
|
||||
log('videos', ` item ${item.id} 恢复失败: ${err.message},将重新提交`)
|
||||
delete item.videoTaskId
|
||||
needSubmit.push(item)
|
||||
}
|
||||
saveManifest(manifestPath, manifest)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
if (needSubmit.length === 0) { log('videos', '全部通过恢复完成'); return }
|
||||
|
||||
// Phase 2: 提交新任务(并发 3)
|
||||
const concurrency = 3
|
||||
log('videos', `提交 ${needSubmit.length} 个新任务(并发: ${concurrency})...`)
|
||||
|
||||
const submitted = []
|
||||
for (let i = 0; i < needSubmit.length; i += concurrency) {
|
||||
const batch = needSubmit.slice(i, i + concurrency)
|
||||
const batchResults = await Promise.allSettled(
|
||||
batch.map(async (item) => {
|
||||
const images = item.lastFrameUrl
|
||||
? [item.url, item.lastFrameUrl]
|
||||
: [item.url]
|
||||
const extraOpts = item.lastFrameUrl
|
||||
? { aspectRatio: ratio, lastFrameUrl: item.lastFrameUrl }
|
||||
: { aspectRatio: ratio }
|
||||
|
||||
try {
|
||||
const taskId = await Api.create(item.url, item.videoPrompt, extraOpts)
|
||||
return { item, taskId, error: null }
|
||||
} catch (err) {
|
||||
return { item, taskId: null, error: err.message }
|
||||
}
|
||||
})
|
||||
)
|
||||
for (const r of batchResults) {
|
||||
const val = r.status === 'fulfilled' ? r.value : { item: null, taskId: null, error: r.reason }
|
||||
submitted.push(val)
|
||||
if (val.item && val.taskId) {
|
||||
val.item.videoTaskId = val.taskId
|
||||
}
|
||||
}
|
||||
saveManifest(manifestPath, manifest)
|
||||
}
|
||||
|
||||
// Phase 3: 轮询新任务
|
||||
const pending = submitted.filter(s => s.taskId)
|
||||
if (pending.length === 0) {
|
||||
log('videos', '所有任务提交失败')
|
||||
for (const s of submitted) {
|
||||
if (s.item) { s.item.status = 'failed'; s.item.error = s.error || '提交失败' }
|
||||
}
|
||||
saveManifest(manifestPath, manifest)
|
||||
return
|
||||
}
|
||||
|
||||
log('videos', `等待 ${pending.length} 个视频生成...`)
|
||||
|
||||
const pollResults = await Promise.allSettled(
|
||||
pending.map(async ({ item, taskId }) => {
|
||||
try {
|
||||
const result = await pollFn(taskId, item.videoPrompt, {
|
||||
outputDir: videosDir,
|
||||
aspectRatio: ratio,
|
||||
imageUrl: item.url,
|
||||
lastFrameUrl: item.lastFrameUrl || '',
|
||||
})
|
||||
return { item, result, ok: true }
|
||||
} catch (err) {
|
||||
return { item, error: err.message, ok: false }
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const result = results[i]
|
||||
const item = items[i]
|
||||
if (!item) continue
|
||||
if (result.success && result.file) {
|
||||
item.video = path.relative(dir, result.file).replace(/\\/g, '/')
|
||||
item.videoDuration = result.duration
|
||||
} else {
|
||||
item.status = 'failed'
|
||||
item.error = result.error || '视频生成失败'
|
||||
log('videos', ` item ${(item.id || '?')} 失败: ${item.error}`)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
log('videos', `批量生成失败: ${err.message}`)
|
||||
for (const item of items) {
|
||||
if (!item.video) {
|
||||
item.status = 'failed'
|
||||
item.error = `批量生成异常: ${err.message}`
|
||||
}
|
||||
for (const r of pollResults) {
|
||||
const val = r.status === 'fulfilled' ? r.value : { ok: false, error: r.reason?.message }
|
||||
if (val.ok && val.result.file) {
|
||||
val.item.video = path.relative(dir, val.result.file).replace(/\\/g, '/')
|
||||
val.item.videoDuration = val.result.duration
|
||||
delete val.item.videoTaskId
|
||||
} else if (val.item) {
|
||||
val.item.status = 'failed'
|
||||
val.item.error = val.error || '视频生成未返回文件'
|
||||
delete val.item.videoTaskId
|
||||
}
|
||||
saveManifest(manifestPath, manifest)
|
||||
}
|
||||
|
||||
// 上传视频到 OSS
|
||||
|
||||
@@ -12,7 +12,7 @@ const SCRIPTS_DIR = path.join(__dirname, '..')
|
||||
const SKILLS_DIR = path.join(SCRIPTS_DIR, '..')
|
||||
const PROJECT_ROOT = path.join(SKILLS_DIR, '..', '..')
|
||||
const CONFIG_PATH = path.join(SKILLS_DIR, 'config.json')
|
||||
const ACCOUNTS_DIR = path.join(PROJECT_ROOT, 'accounts')
|
||||
const ACCOUNTS_DIR = path.join(PROJECT_ROOT, '..', 'accounts')
|
||||
|
||||
// ============================================================================
|
||||
// 配置 & Manifest
|
||||
@@ -64,16 +64,14 @@ function getReferences(manifest, accountConfig) {
|
||||
log('images', 'manifest.references 全部无效,尝试 account fallback')
|
||||
}
|
||||
|
||||
// Fallback 1: 从 account.json 的 styles.*.references 读取
|
||||
const styles = accountConfig.styles || {}
|
||||
for (const [, style] of Object.entries(styles)) {
|
||||
for (const ref of (style.references || [])) {
|
||||
if (ref.url) result.urls.push(ref.url)
|
||||
if (ref.file && accountId) {
|
||||
const localPath = path.join(ACCOUNTS_DIR, accountId, 'references', ref.file)
|
||||
if (fs.existsSync(localPath)) {
|
||||
result.localPaths.push(localPath)
|
||||
}
|
||||
// Fallback 1: 从 account.json 的顶层 references 读取
|
||||
const topRefs = accountConfig.references || []
|
||||
for (const ref of topRefs) {
|
||||
if (ref.url) result.urls.push(ref.url)
|
||||
if (ref.file && accountId) {
|
||||
const localPath = path.join(ACCOUNTS_DIR, accountId, 'references', ref.file)
|
||||
if (fs.existsSync(localPath)) {
|
||||
result.localPaths.push(localPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -111,11 +109,26 @@ function ensureDir(dir) {
|
||||
}
|
||||
|
||||
function slugify(text) {
|
||||
return text
|
||||
.replace(/[^\w一-鿿]/g, '_')
|
||||
.replace(/_+/g, '_')
|
||||
.replace(/^_|_$/g, '')
|
||||
.substring(0, 20)
|
||||
// 限制中文字符最多5个,其他字符(英文数字)最多10个
|
||||
let chineseChars = []
|
||||
let otherChars = []
|
||||
|
||||
for (const char of text) {
|
||||
if (/\p{Script=Han}/u.test(char)) {
|
||||
// 中文字符
|
||||
if (chineseChars.length < 5) {
|
||||
chineseChars.push(char)
|
||||
}
|
||||
} else if (/\w/u.test(char)) {
|
||||
// 英文、数字
|
||||
if (otherChars.length < 10) {
|
||||
otherChars.push(char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = chineseChars.concat(otherChars).join('')
|
||||
return result || 'untitled'
|
||||
}
|
||||
|
||||
function renameGeneratedFile(oldRelPath, dir, seq, nameHint, suffix) {
|
||||
|
||||
@@ -216,7 +216,7 @@ async function main() {
|
||||
console.log('用法:')
|
||||
console.log(' pipeline.js create-account --id <id> --name <名称> [--desc ...] [--references file1,file2]')
|
||||
console.log(' pipeline.js validate-account --account <id>')
|
||||
console.log(' pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>]')
|
||||
console.log(' pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
|
||||
console.log(' pipeline.js validate --manifest <path>')
|
||||
console.log(' pipeline.js confirm --manifest <path> --all')
|
||||
console.log(' pipeline.js run --manifest <path> [--account id] [--phase p1,p2] [--resume] [--retry-failed]')
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* 阿里云 Qwen-TTS 批量语音合成脚本
|
||||
* CosyVoice TTS 批量语音合成脚本
|
||||
* 通过 WebSocket 调用阿里云 DashScope CosyVoice API
|
||||
*
|
||||
* 输入 JSON 文件格式:
|
||||
* {
|
||||
@@ -9,26 +10,25 @@
|
||||
* {"id": 1, "text": "第一段文案"},
|
||||
* {"id": 2, "text": "第二段文案"}
|
||||
* ],
|
||||
* "voice": "Cherry", // 可选,覆盖 config
|
||||
* "output_dir": "./audio" // 可选,默认 ./audio
|
||||
* "voice": "longanyang", // 可选,覆盖 config
|
||||
* "output_dir": "./audio" // 可选,默认 ./audio
|
||||
* }
|
||||
*
|
||||
* 输出 JSON (stdout):
|
||||
* {
|
||||
* "segments": [
|
||||
* {"id": 1, "text": "...", "audio": "./audio/seg_001.wav", "duration": 3.456},
|
||||
* ...
|
||||
* {"id": 1, "text": "...", "audio": "./audio/seg_001.mp3", "duration": 3.456}
|
||||
* ]
|
||||
* }
|
||||
*
|
||||
* 也可作为模块调用:
|
||||
* const { synthesize } = require('./qwen-tts')
|
||||
* const { filePath, duration } = await synthesize('你好世界', { voice: 'Cherry' })
|
||||
* const { filePath, duration } = await synthesize('你好世界', { voice: 'longanyang' })
|
||||
*/
|
||||
|
||||
const axios = require('axios')
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const { execFileSync } = require('child_process')
|
||||
|
||||
const CONFIG_PATH = path.join(__dirname, '..', '..', 'config.json')
|
||||
|
||||
@@ -37,102 +37,185 @@ function loadConfig() {
|
||||
return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf-8'))
|
||||
}
|
||||
|
||||
/**
|
||||
* 单段语音合成(非流式)
|
||||
* @param {string} text - 要合成的文本
|
||||
* @param {object} options - { voice, model, language, outputDir, id }
|
||||
* @returns {{ filePath: string, duration: number }}
|
||||
*/
|
||||
async function synthesize(text, options = {}) {
|
||||
const config = loadConfig()
|
||||
|
||||
const apiKey = options.apiKey || config.ttsApiKey
|
||||
if (!apiKey) throw new Error('ttsApiKey 未配置,请在 config.json 中设置')
|
||||
|
||||
const baseUrl = (options.apiBaseUrl || config.ttsApiBaseUrl || 'https://dashscope.aliyuncs.com/api/v1').replace(/\/$/, '')
|
||||
const model = options.model || config.ttsModel || 'qwen-tts'
|
||||
const voice = options.voice || config.ttsVoice || 'Cherry'
|
||||
const language = options.language || config.ttsLanguage || 'Chinese'
|
||||
const outputDir = options.outputDir || './audio'
|
||||
|
||||
fs.mkdirSync(outputDir, { recursive: true })
|
||||
|
||||
// 确保文本有句末标点,让 TTS 生成自然语调和尾部停顿
|
||||
text = text.trimEnd()
|
||||
if (!/[。!?.!?…]$/.test(text)) text += '。'
|
||||
|
||||
const url = `${baseUrl}/services/aigc/multimodal-generation/generation`
|
||||
|
||||
let res
|
||||
function getAudioDuration(filePath) {
|
||||
try {
|
||||
res = await axios.post(url, {
|
||||
model,
|
||||
input: {
|
||||
text,
|
||||
voice,
|
||||
language_type: language,
|
||||
},
|
||||
}, {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
timeout: 60000,
|
||||
const out = execFileSync('ffprobe', [
|
||||
'-v', 'quiet', '-show_entries', 'format=duration',
|
||||
'-of', 'default=noprint_wrappers=1:nokey=1', filePath,
|
||||
], { encoding: 'utf-8', timeout: 10000 })
|
||||
return parseFloat(out.trim())
|
||||
} catch {
|
||||
const stat = fs.statSync(filePath)
|
||||
return stat.size * 8 / 32000
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 单段语音合成(CosyVoice WebSocket)
|
||||
* @param {string} text
|
||||
* @param {object} options - { voice, model, outputDir, id, instruction }
|
||||
* @returns {Promise<{filePath: string, duration: number}>}
|
||||
*/
|
||||
function synthesize(text, options = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const config = loadConfig()
|
||||
|
||||
const apiKey = options.apiKey || config.ttsApiKey
|
||||
if (!apiKey) { reject(new Error('ttsApiKey 未配置')); return }
|
||||
|
||||
const model = options.model || config.ttsModel || 'cosyvoice-v3-flash'
|
||||
const voice = options.voice || config.ttsVoice || 'longanyang'
|
||||
const instruction = options.instruction || config.ttsInstruction || ''
|
||||
const outputDir = options.outputDir || './audio'
|
||||
|
||||
fs.mkdirSync(outputDir, { recursive: true })
|
||||
|
||||
text = text.trimEnd()
|
||||
if (!/[。!?.!?…]$/.test(text)) text += '。'
|
||||
|
||||
const id = options.id || 1
|
||||
const fileName = `seg_${String(id).padStart(3, '0')}.mp3`
|
||||
const filePath = path.resolve(outputDir, fileName)
|
||||
|
||||
const wsUrl = 'wss://dashscope.aliyuncs.com/api-ws/v1/inference'
|
||||
const ws = new WebSocket(wsUrl, {
|
||||
headers: { Authorization: `bearer ${apiKey}` },
|
||||
})
|
||||
} catch (err) {
|
||||
const detail = err.response?.data
|
||||
throw new Error(`TTS API 错误: ${err.message}${detail ? ' ' + JSON.stringify(detail) : ''}`)
|
||||
}
|
||||
|
||||
const audioUrl = res.data?.output?.audio?.url
|
||||
if (!audioUrl) {
|
||||
throw new Error(`TTS API 未返回音频 URL: ${JSON.stringify(res.data)}`)
|
||||
}
|
||||
const taskId = `tts_${Date.now()}_${id}`
|
||||
const chunks = []
|
||||
let settled = false
|
||||
|
||||
// 下载音频到本地
|
||||
const id = options.id || 1
|
||||
const fileName = `seg_${String(id).padStart(3, '0')}.wav`
|
||||
const filePath = path.resolve(outputDir, fileName)
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) { settled = true; ws.close(); reject(new Error('TTS 超时 (60s)')) }
|
||||
}, 60000)
|
||||
|
||||
const audioRes = await axios.get(audioUrl, { responseType: 'arraybuffer', timeout: 30000 })
|
||||
const wavBuffer = Buffer.from(audioRes.data)
|
||||
ws.addEventListener('open', () => {
|
||||
// Step 1: run-task — empty input, no text
|
||||
ws.send(JSON.stringify({
|
||||
header: {
|
||||
task_id: taskId,
|
||||
action: 'run-task',
|
||||
streaming: 'duplex',
|
||||
},
|
||||
payload: {
|
||||
task_group: 'audio',
|
||||
task: 'tts',
|
||||
function: 'SpeechSynthesizer',
|
||||
model,
|
||||
parameters: {
|
||||
voice,
|
||||
format: 'mp3',
|
||||
sample_rate: 24000,
|
||||
volume: 50,
|
||||
rate: 1.0,
|
||||
pitch_rate: 1.0,
|
||||
text_type: 'PlainText',
|
||||
...(instruction ? { instruction } : {}),
|
||||
},
|
||||
input: {},
|
||||
},
|
||||
}))
|
||||
})
|
||||
|
||||
// 追加 0.3s 静音(句间气口)
|
||||
const silenceSec = options.silencePadding !== undefined ? options.silencePadding : 0.3
|
||||
const silenceBytes = Math.round(24000 * 2 * silenceSec)
|
||||
const silenceBuffer = Buffer.alloc(silenceBytes, 0)
|
||||
const finalBuffer = Buffer.concat([wavBuffer, silenceBuffer])
|
||||
// 更新 WAV 头的文件大小
|
||||
finalBuffer.writeUInt32LE(finalBuffer.length - 8, 4)
|
||||
finalBuffer.writeUInt32LE(wavBuffer.length - 44 + silenceBytes, 40)
|
||||
fs.writeFileSync(filePath, finalBuffer)
|
||||
ws.addEventListener('message', async (event) => {
|
||||
if (typeof event.data !== 'string') {
|
||||
const buf = event.data instanceof Blob
|
||||
? Buffer.from(await event.data.arrayBuffer())
|
||||
: Buffer.from(event.data)
|
||||
chunks.push(buf)
|
||||
return
|
||||
}
|
||||
try {
|
||||
const msg = JSON.parse(event.data)
|
||||
const evt = msg.header?.event
|
||||
|
||||
const duration = (finalBuffer.length - 44) / (24000 * 2)
|
||||
if (evt === 'task-started') {
|
||||
// Step 2: continue-task — send text
|
||||
ws.send(JSON.stringify({
|
||||
header: {
|
||||
task_id: taskId,
|
||||
action: 'continue-task',
|
||||
streaming: 'duplex',
|
||||
},
|
||||
payload: {
|
||||
task_group: 'audio',
|
||||
task: 'tts',
|
||||
function: 'SpeechSynthesizer',
|
||||
model,
|
||||
input: { text },
|
||||
},
|
||||
}))
|
||||
|
||||
return { filePath, duration }
|
||||
// Step 3: finish-task
|
||||
ws.send(JSON.stringify({
|
||||
header: {
|
||||
task_id: taskId,
|
||||
action: 'finish-task',
|
||||
streaming: 'duplex',
|
||||
},
|
||||
payload: {
|
||||
task_group: 'audio',
|
||||
task: 'tts',
|
||||
function: 'SpeechSynthesizer',
|
||||
input: {},
|
||||
},
|
||||
}))
|
||||
} else if (evt === 'task-finished') {
|
||||
clearTimeout(timer)
|
||||
ws.close()
|
||||
if (settled) return
|
||||
settled = true
|
||||
|
||||
const audio = Buffer.concat(chunks)
|
||||
if (audio.length === 0) { reject(new Error('TTS 未返回音频')); return }
|
||||
|
||||
fs.writeFileSync(filePath, audio)
|
||||
resolve({ filePath, duration: getAudioDuration(filePath) })
|
||||
} else if (evt === 'task-failed') {
|
||||
clearTimeout(timer)
|
||||
ws.close()
|
||||
if (settled) return
|
||||
settled = true
|
||||
reject(new Error(`TTS 失败: ${msg.header?.error_message || msg.header?.message || JSON.stringify(msg)}`))
|
||||
}
|
||||
} catch {}
|
||||
})
|
||||
|
||||
ws.addEventListener('error', (e) => {
|
||||
clearTimeout(timer)
|
||||
if (!settled) { settled = true; reject(new Error(`WebSocket 错误: ${e.message || '连接失败'}`)) }
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量语音合成
|
||||
* @param {Array<{id: number, text: string}>} segments
|
||||
* @param {object} options - { voice, outputDir }
|
||||
* @returns {Array<{id: number, text: string, audio: string, duration: number}>}
|
||||
*/
|
||||
async function synthesizeBatch(segments, options = {}) {
|
||||
const results = []
|
||||
for (const seg of segments) {
|
||||
console.error(` 合成 #${seg.id}: ${seg.text.substring(0, 30)}...`)
|
||||
const { filePath, duration } = await synthesize(seg.text, {
|
||||
...options,
|
||||
id: seg.id,
|
||||
})
|
||||
results.push({
|
||||
id: seg.id,
|
||||
text: seg.text,
|
||||
audio: filePath,
|
||||
duration: Math.round(duration * 1000) / 1000,
|
||||
})
|
||||
// 间隔 0.5 秒避免限流
|
||||
try {
|
||||
const { filePath, duration } = await synthesize(seg.text, {
|
||||
...options,
|
||||
id: seg.id,
|
||||
})
|
||||
results.push({
|
||||
id: seg.id,
|
||||
text: seg.text,
|
||||
audio: filePath,
|
||||
duration: Math.round(duration * 1000) / 1000,
|
||||
})
|
||||
} catch (err) {
|
||||
results.push({
|
||||
id: seg.id,
|
||||
text: seg.text,
|
||||
audio: '',
|
||||
duration: 0,
|
||||
error: err.message,
|
||||
})
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 500))
|
||||
}
|
||||
return results
|
||||
@@ -147,22 +230,19 @@ async function main() {
|
||||
console.error('input.json 格式:')
|
||||
console.error(JSON.stringify({
|
||||
segments: [{ id: 1, text: '文案' }],
|
||||
voice: 'Cherry',
|
||||
voice: 'longanyang',
|
||||
output_dir: './audio',
|
||||
}, null, 2))
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const config = JSON.parse(fs.readFileSync(inputJson, 'utf-8'))
|
||||
const segments = config.segments
|
||||
const options = {
|
||||
const results = await synthesizeBatch(config.segments, {
|
||||
voice: config.voice,
|
||||
outputDir: config.output_dir || './audio',
|
||||
}
|
||||
})
|
||||
|
||||
const results = await synthesizeBatch(segments, options)
|
||||
const output = { segments: results }
|
||||
process.stdout.write(JSON.stringify(output, null, 2) + '\n')
|
||||
process.stdout.write(JSON.stringify({ segments: results }, null, 2) + '\n')
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
|
||||
Reference in New Issue
Block a user