feat(capcut-pipeline): 将 TTS 配音切换为 CosyVoice 并重构动画系统

- 将 TTS 引擎从 Qwen-TTS 切换为阿里云 CosyVoice(DashScope WebSocket)
- 输出格式从 WAV(24kHz)改为 MP3
- 重构图片动画分拆逻辑,支持组合动画(如"渐显+放大")
- 移除字幕关键词高亮相关字段
- 移除已删除的 `uploadAudioToOSS` 函数,统一使用 `uploadToOSS`
- 更新文档和配置默认值以匹配新引擎
This commit is contained in:
2026-05-01 14:50:50 +08:00
parent 9d19437a29
commit 3a641244a5
5 changed files with 46 additions and 82 deletions

View File

@@ -334,10 +334,9 @@ node kling-video-generator.js --image <url> --prompt <prompt> -o ./videos
```
output/{name}_{YYYYMMDD}_{NNN}/
├── manifest.json # 主清单(贯穿全流程)
├── prompts.txt # 原始提示词存档
├── images/ # scene_{NN}_{slug}.jpegslug 从 script/shotDesc 派生,首尾帧加 _last 后缀)
├── videos/ # scene_{NN}_{slug}.mp4与图片对应
└── urls.json # OSS 公网 URL 映射
└── audio/ # seg_001.mp3TTS 分句音频,多句时 seg_{id}_{j}.mp3
```
**命名对应关系**:图片 `scene_01_觉醒.jpeg` → 视频 `scene_01_觉醒.mp4`;首尾帧尾帧 `scene_01_觉醒_last.jpeg`MJ 候选 `scene_01_觉醒_cand1.jpeg`
@@ -396,7 +395,7 @@ output/{name}_{YYYYMMDD}_{NNN}/
所有子技能共享以下资源(位于本目录):
- `scripts/` — 共享脚本gemini-image-generator.js, mj-image-generator.js, grok-video-generator.js, veo-video-generator.js, capcut_assemble.js, sync-to-jianying.js, oss-upload.js
- `scripts/` — 共享脚本gemini-image-generator.js, mj-image-generator.js, grok-video-generator.js, veo-video-generator.js, kling-video-generator.js, qwen-tts.js, capcut_assemble.js, sync-to-jianying.js, oss-upload.js
- `accounts/` — 账号配置(项目根目录,详见 [account-system.md](references/account-system.md)
- `references/account-system.md` — 账号系统说明

View File

@@ -218,8 +218,7 @@ async function assemble(args) {
format = '9:16',
apiKey = '',
duration = '4',
animation = '缩放',
localAudio = 'true',
animation = '渐显+放大',
} = args
if (!input) throw new Error('缺少 --input 参数')
@@ -352,12 +351,11 @@ async function assemble(args) {
// Step 2: 上传(已调速的)视频到 OSS
const missingUrl = items.filter(it => it.video && !it.videoUrl)
if (missingUrl.length > 0) {
const { uploadFile } = require('./oss-upload')
console.log(` 上传 ${missingUrl.length} 个视频到 OSS...`)
for (const item of missingUrl) {
const videoPath = path.resolve(inputDir, item.video)
try {
const { url } = await uploadFile(videoPath)
const url = await uploadToOSS(videoPath)
item.videoUrl = url
// 回写 manifest
if (manifestFile) {
@@ -492,17 +490,12 @@ async function addImages(draftUrl, items, imgUrls, timeline, width, height, anim
}
if (animation) {
const parts = animation.split('+')
for (const part of parts) {
const name = part.trim()
// 组合动画(持续整段):缩放、三分割 等
if (name === '缩放' || name === '缩放 II') {
info.loop_animation = name
} else {
// 默认作为入场动画
info.in_animation = name
}
}
const parts = animation.split('+').map(p => p.trim()).filter(Boolean)
const groupNames = ['缩放', '缩放 II']
const groupAnims = parts.filter(p => groupNames.includes(p))
const inAnims = parts.filter(p => !groupNames.includes(p))
if (groupAnims.length > 0) info.loop_animation = groupAnims.join('|')
if (inAnims.length > 0) info.in_animation = inAnims.join('|')
}
return info
@@ -637,19 +630,9 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
}
// ============================================================================
// 音频上传(本地文件 → OSS 公网 URL
// 音频批量上传(本地文件 → OSS 公网 URL
// ============================================================================
async function uploadAudioToOSS(filePath) {
try {
const oss = require(path.join(__dirname, 'oss-upload'))
const { url } = await oss.uploadFile(filePath)
return url
} catch (err) {
throw new Error(`音频上传 OSS 失败: ${err.message}`)
}
}
async function batchUploadAudio(inputDir, items) {
const urls = {}
for (const item of items) {
@@ -665,7 +648,7 @@ async function batchUploadAudio(inputDir, items) {
continue
}
try {
urls[seg.audio] = await uploadAudioToOSS(filePath)
urls[seg.audio] = await uploadToOSS(filePath)
console.log(` 上传: ${path.basename(filePath)} -> OK`)
} catch (err) {
console.error(` 上传失败: ${path.basename(filePath)} - ${err.message}`)
@@ -686,7 +669,7 @@ async function batchUploadAudio(inputDir, items) {
continue
}
try {
urls[item.audio] = await uploadAudioToOSS(filePath)
urls[item.audio] = await uploadToOSS(filePath)
console.log(` 上传: ${path.basename(filePath)} -> OK`)
} catch (err) {
console.error(` 上传失败: ${path.basename(filePath)} - ${err.message}`)
@@ -868,8 +851,6 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
start: currentTime,
end: currentTime + duration,
text: seg.text,
keyword: '',
keyword_color: '',
}
if (inAnimation) cap.in_animation = inAnimation
@@ -903,8 +884,6 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
start: currentTime,
end: currentTime + duration,
text: sentence,
keyword: '',
keyword_color: '',
}
if (inAnimation) cap.in_animation = inAnimation
@@ -918,16 +897,10 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
}
} else {
// 原始模式:一句字幕
const keyword = ''
const keywordColor = style.highlightColor || style.color || '#FFFFFF'
const cap = {
start: tl.start,
end: tl.end,
text,
keyword,
keyword_color: keyword ? keywordColor : '',
keyword_font_size: 18,
}
if (inAnimation) cap.in_animation = inAnimation
@@ -1040,7 +1013,7 @@ async function main() {
console.log(' --duration 4 默认每段时长/秒无TTS时的fallback默认 4')
console.log(' --voiceover true|false 是否添加TTS配音轨道默认 true')
console.log(' --subtitles true|false 是否添加字幕(默认 true')
console.log(' --split-captions true|false 分句字幕模式(默认 false长句按标点切分)')
console.log(' --split-captions true|false 分句字幕模式(默认 true按标点切分)')
console.log(' --bgm <url> 背景音乐 URL')
console.log(' --effects "名称1,名称2" 特效名称(逗号分隔)')
console.log(' --filter "名称:强度" 滤镜(强度 0-100')

View File

@@ -23,7 +23,7 @@ async function phaseAssemble(manifest, manifestPath, options) {
subtitles: mode === 'images' ? 'true' : 'false',
voiceover: manifest.items.some(it => it.audio) ? 'true' : 'false',
duration: '4',
animation: capcutConfig.animation || '缩放',
animation: capcutConfig.animation || '渐显+放大',
}
if (capcutConfig.defaultBGM) assembleArgs.bgm = capcutConfig.defaultBGM