feat(video-pipeline): 添加关键字氛围词花字叠加和Oss URL回写功能

- 新增 Q17 关键字氛围词问答项,支持关闭/默认/自定义花字效果
- 在 manifest 和 account.json 中添加 keyword 和 keywordStyle 字段
- 实现关键字氛围词在画面中央的叠加逻辑,支持动画、阴影、文字效果等配置
- 在 assemble 流程中增加 `keywords` 步骤,自动读取账号配置生成花字
- 修复音频上传后未回写 OSS URL 到 manifest 的问题,避免重复上传
This commit is contained in:
2026-05-01 15:21:59 +08:00
parent 3a641244a5
commit e4723d9ce3
8 changed files with 157 additions and 1 deletions

View File

@@ -91,6 +91,23 @@ Phase 4: 技术配置(有默认值,可跳过)
| 14 | 参考图文件? | 无 | 用户稍后放入 references/ 目录Agent 上传 OSS 回写 URL |
| 15 | TTS 音色? | config.json 全局 ttsVoice | account.json 的 ttsVoice留空用全局默认 |
| 16 | TTS 语气指令? | 无 | account.json 的 ttsInstruction描述期望的语气风格 |
| 17 | 关键字氛围词? | 开启(默认样式) | 画面中央大字叠加,增强冲击力。选项:关闭 / 默认样式 / 描述期望效果 |
| | 期望的花字风格?(选填) | 按账号风格推荐 | 根据视觉基调推荐,见下方花字推荐表 |
**花字效果推荐表**92 种免费效果,按风格分类):
| 账号风格 | 推荐花字 | 备选 |
|---------|---------|------|
| 暗黑/军事/权谋 | 暗黑潮酷黑色花字 | 潮酷 黑色紫光、简约白色黑边花字 |
| 潮酷/街头/潮流 | 潮酷 白色橙边 | 超酷发光镂空花字、潮酷橙色 |
| 科技/赛博 | 赛博朋克未来科技感立体发光花字 | 蓝色镂空发光花字、炫彩发光花字 |
| 简约/商务/知识 | 简约白色黑边花字 | 简约白色花字、简约黑色描边立体花字 |
| 清新/生活/治愈 | 小清新 白色 | 清新蓝色发光花字、梦幻粉色发光立体花字 |
| 复古/港风 | 复古 红色 | 红色朦胧港风复古花字、书法墨环图案花字 |
| 综艺/娱乐 | 综艺 白色黑边 | 立体综艺花字、综艺 橙色 |
> Agent 可调用 `get_text_effects` API 获取完整列表92 种免费 + 更多 VIP
> Q17 选"默认样式"时Agent 根据账号风格从上表自动匹配。
**运动偏好 → 视频提示词映射**
@@ -132,6 +149,7 @@ Phase 4: 技术配置(有默认值,可跳过)
- 视频模型:{Q13}
- TTS音色{Q15}
- TTS语气{Q16}
- 关键字氛围词:{Q17 开启/关闭,花字风格}
确认 "开始" → 创建账号
修改 → 调整后重新输出
@@ -159,6 +177,7 @@ Phase 4: 技术配置(有默认值,可跳过)
2. **生成 account.json**
- 从 `_template/account.json` 复制骨架
- 填入 id、name、description、模型、画幅等
- Q17 选关闭时删除 `keywordStyle` 节;选自定义花字时更新 `textEffect` 字段
3. **生成分镜.md**
- 读取 `_template/prompts/通用分镜.md`

View File

@@ -62,6 +62,7 @@ node scripts/pipeline.js validate --manifest <path>
| `duration` | 计划视频时长(秒),来自分镜阶段 |
| `imagePrompt` | 英文画面描述(给 Gemini/MJStep 2-A 生成 |
| `directorRef` | 导演构图参考tarantino / kitano / fincher三层透传 |
| `keyword` | 关键字氛围词2-6 字assemble 时以花字效果叠加在画面中央。可选 |
| `confirmed` | 人工确认状态,默认 `false` |
### Agent 后续回写Step 3-A 视频提示词)

View File

@@ -288,7 +288,7 @@ async function assemble(args) {
const steps = []
if (mode === 'images') steps.push('upload')
steps.push('draft', 'materials', 'audio_oss', 'voiceover', 'audio', 'subtitles', 'effects', 'filter', 'save', 'sync')
steps.push('draft', 'materials', 'audio_oss', 'voiceover', 'audio', 'subtitles', 'keywords', 'effects', 'filter', 'save', 'sync')
const totalSteps = steps.length
let step = 0
@@ -380,6 +380,25 @@ async function assemble(args) {
try {
audioUrls = await batchUploadAudio(inputDir, items)
console.log(` 成功: ${Object.keys(audioUrls).length} 段音频\n`)
// 回写 OSS URL 到 manifest避免重复上传
if (Object.keys(audioUrls).length > 0 && manifestFile) {
let changed = false
for (const item of manifest.items) {
if (item.audio && audioUrls[item.audio]) {
item.audio = audioUrls[item.audio]
changed = true
}
if (item.segments) {
for (const seg of item.segments) {
if (seg.audio && audioUrls[seg.audio]) {
seg.audio = audioUrls[seg.audio]
changed = true
}
}
}
}
if (changed) saveManifest(manifestFile, manifest)
}
} catch (err) {
console.log(` OSS 上传失败,将尝试本地路径: ${err.message}\n`)
}
@@ -415,6 +434,15 @@ async function assemble(args) {
console.log(' 跳过')
}
// -- 添加关键字氛围词 --
step++; console.log(`[${step}/${totalSteps}] 添加关键字氛围词...`)
const keywordStyle = loadKeywordStyle(manifest)
if (Object.keys(keywordStyle).length > 0 && items.some(i => i.keyword)) {
await addKeywordOverlays(draftUrl, items, timeline, keywordStyle)
} else {
console.log(' 跳过(无关键字或未配置 keywordStyle')
}
// -- 添加特效 --
step++; console.log(`[${step}/${totalSteps}] 添加特效...`)
if (effectsStr) {
@@ -796,6 +824,75 @@ function loadSubtitleStyle(manifest) {
} catch { return {} }
}
function loadKeywordStyle(manifest) {
const account = manifest.account
if (!account) return {}
const scriptDir = __dirname
const accountFile = path.join(scriptDir, '..', '..', '..', 'accounts', account, 'account.json')
if (!fs.existsSync(accountFile)) return {}
try {
const accountData = JSON.parse(fs.readFileSync(accountFile, 'utf-8'))
return accountData.capcut?.keywordStyle || {}
} catch { return {} }
}
// ============================================================================
// 添加关键字氛围词叠加(画面中央大字)
// ============================================================================
async function addKeywordOverlays(draftUrl, items, timeline, style = {}) {
const keywordItems = items.filter(item => item.keyword)
if (keywordItems.length === 0) {
console.log(' 无关键字,跳过')
return
}
const captions = []
for (let i = 0; i < items.length; i++) {
const item = items[i]
if (!item.keyword) continue
const tl = timeline[i]
const cap = {
start: tl.start,
end: tl.end,
text: item.keyword,
}
if (style.inAnimation) cap.in_animation = style.inAnimation
if (style.outAnimation) cap.out_animation = style.outAnimation
if (style.inAnimDuration) cap.in_animation_duration = style.inAnimDuration
if (style.outAnimDuration) cap.out_animation_duration = style.outAnimDuration
captions.push(cap)
}
if (captions.length === 0) return
await api('add_captions', {
draft_url: draftUrl,
captions: JSON.stringify(captions),
font: style.font || null,
font_size: style.fontSize || 60,
text_color: style.color || '#FFFFFF',
alignment: 1,
bold: style.bold || false,
has_shadow: style.hasShadow || false,
shadow_info: style.shadowAlpha ? {
shadow_alpha: style.shadowAlpha,
shadow_color: style.shadowColor || '#000000',
shadow_diffuse: 15,
shadow_distance: 5,
shadow_angle: -45,
} : undefined,
alpha: style.alpha || 1,
scale_x: 1, scale_y: 1,
transform_x: 0,
transform_y: style.transformY || 0,
text_effect: style.textEffect || null,
})
console.log(` 已添加 ${captions.length} 个关键字氛围词 (效果: ${style.textEffect || '无'})`)
}
function loadTransitions(manifest) {
const account = manifest.account
if (!account) return null

View File

@@ -79,6 +79,7 @@ function initManifest(options) {
}
if (raw.directorRef) item.directorRef = raw.directorRef
if (raw.videoPrompt) item.videoPrompt = raw.videoPrompt
if (raw.keyword) item.keyword = raw.keyword
if (resolvedMode === 'framePair') item.lastFramePrompt = raw.lastFramePrompt
return item
})

View File

@@ -21,6 +21,17 @@
"highlightColor": "#FF6B35",
"bold": true
},
"keywordStyle": {
"textEffect": "简约白色黑边花字",
"fontSize": 60,
"color": "#FFFFFF",
"bold": true,
"transformY": 0,
"inAnimation": "打字机效果",
"outAnimation": "模糊淡出",
"inAnimDuration": 300000,
"outAnimDuration": 300000
},
"defaultBGM": "",
"transitions": {
"strategy": "rhythm",

View File

@@ -317,12 +317,17 @@ undone
"id": 1,
"shotDesc": "英文画面描述图文50-80词 / 视频30-60词",
"script": "该段完整原文逐字摘取,一字不改,禁止改写/提炼/摘要",
"keyword": "2-6字氛围关键词可选",
"duration": 5,
"directorRef": "tarantino / kitano / fincher"
}
]
```
**字段说明**
- `script`:该段的**完整原文逐字摘取,一字不改**。原文怎么写就怎么贴,禁止改写、提炼、摘要、概括、换词
- `keyword`:该段的**氛围关键词**可选2-6 个字,以花字效果叠加在画面中央增强冲击力。提炼该段最核心的意象/概念,偏向名词或动名词,有画面感。无合适关键词时省略该字段
## 九、启动指令与自检
收到文案后:

View File

@@ -36,6 +36,17 @@
"inAnimation": "淡入",
"outAnimation": "淡出"
},
"keywordStyle": {
"textEffect": "简约白色黑边花字",
"fontSize": 60,
"color": "#FFFFFF",
"bold": true,
"transformY": 0,
"inAnimation": "打字机效果",
"outAnimation": "模糊淡出",
"inAnimDuration": 300000,
"outAnimDuration": 300000
},
"defaultBGM": "",
"transitions": {
"strategy": "rhythm",

View File

@@ -344,6 +344,7 @@ something that cannot be undone
"id": 1,
"shotDesc": "英文画面描述图文50-80词 / 视频30-60词",
"script": "该段的完整原文案,不提炼,保留论证、例子、细节",
"keyword": "2-6字氛围关键词",
"duration": 5,
"directorRef": "tarantino / kitano / fincher"
}
@@ -352,6 +353,10 @@ something that cannot be undone
**字段说明**
- `script`:该段的**完整原文逐字摘取,一字不改**。原文怎么写就怎么贴,禁止改写、提炼、摘要、概括、换词。包含论证、例子、细节,不是金句
- `keyword`:该段的**氛围关键词**2-6 个字,以花字效果叠加在画面中央增强冲击力。规则:
- 提炼该段最核心的意象/概念,不是复述文案
- 偏向名词或动名词,有画面感:"权力""沉默""位置""逐帧拆""审视"
- 不是每段都必须有,无合适关键词时省略该字段
- 按语义单元切割,确保每段表达一个完整观点或例子
## 十一、完整示例
@@ -381,6 +386,7 @@ something that cannot be undone
"id": 1,
"shotDesc": "a solitary figure in a dark traditional robe seated at the far end of a dim wooden hall, three-quarters of the frame filled with empty floor and gathering shadow — the man occupies only the leftmost edge of the composition, back straight, shoulders set with the stillness of someone who has already decided. The space around him continues to darken.",
"script": "权力从来不大声说话。它藏在一个人坐在哪里,看向哪里,在哪句话之后沉默了三秒。",
"keyword": "权力",
"duration": 6,
"directorRef": "kitano"
},
@@ -388,6 +394,7 @@ something that cannot be undone
"id": 2,
"shotDesc": "extreme close-up of a man's eyes, half-lowered, tracking slowly across the room with the precision of someone reading a document no one else can see — his gaze moves but his head does not. In the blurred background, the edge of another figure waits, unknowingly being measured and filed away.",
"script": "权力藏在一个人坐在哪里,看向哪里。你看到的是他的位置,他看到的是整个房间的结构。",
"keyword": "位置",
"duration": 6,
"directorRef": "tarantino"
},
@@ -395,6 +402,7 @@ something that cannot be undone
"id": 3,
"shotDesc": "a near-symmetrical frame — two hands visible on a low table, one pair relaxed and open, one pair with fingers slowly pressing flat, knuckles beginning to whiten. The geometric precision of the table edge divides the frame exactly in half. The whitening knuckles are the only thing breaking the symmetry — and the silence.",
"script": "在哪句话之后沉默了三秒。这种沉默不是等待,是审视。今天,我们逐帧拆解权力运行的底层逻辑。",
"keyword": "审视",
"duration": 6,
"directorRef": "fincher"
}
@@ -426,6 +434,7 @@ something that cannot be undone
"id": 1,
"shotDesc": "a figure seated at the far end of a dim wooden hall, three-quarters of the frame filled with empty floor — the man's body is perfectly still but his head is beginning a slow almost imperceptible turn toward the door at the right edge of frame, as if he has heard something the camera has not yet revealed. The shadow on the floor continues to lengthen.",
"script": "权力从来不大声说话。它藏在一个人坐在哪里,看向哪里,在哪句话之后沉默了三秒。",
"keyword": "权力",
"duration": 5,
"directorRef": "kitano"
},
@@ -433,6 +442,7 @@ something that cannot be undone
"id": 2,
"shotDesc": "extreme close-up of a man's eyes, half-lowered, beginning to track slowly to the left with the precision of someone reading a document no one else can see — his gaze shifts but his head does not move yet, and in the blurred background a second figure's shoulder is beginning to come into focus.",
"script": "权力藏在一个人坐在哪里,看向哪里。你看到的是他的位置,他看到的是整个房间的结构。",
"keyword": "位置",
"duration": 5,
"directorRef": "tarantino"
},
@@ -440,6 +450,7 @@ something that cannot be undone
"id": 3,
"shotDesc": "a near-symmetrical frame — two pairs of hands on a low table, one pair relaxed, the other with fingers slowly pressing flat and knuckles beginning to whiten. The table edge divides the frame exactly in half. The whitening knuckles are the only motion in the frame, pressing harder, as the geometric order begins its quiet collapse.",
"script": "在哪句话之后沉默了三秒。这种沉默不是等待,是审视。今天,我们逐帧拆解权力运行的底层逻辑。",
"keyword": "审视",
"duration": 5,
"directorRef": "fincher"
}