feat(video-pipeline): 用 ffprobe 实际测量音视频时长并统一字段名为 script

将项目中的 `narration` 字段统一重命名为 `script`,并新增 `getAudioDurationSec` 函数通过 `ffprobe` 实际测量音频和视频文件的时长,替代 Manifest 中的估计值,提高时间线组装的准确性。同时优化字幕逻辑,仅在有 TTS 音频时调整视频速度。
This commit is contained in:
2026-05-01 01:52:02 +08:00
parent 7d526d2b60
commit c878abc39b
8 changed files with 130 additions and 41 deletions

View File

@@ -123,6 +123,19 @@ async function batchUploadToOSS(inputDir, files) {
return urls
}
function getAudioDurationSec(filePath) {
return new Promise((resolve) => {
execFile('ffprobe', [
'-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', filePath
], (err, stdout) => {
if (err) { resolve(null); return }
const dur = parseFloat(stdout.trim())
resolve(dur > 0 ? dur : null)
})
})
}
// ============================================================================
// 主流程
// ============================================================================
@@ -131,7 +144,7 @@ function buildTimeline(items, defaultDurationUs) {
// 音频为主轴视频调速适配≤2x 加速,>2x 截断)
let offset = 0
return items.map(item => {
const audioDur = (item.audioDuration != null) ? item.audioDuration * US : (item.duration != null) ? item.duration * US : 0
const audioDur = (item.audioDuration != null) ? item.audioDuration * US : 0
const videoDur = (item.videoDuration != null) ? item.videoDuration * US : 0
// 无 TTS用视频时长或固定时长
if (audioDur <= 0) {
@@ -193,10 +206,37 @@ async function assemble(args) {
if (items.length === 0) throw new Error('没有可用的素材文件')
// 统一时间线:由 duration 驱动TTS 音频时长)或 fallback 到固定时长
// 用 ffprobe 测量实际音频/视频时长,替代 manifest 中的估计值
let audioMeasured = 0, videoMeasured = 0
for (const item of items) {
// 测量 TTS 音频实际时长
if (item.audio && !item.audio.startsWith('http')) {
const audioPath = path.isAbsolute(item.audio)
? item.audio
: path.resolve(inputDir, item.audio)
if (fs.existsSync(audioPath)) {
const actualDur = await getAudioDurationSec(audioPath)
if (actualDur != null) { item.audioDuration = actualDur; audioMeasured++ }
}
}
// 测量视频文件实际时长(生成器返回的是硬编码常量,不准确)
if (item.video) {
const videoPath = path.isAbsolute(item.video)
? item.video
: path.resolve(inputDir, item.video)
if (fs.existsSync(videoPath)) {
const actualDur = await getAudioDurationSec(videoPath) // ffprobe 对音视频通用
if (actualDur != null) { item.videoDuration = actualDur; videoMeasured++ }
}
}
}
if (audioMeasured > 0 || videoMeasured > 0) {
console.log(` 实际时长测量: 音频 ${audioMeasured} 个, 视频 ${videoMeasured}`)
}
const timeline = buildTimeline(items, defaultDurationUs)
const totalDurationUs = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
const hasTTS = items.some(item => item.audio && (item.audioDuration != null || item.duration != null))
const hasTTS = items.some(item => item.audio && item.audioDuration != null)
console.log(`\nCapCut 成片组装`)
console.log(` 模式: ${mode} 画幅: ${format} (${width}x${height})`)
@@ -280,7 +320,7 @@ async function assemble(args) {
if (manifestFile) {
try {
const m = JSON.parse(fs.readFileSync(manifestFile, 'utf-8'))
const mi = m.items.find(i => i.id === item.id || i.narration === (item.narration || item.text) || i.text === (item.narration || item.text))
const mi = m.items.find(i => i.id === item.id || i.script === item.script || i.text === item.text)
if (mi) { mi.videoUrl = url; fs.writeFileSync(manifestFile, JSON.stringify(m, null, 2)) }
} catch (_) {}
}
@@ -316,7 +356,7 @@ async function assemble(args) {
// -- 添加字幕 --
step++; console.log(`[${step}/${totalSteps}] 添加字幕...`)
if (subtitles === 'true' && items.some(i => i.narration || i.text)) {
if (subtitles === 'true' && items.some(i => i.script || i.text)) {
await addSubtitles(draftUrl, items, timeline, subtitleStyle)
} else {
console.log(' 跳过')
@@ -669,7 +709,7 @@ async function addSubtitles(draftUrl, items, timeline, style = {}) {
for (let i = 0; i < items.length; i++) {
const item = items[i]
const text = item.narration || item.text || item.caption || ''
const text = item.script || item.text || item.caption || ''
if (!text) continue
const tl = timeline[i]

View File

@@ -40,7 +40,7 @@ function initManifest(options) {
}
// 校验必填字段
const requiredFields = ['shotDesc', 'narration', 'imagePrompt']
const requiredFields = ['shotDesc', 'script', 'imagePrompt']
const resolvedMode = mode || 'single'
for (let i = 0; i < rawItems.length; i++) {
@@ -72,7 +72,7 @@ function initManifest(options) {
id: i + 1,
status: 'pending',
shotDesc: raw.shotDesc || '',
narration: raw.narration || raw.text || '',
script: raw.script || '',
duration: raw.duration || 5,
imagePrompt: raw.imagePrompt,
confirmed: false,

View File

@@ -32,7 +32,7 @@ function validateManifest(manifestPath) {
if (manifest.items && Array.isArray(manifest.items)) {
manifest.items.forEach((item, i) => {
const prefix = `items[${i}]`
if (!item.narration && !item.text) issues.push(`${prefix} 缺少 narration 或 text中文旁白`)
if (!item.script && !item.text) issues.push(`${prefix} 缺少 script 或 text中文文案`)
if (!item.shotDesc) issues.push(`${prefix} 缺少 shotDesc分镜描述`)
if (!item.imagePrompt) issues.push(`${prefix} 缺少 imagePrompt`)
if (manifest.mode === 'framePair' && !item.lastFramePrompt) {

View File

@@ -110,7 +110,7 @@ async function generateGemini(item, idx, dir, imagesDir, ratio, refs) {
const file = (result.savedFiles && result.savedFiles.length > 0)
? renameGeneratedFile(
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
dir, idx, item.narration || item.shotDesc, ''
dir, idx, item.script || item.shotDesc, ''
)
: null
return { file }
@@ -184,7 +184,7 @@ async function generateKling(item, idx, dir, imagesDir, ratio, refs) {
const file = (result.savedFiles && result.savedFiles.length > 0)
? renameGeneratedFile(
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
dir, idx, item.narration || item.shotDesc, ''
dir, idx, item.script || item.shotDesc, ''
)
: null
return { file }

View File

@@ -15,7 +15,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
const { synthesize } = require('../qwen-tts')
const items = manifest.items.filter(it =>
it.status === 'done' && (it.narration || it.text) && !it.audio
it.status === 'done' && (it.script || it.text) && !it.audio
)
if (items.length === 0) { log('tts', '无待处理 item跳过'); return }
@@ -25,7 +25,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
const item = items[i]
const idx = i + 1
try {
const { filePath, duration } = await synthesize(item.narration || item.text, {
const { filePath, duration } = await synthesize(item.script || item.text, {
outputDir: audioDir,
id: item.id || idx,
voice: manifest.ttsVoice || undefined,
@@ -33,7 +33,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
})
item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
item.audioDuration = Math.round(duration * 1000) / 1000
log('tts', `[${idx}/${items.length}] ${duration.toFixed(1)}s: ${(item.narration || item.text).substring(0, 30)}...`)
log('tts', `[${idx}/${items.length}] ${duration.toFixed(1)}s: ${(item.script || item.text).substring(0, 30)}...`)
} catch (err) {
item.status = 'failed'
item.error = `TTS失败: ${err.message}`