feat(video-pipeline): 用 ffprobe 实际测量音视频时长并统一字段名为 script
将项目中的 `narration` 字段统一重命名为 `script`,并新增 `getAudioDurationSec` 函数通过 `ffprobe` 实际测量音频和视频文件的时长,替代 Manifest 中的估计值,提高时间线组装的准确性。同时优化字幕逻辑,仅在有 TTS 音频时调整视频速度。
This commit is contained in:
@@ -123,6 +123,19 @@ async function batchUploadToOSS(inputDir, files) {
|
||||
return urls
|
||||
}
|
||||
|
||||
function getAudioDurationSec(filePath) {
|
||||
return new Promise((resolve) => {
|
||||
execFile('ffprobe', [
|
||||
'-v', 'quiet', '-show_entries', 'format=duration',
|
||||
'-of', 'csv=p=0', filePath
|
||||
], (err, stdout) => {
|
||||
if (err) { resolve(null); return }
|
||||
const dur = parseFloat(stdout.trim())
|
||||
resolve(dur > 0 ? dur : null)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 主流程
|
||||
// ============================================================================
|
||||
@@ -131,7 +144,7 @@ function buildTimeline(items, defaultDurationUs) {
|
||||
// 音频为主轴,视频调速适配(≤2x 加速,>2x 截断)
|
||||
let offset = 0
|
||||
return items.map(item => {
|
||||
const audioDur = (item.audioDuration != null) ? item.audioDuration * US : (item.duration != null) ? item.duration * US : 0
|
||||
const audioDur = (item.audioDuration != null) ? item.audioDuration * US : 0
|
||||
const videoDur = (item.videoDuration != null) ? item.videoDuration * US : 0
|
||||
// 无 TTS:用视频时长或固定时长
|
||||
if (audioDur <= 0) {
|
||||
@@ -193,10 +206,37 @@ async function assemble(args) {
|
||||
|
||||
if (items.length === 0) throw new Error('没有可用的素材文件')
|
||||
|
||||
// 统一时间线:由 duration 驱动(TTS 音频时长)或 fallback 到固定时长
|
||||
// 用 ffprobe 测量实际音频/视频时长,替代 manifest 中的估计值
|
||||
let audioMeasured = 0, videoMeasured = 0
|
||||
for (const item of items) {
|
||||
// 测量 TTS 音频实际时长
|
||||
if (item.audio && !item.audio.startsWith('http')) {
|
||||
const audioPath = path.isAbsolute(item.audio)
|
||||
? item.audio
|
||||
: path.resolve(inputDir, item.audio)
|
||||
if (fs.existsSync(audioPath)) {
|
||||
const actualDur = await getAudioDurationSec(audioPath)
|
||||
if (actualDur != null) { item.audioDuration = actualDur; audioMeasured++ }
|
||||
}
|
||||
}
|
||||
// 测量视频文件实际时长(生成器返回的是硬编码常量,不准确)
|
||||
if (item.video) {
|
||||
const videoPath = path.isAbsolute(item.video)
|
||||
? item.video
|
||||
: path.resolve(inputDir, item.video)
|
||||
if (fs.existsSync(videoPath)) {
|
||||
const actualDur = await getAudioDurationSec(videoPath) // ffprobe 对音视频通用
|
||||
if (actualDur != null) { item.videoDuration = actualDur; videoMeasured++ }
|
||||
}
|
||||
}
|
||||
}
|
||||
if (audioMeasured > 0 || videoMeasured > 0) {
|
||||
console.log(` 实际时长测量: 音频 ${audioMeasured} 个, 视频 ${videoMeasured} 个`)
|
||||
}
|
||||
|
||||
const timeline = buildTimeline(items, defaultDurationUs)
|
||||
const totalDurationUs = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
|
||||
const hasTTS = items.some(item => item.audio && (item.audioDuration != null || item.duration != null))
|
||||
const hasTTS = items.some(item => item.audio && item.audioDuration != null)
|
||||
|
||||
console.log(`\nCapCut 成片组装`)
|
||||
console.log(` 模式: ${mode} 画幅: ${format} (${width}x${height})`)
|
||||
@@ -280,7 +320,7 @@ async function assemble(args) {
|
||||
if (manifestFile) {
|
||||
try {
|
||||
const m = JSON.parse(fs.readFileSync(manifestFile, 'utf-8'))
|
||||
const mi = m.items.find(i => i.id === item.id || i.narration === (item.narration || item.text) || i.text === (item.narration || item.text))
|
||||
const mi = m.items.find(i => i.id === item.id || i.script === item.script || i.text === item.text)
|
||||
if (mi) { mi.videoUrl = url; fs.writeFileSync(manifestFile, JSON.stringify(m, null, 2)) }
|
||||
} catch (_) {}
|
||||
}
|
||||
@@ -316,7 +356,7 @@ async function assemble(args) {
|
||||
|
||||
// -- 添加字幕 --
|
||||
step++; console.log(`[${step}/${totalSteps}] 添加字幕...`)
|
||||
if (subtitles === 'true' && items.some(i => i.narration || i.text)) {
|
||||
if (subtitles === 'true' && items.some(i => i.script || i.text)) {
|
||||
await addSubtitles(draftUrl, items, timeline, subtitleStyle)
|
||||
} else {
|
||||
console.log(' 跳过')
|
||||
@@ -669,7 +709,7 @@ async function addSubtitles(draftUrl, items, timeline, style = {}) {
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
const item = items[i]
|
||||
const text = item.narration || item.text || item.caption || ''
|
||||
const text = item.script || item.text || item.caption || ''
|
||||
if (!text) continue
|
||||
|
||||
const tl = timeline[i]
|
||||
|
||||
@@ -40,7 +40,7 @@ function initManifest(options) {
|
||||
}
|
||||
|
||||
// 校验必填字段
|
||||
const requiredFields = ['shotDesc', 'narration', 'imagePrompt']
|
||||
const requiredFields = ['shotDesc', 'script', 'imagePrompt']
|
||||
const resolvedMode = mode || 'single'
|
||||
|
||||
for (let i = 0; i < rawItems.length; i++) {
|
||||
@@ -72,7 +72,7 @@ function initManifest(options) {
|
||||
id: i + 1,
|
||||
status: 'pending',
|
||||
shotDesc: raw.shotDesc || '',
|
||||
narration: raw.narration || raw.text || '',
|
||||
script: raw.script || '',
|
||||
duration: raw.duration || 5,
|
||||
imagePrompt: raw.imagePrompt,
|
||||
confirmed: false,
|
||||
|
||||
@@ -32,7 +32,7 @@ function validateManifest(manifestPath) {
|
||||
if (manifest.items && Array.isArray(manifest.items)) {
|
||||
manifest.items.forEach((item, i) => {
|
||||
const prefix = `items[${i}]`
|
||||
if (!item.narration && !item.text) issues.push(`${prefix} 缺少 narration 或 text(中文旁白)`)
|
||||
if (!item.script && !item.text) issues.push(`${prefix} 缺少 script 或 text(中文文案)`)
|
||||
if (!item.shotDesc) issues.push(`${prefix} 缺少 shotDesc(分镜描述)`)
|
||||
if (!item.imagePrompt) issues.push(`${prefix} 缺少 imagePrompt`)
|
||||
if (manifest.mode === 'framePair' && !item.lastFramePrompt) {
|
||||
|
||||
@@ -110,7 +110,7 @@ async function generateGemini(item, idx, dir, imagesDir, ratio, refs) {
|
||||
const file = (result.savedFiles && result.savedFiles.length > 0)
|
||||
? renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
dir, idx, item.script || item.shotDesc, ''
|
||||
)
|
||||
: null
|
||||
return { file }
|
||||
@@ -184,7 +184,7 @@ async function generateKling(item, idx, dir, imagesDir, ratio, refs) {
|
||||
const file = (result.savedFiles && result.savedFiles.length > 0)
|
||||
? renameGeneratedFile(
|
||||
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
|
||||
dir, idx, item.narration || item.shotDesc, ''
|
||||
dir, idx, item.script || item.shotDesc, ''
|
||||
)
|
||||
: null
|
||||
return { file }
|
||||
|
||||
@@ -15,7 +15,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
|
||||
const { synthesize } = require('../qwen-tts')
|
||||
|
||||
const items = manifest.items.filter(it =>
|
||||
it.status === 'done' && (it.narration || it.text) && !it.audio
|
||||
it.status === 'done' && (it.script || it.text) && !it.audio
|
||||
)
|
||||
if (items.length === 0) { log('tts', '无待处理 item,跳过'); return }
|
||||
|
||||
@@ -25,7 +25,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
|
||||
const item = items[i]
|
||||
const idx = i + 1
|
||||
try {
|
||||
const { filePath, duration } = await synthesize(item.narration || item.text, {
|
||||
const { filePath, duration } = await synthesize(item.script || item.text, {
|
||||
outputDir: audioDir,
|
||||
id: item.id || idx,
|
||||
voice: manifest.ttsVoice || undefined,
|
||||
@@ -33,7 +33,7 @@ async function phaseTts(manifest, manifestPath, options = {}) {
|
||||
})
|
||||
item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
|
||||
item.audioDuration = Math.round(duration * 1000) / 1000
|
||||
log('tts', `[${idx}/${items.length}] ${duration.toFixed(1)}s: ${(item.narration || item.text).substring(0, 30)}...`)
|
||||
log('tts', `[${idx}/${items.length}] ${duration.toFixed(1)}s: ${(item.script || item.text).substring(0, 30)}...`)
|
||||
} catch (err) {
|
||||
item.status = 'failed'
|
||||
item.error = `TTS失败: ${err.message}`
|
||||
|
||||
Reference in New Issue
Block a user