feat(video-pipeline): 重构视频流水线，优化成片时间线规则和状态管理

- 引入 manifest.json 作为唯一状态源，所有子 Agent 操作回写 manifest - 重构 timebuilder 逻辑，支持四种视频适配策略（加速/裁剪/放缓/画面停顿） - 统一 TTS 阶段输出结构，单句和多句均写入 segments[] - 重写字幕和配音生成，基于 segments 精确时长实现音画同步 - 新增 confirm 命令支持按 id 范围确认，上传阶段分离图片和视频 - 添加中间产物写入 output/ 目录的约束，清理废弃配置参数
2026-05-02 00:14:40 +08:00
parent b4b92854db
commit 0998fd6ae1
14 changed files with 457 additions and 205 deletions
--- a/.claude/skills/video-from-script/scripts/capcut_assemble.js
+++ b/.claude/skills/video-from-script/scripts/capcut_assemble.js
@@ -215,28 +215,89 @@ function getAudioDurationSec(filePath) {
 // 主流程
 // ============================================================================

-function buildTimeline(items, defaultDurationUs) {
-  // 音频为主轴，视频调速适配（≤2x 加速，>2x 截断）
+function buildTimeline(items) {
+  // 核心规则：
+  //   图片模式：图片没有独立时长，TTS 音频时长 = 画面时长。无音频 = 0 时长（跳过）
+  //   视频模式：TTS 为主轴，视频通过 裁剪/加速/放缓/停顿 适配
+  //     视频比音频长（ratio > 1.1）:
+  //       ≤ 2x → 加速（setpts 压缩时间）
+  //       > 2x → 裁剪（截断到音频时长）
+  //     视频比音频短（ratio < 0.9）:
+  //       ≥ 0.5x → 放缓（setpts 拉长时间，≤2x慢速）
+  //       < 0.5x → 画面停顿（视频正常播放+最后一帧冻结补时长）
  let offset = 0
  return items.map(item => {
-    const audioDur = (item.audioDuration != null) ? item.audioDuration * US : 0
+    // 有 segments 时用各段实际时长之和（精确对齐音频文件）
+    let audioDur
+    if (item.segments && item.segments.length > 0) {
+      audioDur = item.segments.reduce((sum, s) => sum + (s.duration || 0), 0) * US
+    } else {
+      audioDur = (item.audioDuration != null) ? item.audioDuration * US : 0
+    }
    const videoDur = (item.videoDuration != null) ? item.videoDuration * US : 0
-    // 无 TTS：用视频时长或固定时长
+    const hasVideo = !!(item.video || item.videoUrl || item.url)
+
+    // 无 TTS 音频
    if (audioDur <= 0) {
-      const dur = videoDur || defaultDurationUs
-      const entry = { start: offset, end: offset + dur, duration: dur, speed: 1 }
+      if (hasVideo && videoDur > 0) {
+        // 视频模式无音频：用视频原始时长
+        const entry = { start: offset, end: offset + videoDur, duration: videoDur, speed: 1, strategy: 'none' }
+        offset += videoDur
+        return entry
+      }
+      // 图片模式无音频：0 时长，标记跳过
+      const entry = { start: offset, end: offset, duration: 0, speed: 1, strategy: 'none', skip: true }
+      return entry
+    }
+
+    // 有 TTS：音频时长为主轴
+    const dur = audioDur
+
+    if (!hasVideo || videoDur <= 0) {
+      // 图片模式：直接用音频时长
+      const entry = { start: offset, end: offset + dur, duration: dur, speed: 1, strategy: 'none' }
+      offset += dur
+      return entry
+    }
+
+    // 视频模式：视频 vs 音频时长匹配
+    const ratio = videoDur / audioDur
+
+    if (ratio > 1.1) {
+      // 视频比音频长
+      if (ratio <= 2) {
+        // 加速策略
+        const entry = { start: offset, end: offset + dur, duration: dur, speed: ratio, strategy: 'speed_up' }
+        offset += dur
+        return entry
+      } else {
+        // 裁剪策略
+        const entry = { start: offset, end: offset + dur, duration: dur, speed: 1, strategy: 'trim' }
+        offset += dur
+        return entry
+      }
+    } else if (ratio < 0.9) {
+      // 视频比音频短
+      if (ratio >= 0.5) {
+        // 放缓策略（慢放 ≤2x）
+        const entry = { start: offset, end: offset + dur, duration: dur, speed: ratio, strategy: 'slow_down' }
+        offset += dur
+        return entry
+      } else {
+        // 画面停顿策略（视频原速播放 + 最后一帧冻结补时长）
+        const entry = {
+          start: offset, end: offset + dur, duration: dur, speed: 1,
+          strategy: 'freeze', freezeExtra: dur - videoDur,
+        }
+        offset += dur
+        return entry
+      }
+    } else {
+      // 接近匹配（0.9 ~ 1.1），无需调整
+      const entry = { start: offset, end: offset + dur, duration: dur, speed: 1, strategy: 'none' }
      offset += dur
      return entry
    }
-    // 有 TTS：音频时长为主轴
-    const dur = audioDur
-    const ratio = videoDur > 0 ? videoDur / audioDur : 1
-    // ≤2x: 加速到音频时长；>2x: 截断（视频只取前 audioDur 部分）
-    const speed = ratio <= 2 ? ratio : 1
-    const needAdjust = videoDur > audioDur + 100000 // 视频比音频长 0.1s 以上才需要调整
-    const entry = { start: offset, end: offset + dur, duration: dur, speed, needAdjust }
-    offset += dur
-    return entry
  })
 }

@@ -253,7 +314,6 @@ async function assemble(args) {
    filter: filterStr,
    format = '9:16',
    apiKey = '',
-    duration = '4',
    animation = '轻微放大',
  } = args

@@ -284,22 +344,44 @@ async function assemble(args) {
  }

  const { width, height } = getResolution(format)
-  const defaultDurationUs = parseFloat(duration) * US

  // 过滤出实际存在的文件
+  const missingFileItems = []
  const items = manifest.items.filter(item => {
    if (item.url) return true // 视频模式可能用 URL
+    if (item.video) return true // 视频模式本地文件
+    if (!item.file) {
+      missingFileItems.push(item.id || '?')
+      return false
+    }
    const filePath = path.join(inputDir, item.file)
    return fs.existsSync(filePath)
  })

+  if (items.length === 0) {
+    if (missingFileItems.length > 0) {
+      throw new Error(`没有可用的素材文件 — ${missingFileItems.length} 个 item 缺少 file 字段（id: ${missingFileItems.join(', ')}），请先运行 images 阶段`)
+    }
+    throw new Error('没有可用的素材文件')
+  }
+
  if (items.length === 0) throw new Error('没有可用的素材文件')

  // 用 ffprobe 测量实际音频/视频时长，替代 manifest 中的估计值
  let audioMeasured = 0, videoMeasured = 0
  for (const item of items) {
-    // 测量 TTS 音频实际时长（有 segments 时跳过，audioDuration 已是精确累计值）
-    if (item.audio && !item.audio.startsWith('http') && !item.segments) {
+    // 测量各 segment 音频文件实际时长
+    if (item.segments && item.segments.length > 0) {
+      for (const seg of item.segments) {
+        if (!seg.audio || seg.audio.startsWith('http')) continue
+        const audioPath = path.isAbsolute(seg.audio)
+          ? seg.audio
+          : path.resolve(inputDir, seg.audio)
+        if (!fs.existsSync(audioPath)) continue
+        const actualDur = await getAudioDurationSec(audioPath)
+        if (actualDur != null) { seg.duration = actualDur; audioMeasured++ }
+      }
+    } else if (item.audio && !item.audio.startsWith('http')) {
      const audioPath = path.isAbsolute(item.audio)
        ? item.audio
        : path.resolve(inputDir, item.audio)
@@ -323,16 +405,32 @@ async function assemble(args) {
    console.log(`  实际时长测量: 音频 ${audioMeasured} 个, 视频 ${videoMeasured} 个`)
  }

-  const timeline = buildTimeline(items, defaultDurationUs)
+  const timeline = buildTimeline(items)
  const totalDurationUs = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
  const hasTTS = items.some(item => item.audio && item.audioDuration != null)

+  // 时间轴诊断
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i]
+    const tl = timeline[i]
+    if (tl.skip) { console.log(`  [${i + 1}] 跳过（无音频）`); continue }
+    const audioDur = item.segments
+      ? item.segments.reduce((s, seg) => s + (seg.duration || 0), 0)
+      : (item.audioDuration || 0)
+    const slotDur = tl.duration / US
+    const diff = slotDur - audioDur
+    const videoDur = (item.videoDuration || 0)
+    const stratInfo = tl.strategy && tl.strategy !== 'none' ? ` 策略=${tl.strategy}` : ''
+    const marker = Math.abs(diff) > 0.05 ? ' ⚠️ 不对齐' : ''
+    console.log(`  [${i + 1}] 画面=${slotDur.toFixed(2)}s 音频=${audioDur.toFixed(2)}s 视频=${videoDur.toFixed(2)}s${stratInfo}${marker}`)
+  }
+
  // -- 读取转场策略（在 addImages/addVideos 之前） --
  const transitionConfig = loadTransitions(manifest)

  console.log(`\nCapCut 成片组装`)
  console.log(`  模式: ${mode}  画幅: ${format} (${width}x${height})`)
-  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : `固定${duration}s/段`}  总时长: ${(totalDurationUs / US).toFixed(1)}s`)
+  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : '视频原始时长'}  总时长: ${(totalDurationUs / US).toFixed(1)}s`)
  console.log(`  字幕: ${subtitles}  配音: ${voiceover}  动画: ${animation}`)
  if (finalEffects) console.log(`  特效: ${finalEffects}`)
  if (finalFilter) console.log(`  滤镜: ${finalFilter}`)
@@ -386,10 +484,10 @@ async function assemble(args) {
    for (let i = 0; i < items.length; i++) {
      const item = items[i]
      const tl = timeline[i]
-      if (tl.needAdjust && item.video) {
+      if (tl.strategy && tl.strategy !== 'none' && item.video) {
        const videoPath = path.resolve(inputDir, item.video)
        const audioDur = tl.duration / US
-        const adjustedPath = await adjustVideoSpeed(videoPath, audioDur)
+        const adjustedPath = await adjustVideoSpeed(videoPath, audioDur, tl.strategy, tl.speed, tl.freezeExtra || 0)
        if (adjustedPath !== videoPath) {
          item.video = path.relative(inputDir, adjustedPath)
          item.videoDuration = audioDur
@@ -398,7 +496,7 @@ async function assemble(args) {
      }
    }
    if (adjustedCount > 0) {
-      console.log(`  视频调速: ${adjustedCount}/${items.length} 个`)
+      console.log(`  视频调整: ${adjustedCount}/${items.length} 个`)
    }

    // Step 2: 上传（已调速的）视频到 OSS
@@ -547,7 +645,7 @@ async function assemble(args) {
  console.log(`  草稿ID: ${draftId}`)
  console.log(`  总时长: ${(totalDurationUs / US).toFixed(1)}s`)
  console.log(`  素材数: ${items.length}`)
-  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : '固定时长'}`)
+  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : '视频原始时长'}`)
  if (mode === 'videos' && subtitles === 'false') {
    console.log(`\n  >> 视频模式未加字幕，请在剪映中打开草稿 → 识别字幕 → 语音识别生成\n`)
  }
@@ -713,54 +811,142 @@ async function addKenBurns(draftUrl, segmentIds, items, timeline, manifest) {
 // ============================================================================

 /**
- * ffmpeg 调速：将视频调整为指定时长
- * ratio <= 2x: 加速；ratio > 2x: 截断
- * 返回调整后的文件路径（调整失败则返回原路径）
+ * ffmpeg 视频调整：根据策略适配音频时长
+ *
+ * 策略（按 ratio = videoDur / audioDur 选择）:
+ *   speed_up  (ratio > 1.1, ≤2x)  → setpts 压缩时间（加速）
+ *   trim      (ratio > 2x)        → 截断到目标时长
+ *   slow_down (ratio < 0.9, ≥0.5x) → setpts 拉长时间（慢放）
+ *   freeze    (ratio < 0.5x)      → 视频原速 + 最后一帧冻结补时长
+ *   none      (0.9~1.1)           → 无需调整
+ *
+ * 所有策略失败后兜底：截断到目标时长
+ *
+ * 返回调整后的文件路径（失败则返回原路径）
 */
-async function adjustVideoSpeed(videoPath, targetDurationSec) {
+async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none', speed = 1, freezeExtraUs = 0) {
  if (!fs.existsSync(videoPath)) return videoPath
+  if (strategy === 'none') return videoPath
+
+  // 兜底截断：所有策略失败后的最终回退
+  function fallbackTrim(cb) {
+    execFile('ffmpeg', [
+      '-y', '-i', videoPath,
+      '-t', String(targetDurationSec),
+      '-c', 'copy',
+      videoPath.replace(/(\.\w+)$/, '_adj$1')
+    ], { timeout: 30000 }, (err) => {
+      if (err) { cb(videoPath); return }
+      cb(videoPath.replace(/(\.\w+)$/, '_adj$1'))
+    })
+  }

  return new Promise((resolve) => {
-    // 先获取视频时长
    execFile('ffprobe', [
      '-v', 'quiet', '-show_entries', 'format=duration',
      '-of', 'csv=p=0', videoPath
    ], (err, stdout) => {
-      if (err) { resolve(videoPath); return }
+      if (err) { fallbackTrim(resolve); return }
      const videoDur = parseFloat(stdout.trim())
-      if (!videoDur || videoDur <= 0 || videoDur <= targetDurationSec + 0.1) {
-        resolve(videoPath); return
-      }
+      if (!videoDur || videoDur <= 0) { fallbackTrim(resolve); return }

-      const ratio = videoDur / targetDurationSec
      const outPath = videoPath.replace(/(\.\w+)$/, '_adj$1')

-      if (ratio <= 2) {
-        // 加速：setpts=PTS/speed, atempo=speed (音频变速)
-        const speed = ratio.toFixed(3)
-        const atempo = Math.min(speed, 2.0) // atempo 单次上限 2.0
-        execFile('ffmpeg', [
-          '-y', '-i', videoPath,
-          '-filter_complex', `setpts=PTS/${speed}`,
-          '-an',
-          outPath
-        ], { timeout: 30000 }, (err) => {
-          if (err) { console.log(`     调速失败，使用原始视频: ${err.message}`); resolve(videoPath); return }
-          console.log(`     调速: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speed}x)`)
-          resolve(outPath)
-        })
-      } else {
-        // 截断：取前 targetDuration 秒
+      if (strategy === 'trim') {
        execFile('ffmpeg', [
          '-y', '-i', videoPath,
          '-t', String(targetDurationSec),
          '-c', 'copy',
          outPath
        ], { timeout: 30000 }, (err) => {
-          if (err) { console.log(`     截断失败，使用原始视频: ${err.message}`); resolve(videoPath); return }
+          if (err) { console.log(`     截断失败: ${err.message}`); resolve(videoPath); return }
          console.log(`     截断: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s`)
          resolve(outPath)
        })
+      } else if (strategy === 'speed_up') {
+        const speedVal = speed.toFixed(3)
+        execFile('ffmpeg', [
+          '-y', '-i', videoPath,
+          '-filter_complex', `setpts=PTS/${speedVal}`,
+          '-an',
+          outPath
+        ], { timeout: 30000 }, (err) => {
+          if (err) {
+            console.log(`     加速失败，兜底截断: ${err.message}`)
+            fallbackTrim(resolve)
+            return
+          }
+          console.log(`     加速: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speedVal}x)`)
+          resolve(outPath)
+        })
+      } else if (strategy === 'slow_down') {
+        const factor = (1 / speed).toFixed(3)
+        execFile('ffmpeg', [
+          '-y', '-i', videoPath,
+          '-filter_complex', `setpts=PTS*${factor}`,
+          '-an',
+          outPath
+        ], { timeout: 30000 }, (err) => {
+          if (err) {
+            console.log(`     放缓失败，兜底截断: ${err.message}`)
+            fallbackTrim(resolve)
+            return
+          }
+          console.log(`     放缓: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speed.toFixed(2)}x speed)`)
+          resolve(outPath)
+        })
+      } else if (strategy === 'freeze') {
+        // 画面停顿：原速播放 + 最后一帧冻结补时长
+        const freezeSec = freezeExtraUs / US
+        execFile('ffmpeg', [
+          '-y', '-i', videoPath,
+          '-filter_complex', `tpad=stop=-1:stop_duration=${freezeSec.toFixed(3)}`,
+          '-an',
+          outPath
+        ], { timeout: 30000 }, (err) => {
+          if (err) {
+            // 回退方案：截取最后一帧 → 生成冻结帧视频 → concat 拼接
+            console.log(`     tpad freeze 失败，尝试 concat 方案: ${err.message}`)
+            const lastFrame = videoPath.replace(/(\.\w+)$/, '_lastframe.png')
+            const frozenVideo = videoPath.replace(/(\.\w+)$/, '_frozen.mp4')
+            execFile('ffmpeg', [
+              '-y', '-sseof', '-0.1', '-i', videoPath,
+              '-frames:v', '1', lastFrame
+            ], { timeout: 10000 }, (err2) => {
+              if (err2) { console.log(`     concat 方案也失败，兜底截断`); fallbackTrim(resolve); return }
+              execFile('ffmpeg', [
+                '-y', '-loop', '1', '-i', lastFrame,
+                '-t', String(freezeSec.toFixed(3)),
+                '-pix_fmt', 'yuv420p',
+                '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2',
+                frozenVideo
+              ], { timeout: 15000 }, (err3) => {
+                if (err3) {
+                  try { fs.unlinkSync(lastFrame) } catch (_) {}
+                  console.log(`     冻结帧视频生成失败，兜底截断`)
+                  fallbackTrim(resolve)
+                  return
+                }
+                const concatList = path.join(path.dirname(videoPath), '_freeze_concat.txt')
+                fs.writeFileSync(concatList, `file '${videoPath}'\nfile '${frozenVideo}'\n`)
+                execFile('ffmpeg', [
+                  '-y', '-f', 'concat', '-safe', '0', '-i', concatList,
+                  '-c', 'copy', outPath
+                ], { timeout: 30000 }, (err4) => {
+                  try { fs.unlinkSync(lastFrame); fs.unlinkSync(frozenVideo); fs.unlinkSync(concatList) } catch (_) {}
+                  if (err4) { console.log(`     拼接失败，兜底截断`); fallbackTrim(resolve); return }
+                  console.log(`     画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
+                  resolve(outPath)
+                })
+              })
+            })
+            return
+          }
+          console.log(`     画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
+          resolve(outPath)
+        })
+      } else {
+        resolve(videoPath)
      }
    })
  })
@@ -829,8 +1015,8 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
 async function batchUploadAudio(inputDir, items) {
  const urls = {}
  for (const item of items) {
-    // 上传 segments 中的每段音频
-    if (item.segments && item.segments.length > 1) {
+    // 上传所有 segment 音频文件
+    if (item.segments && item.segments.length > 0) {
      for (const seg of item.segments) {
        if (!seg.audio || seg.audio.startsWith('http') || urls[seg.audio]) continue
        const filePath = path.isAbsolute(seg.audio)
@@ -848,7 +1034,7 @@ async function batchUploadAudio(inputDir, items) {
        }
      }
    }
-    // 上传 item.audio（单段或 segments 的第一段）
+    // 上传 item.audio（向后兼容，segments[0].audio 通常等于此值）
    if (!item.audio || item.audio.startsWith('http')) {
      if (item.audio) urls[item.audio] = item.audio
      continue
@@ -893,24 +1079,29 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    const tl = timeline[i]
-    const segments = item.segments && item.segments.length > 1 ? item.segments : null

-    if (segments) {
-      // 多段音频：按 segment 逐段添加，使用精确时长
-      const slots = distributeSegments(tl, segments)
-
-      for (const slot of slots) {
-        const audioUrl = resolveAudio(slot.audio)
+    if (item.segments && item.segments.length > 0) {
+      // 逐段添加，每段使用实际音频文件时长（不做比例分配，消除留白）
+      let currentTime = tl.start
+      for (let si = 0; si < item.segments.length; si++) {
+        const seg = item.segments[si]
+        const audioUrl = resolveAudio(seg.audio)
+        const segDurUs = (seg.duration || 0) * US
+        if (segDurUs <= 0) continue
+        // 最后一段对齐 timeline 末尾，吃掉浮点误差
+        const isLast = si === item.segments.length - 1
+        const endTime = isLast ? tl.end : currentTime + segDurUs
        audioInfos.push({
          audio_url: audioUrl,
-          start: slot.start,
-          end: slot.end,
-          duration: slot.duration,
+          start: currentTime,
+          end: endTime,
+          duration: endTime - currentTime,
          volume: 1.0,
        })
+        currentTime = endTime
      }
    } else if (item.audio) {
-      // 单段音频：用实际音频时长，不超过 timeline 时长
+      // 无 segments：用实际音频时长
      const audioUrl = resolveAudio(item.audio)
      const audioDurUs = item.audioDuration ? item.audioDuration * US : tl.duration

@@ -981,23 +1172,6 @@ function applyAnimationProps(cap, style = {}) {
  if (style.outAnimDuration) cap.out_animation_duration = style.outAnimDuration
 }

-// segments 按比例分配到时间线（DRY helper）
-function distributeSegments(tl, segments) {
-  const totalSegDur = segments.reduce((sum, s) => sum + (s.duration || 0) * US, 0)
-  if (totalSegDur <= 0) return []
-  const tlDuration = tl.end - tl.start
-  let currentTime = tl.start
-  return segments.map((seg, idx) => {
-    const segDurUs = Math.round((seg.duration || 0) * US)
-    let duration = Math.round(tlDuration * (segDurUs / totalSegDur))
-    if (idx === segments.length - 1) duration = tl.end - currentTime
-    duration = Math.max(duration, 100000)
-    const entry = { start: currentTime, end: currentTime + duration, duration, text: seg.text, audio: seg.audio }
-    currentTime += duration
-    return entry
-  })
-}
-
 function loadAccountConfig(manifest) {
  const account = manifest.account
  if (!account) return {}
@@ -1093,17 +1267,19 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
    const tl = timeline[i]

    if (split) {
-      // 分句模式：优先用 segments（TTS 逐句生成的精确时长），回退到字数估算
-      const segments = item.segments && item.segments.length > 1 ? item.segments : null
-
-      if (segments) {
-        // 精确模式：用 segments 的实际音频时长
-        const slots = distributeSegments(tl, segments)
-
-        for (const slot of slots) {
-          const cap = { start: slot.start, end: slot.end, text: slot.text }
+      // 分句模式：优先用 segments 精确时长（与 addVoiceover 同步），回退到字数估算
+      if (item.segments && item.segments.length > 0) {
+        let currentTime = tl.start
+        for (let si = 0; si < item.segments.length; si++) {
+          const seg = item.segments[si]
+          const segDurUs = (seg.duration || 0) * US
+          if (segDurUs <= 0) continue
+          const isLast = si === item.segments.length - 1
+          const endTime = isLast ? tl.end : currentTime + segDurUs
+          const cap = { start: currentTime, end: endTime, text: seg.text }
          applyAnimationProps(cap, animStyle)
          captions.push(cap)
+          currentTime = endTime
        }
      } else {
        // 回退：字数权重估算
@@ -1246,7 +1422,6 @@ async function main() {
    console.log('选项:')
    console.log('  --mode images|videos     素材类型（默认 images）')
    console.log('  --format 9:16            画幅比例')
-    console.log('  --duration 4             默认每段时长/秒（无TTS时的fallback，默认 4）')
    console.log('  --voiceover true|false   是否添加TTS配音轨道（默认 true）')
    console.log('  --subtitles true|false   是否添加字幕（默认 true）')
    console.log('  --split-captions true|false  分句字幕模式（默认 true，按标点切分）')
@@ -1256,12 +1431,12 @@ async function main() {
    console.log('  --apiKey <key>           云渲染 API Key（可选）')
    console.log('  --manifest <path>        manifest.json 路径')
    console.log('')
-    console.log('时间线模式:')
-    console.log('  manifest.json 中每段包含 audio + duration → TTS音频驱动时间线')
-    console.log('  无 audio/duration → 按 --duration 固定时长')
-    console.log('')
-    console.log('manifest.json 示例（TTS驱动）:')
-    console.log('  {"items":[{"file":"1.png","text":"文案","audio":"seg_1.mp3","duration":3.5}]}')
+    console.log('时间线规则:')
+    console.log('  图片模式: TTS 音频时长 = 画面时长，无音频则跳过')
+    console.log('  视频模式: TTS 为主轴，视频通过以下策略适配:')
+    console.log('    视频比音频长 → 加速(≤2x) 或 裁剪(>2x)')
+    console.log('    视频比音频短 → 放缓(≥0.5x) 或 画面停顿(<0.5x)')
+    console.log('    所有策略失败 → 兜底截断')
    console.log('')
    console.log('配置:')
    console.log('  请运行 node setup.js 生成配置')