feat(skills): 完善视频生产 pipeline 及新增健身跟练账号

- SKILL.md: 新增工作流阶段定义、质量卡点、分镜规则 - manifest-schema.md: 补充完整字段规范及类型定义 - phase-tts.js: 优化 TTS 合成长逻辑，添加进度追踪 - capcut-tracks.js: 扩展轨道构建能力，支持更多元素类型 - capcut-timeline.js: 改进时间线生成，支持淡入淡出 - capcut_assemble.js: 新增 assemble 阶段完整实现 - cmd-init.js: 完善 init 命令逻辑 - qwen-tts.js: 调整超时配置 - accounts/禁忌帝王学: 更新拆分/图像/台词提示词 - accounts/健身跟练: 新增账号含 account.json 及全套提示词模板 - 新增 workflow-issues-20260501.md 参考文档 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 22:53:37 +08:00
parent e6daf7a8d8
commit 6eec0e8889
28 changed files with 2199 additions and 253 deletions
--- a/.claude/skills/video-from-script/scripts/lib/capcut-timeline.js
+++ b/.claude/skills/video-from-script/scripts/lib/capcut-timeline.js
@@ -3,12 +3,15 @@
 *
 * 核心算法模块。纯函数 + ffmpeg，自包含可测试。
 *
- * 规则:
+ * 铁律（固化，不可绕过）：
+ *   音频：生成后不可调速（TTS=1.15x，CapCut无speed字段）
+ *   视频：始终配合音频时长（只允许加速/截断，不允许慢放/冻结）
+ *
+ * 时间线规则：
 *   图片模式: TTS 音频时长 = 画面时长，无音频 = 跳过
 *   视频模式: TTS 为主轴，视频通过策略适配
 *     视频比音频长 → 加速(≤2x) / 裁剪(>2x)
- *     视频比音频短 → 放缓(≥0.5x) / 画面停顿(<0.5x)
- *     所有策略失败 → 兜底截断
+ *     视频比音频短 → 禁止！应在分镜阶段拆分 shot，不允许慢放/冻结补齐
 */

 const fs = require('fs')
@@ -20,6 +23,20 @@ const { US } = require('./capcut-api')
 // 时间线构建
 // ============================================================================

+/**
+ * 构建时间线条目
+ *
+ * @param {Array} items - manifest items
+ * @returns {Array} timeline entries
+ *
+ * 策略选择（固化，按 ratio = videoDur / audioDur）：
+ *   ≥ 1.1, ≤ 2   → speed_up   （视频加速追上音频，最优）
+ *   > 2          → trim       （视频截断至音频时长）
+ *   0.9 ~ 1.1    → none       （接近匹配，无需调整）
+ *   < 0.9        → 禁止！音频时长超过视频，分镜阶段未正确拆分 shot
+ *
+ * 铁律：不允许 slow_down / freeze，不允许音频调速
+ */
 function buildTimeline(items) {
  let offset = 0
  return items.map(item => {
@@ -46,7 +63,7 @@ function buildTimeline(items) {
      return entry
    }

-    // 视频模式：策略选择
+    // 视频模式：策略选择（铁律：不允许音频>视频）
    const ratio = videoDur / audioDur

    if (ratio > 1.1) {
@@ -59,23 +76,25 @@ function buildTimeline(items) {
        offset += dur
        return entry
      }
-    } else if (ratio < 0.9) {
-      if (ratio >= 0.5) {
-        const entry = { start: offset, end: offset + dur, duration: dur, speed: ratio, strategy: 'slow_down' }
-        offset += dur
-        return entry
-      } else {
-        const entry = {
-          start: offset, end: offset + dur, duration: dur, speed: 1,
-          strategy: 'freeze', freezeExtra: dur - videoDur,
-        }
-        offset += dur
-        return entry
-      }
-    } else {
+    } else if (ratio >= 0.9) {
+      // 0.9 ~ 1.1：无需调整
      const entry = { start: offset, end: offset + dur, duration: dur, speed: 1, strategy: 'none' }
      offset += dur
      return entry
+    } else {
+      // ratio < 0.9：音频时长超过视频！
+      // 铁律禁止：不允许慢放/冻结/拼接补齐。此情况应在分镜阶段拆分 shot。
+      // 强制截断并打印错误标记，由主 Agent 上报给用户/打回分镜重做。
+      const entry = {
+        start: offset, end: offset + dur, duration: dur, speed: 1,
+        strategy: 'FORBIDDEN_audio_gt_video',
+        ratio: parseFloat(ratio.toFixed(3)),
+        videoDur: parseFloat((videoDur / US).toFixed(2)),
+        audioDur: parseFloat((audioDur / US).toFixed(2)),
+        error: '音频时长(' + (audioDur / US).toFixed(2) + 's) > 视频时长(' + (videoDur / US).toFixed(2) + 's)，分镜阶段 shot 未正确拆分，请打回重新切割',
+      }
+      offset += dur
+      return entry
    }
  })
 }
@@ -87,16 +106,18 @@ function buildTimeline(items) {
 /**
 * ffmpeg 视频调整：根据策略适配音频时长
 *
- * 策略（按 ratio = videoDur / audioDur 选择）:
- *   speed_up  (ratio > 1.1, ≤2x)  → setpts 压缩时间（加速）
- *   trim      (ratio > 2x)        → 截断到目标时长
- *   slow_down (ratio < 0.9, ≥0.5x) → setpts 拉长时间（慢放）
- *   freeze    (ratio < 0.5x)      → 视频原速 + 最后一帧冻结补时长
+ * 允许策略（按 ratio = videoDur / audioDur 选择）:
+ *   speed_up  (ratio > 1.1, ≤2x)  → setpts 压缩时间（加速），最优
+ *   trim      (ratio > 2x)        → 截断到目标时长，次选
 *   none      (0.9~1.1)           → 无需调整
 *
+ * 禁止策略（已删除）:
+ *   slow_down (ratio < 0.9)       → ❌ 音频不可调速！
+ *   freeze    (ratio < 0.5)        → ❌ 不允许冻结帧补齐！
+ *
 * 所有策略失败后兜底：截断到目标时长
 */
-async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none', speed = 1, freezeExtraUs = 0) {
+async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none', speed = 1) {
  if (!fs.existsSync(videoPath)) return videoPath
  if (strategy === 'none') return videoPath

@@ -150,72 +171,9 @@ async function adjustVideoSpeed(videoPath, targetDurationSec, strategy = 'none',
          console.log(`     加速: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speedVal}x)`)
          resolve(outPath)
        })
-      } else if (strategy === 'slow_down') {
-        const factor = (1 / speed).toFixed(3)
-        execFile('ffmpeg', [
-          '-y', '-i', videoPath,
-          '-filter_complex', `setpts=PTS*${factor}`,
-          '-an',
-          outPath
-        ], { timeout: 30000 }, (err) => {
-          if (err) {
-            console.log(`     放缓失败，兜底截断: ${err.message}`)
-            fallbackTrim(resolve)
-            return
-          }
-          console.log(`     放缓: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speed.toFixed(2)}x speed)`)
-          resolve(outPath)
-        })
-      } else if (strategy === 'freeze') {
-        const freezeSec = freezeExtraUs / US
-        execFile('ffmpeg', [
-          '-y', '-i', videoPath,
-          '-filter_complex', `tpad=stop=-1:stop_duration=${freezeSec.toFixed(3)}`,
-          '-an',
-          outPath
-        ], { timeout: 30000 }, (err) => {
-          if (err) {
-            console.log(`     tpad freeze 失败，尝试 concat 方案: ${err.message}`)
-            const lastFrame = videoPath.replace(/(\.\w+)$/, '_lastframe.png')
-            const frozenVideo = videoPath.replace(/(\.\w+)$/, '_frozen.mp4')
-            execFile('ffmpeg', [
-              '-y', '-sseof', '-0.1', '-i', videoPath,
-              '-frames:v', '1', lastFrame
-            ], { timeout: 10000 }, (err2) => {
-              if (err2) { console.log(`     concat 方案也失败，兜底截断`); fallbackTrim(resolve); return }
-              execFile('ffmpeg', [
-                '-y', '-loop', '1', '-i', lastFrame,
-                '-t', String(freezeSec.toFixed(3)),
-                '-pix_fmt', 'yuv420p',
-                '-vf', 'scale=trunc(iw/2)*2:trunc(ih/2)*2',
-                frozenVideo
-              ], { timeout: 15000 }, (err3) => {
-                if (err3) {
-                  try { fs.unlinkSync(lastFrame) } catch (_) {}
-                  console.log(`     冻结帧视频生成失败，兜底截断`)
-                  fallbackTrim(resolve)
-                  return
-                }
-                const concatList = path.join(path.dirname(videoPath), '_freeze_concat.txt')
-                fs.writeFileSync(concatList, `file '${videoPath}'\nfile '${frozenVideo}'\n`)
-                execFile('ffmpeg', [
-                  '-y', '-f', 'concat', '-safe', '0', '-i', concatList,
-                  '-c', 'copy', outPath
-                ], { timeout: 30000 }, (err4) => {
-                  try { fs.unlinkSync(lastFrame); fs.unlinkSync(frozenVideo); fs.unlinkSync(concatList) } catch (_) {}
-                  if (err4) { console.log(`     拼接失败，兜底截断`); fallbackTrim(resolve); return }
-                  console.log(`     画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
-                  resolve(outPath)
-                })
-              })
-            })
-            return
-          }
-          console.log(`     画面停顿: ${videoDur.toFixed(1)}s + 冻结 ${freezeSec.toFixed(1)}s = ${targetDurationSec.toFixed(1)}s`)
-          resolve(outPath)
-        })
      } else {
-        resolve(videoPath)
+        // 未知策略，兜底截断
+        fallbackTrim(resolve)
      }
    })
  })
--- a/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
+++ b/.claude/skills/video-from-script/scripts/lib/capcut-tracks.js
@@ -3,6 +3,10 @@
 *
 * 所有 add* 函数 + 转场策略 + 账号配置读取。
 * Agent 修改字幕风格、Ken Burns、转场、特效等只需关注此文件。
+ *
+ * 音频策略（固化铁律）：
+ *   - 音频由 TTS 1.15x 生成，导入 CapCut 时无 speed 字段（不可调速）
+ *   - 每个 item 的 segments[] 逐段添加，各段 start 按 startOffset 精确对齐
 */

 const path = require('path')
@@ -303,33 +307,233 @@ async function addVideos(draftUrl, inputDir, items, timeline, width, height, tra
  return allSegmentIds
 }

+// ============================================================================
+// 将 segment 写入视频轨道时间线（slot）
+// 背景：add_videos 只负责把视频加入素材库，不自动上时间线。
+//       此函数在 add_videos 成功后调用，将每个 segment_id 写入第一个 video track。
+// ============================================================================
+
+async function addSlots(draftUrl, items, timeline) {
+  const { api: capcutApi, US } = require('./capcut-api')
+  const { getManifestDir } = require('./pipeline-utils')
+  const path = require('path')
+
+  // 获取当前云端草稿的 draft_content，获取第一个 video track 的 id
+  let draftData
+  try {
+    draftData = (await capcutApi('get_draft', { draft_url: draftUrl })).data || {}
+  } catch (err) {
+    // get_draft 接口不可用，尝试从本地 manifest 目录寻找草稿
+    const manifestDir = path.dirname(draftUrl.startsWith('http') ? draftUrl : '')
+    console.log('   get_draft 不可用，切换本地写入模式')
+    return addSlotsLocally(draftUrl, items, timeline)
+  }
+
+  const tracks = draftData.tracks || []
+  const videoTrack = tracks.find(t => t.type === 'video')
+  if (!videoTrack) {
+    console.log('   未找到 video track，跳过 slot 写入')
+    return
+  }
+
+  // 构造 slot 数据
+  const slots = []
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i]
+    const tl = timeline[i]
+    const segId = item.segmentId || item._segmentId
+    if (!segId) continue
+
+    const slotId = generateUUID()
+    slots.push({
+      id: slotId,
+      material_id: segId,
+      track_id: videoTrack.id,
+      render_index: i,
+      type: 'video',
+      common_property: {
+        start_time: tl.start,
+        source_timerange: { start: 0, duration: tl.duration },
+        target_timerange: { start: tl.start, duration: tl.duration },
+        is_avatar: false,
+        audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
+        volume: 1.0,
+      },
+    })
+  }
+
+  if (slots.length === 0) {
+    console.log('   无有效 slot 数据，跳过')
+    return
+  }
+
+  // 通过 add_slots API 写入
+  try {
+    await capcutApi('add_slots', {
+      draft_url: draftUrl,
+      slots: JSON.stringify(slots),
+    })
+    console.log(`   已写入 ${slots.length} 个 slot 到视频轨道`)
+  } catch (err) {
+    // API 不支持时，降级为本地写入
+    console.log(`   add_slots API 不可用: ${err.message}，降级为本地写入`)
+    await addSlotsLocally(draftUrl, items, timeline, videoTrack.id)
+  }
+}
+
+// 直接写入本地 draft_content.json 的 slot
+// options.draftId: 可选，直接指定 draftId（优先使用），否则从 draftUrl 提取
+async function addSlotsLocally(draftUrl, items, timeline, trackId, options = {}) {
+  const { api: capcutApi, US } = require('./capcut-api')
+  const fs = require('fs')
+
+  // 优先使用 options.draftId，否则从 draftUrl 提取
+  let draftId = options.draftId || null
+  if (!draftId) {
+    try {
+      draftId = new URL(draftUrl).searchParams.get('draft_id')
+    } catch {
+      console.log('   无法解析 draftUrl，跳过本地 slot 写入')
+      return
+    }
+  }
+
+  if (!draftId) {
+    console.log('   无法提取 draft_id，跳过本地 slot 写入')
+    return
+  }
+
+  const { getConfig } = require('./capcut-api')
+  const jianyingPath = getConfig().jianyingDraftPath
+  const draftPath = path.join(jianyingPath, draftId, 'draft_content.json')
+  if (!fs.existsSync(draftPath)) {
+    console.log(`   本地草稿不存在: ${draftPath}，跳过 slot 写入`)
+    return
+  }
+
+  let draft
+  try {
+    draft = JSON.parse(fs.readFileSync(draftPath, 'utf-8'))
+  } catch {
+    console.log('   draft_content.json 读取失败，跳过')
+    return
+  }
+
+  // 找到第一个 video track
+  const videoTrack = trackId
+    ? draft.tracks.find(t => t.id === trackId)
+    : draft.tracks.find(t => t.type === 'video')
+
+  if (!videoTrack) {
+    console.log('   未找到 video track，跳过')
+    return
+  }
+
+  const slots = []
+  for (let i = 0; i < items.length; i++) {
+    const item = items[i]
+    const tl = timeline[i]
+    const segId = item.segmentId || item._segmentId
+    if (!segId) {
+      // 尝试从 materials.videos 匹配
+      const fname = item.video ? path.basename(item.video) : ''
+      const matVideo = (draft.materials.videos || []).find(v => {
+        const matFname = path.basename(v.path || '')
+        return fname && matFname.includes(fname.replace('videos/', ''))
+      })
+      if (matVideo) {
+        items[i]._segmentId = matVideo.id
+        slots.push(buildSlot(matVideo.id, videoTrack.id, i, tl, US))
+      }
+    } else {
+      slots.push(buildSlot(segId, videoTrack.id, i, tl, US))
+    }
+  }
+
+  if (slots.length > 0) {
+    videoTrack.slots = slots
+    draft.duration = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
+    fs.writeFileSync(draftPath, JSON.stringify(draft, null, 2), 'utf-8')
+    console.log(`   已本地写入 ${slots.length} 个 slot 到视频轨道`)
+
+    // 触发剪映扫描
+    triggerDirScan(path.dirname(draftPath))
+  }
+}
+
+function buildSlot(segId, trackId, index, tl, US) {
+  return {
+    id: generateUUID(),
+    material_id: segId,
+    track_id: trackId,
+    render_index: index,
+    type: 'video',
+    common_property: {
+      start_time: tl.start,
+      source_timerange: { start: 0, duration: tl.duration },
+      target_timerange: { start: tl.start, duration: tl.duration },
+      is_avatar: false,
+      audio_fade: { fade_in_duration: 0, fade_out_duration: 0 },
+      volume: 1.0,
+    },
+  }
+}
+
+function generateUUID() {
+  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
+    const r = Math.random() * 16 | 0
+    return (c === 'x' ? r : (r & 0x3 | 0x8)).toString(16).toUpperCase()
+  })
+}
+
+function triggerDirScan(dir) {
+  const { execFile } = require('child_process')
+  const tmp = dir + '.slot_tmp'
+  if (process.platform === 'darwin') {
+    execFile('rsync', ['-a', dir + '/', tmp], (err) => {
+      try { require('fs').rmSync(tmp, { recursive: true, force: true }) } catch {}
+    })
+  }
+}
+
 // ============================================================================
 // 添加 TTS 配音
 // ============================================================================

 async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {}) {
-  const audioItems = items.filter(item => item.audio)
-  if (audioItems.length === 0) {
-    console.log('   无 TTS 音频文件，跳过')
-    return
-  }
-
-  const audioInfos = []
-  const resolveAudio = (relPath) => {
-    if (relPath.startsWith('http')) return relPath
-    if (audioUrls[relPath]) return audioUrls[relPath]
-    return path.isAbsolute(relPath) ? relPath : path.resolve(inputDir, relPath)
-  }
+  // 优先使用 segments[] 逐段添加（精确对齐）
+  // 无 segments 时降级为旧的整段方式
+  const segmentsFlat = []

  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    const tl = timeline[i]
+    if (!item.audio) continue

-    if (item.audio) {
-      const audioUrl = resolveAudio(item.audio)
+    if (item.segments && item.segments.length > 0) {
+      // 使用 segments 精确添加
+      for (const seg of item.segments) {
+        if (!seg.audio || seg.error) continue
+        const audioUrl = seg.audio.startsWith('http')
+          ? seg.audio
+          : (audioUrls[seg.audio] || path.resolve(inputDir, seg.audio))
+        const segDurUs = Math.round(seg.duration * US)
+        const segStartUs = tl.start + Math.round(seg.startOffset * US)
+        segmentsFlat.push({
+          audio_url: audioUrl,
+          start: segStartUs,
+          end: segStartUs + segDurUs,
+          duration: segDurUs,
+          volume: 1.0,
+        })
+      }
+    } else {
+      // 降级：整段添加
+      const audioUrl = item.audio.startsWith('http')
+        ? item.audio
+        : (audioUrls[item.audio] || path.resolve(inputDir, item.audio))
      const audioDurUs = item.audioDuration ? item.audioDuration * US : tl.duration
-
-      audioInfos.push({
+      segmentsFlat.push({
        audio_url: audioUrl,
        start: tl.start,
        end: tl.start + audioDurUs,
@@ -339,17 +543,26 @@ async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {})
    }
  }

-  if (audioInfos.length === 0) {
-    console.log('   无可用音频，跳过配音')
+  if (segmentsFlat.length === 0) {
+    console.log('   无 TTS 音频文件，跳过')
    return
  }

-  await api('add_audios', {
-    draft_url: draftUrl,
-    audio_infos: JSON.stringify(audioInfos),
-  })
-  const ossCount = audioInfos.filter(a => a.audio_url.startsWith('http')).length
-  console.log(`   已添加 ${audioInfos.length} 段 TTS 配音 (${ossCount > 0 ? `${ossCount} 段 OSS + ` : ''}${audioInfos.length - ossCount} 段本地)`)
+  // 逐个添加音频（CapCut API 批量添加不稳定）
+  let addedCount = 0
+  for (const audioInfo of segmentsFlat) {
+    try {
+      await api('add_audios', {
+        draft_url: draftUrl,
+        audio_infos: JSON.stringify([audioInfo]),
+      })
+      addedCount++
+    } catch (err) {
+      console.error(`   音频添加失败: ${err.message.slice(0, 80)}`)
+    }
+  }
+  const ossCount = segmentsFlat.filter(a => a.audio_url.startsWith('http')).length
+  console.log(`   已添加 ${addedCount}/${segmentsFlat.length} 段 TTS 配音 (${ossCount} 段 OSS)`)
 }

 // ============================================================================
@@ -402,7 +615,24 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false

    const tl = timeline[i]

-    if (split) {
+    if (split && item.segments && item.segments.length > 0) {
+      // 精确字幕模式：使用 segments 实测时长，逐段添加字幕
+      for (const seg of item.segments) {
+        if (seg.error || !seg.text) continue
+        const segStartUs = tl.start + Math.round(seg.startOffset * US)
+        const segDurUs = Math.round(seg.duration * US)
+
+        const cap = {
+          start: segStartUs,
+          end: segStartUs + segDurUs,
+          text: seg.text,
+        }
+
+        applyAnimationProps(cap, animStyle)
+        captions.push(cap)
+      }
+    } else if (split) {
+      // 降级：按字符比例分配（无 segments 时）
      const sentences = splitTextIntoSentences(text)
      if (sentences.length === 0) continue

@@ -447,9 +677,7 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
    return
  }

-  await api('add_captions', {
-    draft_url: draftUrl,
-    captions: JSON.stringify(captions),
+  const commonStyle = {
    font: style.font || null,
    font_size: style.fontSize || 15,
    text_color: style.color || '#ffffff',
@@ -472,9 +700,23 @@ async function addSubtitles(draftUrl, items, timeline, style = {}, split = false
    transform_x: 0,
    transform_y: style.transformY || 0,
    style_text: 0,
-  })
+  }

-  console.log(`   已添加 ${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'} → ${animStyle.outAnimation || '无'})`)
+  // 逐条添加字幕（CapCut API 批量添加不稳定）
+  let addedCount = 0
+  for (const cap of captions) {
+    try {
+      await api('add_captions', {
+        draft_url: draftUrl,
+        captions: JSON.stringify([cap]),
+        ...commonStyle,
+      })
+      addedCount++
+    } catch (err) {
+      console.error(`   字幕添加失败: ${err.message.slice(0, 80)}`)
+    }
+  }
+  console.log(`   已添加 ${addedCount}/${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${animStyle.inAnimation || '无'} → ${animStyle.outAnimation || '无'})`)
 }

 // ============================================================================
@@ -583,6 +825,8 @@ module.exports = {
  addBGM,
  addSubtitles,
  addKeywordOverlays,
+  addSlots,
+  addSlotsLocally,
  addEffects,
  addFilter,
 }
--- a/.claude/skills/video-from-script/scripts/lib/cmd-init.js
+++ b/.claude/skills/video-from-script/scripts/lib/cmd-init.js
@@ -72,6 +72,28 @@ function initManifest(options) {
    console.log(`  ⚠ ${refsWithoutUrl.length} 个参考图缺少 OSS URL，images 阶段会自动上传`)
  }

+  // 从 videoModel 推算固定时长（秒）
+  const videoModelFixedDurations = {
+    'kling': 6,
+    'kling-v2-5-turbo': 6,
+    'veo3-fast': 8,
+    'veo3-fast-frames': 8,
+    'grok-video-3': 6,
+  }
+  const estimatedVideoDuration = videoModelFixedDurations[options.videoModel || accountConfig.videoModel] || 6
+
+  // 校验时长约束
+  for (let i = 0; i < rawItems.length; i++) {
+    const item = rawItems[i]
+    const dur = Number(item.duration) || 5
+    if (dur > estimatedVideoDuration) {
+      console.error(`错误: items[${i}] 的 TTS 估算 duration=${dur}s > videoModel 固定时长 ${estimatedVideoDuration}s`)
+      console.error(`       必须先拆分 shot 再执行 init！`)
+      console.error(`       script: "${item.script}"`)
+      process.exit(1)
+    }
+  }
+
  // 构建 items
  const items = rawItems.map((raw, i) => {
    const slug = slugify(raw.shotDesc || raw.script || `scene_${i + 1}`)
@@ -81,7 +103,8 @@ function initManifest(options) {
      file: `images/scene_${String(i + 1).padStart(2, '0')}_${slug}.jpeg`,
      shotDesc: raw.shotDesc || '',
      script: raw.script || '',
-      duration: raw.duration || 5,
+      duration: Number(raw.duration) || 5,
+      estimatedVideoDuration,
      imagePrompt: raw.imagePrompt,
      confirmed: false,
    }
@@ -102,8 +125,10 @@ function initManifest(options) {
    references,
    ...(accountConfig.ttsVoice ? { ttsVoice: accountConfig.ttsVoice } : {}),
    ...(accountConfig.ttsInstruction ? { ttsInstruction: accountConfig.ttsInstruction } : {}),
-    ...(accountConfig.ttsRate ? { ttsRate: accountConfig.ttsRate } : {}),
+    // 铁律：ttsRate 写死 1.15x，不允许配置覆盖（除非显式传入）
+    ttsRate: options.ttsRate || 1.15,
    items,
+    estimatedVideoDuration,  // 顶层冗余，便于 assemble 直接读取
  }

  // 创建输出目录（自增序号）
--- a/.claude/skills/video-from-script/scripts/lib/phase-tts.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-tts.js
@@ -1,13 +1,100 @@
 /**
- * Phase: tts — 语音合成（整段合成）
+ * Phase: tts — 语音合成（先分段，后合成）
 *
- * 每个 item 的 script 整段合成一个音频文件，保留自然语调。
- * item.audio 指向完整音频，item.audioDuration 为总时长。
- * 字幕切分由组装阶段按字符比例分配，不在 TTS 阶段处理。
+ * 核心变化：音频分段优先于生图。
+ *
+ * 1. 在生成图片之前，先将文案按语义断点切分为多个音频片段
+ * 2. 每个片段时长 < videoModel 固定时长（Kling=6s）
+ * 3. 逐段合成，记录实测时长，写入 manifest.segments[]
+ * 4. manifest.items[n].segments = [{text, audio, duration, startOffset}, ...]
+ * 5. manifest.items[n].audioDuration = 片段总和（供 assemble 计算 ratio）
+ *
+ * 流程顺序变为：tts → images → upload → videos → assemble
 */

 const path = require('path')
-const { saveManifest, ensureDir, log, getManifestDir } = require('./pipeline-utils')
+const { saveManifest, ensureDir, log, getManifestDir, splitTextIntoSentences } = require('./pipeline-utils')
+
+/**
+ * 在语义断点处将文案切分为音频片段
+ * 每段时长（估算）必须 < videoDuration，且尽量接近（最佳 ratio 接近1.0）
+ *
+ * @param {string} text - 完整文案
+ * @param {number} videoDur - 视频模型固定时长（秒），如 6
+ * @param {number} charsPerSec - 语速（字/秒），固定 5
+ * @returns {Array<{text, estimatedDuration}>}
+ */
+function splitIntoAudioSegments(text, videoDur, charsPerSec = 5) {
+  // 优先在自然断点切分（句号/感叹号/分号）
+  const naturalBreaks = splitTextIntoSentences(text)
+  if (naturalBreaks.length <= 1) {
+    // 无自然断点：在半段处（含小数点）切分
+    const chars = text.length
+    const estimatedTotal = chars / charsPerSec
+    if (estimatedTotal <= videoDur) {
+      // 整段可容纳
+      return [{ text, estimatedDuration: estimatedTotal }]
+    }
+    // 无法单段容纳，在中间逗号处切
+    const mid = Math.floor(chars / 2)
+    const breakIdx = text.indexOf('，', mid)
+    if (breakIdx > 0) {
+      return [
+        { text: text.slice(0, breakIdx + 1), estimatedDuration: (breakIdx + 1) / charsPerSec },
+        { text: text.slice(breakIdx + 1), estimatedDuration: (chars - breakIdx - 1) / charsPerSec },
+      ]
+    }
+    // 强制按字数切
+    const halfChars = Math.floor(chars / 2)
+    return [
+      { text: text.slice(0, halfChars), estimatedDuration: halfChars / charsPerSec },
+      { text: text.slice(halfChars), estimatedDuration: (chars - halfChars) / charsPerSec },
+    ]
+  }
+
+  // 多个自然句：逐句判断，合并短句
+  const result = []
+  let currentText = ''
+  let currentEstDur = 0
+
+  for (let i = 0; i < naturalBreaks.length; i++) {
+    const sentence = naturalBreaks[i]
+    const sentenceLen = sentence.length
+    const sentenceEstDur = sentenceLen / charsPerSec
+
+    if (currentEstDur + sentenceEstDur <= videoDur) {
+      // 可以合并到当前段
+      currentText += sentence + '。'
+      currentEstDur += sentenceEstDur
+    } else {
+      // 先保存当前段
+      if (currentText) {
+        result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
+      }
+      currentText = sentence + '。'
+      currentEstDur = sentenceEstDur
+
+      // 单句本身超长（超 videoDur）
+      if (sentenceEstDur > videoDur) {
+        // 按半段切
+        const halfLen = Math.floor(sentenceLen / 2)
+        const half1 = sentence.slice(0, halfLen)
+        const half2 = sentence.slice(halfLen)
+        // 回退上一段，用两个半段替代
+        result.pop()
+        result.push({ text: half1, estimatedDuration: halfLen / charsPerSec })
+        currentText = half2 + '。'
+        currentEstDur = (sentenceLen - halfLen) / charsPerSec
+      }
+    }
+  }
+
+  if (currentText) {
+    result.push({ text: currentText.trim(), estimatedDuration: currentEstDur })
+  }
+
+  return result
+}

 async function phaseTts(manifest, manifestPath, options = {}) {
  const dir = getManifestDir(manifestPath)
@@ -16,38 +103,89 @@ async function phaseTts(manifest, manifestPath, options = {}) {

  const { synthesize } = require('../qwen-tts')

-  const items = manifest.items.filter(it =>
-    it.status === 'done' && (it.script || it.text) && !it.audio
-  )
-  if (items.length === 0) { log('tts', '无待处理 item，跳过'); return }
+  const videoDur = manifest.estimatedVideoDuration || 6
+  const ttsRate = manifest.ttsRate || 1.15

-  log('tts', `共 ${items.length} 段`)
+  const items = manifest.items.filter(it =>
+    (it.script || it.text) && !it.audio
+  )
+  if (items.length === 0) { log('tts', '无待处理 item（已合成），跳过'); return }
+
+  log('tts', `共 ${items.length} 段, 视频固定时长=${videoDur}s, TTS语速=${ttsRate}x`)

  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    const idx = i + 1
-    const fullText = item.script || item.text
+    const fullText = (item.script || item.text).trim()

-    try {
-      const { filePath, duration } = await synthesize(fullText, {
-        outputDir: audioDir,
-        id: String(item.id || idx),
-        voice: manifest.ttsVoice || undefined,
-        instruction: manifest.ttsInstruction || undefined,
-        rate: manifest.ttsRate || undefined,
-      })
-
-      const totalDuration = Math.round(duration * 1000) / 1000
-      item.audio = path.relative(dir, filePath).replace(/\\/g, '/')
-      item.audioDuration = totalDuration
-      log('tts', `[${idx}/${items.length}] ${totalDuration.toFixed(1)}s: ${fullText.substring(0, 30)}...`)
-    } catch (err) {
-      item.status = 'failed'
-      item.error = `TTS失败: ${err.message}`
-      log('tts', `[${idx}/${items.length}] 失败: ${err.message}`)
+    // Step 1: 计算音频分段
+    const rawSegments = splitIntoAudioSegments(fullText, videoDur)
+    log('tts', `[${idx}/${items.length}] 原始分段: ${rawSegments.length} 段`)
+    for (const seg of rawSegments) {
+      log('tts', `        分段估算: ${seg.estimatedDuration.toFixed(2)}s / ${seg.text.slice(0, 20)}...`)
    }
+
+    // Step 2: 逐段合成
+    const segments = []
+    let globalOffset = 0
+
+    for (let j = 0; j < rawSegments.length; j++) {
+      const segInput = rawSegments[j]
+      const segId = `${item.id}_${j + 1}`
+
+      try {
+        const { filePath, duration: realDuration } = await synthesize(segInput.text, {
+          outputDir: audioDir,
+          id: segId,
+          voice: manifest.ttsVoice || undefined,
+          instruction: manifest.ttsInstruction || undefined,
+          rate: ttsRate,
+        })
+
+        const segment = {
+          id: segId,
+          text: segInput.text,
+          audio: path.relative(dir, filePath).replace(/\\/g, '/'),
+          estimatedDuration: Math.round(segInput.estimatedDuration * 1000) / 1000,
+          duration: Math.round(realDuration * 1000) / 1000,
+          startOffset: Math.round(globalOffset * 1000) / 1000,
+        }
+        segments.push(segment)
+        globalOffset += realDuration
+
+        log('tts', `[${idx}/${items.length}] 段${j + 1}: 估算${segInput.estimatedDuration.toFixed(2)}s → 实测${realDuration.toFixed(2)}s | ${segInput.text.slice(0, 15)}...`)
+      } catch (err) {
+        log('tts', `[${idx}/${items.length}] 段${j + 1} 合成失败: ${err.message}`)
+        segments.push({
+          id: segId,
+          text: segInput.text,
+          audio: '',
+          estimatedDuration: segInput.estimatedDuration,
+          duration: 0,
+          startOffset: globalOffset,
+          error: err.message,
+        })
+        globalOffset += segInput.estimatedDuration
+      }
+    }
+
+    // Step 3: 汇总到 item
+    const totalAudioDuration = Math.round(globalOffset * 1000) / 1000
+    item.segments = segments
+    item.audio = segments[0]?.audio || ''
+    item.audioDuration = totalAudioDuration
+    item.segmentCount = segments.length
+
+    // Step 4: 时长合规诊断
+    const ratio = videoDur / totalAudioDuration
+    if (ratio < 0.9) {
+      item._timelineWarning = `⚠ audioDur(${totalAudioDuration.toFixed(1)}s) > videoDur(${videoDur}s)，ratio=${ratio.toFixed(2)}，assemble 将截断`
+    }
+
+    log('tts', `[${idx}/${items.length}] 完成: ${segments.length}段, 总音频${totalAudioDuration.toFixed(1)}s, ratio=${ratio.toFixed(2)}`)
+
    saveManifest(manifestPath, manifest)
  }
 }

-module.exports = { phaseTts }
+module.exports = { phaseTts, splitIntoAudioSegments }