video-create/.claude/skills/video-from-script/scripts/capcut_assemble.js

#!/usr/bin/env node
/**
 * CapCut 成片组装脚本
 *
 * 将图片/视频素材通过 CapCut Mate API 组装为草稿，同步到本地剪映。
 *
 * 用法:
 *   node capcut_assemble.js --input ./output/batch_xxx [选项]
 *
 * 配置:
 *   请运行 node setup.js 生成配置
 * 同步方式: 纯 Node.js（sync-to-jianying.js），无需 Python/uv
 */

const axios = require('axios')
const path = require('path')
const fs = require('fs')
const { execFile } = require('child_process')
const { syncDraft, registerDraft, triggerDirectoryScan } = require('./sync-to-jianying')
const { splitTextIntoSentences } = require('./lib/pipeline-utils')

// ============================================================================
// 配置
// ============================================================================

let _config = null
function getConfig() {
  if (_config) return _config
  const configPath = path.join(__dirname, '..', '..', 'config.json')
  if (!fs.existsSync(configPath)) {
    console.error('缺少配置文件: skills/config.json')
    console.error('请运行 node setup.js 生成配置')
    process.exit(1)
  }
  const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'))
  if (!config.jianyingDraftPath || !config.capcutMateDir || !config.capcutMateApiBase) {
    console.error('config.json 需要填写 jianyingDraftPath、capcutMateDir 和 capcutMateApiBase')
    process.exit(1)
  }
  _config = config
  return _config
}

const BASE_URL = getConfig().capcutMateApiBase
const US = 1_000_000

// ============================================================================
// CapCut API 封装
// ============================================================================

async function api(endpoint, data = {}, timeout = 60000) {
  const url = `${BASE_URL}/${endpoint}`
  const method = endpoint === 'get_draft' ? 'get' : 'post'
  try {
    const res = method === 'get'
      ? await axios.get(url, { params: data, timeout })
      : await axios.post(url, data, { timeout })
    if (res.data.code !== undefined && res.data.code !== 0) {
      throw new Error(`API [${endpoint}] 返回错误: ${res.data.message}`)
    }
    return res.data
  } catch (err) {
    if (err.response) {
      throw new Error(`API [${endpoint}] HTTP ${err.response.status}: ${JSON.stringify(err.response.data)}`)
    }
    throw err
  }
}

// ============================================================================
// CLI 参数
// ============================================================================

function parseArgs(argv) {
  const args = {}
  for (let i = 0; i < argv.length; i++) {
    if (argv[i].startsWith('--')) {
      const key = argv[i].slice(2)
      const value = argv[i + 1]
      if (value && !value.startsWith('--')) {
        args[key] = value
        i++
      } else {
        args[key] = true
      }
    }
  }
  return args
}

function getResolution(format) {
  const map = {
    '9:16': { width: 1080, height: 1920 },
    '16:9': { width: 1920, height: 1080 },
    '1:1':  { width: 1080, height: 1080 },
    '4:3':  { width: 1440, height: 1080 },
  }
  return map[format] || map['9:16']
}

// ============================================================================
// OSS 上传
// ============================================================================

const ossUpload = require(path.join(__dirname, 'oss-upload'))

async function uploadToOSS(filePath) {
  const { url } = await ossUpload.uploadFile(filePath)
  return url
}

// ============================================================================
// 转场选择策略
// ============================================================================

function getTransition(item, index, totalCount, transitionConfig) {
  // 无配置 → 不加转场
  if (!transitionConfig) return { name: '', duration: 0 }

  const defaultT = transitionConfig.default || { name: '闪白', duration: 150000 }
  const strategy = transitionConfig.strategy || 'fixed'

  // 第一个素材不加转场
  if (index === 0) return { name: '', duration: 0 }

  switch (strategy) {
    case 'director': {
      // 按 directorRef 选择转场
      const ref = (item.directorRef || '').toLowerCase()
      const byDirector = transitionConfig.byDirector || {}
      return byDirector[ref] || defaultT
    }

    case 'rhythm': {
      // 按位置选择转场（hook / body / keypoint / closing）
      const rules = transitionConfig.byPosition || {}
      if (index === 1) return rules.hook || defaultT
      if (index >= totalCount - 2) return rules.closing || defaultT
      // 每隔3个 shot 用一个强调转场
      if (index % 3 === 0) return rules.keypoint || defaultT
      return rules.body || defaultT
    }

    case 'fixed':
    default:
      return defaultT
  }
}

async function batchUploadToOSS(inputDir, files) {
  const urls = {}
  for (const file of files) {
    const filePath = path.join(inputDir, file)
    if (!fs.existsSync(filePath)) continue
    try {
      urls[file] = await uploadToOSS(filePath)
      console.log(`   上传: ${file} -> OK`)
    } catch (err) {
      console.error(`   上传失败: ${file} - ${err.message}`)
    }
  }
  return urls
}

function getAudioDurationSec(filePath) {
  return new Promise((resolve) => {
    execFile('ffprobe', [
      '-v', 'quiet', '-show_entries', 'format=duration',
      '-of', 'csv=p=0', filePath
    ], (err, stdout) => {
      if (err) { resolve(null); return }
      const dur = parseFloat(stdout.trim())
      resolve(dur > 0 ? dur : null)
    })
  })
}

// ============================================================================
// 主流程
// ============================================================================

function buildTimeline(items, defaultDurationUs) {
  // 音频为主轴，视频调速适配（≤2x 加速，>2x 截断）
  let offset = 0
  return items.map(item => {
    const audioDur = (item.audioDuration != null) ? item.audioDuration * US : 0
    const videoDur = (item.videoDuration != null) ? item.videoDuration * US : 0
    // 无 TTS：用视频时长或固定时长
    if (audioDur <= 0) {
      const dur = videoDur || defaultDurationUs
      const entry = { start: offset, end: offset + dur, duration: dur, speed: 1 }
      offset += dur
      return entry
    }
    // 有 TTS：音频时长为主轴
    const dur = audioDur
    const ratio = videoDur > 0 ? videoDur / audioDur : 1
    // ≤2x: 加速到音频时长；>2x: 截断（视频只取前 audioDur 部分）
    const speed = ratio <= 2 ? ratio : 1
    const needAdjust = videoDur > audioDur + 100000 // 视频比音频长 0.1s 以上才需要调整
    const entry = { start: offset, end: offset + dur, duration: dur, speed, needAdjust }
    offset += dur
    return entry
  })
}

async function assemble(args) {
  const {
    input,
    manifest: manifestPath,
    mode = 'images',
    subtitles = 'true',
    splitCaptions = 'true',
    voiceover = 'true',
    bgm,
    effects: effectsStr,
    filter: filterStr,
    format = '9:16',
    apiKey = '',
    duration = '4',
    animation = '渐显+放大',
  } = args

  if (!input) throw new Error('缺少 --input 参数')

  const inputDir = path.resolve(input)
  const manifestFile = manifestPath
    ? path.resolve(manifestPath)
    : path.join(inputDir, 'manifest.json')

  if (!fs.existsSync(manifestFile)) {
    throw new Error(`找不到 manifest.json: ${manifestFile}`)
  }

  const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf-8'))

  // 从 account.json 自动继承 effects / filter（CLI 参数优先）
  let finalEffects = effectsStr
  let finalFilter = filterStr
  if (!finalEffects || !finalFilter) {
    const accountData = loadAccountConfig(manifest)
    if (!finalEffects && accountData.capcut?.effects?.length) {
      finalEffects = accountData.capcut.effects.join(',')
    }
    if (!finalFilter && accountData.capcut?.filter) {
      finalFilter = accountData.capcut.filter
    }
  }

  const { width, height } = getResolution(format)
  const defaultDurationUs = parseFloat(duration) * US

  // 过滤出实际存在的文件
  const items = manifest.items.filter(item => {
    if (item.url) return true // 视频模式可能用 URL
    const filePath = path.join(inputDir, item.file)
    return fs.existsSync(filePath)
  })

  if (items.length === 0) throw new Error('没有可用的素材文件')

  // 用 ffprobe 测量实际音频/视频时长，替代 manifest 中的估计值
  let audioMeasured = 0, videoMeasured = 0
  for (const item of items) {
    // 测量 TTS 音频实际时长（有 segments 时跳过，audioDuration 已是精确累计值）
    if (item.audio && !item.audio.startsWith('http') && !item.segments) {
      const audioPath = path.isAbsolute(item.audio)
        ? item.audio
        : path.resolve(inputDir, item.audio)
      if (fs.existsSync(audioPath)) {
        const actualDur = await getAudioDurationSec(audioPath)
        if (actualDur != null) { item.audioDuration = actualDur; audioMeasured++ }
      }
    }
    // 测量视频文件实际时长（生成器返回的是硬编码常量，不准确）
    if (item.video) {
      const videoPath = path.isAbsolute(item.video)
        ? item.video
        : path.resolve(inputDir, item.video)
      if (fs.existsSync(videoPath)) {
        const actualDur = await getAudioDurationSec(videoPath) // ffprobe 对音视频通用
        if (actualDur != null) { item.videoDuration = actualDur; videoMeasured++ }
      }
    }
  }
  if (audioMeasured > 0 || videoMeasured > 0) {
    console.log(`  实际时长测量: 音频 ${audioMeasured} 个, 视频 ${videoMeasured} 个`)
  }

  const timeline = buildTimeline(items, defaultDurationUs)
  const totalDurationUs = timeline.length > 0 ? timeline[timeline.length - 1].end : 0
  const hasTTS = items.some(item => item.audio && item.audioDuration != null)

  // -- 读取转场策略（在 addImages/addVideos 之前） --
  const transitionConfig = loadTransitions(manifest)

  console.log(`\nCapCut 成片组装`)
  console.log(`  模式: ${mode}  画幅: ${format} (${width}x${height})`)
  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : `固定${duration}s/段`}  总时长: ${(totalDurationUs / US).toFixed(1)}s`)
  console.log(`  字幕: ${subtitles}  配音: ${voiceover}  动画: ${animation}`)
  if (finalEffects) console.log(`  特效: ${finalEffects}`)
  if (finalFilter) console.log(`  滤镜: ${finalFilter}`)
  console.log(`  素材: ${items.length} 个可用\n`)

  const steps = []
  if (mode === 'images') steps.push('upload')
  steps.push('draft', 'materials', 'audio_oss', 'voiceover', 'audio', 'subtitles', 'keywords', 'effects', 'filter', 'save', 'sync')
  const totalSteps = steps.length
  let step = 0

  // -- 上传图片到 OSS（优先使用 manifest 中已有的 URL） --
  let imgUrls = {}
  if (mode === 'images') {
    // 先从 manifest 收集已有 URL
    const needUpload = []
    for (const item of items) {
      if (item.url && item.url.startsWith('http')) {
        imgUrls[item.file] = item.url
      } else {
        needUpload.push(item.file)
      }
    }
    if (needUpload.length > 0) {
      step++; console.log(`[${step}/${totalSteps}] 上传图片到 OSS (${needUpload.length} 张需上传, ${Object.keys(imgUrls).length} 张已有URL)...`)
      const uploaded = await batchUploadToOSS(inputDir, needUpload)
      imgUrls = { ...imgUrls, ...uploaded }
    } else {
      step++; console.log(`[${step}/${totalSteps}] 所有图片已有 URL，跳过上传`)
    }
    if (Object.keys(imgUrls).length === 0) throw new Error('所有图片上传失败')
    console.log(`   成功: ${Object.keys(imgUrls).length}/${items.length}\n`)
  }

  // -- 创建草稿 --
  step++; console.log(`[${step}/${totalSteps}] 创建草稿...`)
  const draftRes = await api('create_draft', { width, height })
  const draftUrl = draftRes.draft_url
  const draftId = new URL(draftUrl).searchParams.get('draft_id')
  console.log(`   draft_id: ${draftId}\n`)

  // -- 导入素材 --
  step++; console.log(`[${step}/${totalSteps}] 导入素材...`)
  if (mode === 'images') {
    await addImages(draftUrl, items, imgUrls, timeline, width, height, animation, transitionConfig)
  } else {
    // 视频模式：调速 → 上传 OSS → 添加到草稿
    // Step 1: ffmpeg 调速（在上传前，避免传两份）
    let adjustedCount = 0
    for (let i = 0; i < items.length; i++) {
      const item = items[i]
      const tl = timeline[i]
      if (tl.needAdjust && item.video) {
        const videoPath = path.resolve(inputDir, item.video)
        const audioDur = tl.duration / US
        const adjustedPath = await adjustVideoSpeed(videoPath, audioDur)
        if (adjustedPath !== videoPath) {
          item.video = path.relative(inputDir, adjustedPath)
          item.videoDuration = audioDur
          adjustedCount++
        }
      }
    }
    if (adjustedCount > 0) {
      console.log(`  视频调速: ${adjustedCount}/${items.length} 个`)
    }

    // Step 2: 上传（已调速的）视频到 OSS
    const missingUrl = items.filter(it => it.video && !it.videoUrl)
    if (missingUrl.length > 0) {
      console.log(`  上传 ${missingUrl.length} 个视频到 OSS...`)
      for (const item of missingUrl) {
        const videoPath = path.resolve(inputDir, item.video)
        try {
          const url = await uploadToOSS(videoPath)
          item.videoUrl = url
          // 回写 manifest
          if (manifestFile) {
            try {
              const m = JSON.parse(fs.readFileSync(manifestFile, 'utf-8'))
              const mi = m.items.find(i => i.id === item.id || i.script === item.script || i.text === item.text)
              if (mi) { mi.videoUrl = url; fs.writeFileSync(manifestFile, JSON.stringify(m, null, 2)) }
            } catch (_) {}
          }
        } catch (err) {
          console.log(`  视频上传失败: ${err.message}`)
        }
      }
    }
    await addVideos(draftUrl, inputDir, items, timeline, width, height, transitionConfig)
  }

  // -- 上传 TTS 音频到 OSS --
  let audioUrls = {}
  if (voiceover === 'true' && hasTTS) {
    step++; console.log(`[${step}/${totalSteps}] 上传 TTS 音频到 OSS...`)
    try {
      audioUrls = await batchUploadAudio(inputDir, items)
      console.log(`   成功: ${Object.keys(audioUrls).length} 段音频\n`)
      // 回写 OSS URL 到 manifest，避免重复上传
      if (Object.keys(audioUrls).length > 0 && manifestFile) {
        let changed = false
        for (const item of manifest.items) {
          if (item.audio && audioUrls[item.audio]) {
            item.audio = audioUrls[item.audio]
            changed = true
          }
          if (item.segments) {
            for (const seg of item.segments) {
              if (seg.audio && audioUrls[seg.audio]) {
                seg.audio = audioUrls[seg.audio]
                changed = true
              }
            }
          }
        }
        if (changed) fs.writeFileSync(manifestFile, JSON.stringify(manifest, null, 2))
      }
    } catch (err) {
      console.log(`   OSS 上传失败，将尝试本地路径: ${err.message}\n`)
    }
  }

  // -- 添加 TTS 配音 --
  step++; console.log(`[${step}/${totalSteps}] 添加 TTS 配音...`)
  if (voiceover === 'true' && hasTTS) {
    await addVoiceover(draftUrl, inputDir, items, timeline, audioUrls)
  } else {
    console.log('   跳过（无 TTS 音频或未启用）')
  }

  // -- 添加 BGM --
  step++; console.log(`[${step}/${totalSteps}] 添加背景音乐...`)
  if (bgm) {
    await addBGM(draftUrl, bgm, totalDurationUs)
  } else {
    console.log('   跳过（未指定 --bgm）')
  }

  // -- 读取账号字幕风格 --
  const subtitleStyle = loadSubtitleStyle(manifest)
  if (Object.keys(subtitleStyle).length > 0) {
    console.log(`  字幕风格: ${subtitleStyle.font || '默认'} ${subtitleStyle.inAnimation ? subtitleStyle.inAnimation + '→' + subtitleStyle.outAnimation : ''}`)
  }

  // -- 添加字幕 --
  step++; console.log(`[${step}/${totalSteps}] 添加字幕...`)
  if (subtitles === 'true' && items.some(i => i.script || i.text)) {
    await addSubtitles(draftUrl, items, timeline, subtitleStyle, splitCaptions === 'true')
  } else {
    console.log('   跳过')
  }

  // -- 添加关键字氛围词 --
  step++; console.log(`[${step}/${totalSteps}] 添加关键字氛围词...`)
  const keywordStyle = loadKeywordStyle(manifest)
  if (Object.keys(keywordStyle).length > 0 && items.some(i => i.keyword)) {
    await addKeywordOverlays(draftUrl, items, timeline, keywordStyle)
  } else {
    console.log('   跳过（无关键字或未配置 keywordStyle）')
  }

  // -- 添加特效 --
  step++; console.log(`[${step}/${totalSteps}] 添加特效...`)
  if (finalEffects) {
    try {
      await addEffects(draftUrl, finalEffects, totalDurationUs)
    } catch (e) {
      console.log(`   特效跳过: ${e.message}`)
    }
  } else {
    console.log('   跳过（未配置特效）')
  }

  // -- 添加滤镜 --
  step++; console.log(`[${step}/${totalSteps}] 添加滤镜...`)
  if (finalFilter) {
    try {
      await addFilter(draftUrl, finalFilter, totalDurationUs)
    } catch (e) {
      console.log(`   滤镜跳过: ${e.message}`)
    }
  } else {
    console.log('   跳过（未配置滤镜）')
  }

  // -- 保存草稿 --
  step++; console.log(`[${step}/${totalSteps}] 保存草稿...`)
  await api('save_draft', { draft_url: draftUrl })
  console.log('   已保存\n')

  // -- 同步到本地剪映 --
  step++; console.log(`[${step}/${totalSteps}] 同步到本地剪映...`)
  await syncToLocalJianying(draftUrl, draftId, totalDurationUs)
  console.log('   同步完成\n')

  // -- 云渲染（可选）--
  if (apiKey) {
    console.log('提交云渲染...')
    await api('gen_video', { draft_url: draftUrl, apiKey })
    console.log('渲染已提交，使用 gen_video_status 查询进度')
  }

  console.log(`\n成片组装完成`)
  console.log(`  草稿ID: ${draftId}`)
  console.log(`  总时长: ${(totalDurationUs / US).toFixed(1)}s`)
  console.log(`  素材数: ${items.length}`)
  console.log(`  时间线: ${hasTTS ? 'TTS音频驱动' : '固定时长'}`)
  if (mode === 'videos' && subtitles === 'false') {
    console.log(`\n  >> 视频模式未加字幕，请在剪映中打开草稿 → 识别字幕 → 语音识别生成\n`)
  }
}

// ============================================================================
// 添加图片（自动上传到 OSS）
// ============================================================================

async function addImages(draftUrl, items, imgUrls, timeline, width, height, animation = '', transitionConfig = null) {
  const imageInfos = items.map((item, i) => {
    const url = imgUrls[item.file]
    if (!url) throw new Error(`图片 ${item.file} 未上传成功，无法添加`)
    const tl = timeline[i]
    const t = getTransition(item, i, items.length, transitionConfig)

    // animation 解析：支持 "缩放" (group), "放大" (in), "渐显+缩小" (in+out)
    const info = {
      image_url: url,
      width,
      height,
      start: tl.start,
      end: tl.end,
      duration: tl.duration,
      transition: t.name,
      transition_duration: t.duration,
    }

    if (animation) {
      const parts = animation.split('+').map(p => p.trim()).filter(Boolean)
      const groupNames = ['缩放', '缩放 II']
      const groupAnims = parts.filter(p => groupNames.includes(p))
      const inAnims = parts.filter(p => !groupNames.includes(p))
      if (groupAnims.length > 0) info.loop_animation = groupAnims.join('|')
      if (inAnims.length > 0) info.in_animation = inAnims.join('|')
    }

    return info
  })

  // 单次全量提交，所有图片在同一轨道
  console.log(`   一次性添加 ${imageInfos.length} 张图片...`)
  const res = await api('add_images', {
    draft_url: draftUrl,
    image_infos: JSON.stringify(imageInfos),
    alpha: 1, scale_x: 1, scale_y: 1,
    transform_x: 0, transform_y: 0,
  }, 300000)
  const allSegmentIds = res.segment_ids || []

  console.log(`   已添加 ${items.length} 张图片`)
  return allSegmentIds
}

// ============================================================================
// 添加视频（从 manifest 读取时长）
// ============================================================================

/**
 * ffmpeg 调速：将视频调整为指定时长
 * ratio <= 2x: 加速；ratio > 2x: 截断
 * 返回调整后的文件路径（调整失败则返回原路径）
 */
async function adjustVideoSpeed(videoPath, targetDurationSec) {
  if (!fs.existsSync(videoPath)) return videoPath

  return new Promise((resolve) => {
    // 先获取视频时长
    execFile('ffprobe', [
      '-v', 'quiet', '-show_entries', 'format=duration',
      '-of', 'csv=p=0', videoPath
    ], (err, stdout) => {
      if (err) { resolve(videoPath); return }
      const videoDur = parseFloat(stdout.trim())
      if (!videoDur || videoDur <= 0 || videoDur <= targetDurationSec + 0.1) {
        resolve(videoPath); return
      }

      const ratio = videoDur / targetDurationSec
      const outPath = videoPath.replace(/(\.\w+)$/, '_adj$1')

      if (ratio <= 2) {
        // 加速：setpts=PTS/speed, atempo=speed (音频变速)
        const speed = ratio.toFixed(3)
        const atempo = Math.min(speed, 2.0) // atempo 单次上限 2.0
        execFile('ffmpeg', [
          '-y', '-i', videoPath,
          '-filter_complex', `setpts=PTS/${speed}${videoPath.endsWith('.mp4') ? `,atempo=${atempo}` : ''}`,
          '-an', // 去掉音频（视频模式不需要原始音轨）
          outPath
        ], { timeout: 30000 }, (err) => {
          if (err) { console.log(`     调速失败，使用原始视频: ${err.message}`); resolve(videoPath); return }
          console.log(`     调速: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s (${speed}x)`)
          resolve(outPath)
        })
      } else {
        // 截断：取前 targetDuration 秒
        execFile('ffmpeg', [
          '-y', '-i', videoPath,
          '-t', String(targetDurationSec),
          '-c', 'copy',
          outPath
        ], { timeout: 30000 }, (err) => {
          if (err) { console.log(`     截断失败，使用原始视频: ${err.message}`); resolve(videoPath); return }
          console.log(`     截断: ${videoDur.toFixed(1)}s → ${targetDurationSec.toFixed(1)}s`)
          resolve(outPath)
        })
      }
    })
  })
}

async function addVideos(draftUrl, inputDir, items, timeline, width, height, transitionConfig = null) {
  const videoInfos = items.map((item, i) => {
    const tl = timeline[i]
    const t = getTransition(item, i, items.length, transitionConfig)
    return {
      video_url: item.videoUrl || (item.video ? path.resolve(inputDir, item.video) : null) || item.url || path.resolve(inputDir, item.file),
      width,
      height,
      start: tl.start,
      end: tl.end,
      duration: tl.duration,
      mask: '',
      transition: t.name,
      transition_duration: t.duration,
      volume: item.volume || 1,
    }
  })

  // 先尝试全量提交
  try {
    const res = await api('add_videos', {
      draft_url: draftUrl,
      video_infos: JSON.stringify(videoInfos),
      alpha: 1, scale_x: 1, scale_y: 1,
      transform_x: 0, transform_y: 0,
      scene_timelines: [],
    })
    console.log(`   已添加 ${items.length} 个视频片段（全量）`)
    return res.segment_ids || []
  } catch (err) {
    if (!err.message.includes('504') && !err.message.includes('timeout')) throw err
    console.log(`   全量提交超时，降级为分批添加...`)
  }

  // 504 回退：分批添加（每批 3 个，保持绝对时间不变）
  const BATCH_SIZE = 3
  const allSegmentIds = []
  for (let i = 0; i < videoInfos.length; i += BATCH_SIZE) {
    const batch = videoInfos.slice(i, i + BATCH_SIZE)
    const batchNum = Math.floor(i / BATCH_SIZE) + 1
    const totalBatches = Math.ceil(videoInfos.length / BATCH_SIZE)
    console.log(`   分批 [${batchNum}/${totalBatches}] 添加 ${batch.length} 个片段...`)
    const res = await api('add_videos', {
      draft_url: draftUrl,
      video_infos: JSON.stringify(batch),
      alpha: 1, scale_x: 1, scale_y: 1,
      transform_x: 0, transform_y: 0,
      scene_timelines: [],
    })
    if (res.segment_ids) allSegmentIds.push(...res.segment_ids)
  }

  console.log(`   已添加 ${items.length} 个视频片段（分批）`)
  return allSegmentIds
}

// ============================================================================
// 音频批量上传（本地文件 → OSS 公网 URL）
// ============================================================================

async function batchUploadAudio(inputDir, items) {
  const urls = {}
  for (const item of items) {
    // 上传 segments 中的每段音频
    if (item.segments && item.segments.length > 1) {
      for (const seg of item.segments) {
        if (!seg.audio || seg.audio.startsWith('http') || urls[seg.audio]) continue
        const filePath = path.isAbsolute(seg.audio)
          ? seg.audio
          : path.resolve(inputDir, seg.audio)
        if (!fs.existsSync(filePath)) {
          console.error(`   音频文件不存在: ${filePath}`)
          continue
        }
        try {
          urls[seg.audio] = await uploadToOSS(filePath)
          console.log(`   上传: ${path.basename(filePath)} -> OK`)
        } catch (err) {
          console.error(`   上传失败: ${path.basename(filePath)} - ${err.message}`)
        }
      }
    }
    // 上传 item.audio（单段或 segments 的第一段）
    if (!item.audio || item.audio.startsWith('http')) {
      if (item.audio) urls[item.audio] = item.audio
      continue
    }
    if (urls[item.audio]) continue
    const filePath = path.isAbsolute(item.audio)
      ? item.audio
      : path.resolve(inputDir, item.audio)
    if (!fs.existsSync(filePath)) {
      console.error(`   音频文件不存在: ${filePath}`)
      continue
    }
    try {
      urls[item.audio] = await uploadToOSS(filePath)
      console.log(`   上传: ${path.basename(filePath)} -> OK`)
    } catch (err) {
      console.error(`   上传失败: ${path.basename(filePath)} - ${err.message}`)
    }
  }
  return urls
}

// ============================================================================
// 添加 TTS 配音（每段音频按时间线排列）
// ============================================================================

async function addVoiceover(draftUrl, inputDir, items, timeline, audioUrls = {}) {
  // 收集音频
  const audioItems = items.filter(item => item.audio || (item.segments && item.segments.length > 0))
  if (audioItems.length === 0) {
    console.log('   无 TTS 音频文件，跳过')
    return
  }

  const audioInfos = []
  const resolveAudio = (relPath) => {
    if (relPath.startsWith('http')) return relPath
    if (audioUrls[relPath]) return audioUrls[relPath]
    return path.isAbsolute(relPath) ? relPath : path.resolve(inputDir, relPath)
  }

  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    const tl = timeline[i]
    const segments = item.segments && item.segments.length > 1 ? item.segments : null

    if (segments) {
      // 多段音频：按 segment 逐段添加，使用精确时长
      const totalSegDur = segments.reduce((sum, s) => sum + s.duration * US, 0)
      const tlDuration = tl.end - tl.start
      let currentTime = tl.start

      for (let j = 0; j < segments.length; j++) {
        const seg = segments[j]
        const segDurUs = Math.round(seg.duration * US)
        let duration = Math.round(tlDuration * (segDurUs / totalSegDur))
        if (j === segments.length - 1) duration = tl.end - currentTime
        duration = Math.max(duration, 100000)

        const audioUrl = resolveAudio(seg.audio)

        audioInfos.push({
          audio_url: audioUrl,
          start: currentTime,
          end: currentTime + duration,
          duration,
          volume: 1.0,
        })
        currentTime += duration
      }
    } else if (item.audio) {
      // 单段音频
      const audioUrl = resolveAudio(item.audio)

      audioInfos.push({
        audio_url: audioUrl,
        start: tl.start,
        end: tl.end,
        duration: tl.duration,
        volume: 1.0,
      })
    }
  }

  if (audioInfos.length === 0) {
    console.log('   无可用音频，跳过配音')
    return
  }

  await api('add_audios', {
    draft_url: draftUrl,
    audio_infos: JSON.stringify(audioInfos),
  })
  const ossCount = audioInfos.filter(a => a.audio_url.startsWith('http')).length
  console.log(`   已添加 ${audioInfos.length} 段 TTS 配音 (${ossCount > 0 ? `${ossCount} 段 OSS + ` : ''}${audioInfos.length - ossCount} 段本地)`)
}

// ============================================================================
// 添加背景音乐
// ============================================================================

async function addBGM(draftUrl, bgmUrl, totalDurationUs) {
  // 先获取音频实际时长
  let audioDuration = totalDurationUs
  try {
    const durRes = await api('get_audio_duration', { mp3_url: bgmUrl })
    if (durRes.duration) audioDuration = durRes.duration
  } catch (_) {
    // 无法获取时长就用视频总时长
  }

  await api('add_audios', {
    draft_url: draftUrl,
    audio_infos: JSON.stringify([{
      audio_url: bgmUrl,
      duration: audioDuration,
      end: Math.min(audioDuration, totalDurationUs),
      start: 0,
      volume: 0.15,
    }]),
  })
  console.log(`   已添加 BGM (${(audioDuration / US).toFixed(1)}s)`)
}

// ============================================================================
// 读取账号配置
// ============================================================================

function loadAccountConfig(manifest) {
  const account = manifest.account
  if (!account) return {}
  const accountFile = path.join(__dirname, '..', '..', '..', '..', 'accounts', account, 'account.json')
  if (!fs.existsSync(accountFile)) return {}
  try { return JSON.parse(fs.readFileSync(accountFile, 'utf-8')) } catch { return {} }
}

function loadSubtitleStyle(manifest) {
  return loadAccountConfig(manifest).capcut?.subtitleStyle || {}
}

function loadKeywordStyle(manifest) {
  return loadAccountConfig(manifest).capcut?.keywordStyle || {}
}

// ============================================================================
// 添加关键字氛围词叠加（画面中央大字）
// ============================================================================

async function addKeywordOverlays(draftUrl, items, timeline, style = {}) {
  const keywordItems = items.filter(item => item.keyword)
  if (keywordItems.length === 0) {
    console.log('   无关键字，跳过')
    return
  }

  const captions = []
  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    if (!item.keyword) continue
    const tl = timeline[i]

    const cap = {
      start: tl.start,
      end: tl.end,
      text: item.keyword,
    }
    if (style.inAnimation) cap.in_animation = style.inAnimation
    if (style.outAnimation) cap.out_animation = style.outAnimation
    if (style.inAnimDuration) cap.in_animation_duration = style.inAnimDuration
    if (style.outAnimDuration) cap.out_animation_duration = style.outAnimDuration

    captions.push(cap)
  }

  if (captions.length === 0) return

  await api('add_captions', {
    draft_url: draftUrl,
    captions: JSON.stringify(captions),
    font: style.font || null,
    font_size: style.fontSize || 60,
    text_color: style.color || '#FFFFFF',
    alignment: 1,
    bold: style.bold || false,
    has_shadow: style.hasShadow || false,
    shadow_info: style.shadowAlpha ? {
      shadow_alpha: style.shadowAlpha,
      shadow_color: style.shadowColor || '#000000',
      shadow_diffuse: 15,
      shadow_distance: 5,
      shadow_angle: -45,
    } : undefined,
    alpha: style.alpha || 1,
    scale_x: 1, scale_y: 1,
    transform_x: 0,
    transform_y: style.transformY || 0,
    text_effect: style.textEffect || null,
  })
  console.log(`   已添加 ${captions.length} 个关键字氛围词 (效果: ${style.textEffect || '无'})`)
}

function loadTransitions(manifest) {
  return loadAccountConfig(manifest).capcut?.transitions || null
}

// ============================================================================
// 添加字幕（支持关键词高亮 + 账号字幕风格 + 分句切分）
// ============================================================================

async function addSubtitles(draftUrl, items, timeline, style = {}, split = false) {
  const captions = []

  // 从账号配置读取动画参数
  const inAnimation = style.inAnimation || ''
  const outAnimation = style.outAnimation || ''
  const inAnimDuration = style.inAnimationDuration || null
  const outAnimDuration = style.outAnimationDuration || null

  for (let i = 0; i < items.length; i++) {
    const item = items[i]
    const text = item.script || item.text || item.caption || ''
    if (!text) continue

    const tl = timeline[i]

    if (split) {
      // 分句模式：优先用 segments（TTS 逐句生成的精确时长），回退到字数估算
      const segments = item.segments && item.segments.length > 1 ? item.segments : null

      if (segments) {
        // 精确模式：用 segments 的实际音频时长
        const totalSegDur = segments.reduce((sum, s) => sum + s.duration * US, 0)
        const tlDuration = tl.end - tl.start
        let currentTime = tl.start

        segments.forEach((seg, idx) => {
          const segDurUs = Math.round(seg.duration * US)
          // 按实际时长占比映射到时间线（处理 ffprobe 重新测量的差异）
          let duration = Math.round(tlDuration * (segDurUs / totalSegDur))
          if (idx === segments.length - 1) {
            duration = tl.end - currentTime
          }
          duration = Math.max(duration, 1000000)

          const cap = {
            start: currentTime,
            end: currentTime + duration,
            text: seg.text,
          }

          if (inAnimation) cap.in_animation = inAnimation
          if (outAnimation) cap.out_animation = outAnimation
          if (inAnimDuration) cap.in_animation_duration = inAnimDuration
          if (outAnimDuration) cap.out_animation_duration = outAnimDuration

          captions.push(cap)
          currentTime += duration
        })
      } else {
        // 回退：字数权重估算
        const sentences = splitTextIntoSentences(text)
        if (sentences.length === 0) continue

        const totalDuration = tl.end - tl.start
        const totalChars = sentences.reduce((sum, s) => sum + s.length, 0)
        let currentTime = tl.start

        sentences.forEach((sentence, idx) => {
          const charRatio = sentence.length / totalChars
          let duration = Math.round(totalDuration * charRatio)

          if (idx === sentences.length - 1) {
            duration = tl.end - currentTime
          }

          duration = Math.max(duration, 1000000)

          const cap = {
            start: currentTime,
            end: currentTime + duration,
            text: sentence,
          }

          if (inAnimation) cap.in_animation = inAnimation
          if (outAnimation) cap.out_animation = outAnimation
          if (inAnimDuration) cap.in_animation_duration = inAnimDuration
          if (outAnimDuration) cap.out_animation_duration = outAnimDuration

          captions.push(cap)
          currentTime += duration
        })
      }
    } else {
      // 原始模式：一句字幕
      const cap = {
        start: tl.start,
        end: tl.end,
        text,
      }

      if (inAnimation) cap.in_animation = inAnimation
      if (outAnimation) cap.out_animation = outAnimation
      if (inAnimDuration) cap.in_animation_duration = inAnimDuration
      if (outAnimDuration) cap.out_animation_duration = outAnimDuration

      captions.push(cap)
    }
  }

  if (captions.length === 0) {
    console.log('   无字幕内容，跳过')
    return
  }

  await api('add_captions', {
    draft_url: draftUrl,
    captions: JSON.stringify(captions),
    font: style.font || null,
    font_size: style.fontSize || 15,
    text_color: style.color || '#ffffff',
    alignment: 1,
    bold: style.bold || false,
    italic: false,
    underline: false,
    has_shadow: style.hasShadow || false,
    shadow_info: style.shadowAlpha ? {
      shadow_alpha: style.shadowAlpha,
      shadow_color: style.shadowColor || '#000000',
      shadow_diffuse: 15,
      shadow_distance: 5,
      shadow_angle: -45,
    } : undefined,
    letter_spacing: style.letterSpacing || 0,
    line_spacing: style.lineSpacing || 0,
    alpha: style.alpha || 1,
    scale_x: 1, scale_y: 1,
    transform_x: 0,
    transform_y: style.transformY || 0,
    style_text: 0,
  })

  console.log(`   已添加 ${captions.length} 条字幕${split ? ' (分句模式)' : ''} (字体: ${style.font || '默认'}, 动画: ${inAnimation || '无'} → ${outAnimation || '无'})`)
}

// ============================================================================
// 添加特效
// ============================================================================

async function addEffects(draftUrl, effectsStr, totalDurationUs) {
  const effectNames = effectsStr.split(',').map(s => s.trim()).filter(Boolean)
  const effectInfos = effectNames.map(name => ({
    effect_title: name,
    start: 0,
    end: totalDurationUs,
  }))

  await api('add_effects', {
    draft_url: draftUrl,
    effect_infos: JSON.stringify(effectInfos),
  })

  console.log(`   已添加: ${effectNames.join(', ')}`)
}

// ============================================================================
// 添加滤镜
// ============================================================================

async function addFilter(draftUrl, filterStr, totalDurationUs) {
  const [name, intensity] = filterStr.split(':')
  await api('add_filters', {
    draft_url: draftUrl,
    filter_infos: JSON.stringify([{
      filter_title: (name || '').trim(),
      start: 0,
      end: totalDurationUs,
      intensity: parseFloat(intensity) || 50,
    }]),
  })
  console.log(`   已添加: ${(name || '').trim()} 强度 ${intensity || 50}`)
}

// ============================================================================
// 同步草稿到本地剪映
// ============================================================================

async function syncToLocalJianying(draftUrl, draftId, totalDurationUs) {
  await syncDraft(draftUrl, { name: draftId })
  registerDraft(draftId, draftId, totalDurationUs)
}

// ============================================================================
// 主入口
// ============================================================================

async function main() {
  const args = parseArgs(process.argv.slice(2))

  if (!args.input) {
    console.log('用法: node capcut_assemble.js --input <目录> [选项]')
    console.log('')
    console.log('必填:')
    console.log('  --input <dir>            素材目录（含 manifest.json）')
    console.log('')
    console.log('选项:')
    console.log('  --mode images|videos     素材类型（默认 images）')
    console.log('  --format 9:16            画幅比例')
    console.log('  --duration 4             默认每段时长/秒（无TTS时的fallback，默认 4）')
    console.log('  --voiceover true|false   是否添加TTS配音轨道（默认 true）')
    console.log('  --subtitles true|false   是否添加字幕（默认 true）')
    console.log('  --split-captions true|false  分句字幕模式（默认 true，按标点切分）')
    console.log('  --bgm <url>              背景音乐 URL')
    console.log('  --effects "名称1,名称2"  特效名称（逗号分隔）')
    console.log('  --filter "名称:强度"     滤镜（强度 0-100）')
    console.log('  --apiKey <key>           云渲染 API Key（可选）')
    console.log('  --manifest <path>        manifest.json 路径')
    console.log('')
    console.log('时间线模式:')
    console.log('  manifest.json 中每段包含 audio + duration → TTS音频驱动时间线')
    console.log('  无 audio/duration → 按 --duration 固定时长')
    console.log('')
    console.log('manifest.json 示例（TTS驱动）:')
    console.log('  {"items":[{"file":"1.png","text":"文案","audio":"seg_1.mp3","duration":3.5}]}')
    console.log('')
    console.log('配置:')
    console.log('  请运行 node setup.js 生成配置')
    process.exit(0)
  }

  await assemble(args)
}

if (require.main === module) {
  main().catch(err => {
    console.error(`\n错误: ${err.message}`)
    process.exit(1)
  })
}

module.exports = { assemble }