feat(skills): 集成 GPT Image 图片生成和编辑能力

- 新增 gpt-image-generator.js 脚本，支持文生图、图生图/重绘、批量生成 - 更新 pipeline 和 phase-images 支持 GPT Image 模型 - 更新技能文档，添加 GPT Image 使用说明和 API 特点 - 新增配置文件中的 GPT Image API 参数
2026-05-05 23:49:30 +08:00
parent 823519cbf7
commit 35488beef2
8 changed files with 752 additions and 11 deletions
--- a/.claude/skills/video-from-script/scripts/gpt-image-generator.js
+++ b/.claude/skills/video-from-script/scripts/gpt-image-generator.js
@@ -0,0 +1,531 @@
+#!/usr/bin/env node
+
+/**
+ * GPT Image Generator - GPT Image 模型图片生成工具
+ *
+ * 支持模型：gpt-image-2, gpt-image-1.5, gpt-image-1
+ * 通过云雾 (yunwu.ai) API 代理调用，遵循 OpenAI Images API 格式
+ *
+ * 功能：
+ * - 文生图（Text-to-Image）— /v1/images/generations
+ * - 图生图/重绘（Image-to-Image）— /v1/images/edits (multipart)
+ * - 首尾帧编辑
+ * - 批量生成
+ *
+ * 用法：
+ *   node gpt-image-generator.js generate "a cute cat" -o ./output -r 16:9
+ *   node gpt-image-generator.js edit "add sunglasses" -i ./photo.jpg -o ./output
+ *   node gpt-image-generator.js batch ./prompts.txt -o ./output
+ */
+
+const fs = require('fs')
+const path = require('path')
+const https = require('https')
+const http = require('http')
+
+// ============================================================================
+// 配置
+// ============================================================================
+
+function loadConfig() {
+  const configPath = path.join(__dirname, '..', '..', 'config.json')
+  if (fs.existsSync(configPath)) {
+    return JSON.parse(fs.readFileSync(configPath, 'utf-8'))
+  }
+  return {}
+}
+
+const cfg = loadConfig()
+
+const Config = {
+  baseUrl: cfg.gptImageApiBaseUrl || 'https://yunwu.ai',
+  apiKey: cfg.gptImageApiKey || '',
+  model: cfg.gptImageModel || 'gpt-image-2',
+  timeout: 120000,
+}
+
+// 宽高比 → 建议分辨率映射 (gpt-image-2 constraints: max edge 3840, multiples of 16, ratio ≤ 3:1)
+const RATIO_SIZE_MAP = {
+  '1:1': '1024x1024',
+  '3:2': '1536x1024',
+  '2:3': '1024x1536',
+  '3:4': '1152x1536',
+  '4:3': '1536x1152',
+  '4:5': '1024x1280',
+  '5:4': '1280x1024',
+  '9:16': '1088x1920',
+  '16:9': '1920x1088',
+  '21:9': '2048x880',
+}
+
+// ============================================================================
+// API 调用
+// ============================================================================
+
+const GptImageApi = {
+  /**
+   * 文生图 — POST /v1/images/generations (JSON body)
+   */
+  async generate(prompt, options = {}) {
+    const {
+      model = Config.model,
+      n = 1,
+      size = '1024x1024',
+      quality = 'auto',
+      format = 'png',
+      outputCompression,
+      moderation = 'auto',
+    } = options
+
+    const body = {
+      model,
+      prompt,
+      n,
+      size,
+      quality,
+      format,
+    }
+    if (outputCompression !== undefined) body.output_compression = outputCompression
+    if (moderation !== 'auto') body.moderation = moderation
+
+    console.log(`\n📡 GPT Image 文生图请求`)
+    console.log(`   模型: ${model}`)
+    console.log(`   尺寸: ${size}  质量: ${quality}`)
+
+    const res = await fetch(`${Config.baseUrl}/v1/images/generations`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${Config.apiKey}`,
+      },
+      body: JSON.stringify(body),
+    })
+
+    if (!res.ok) {
+      const errText = await res.text()
+      throw new Error(`GPT Image 生成失败: ${res.status} - ${errText}`)
+    }
+
+    return res.json()
+  },
+
+  /**
+   * 图生图/编辑 — POST /v1/images/edits (multipart/form-data)
+   *
+   * @param {string} prompt - 编辑指令
+   * @param {string[]} imagePaths - 输入图片路径（第一张为编辑对象，其余为参考）
+   * @param {string} [maskPath] - 可选蒙版路径
+   */
+  async edit(prompt, imagePaths, options = {}) {
+    const {
+      model = Config.model,
+      n = 1,
+      size,
+      maskPath,
+    } = options
+
+    const FormData = globalThis.FormData
+    const fd = new FormData()
+    fd.append('model', model)
+    fd.append('prompt', prompt)
+    if (n > 1) fd.append('n', String(n))
+    if (size) fd.append('size', size)
+
+    // 附加图片文件
+    for (const imgPath of imagePaths) {
+      const buf = fs.readFileSync(imgPath)
+      const ext = path.extname(imgPath).toLowerCase()
+      const mimeMap = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.webp': 'image/webp', '.gif': 'image/gif' }
+      const mimeType = mimeMap[ext] || 'image/png'
+      fd.append('image', new Blob([buf], { type: mimeType }), path.basename(imgPath))
+    }
+
+    if (maskPath) {
+      const maskBuf = fs.readFileSync(maskPath)
+      fd.append('mask', new Blob([maskBuf], { type: 'image/png' }), path.basename(maskPath))
+    }
+
+    console.log(`\n📡 GPT Image 编辑请求`)
+    console.log(`   模型: ${model}`)
+    console.log(`   输入图片: ${imagePaths.length} 张${maskPath ? ' + 蒙版' : ''}`)
+    if (size) console.log(`   尺寸: ${size}`)
+
+    const res = await fetch(`${Config.baseUrl}/v1/images/edits`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${Config.apiKey}`,
+      },
+      body: fd,
+    })
+
+    if (!res.ok) {
+      const errText = await res.text()
+      throw new Error(`GPT Image 编辑失败: ${res.status} - ${errText}`)
+    }
+
+    return res.json()
+  },
+
+  /**
+   * 解析响应，提取图片
+   * 支持 base64 JSON 和 URL 两种格式
+   */
+  parseResponse(response) {
+    if (!response || !response.data) {
+      return { images: [] }
+    }
+
+    const images = []
+    for (const item of response.data) {
+      if (item.b64_json) {
+        images.push({ data: item.b64_json, url: item.url, revised_prompt: item.revised_prompt })
+      } else if (item.url) {
+        images.push({ url: item.url, revised_prompt: item.revised_prompt })
+      }
+    }
+    return { images }
+  },
+}
+
+// ============================================================================
+// 文件处理
+// ============================================================================
+
+const FileUtils = {
+  ensureDir(dirPath) {
+    if (!fs.existsSync(dirPath)) {
+      fs.mkdirSync(dirPath, { recursive: true })
+    }
+    return dirPath
+  },
+
+  generateFilename(prefix = 'image', ext = 'png') {
+    const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
+    const random = Math.random().toString(36).substring(2, 8)
+    return `${prefix}_${timestamp}_${random}.${ext}`
+  },
+
+  readPromptsFile(filePath) {
+    const content = fs.readFileSync(filePath, 'utf-8')
+    return content.split('\n').filter(l => l.trim()).map(l => l.trim())
+  },
+
+  async downloadImage(url, outputPath) {
+    const protocol = url.startsWith('https') ? https : http
+    return new Promise((resolve, reject) => {
+      const file = fs.createWriteStream(outputPath)
+      protocol.get(url, (response) => {
+        if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
+          file.close()
+          fs.unlinkSync(outputPath)
+          return FileUtils.downloadImage(response.headers.location, outputPath).then(resolve).catch(reject)
+        }
+        response.pipe(file)
+        file.on('finish', () => { file.close(); resolve(outputPath) })
+      }).on('error', (err) => {
+        file.close()
+        if (fs.existsSync(outputPath)) fs.unlinkSync(outputPath)
+        reject(err)
+      })
+    })
+  },
+}
+
+// ============================================================================
+// 核心生成器
+// ============================================================================
+
+class GptImageGenerator {
+  constructor(options = {}) {
+    this.outputDir = options.outputDir || './output'
+    this.defaultSize = options.size || '1024x1024'
+    this.defaultQuality = options.quality || 'auto'
+
+    if (!Config.apiKey) {
+      console.warn('警告: 未设置 gptImageApiKey')
+    }
+  }
+
+  /**
+   * 文生图 — 从文字提示生成图片
+   */
+  async textToImage(prompt, options = {}) {
+    const {
+      size = this.defaultSize,
+      quality = this.defaultQuality,
+      format = 'png',
+      n = 1,
+      outputDir = this.outputDir,
+      filename = null,
+    } = options
+
+    console.log(`\n🎨 GPT Image 文生图: "${prompt.substring(0, 80)}..."`)
+    console.log(`📐 尺寸: ${size}  🎯 质量: ${quality}`)
+
+    const response = await GptImageApi.generate(prompt, { size, quality, format, n })
+    const result = GptImageApi.parseResponse(response)
+
+    const savedFiles = []
+    FileUtils.ensureDir(outputDir)
+
+    for (let i = 0; i < result.images.length; i++) {
+      const img = result.images[i]
+      const ext = format === 'jpeg' ? 'jpg' : format
+      const outputFilename = filename || FileUtils.generateFilename('gpt_gen', ext)
+      const outputPath = path.join(outputDir, outputFilename)
+
+      if (img.data) {
+        fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
+      } else if (img.url) {
+        await FileUtils.downloadImage(img.url, outputPath)
+      }
+      savedFiles.push(outputPath)
+      console.log(`✅ 已保存: ${outputPath}`)
+    }
+
+    return { images: result.images, savedFiles }
+  }
+
+  /**
+   * 图生图/重绘 — 带参考图编辑
+   */
+  async imageToImage(prompt, inputImages, options = {}) {
+    const {
+      size = this.defaultSize,
+      outputDir = this.outputDir,
+      maskPath = null,
+    } = options
+
+    const imgPaths = Array.isArray(inputImages) ? inputImages : [inputImages]
+
+    console.log(`\n🖼️ GPT Image 编辑: "${prompt.substring(0, 80)}..."`)
+    console.log(`📁 输入图片: ${imgPaths.length} 张`)
+
+    const response = await GptImageApi.edit(prompt, imgPaths, { size, maskPath, n: 1 })
+    const result = GptImageApi.parseResponse(response)
+
+    const savedFiles = []
+    FileUtils.ensureDir(outputDir)
+
+    for (let i = 0; i < result.images.length; i++) {
+      const img = result.images[i]
+      const ext = 'png'
+      const outputFilename = FileUtils.generateFilename('gpt_edit', ext)
+      const outputPath = path.join(outputDir, outputFilename)
+
+      if (img.data) {
+        fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
+      } else if (img.url) {
+        await FileUtils.downloadImage(img.url, outputPath)
+      }
+      savedFiles.push(outputPath)
+      console.log(`✅ 已保存: ${outputPath}`)
+    }
+
+    return { images: result.images, savedFiles }
+  }
+
+  /**
+   * 批量文生图
+   */
+  async batchGenerate(prompts, options = {}) {
+    const results = []
+    const total = prompts.length
+
+    console.log(`\n🚀 GPT Image 批量生成，共 ${total} 个任务`)
+
+    for (let i = 0; i < prompts.length; i++) {
+      console.log(`\n[${i + 1}/${total}] 处理中...`)
+
+      try {
+        const result = await this.textToImage(prompts[i], {
+          ...options,
+          filename: `batch_${String(i + 1).padStart(3, '0')}.png`,
+        })
+        results.push({ success: true, prompt: prompts[i], result })
+      } catch (error) {
+        console.error(`❌ 失败: ${error.message}`)
+        results.push({ success: false, prompt: prompts[i], error: error.message })
+      }
+    }
+
+    const successCount = results.filter(r => r.success).length
+    console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)
+    return results
+  }
+}
+
+// ============================================================================
+// 便捷函数（供 pipeline 调用）
+// ============================================================================
+
+/**
+ * 解析 CLI 参数中的宽高比，返回合适的 size 字符串
+ */
+function ratioToSize(ratio, quality = 'auto') {
+  return RATIO_SIZE_MAP[ratio] || '1024x1024'
+}
+
+// ============================================================================
+// CLI
+// ============================================================================
+
+function showHelp() {
+  console.log(`
+🎨 GPT Image Generator - 云雾API GPT Image 图片生成工具
+📦 模型: ${Config.model}
+
+用法:
+  node gpt-image-generator.js <command> [options]
+
+命令:
+  generate <prompt>         文生图
+  edit <prompt>             图生图/重绘（需要 -i 指定输入图片）
+  batch <file>              批量生成（从文件读取提示词）
+
+选项:
+  -o, --output <dir>        输出目录 (默认: ./output)
+  -r, --ratio <ratio>       宽高比 (1:1, 16:9, 9:16, 3:4, 4:3 等)
+  -s, --size <size>         尺寸 (1024x1024, 1088x1920, auto 等)
+  -q, --quality <q>         质量 (low, medium, high, auto)
+  -f, --format <fmt>        格式 (png, jpeg, webp)
+  -i, --input <files>       输入图片（edit 模式，逗号分隔）
+  --mask <file>             蒙版图片（edit 模式）
+  -n <num>                  生成数量 (默认: 1)
+  -h, --help                显示帮助
+
+示例:
+  # 文生图 9:16
+  node gpt-image-generator.js generate "A cat wearing a hat" -r 9:16 -q medium
+
+  # 图生图/重绘
+  node gpt-image-generator.js edit "Add sunglasses" -i ./photo.jpg
+
+  # 多张参考图编辑
+  node gpt-image-generator.js edit "Combine these items into a gift basket" -i ./a.jpg,./b.jpg
+
+  # 批量生成
+  node gpt-image-generator.js batch ./prompts.txt -r 9:16 -q low
+
+可用宽高比及默认尺寸:
+  ${Object.entries(RATIO_SIZE_MAP).map(([k, v]) => `${k} → ${v}`).join('\n  ')}
+`)
+}
+
+async function main() {
+  const args = process.argv.slice(2)
+
+  if (args.includes('-h') || args.includes('--help') || args.length === 0) {
+    showHelp()
+    return
+  }
+
+  let command = 'generate'
+  let params = []
+  const options = { outputDir: './output', size: '1024x1024', quality: 'auto', format: 'png', n: 1 }
+
+  let i = 0
+  if (args[0] === 'batch' || args[0] === 'edit' || args[0] === 'generate') {
+    command = args[0]
+    i = 1
+  }
+
+  while (i < args.length) {
+    const arg = args[i]
+    if (arg === '-o' || arg === '--output') {
+      options.outputDir = args[++i]
+    } else if (arg === '-r' || arg === '--ratio') {
+      const ratio = args[++i]
+      options.size = RATIO_SIZE_MAP[ratio] || '1024x1024'
+      options._ratio = ratio
+    } else if (arg === '-s' || arg === '--size') {
+      options.size = args[++i]
+    } else if (arg === '-q' || arg === '--quality') {
+      options.quality = args[++i]
+    } else if (arg === '-f' || arg === '--format') {
+      options.format = args[++i]
+    } else if (arg === '-i' || arg === '--input') {
+      options.inputImages = args[++i].split(',').map(s => s.trim()).filter(Boolean)
+    } else if (arg === '--mask') {
+      options.maskPath = args[++i]
+    } else if (arg === '-n') {
+      options.n = parseInt(args[++i], 10) || 1
+    } else {
+      params.push(arg)
+    }
+    i++
+  }
+
+  const generator = new GptImageGenerator({
+    outputDir: options.outputDir,
+    size: options.size,
+    quality: options.quality,
+  })
+
+  if (command === 'batch') {
+    const filePath = params[0]
+    if (!filePath || !fs.existsSync(filePath)) {
+      console.error('请提供提示词文件路径')
+      process.exit(1)
+    }
+    const prompts = FileUtils.readPromptsFile(filePath)
+    await generator.batchGenerate(prompts, options)
+  } else if (command === 'edit') {
+    const prompt = params.join(' ')
+    if (!prompt) { console.error('请提供编辑指令'); process.exit(1) }
+    if (!options.inputImages || options.inputImages.length === 0) {
+      console.error('请使用 -i 指定输入图片')
+      process.exit(1)
+    }
+    await generator.imageToImage(prompt, options.inputImages, {
+      size: options.size,
+      outputDir: options.outputDir,
+      maskPath: options.maskPath,
+    })
+  } else {
+    const prompt = params.join(' ')
+    if (!prompt) { console.error('请提供生成提示词'); process.exit(1) }
+    await generator.textToImage(prompt, {
+      size: options.size,
+      quality: options.quality,
+      format: options.format,
+      n: options.n,
+      outputDir: options.outputDir,
+    })
+  }
+}
+
+// ============================================================================
+// 导出
+// ============================================================================
+
+module.exports = {
+  GptImageGenerator,
+  GptImageApi,
+  Config,
+  FileUtils,
+  RATIO_SIZE_MAP,
+  ratioToSize,
+
+  generate: async (prompt, options) => {
+    const generator = new GptImageGenerator(options)
+    return generator.textToImage(prompt, options)
+  },
+
+  edit: async (prompt, imagePaths, options) => {
+    const generator = new GptImageGenerator(options)
+    return generator.imageToImage(prompt, imagePaths, options)
+  },
+
+  batchGenerate: async (prompts, options) => {
+    const generator = new GptImageGenerator(options)
+    return generator.batchGenerate(prompts, options)
+  },
+}
+
+if (require.main === module) {
+  main().catch(err => {
+    console.error(`\n❌ 错误: ${err.message}`)
+    process.exit(1)
+  })
+}
--- a/.claude/skills/video-from-script/scripts/lib/cmd-validate.js
+++ b/.claude/skills/video-from-script/scripts/lib/cmd-validate.js
@@ -24,7 +24,7 @@ function validateManifest(manifestPath) {
  }

  if (!manifest.account) issues.push('缺少顶层 account')
-  if (!manifest.imageModel) issues.push('缺少顶层 imageModel（可选: gemini, mj）')
+  if (!manifest.imageModel) issues.push('缺少顶层 imageModel（可选: gemini, gpt-image, mj）')
  if (!manifest.format) issues.push('缺少顶层 format（如 9:16）')
  if (!manifest.items || !Array.isArray(manifest.items)) issues.push('缺少顶层 items 数组')
  if (!manifest.mode) issues.push('缺少顶层 mode（single 或 framePair）')
--- a/.claude/skills/video-from-script/scripts/lib/phase-images.js
+++ b/.claude/skills/video-from-script/scripts/lib/phase-images.js
@@ -1,7 +1,7 @@
 /**
 * Phase: images — 图片生成
 *
- * 支持 Gemini / MJ / Kling 三种模型，含首尾帧模式
+ * 支持 Gemini / GPT Image / MJ / Kling 四种模型，含首尾帧模式
 * 并发生成，支持 task ID 恢复（MJ）
 */

@@ -130,6 +130,32 @@ async function generateMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath,
  return harvestMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath, manifest)
 }

+async function generateGptImage(item, idx, dir, imagesDir, ratio, refs) {
+  const { generate: gptGen, edit: gptEdit, ratioToSize } = require('../gpt-image-generator')
+  const size = ratioToSize(ratio)
+  let result
+  if (refs.localPaths.length > 0) {
+    log('images', `[${idx}] GPT Image 图生图: ${item.imagePrompt.substring(0, 60)}...`)
+    result = await gptEdit(item.imagePrompt, refs.localPaths, {
+      outputDir: imagesDir,
+      size,
+    })
+  } else {
+    log('images', `[${idx}] GPT Image 文生图: ${item.imagePrompt.substring(0, 60)}...`)
+    result = await gptGen(item.imagePrompt, {
+      outputDir: imagesDir, size,
+      quality: 'auto',
+    })
+  }
+  const file = (result.savedFiles && result.savedFiles.length > 0)
+    ? renameGeneratedFile(
+        path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
+        dir, idx, item.script || item.shotDesc, ''
+      )
+    : null
+  return { file }
+}
+
 async function generateKling(item, idx, dir, imagesDir, ratio, refs) {
  const { generate: klingGen } = require('../kling-image-generator')
  const klingOpts = { outputDir: imagesDir, aspectRatio: ratio }
@@ -158,6 +184,12 @@ async function generateLastFrame(item, idx, manifest, dir, imagesDir, model, rat
        outputDir: imagesDir,
        aspectRatio: ratio,
      })
+    } else if (model === 'gpt-image') {
+      const { edit: gptEdit, ratioToSize } = require('../gpt-image-generator')
+      lastResult = await gptEdit(item.lastFramePrompt, [firstFramePath], {
+        outputDir: imagesDir,
+        size: ratioToSize(ratio),
+      })
    } else if (model === 'kling') {
      const { generate: klingGen } = require('../kling-image-generator')
      lastResult = await klingGen(item.lastFramePrompt, {
@@ -273,10 +305,12 @@ async function processItem(item, manifest, manifestPath, dir, imagesDir, model,
    let result
    if (model === 'gemini') {
      result = await generateGemini(item, idx, dir, imagesDir, ratio, refs)
+    } else if (model === 'gpt-image') {
+      result = await generateGptImage(item, idx, dir, imagesDir, ratio, refs)
    } else if (model === 'kling') {
      result = await generateKling(item, idx, dir, imagesDir, ratio, refs)
    } else {
-      throw new Error(`不支持的模型: ${model}（支持: gemini, mj, kling）`)
+      throw new Error(`不支持的模型: ${model}（支持: gemini, gpt-image, mj, kling）`)
    }

    if (result.file) {
--- a/.claude/skills/video-from-script/scripts/pipeline.js
+++ b/.claude/skills/video-from-script/scripts/pipeline.js
@@ -225,7 +225,7 @@ async function main() {
  console.log('用法:')
  console.log('  pipeline.js create-account --id <id> --name <名称> [--desc ...] [--references file1,file2]')
  console.log('  pipeline.js validate-account --account <id>')
-  console.log('  pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
+  console.log('  pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|gpt-image|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
  console.log('  pipeline.js validate --manifest <path>')
  console.log('  pipeline.js confirm --manifest <path> --all')
  console.log('  pipeline.js confirm --manifest <path> --items 1,3,5')