video-create/.claude/skills/video-from-script/scripts/gpt-image-generator.js

#!/usr/bin/env node

/**
 * GPT Image Generator - GPT Image 模型图片生成工具
 *
 * 支持模型：gpt-image-2, gpt-image-1.5, gpt-image-1
 * 通过云雾 (yunwu.ai) API 代理调用，遵循 OpenAI Images API 格式
 *
 * 功能：
 * - 文生图（Text-to-Image）— /v1/images/generations
 * - 图生图/重绘（Image-to-Image）— /v1/images/edits (multipart)
 * - 首尾帧编辑
 * - 批量生成
 *
 * 用法：
 *   node gpt-image-generator.js generate "a cute cat" -o ./output -r 16:9
 *   node gpt-image-generator.js edit "add sunglasses" -i ./photo.jpg -o ./output
 *   node gpt-image-generator.js batch ./prompts.txt -o ./output
 */

const fs = require('fs')
const path = require('path')
const https = require('https')
const http = require('http')

// ============================================================================
// 配置
// ============================================================================

function loadConfig() {
  const configPath = path.join(__dirname, '..', '..', 'config.json')
  if (fs.existsSync(configPath)) {
    return JSON.parse(fs.readFileSync(configPath, 'utf-8'))
  }
  return {}
}

const cfg = loadConfig()

const Config = {
  baseUrl: cfg.gptImageApiBaseUrl || 'https://yunwu.ai',
  apiKey: cfg.gptImageApiKey || '',
  model: cfg.gptImageModel || 'gpt-image-2',
  timeout: 120000,
}

// 宽高比 → 建议分辨率映射 (gpt-image-2 constraints: max edge 3840, multiples of 16, ratio ≤ 3:1)
const RATIO_SIZE_MAP = {
  '1:1': '1024x1024',
  '3:2': '1536x1024',
  '2:3': '1024x1536',
  '3:4': '1152x1536',
  '4:3': '1536x1152',
  '4:5': '1024x1280',
  '5:4': '1280x1024',
  '9:16': '1088x1920',
  '16:9': '1920x1088',
  '21:9': '2048x880',
}

// ============================================================================
// API 调用
// ============================================================================

const GptImageApi = {
  /**
   * 文生图 — POST /v1/images/generations (JSON body)
   */
  async generate(prompt, options = {}) {
    const {
      model = Config.model,
      n = 1,
      size = '1024x1024',
      quality = 'auto',
      format = 'png',
      outputCompression,
      moderation = 'auto',
    } = options

    const body = {
      model,
      prompt,
      n,
      size,
      quality,
      format,
    }
    if (outputCompression !== undefined) body.output_compression = outputCompression
    if (moderation !== 'auto') body.moderation = moderation

    console.log(`\n📡 GPT Image 文生图请求`)
    console.log(`   模型: ${model}`)
    console.log(`   尺寸: ${size}  质量: ${quality}`)

    const res = await fetch(`${Config.baseUrl}/v1/images/generations`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${Config.apiKey}`,
      },
      body: JSON.stringify(body),
    })

    if (!res.ok) {
      const errText = await res.text()
      throw new Error(`GPT Image 生成失败: ${res.status} - ${errText}`)
    }

    return res.json()
  },

  /**
   * 图生图/编辑 — POST /v1/images/edits (multipart/form-data)
   *
   * @param {string} prompt - 编辑指令
   * @param {string[]} imagePaths - 输入图片路径（第一张为编辑对象，其余为参考）
   * @param {string} [maskPath] - 可选蒙版路径
   */
  async edit(prompt, imagePaths, options = {}) {
    const {
      model = Config.model,
      n = 1,
      size,
      maskPath,
    } = options

    const FormData = globalThis.FormData
    const fd = new FormData()
    fd.append('model', model)
    fd.append('prompt', prompt)
    if (n > 1) fd.append('n', String(n))
    if (size) fd.append('size', size)

    // 附加图片文件
    for (const imgPath of imagePaths) {
      const buf = fs.readFileSync(imgPath)
      const ext = path.extname(imgPath).toLowerCase()
      const mimeMap = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.webp': 'image/webp', '.gif': 'image/gif' }
      const mimeType = mimeMap[ext] || 'image/png'
      fd.append('image', new Blob([buf], { type: mimeType }), path.basename(imgPath))
    }

    if (maskPath) {
      const maskBuf = fs.readFileSync(maskPath)
      fd.append('mask', new Blob([maskBuf], { type: 'image/png' }), path.basename(maskPath))
    }

    console.log(`\n📡 GPT Image 编辑请求`)
    console.log(`   模型: ${model}`)
    console.log(`   输入图片: ${imagePaths.length} 张${maskPath ? ' + 蒙版' : ''}`)
    if (size) console.log(`   尺寸: ${size}`)

    const res = await fetch(`${Config.baseUrl}/v1/images/edits`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${Config.apiKey}`,
      },
      body: fd,
    })

    if (!res.ok) {
      const errText = await res.text()
      throw new Error(`GPT Image 编辑失败: ${res.status} - ${errText}`)
    }

    return res.json()
  },

  /**
   * 解析响应，提取图片
   * 支持 base64 JSON 和 URL 两种格式
   */
  parseResponse(response) {
    if (!response || !response.data) {
      return { images: [] }
    }

    const images = []
    for (const item of response.data) {
      if (item.b64_json) {
        images.push({ data: item.b64_json, url: item.url, revised_prompt: item.revised_prompt })
      } else if (item.url) {
        images.push({ url: item.url, revised_prompt: item.revised_prompt })
      }
    }
    return { images }
  },
}

// ============================================================================
// 文件处理
// ============================================================================

const FileUtils = {
  ensureDir(dirPath) {
    if (!fs.existsSync(dirPath)) {
      fs.mkdirSync(dirPath, { recursive: true })
    }
    return dirPath
  },

  generateFilename(prefix = 'image', ext = 'png') {
    const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
    const random = Math.random().toString(36).substring(2, 8)
    return `${prefix}_${timestamp}_${random}.${ext}`
  },

  readPromptsFile(filePath) {
    const content = fs.readFileSync(filePath, 'utf-8')
    return content.split('\n').filter(l => l.trim()).map(l => l.trim())
  },

  async downloadImage(url, outputPath) {
    const protocol = url.startsWith('https') ? https : http
    return new Promise((resolve, reject) => {
      const file = fs.createWriteStream(outputPath)
      protocol.get(url, (response) => {
        if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
          file.close()
          fs.unlinkSync(outputPath)
          return FileUtils.downloadImage(response.headers.location, outputPath).then(resolve).catch(reject)
        }
        response.pipe(file)
        file.on('finish', () => { file.close(); resolve(outputPath) })
      }).on('error', (err) => {
        file.close()
        if (fs.existsSync(outputPath)) fs.unlinkSync(outputPath)
        reject(err)
      })
    })
  },
}

// ============================================================================
// 核心生成器
// ============================================================================

class GptImageGenerator {
  constructor(options = {}) {
    this.outputDir = options.outputDir || './output'
    this.defaultSize = options.size || '1024x1024'
    this.defaultQuality = options.quality || 'auto'

    if (!Config.apiKey) {
      console.warn('警告: 未设置 gptImageApiKey')
    }
  }

  /**
   * 文生图 — 从文字提示生成图片
   */
  async textToImage(prompt, options = {}) {
    const {
      size = this.defaultSize,
      quality = this.defaultQuality,
      format = 'png',
      n = 1,
      outputDir = this.outputDir,
      filename = null,
    } = options

    console.log(`\n🎨 GPT Image 文生图: "${prompt.substring(0, 80)}..."`)
    console.log(`📐 尺寸: ${size}  🎯 质量: ${quality}`)

    const response = await GptImageApi.generate(prompt, { size, quality, format, n })
    const result = GptImageApi.parseResponse(response)

    const savedFiles = []
    FileUtils.ensureDir(outputDir)

    for (let i = 0; i < result.images.length; i++) {
      const img = result.images[i]
      const ext = format === 'jpeg' ? 'jpg' : format
      const outputFilename = filename || FileUtils.generateFilename('gpt_gen', ext)
      const outputPath = path.join(outputDir, outputFilename)

      if (img.data) {
        fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
      } else if (img.url) {
        await FileUtils.downloadImage(img.url, outputPath)
      }
      savedFiles.push(outputPath)
      console.log(`✅ 已保存: ${outputPath}`)
    }

    return { images: result.images, savedFiles }
  }

  /**
   * 图生图/重绘 — 带参考图编辑
   */
  async imageToImage(prompt, inputImages, options = {}) {
    const {
      size = this.defaultSize,
      outputDir = this.outputDir,
      maskPath = null,
    } = options

    const imgPaths = Array.isArray(inputImages) ? inputImages : [inputImages]

    console.log(`\n🖼️ GPT Image 编辑: "${prompt.substring(0, 80)}..."`)
    console.log(`📁 输入图片: ${imgPaths.length} 张`)

    const response = await GptImageApi.edit(prompt, imgPaths, { size, maskPath, n: 1 })
    const result = GptImageApi.parseResponse(response)

    const savedFiles = []
    FileUtils.ensureDir(outputDir)

    for (let i = 0; i < result.images.length; i++) {
      const img = result.images[i]
      const ext = 'png'
      const outputFilename = FileUtils.generateFilename('gpt_edit', ext)
      const outputPath = path.join(outputDir, outputFilename)

      if (img.data) {
        fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
      } else if (img.url) {
        await FileUtils.downloadImage(img.url, outputPath)
      }
      savedFiles.push(outputPath)
      console.log(`✅ 已保存: ${outputPath}`)
    }

    return { images: result.images, savedFiles }
  }

  /**
   * 批量文生图
   */
  async batchGenerate(prompts, options = {}) {
    const results = []
    const total = prompts.length

    console.log(`\n🚀 GPT Image 批量生成，共 ${total} 个任务`)

    for (let i = 0; i < prompts.length; i++) {
      console.log(`\n[${i + 1}/${total}] 处理中...`)

      try {
        const result = await this.textToImage(prompts[i], {
          ...options,
          filename: `batch_${String(i + 1).padStart(3, '0')}.png`,
        })
        results.push({ success: true, prompt: prompts[i], result })
      } catch (error) {
        console.error(`❌ 失败: ${error.message}`)
        results.push({ success: false, prompt: prompts[i], error: error.message })
      }
    }

    const successCount = results.filter(r => r.success).length
    console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)
    return results
  }
}

// ============================================================================
// 便捷函数（供 pipeline 调用）
// ============================================================================

/**
 * 解析 CLI 参数中的宽高比，返回合适的 size 字符串
 */
function ratioToSize(ratio, quality = 'auto') {
  return RATIO_SIZE_MAP[ratio] || '1024x1024'
}

// ============================================================================
// CLI
// ============================================================================

function showHelp() {
  console.log(`
🎨 GPT Image Generator - 云雾API GPT Image 图片生成工具
📦 模型: ${Config.model}

用法:
  node gpt-image-generator.js <command> [options]

命令:
  generate <prompt>         文生图
  edit <prompt>             图生图/重绘（需要 -i 指定输入图片）
  batch <file>              批量生成（从文件读取提示词）

选项:
  -o, --output <dir>        输出目录 (默认: ./output)
  -r, --ratio <ratio>       宽高比 (1:1, 16:9, 9:16, 3:4, 4:3 等)
  -s, --size <size>         尺寸 (1024x1024, 1088x1920, auto 等)
  -q, --quality <q>         质量 (low, medium, high, auto)
  -f, --format <fmt>        格式 (png, jpeg, webp)
  -i, --input <files>       输入图片（edit 模式，逗号分隔）
  --mask <file>             蒙版图片（edit 模式）
  -n <num>                  生成数量 (默认: 1)
  -h, --help                显示帮助

示例:
  # 文生图 9:16
  node gpt-image-generator.js generate "A cat wearing a hat" -r 9:16 -q medium

  # 图生图/重绘
  node gpt-image-generator.js edit "Add sunglasses" -i ./photo.jpg

  # 多张参考图编辑
  node gpt-image-generator.js edit "Combine these items into a gift basket" -i ./a.jpg,./b.jpg

  # 批量生成
  node gpt-image-generator.js batch ./prompts.txt -r 9:16 -q low

可用宽高比及默认尺寸:
  ${Object.entries(RATIO_SIZE_MAP).map(([k, v]) => `${k} → ${v}`).join('\n  ')}
`)
}

async function main() {
  const args = process.argv.slice(2)

  if (args.includes('-h') || args.includes('--help') || args.length === 0) {
    showHelp()
    return
  }

  let command = 'generate'
  let params = []
  const options = { outputDir: './output', size: '1024x1024', quality: 'auto', format: 'png', n: 1 }

  let i = 0
  if (args[0] === 'batch' || args[0] === 'edit' || args[0] === 'generate') {
    command = args[0]
    i = 1
  }

  while (i < args.length) {
    const arg = args[i]
    if (arg === '-o' || arg === '--output') {
      options.outputDir = args[++i]
    } else if (arg === '-r' || arg === '--ratio') {
      const ratio = args[++i]
      options.size = RATIO_SIZE_MAP[ratio] || '1024x1024'
      options._ratio = ratio
    } else if (arg === '-s' || arg === '--size') {
      options.size = args[++i]
    } else if (arg === '-q' || arg === '--quality') {
      options.quality = args[++i]
    } else if (arg === '-f' || arg === '--format') {
      options.format = args[++i]
    } else if (arg === '-i' || arg === '--input') {
      options.inputImages = args[++i].split(',').map(s => s.trim()).filter(Boolean)
    } else if (arg === '--mask') {
      options.maskPath = args[++i]
    } else if (arg === '-n') {
      options.n = parseInt(args[++i], 10) || 1
    } else {
      params.push(arg)
    }
    i++
  }

  const generator = new GptImageGenerator({
    outputDir: options.outputDir,
    size: options.size,
    quality: options.quality,
  })

  if (command === 'batch') {
    const filePath = params[0]
    if (!filePath || !fs.existsSync(filePath)) {
      console.error('请提供提示词文件路径')
      process.exit(1)
    }
    const prompts = FileUtils.readPromptsFile(filePath)
    await generator.batchGenerate(prompts, options)
  } else if (command === 'edit') {
    const prompt = params.join(' ')
    if (!prompt) { console.error('请提供编辑指令'); process.exit(1) }
    if (!options.inputImages || options.inputImages.length === 0) {
      console.error('请使用 -i 指定输入图片')
      process.exit(1)
    }
    await generator.imageToImage(prompt, options.inputImages, {
      size: options.size,
      outputDir: options.outputDir,
      maskPath: options.maskPath,
    })
  } else {
    const prompt = params.join(' ')
    if (!prompt) { console.error('请提供生成提示词'); process.exit(1) }
    await generator.textToImage(prompt, {
      size: options.size,
      quality: options.quality,
      format: options.format,
      n: options.n,
      outputDir: options.outputDir,
    })
  }
}

// ============================================================================
// 导出
// ============================================================================

module.exports = {
  GptImageGenerator,
  GptImageApi,
  Config,
  FileUtils,
  RATIO_SIZE_MAP,
  ratioToSize,

  generate: async (prompt, options) => {
    const generator = new GptImageGenerator(options)
    return generator.textToImage(prompt, options)
  },

  edit: async (prompt, imagePaths, options) => {
    const generator = new GptImageGenerator(options)
    return generator.imageToImage(prompt, imagePaths, options)
  },

  batchGenerate: async (prompts, options) => {
    const generator = new GptImageGenerator(options)
    return generator.batchGenerate(prompts, options)
  },
}

if (require.main === module) {
  main().catch(err => {
    console.error(`\n❌ 错误: ${err.message}`)
    process.exit(1)
  })
}