sionrui/scripts/gemini-image-generator.js

/**
 * Gemini Image Generator - 云雾API图片生成工具
 *
 * 功能：
 * - 文生图（Text-to-Image）
 * - 图生图（Image-to-Image）
 * - 多种业务场景模板
 * - 批量生成
 * - 自定义输出目录
 *
 * 使用示例：
 *   node gemini-image-generator.js generate "A cute cat" -o ./output -r 16:9
 *   node gemini-image-generator.js edit "Add sunglasses" -i ./photo.jpg
 *   node gemini-image-generator.js template logo --text "MyBrand"
 *   node gemini-image-generator.js batch ./prompts.txt
 */

const fs = require('fs')
const path = require('path')

// ============================================================================
// 配置模块
// ============================================================================

const Config = {
  // 云雾API配置 - 硬编码
  api: {
    baseUrl: 'https://yunwu.ai',
    model: 'gemini-3.1-flash-image-preview',
    endpoint: '/v1beta/models/gemini-3.1-flash-image-preview:generateContent',
    key: 'sk-BjGv7Nf3KJHTBT8OB8LiGM0vHISl8yFcfCxZAWIZO4yogD7N'
  },

  // 默认输出配置
  output: {
    defaultDir: './output',
    defaultFormat: 'png'
  },

  // 支持的宽高比
  aspectRatios: ['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'],

  // 支持的分辨率
  imageSizes: ['512', '1K', '2K', '4K'],

  // 默认分辨率
  defaultImageSize: '2K',

  // 响应模式
  responseModalities: {
    textAndImage: ['TEXT', 'IMAGE'],
    imageOnly: ['IMAGE'],
    textOnly: ['TEXT']
  },

  // 超时设置（毫秒）
  timeout: {
    default: 120000,      // 默认2分钟
    max: 300000          // 最大5分钟
  }
}

// ============================================================================
// 文件处理模块
// ============================================================================

const FileUtils = {
  /**
   * 确保目录存在
   */
  ensureDir(dirPath) {
    if (!fs.existsSync(dirPath)) {
      fs.mkdirSync(dirPath, { recursive: true })
    }
    return dirPath
  },

  /**
   * 图片转Base64
   */
  imageToBase64(imagePath) {
    const buffer = fs.readFileSync(imagePath)
    const ext = path.extname(imagePath).toLowerCase()
    const mimeTypes = {
      '.png': 'image/png',
      '.jpg': 'image/jpeg',
      '.jpeg': 'image/jpeg',
      '.gif': 'image/gif',
      '.webp': 'image/webp'
    }
    return {
      mimeType: mimeTypes[ext] || 'image/png',
      data: buffer.toString('base64')
    }
  },

  /**
   * Base64保存为图片
   */
  base64ToImage(base64Data, outputPath) {
    const buffer = Buffer.from(base64Data, 'base64')
    fs.writeFileSync(outputPath, buffer)
    return outputPath
  },

  /**
   * 生成唯一文件名
   */
  generateFilename(prefix = 'image', ext = 'png') {
    const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
    const random = Math.random().toString(36).substring(2, 8)
    return `${prefix}_${timestamp}_${random}.${ext}`
  },

  /**
   * 读取提示词文件
   */
  readPromptsFile(filePath) {
    const content = fs.readFileSync(filePath, 'utf-8')
    return content.split('\n').filter(line => line.trim()).map(line => line.trim())
  }
}

// ============================================================================
// API调用模块
// ============================================================================

const GeminiAPI = {
  /**
   * 发送生成请求
   */
  async generateContent(contents, options = {}) {
    const {
      aspectRatio = '1:1',
      imageSize = Config.defaultImageSize,
      responseModalities = Config.responseModalities.textAndImage,
      timeout = Config.timeout.default
    } = options

    const url = `${Config.api.baseUrl}${Config.api.endpoint}?key=${Config.api.key}`

    const body = {
      contents: contents,
      generationConfig: {
        responseModalities: responseModalities,
        imageConfig: {
          aspectRatio: aspectRatio,
          imageSize: imageSize
        }
      }
    }

    console.log(`\n📡 API请求: ${Config.api.baseUrl}${Config.api.endpoint}`)
    console.log(`📋 模型: ${Config.api.model}`)
    console.log(`⏱️ 超时: ${timeout / 1000}秒`)

    // 使用 AbortController 实现超时
    const controller = new AbortController()
    const timeoutId = setTimeout(() => controller.abort(), timeout)

    try {
      const response = await fetch(url, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization': `Bearer ${Config.api.key}`
        },
        body: JSON.stringify(body),
        signal: controller.signal
      })

      if (!response.ok) {
        const error = await response.text()
        throw new Error(`API请求失败: ${response.status} - ${error}`)
      }

      return await response.json()
    } finally {
      clearTimeout(timeoutId)
    }
  },

  /**
   * 解析响应，提取图片和文本
   */
  parseResponse(response) {
    const result = {
      text: '',
      images: []
    }

    if (!response.candidates || !response.candidates[0]) {
      return result
    }

    const parts = response.candidates[0].content?.parts || []

    for (const part of parts) {
      if (part.text) {
        result.text += part.text
      }
      if (part.inlineData) {
        result.images.push({
          mimeType: part.inlineData.mimeType,
          data: part.inlineData.data
        })
      }
    }

    return result
  }
}

// ============================================================================
// 业务场景模板模块
// ============================================================================

const Templates = {
  /**
   * 写实照片模板
   */
  photorealistic: {
    name: '写实照片',
    generate(subject, options = {}) {
      const {
        shotType = 'close-up portrait',
        lighting = 'soft, natural golden hour light',
        mood = 'serene',
        environment = '',
        cameraDetails = '85mm lens, shallow depth of field'
      } = options

      return `A photorealistic ${shotType} of ${subject}. ${environment ? `Set in ${environment}. ` : ''}The scene is illuminated by ${lighting}, creating a ${mood} atmosphere. Captured with ${cameraDetails}. Ultra-realistic, with sharp focus on key details.`
    }
  },

  /**
   * 贴纸/图标模板
   */
  sticker: {
    name: '贴纸/图标',
    generate(subject, options = {}) {
      const {
        style = 'kawaii',
        colorPalette = 'vibrant',
        background = 'white'
      } = options

      return `A ${style}-style sticker of ${subject}. The design features bold, clean outlines, simple cel-shading, and a ${colorPalette} color palette. The background must be ${background}.`
    }
  },

  /**
   * Logo设计模板
   */
  logo: {
    name: 'Logo设计',
    generate(text, options = {}) {
      const {
        style = 'modern, minimalist',
        colorScheme = 'black and white',
        shape = 'circle'
      } = options

      return `Create a ${style} logo${text ? ` with the text "${text}"` : ''}. The text should be in a clean, bold, sans-serif font. The color scheme is ${colorScheme}. Put the logo in a ${shape}.`
    }
  },

  /**
   * 产品图模板
   */
  product: {
    name: '产品图',
    generate(product, options = {}) {
      const {
        surface = 'polished concrete surface',
        lighting = 'three-point softbox setup',
        angle = 'slightly elevated 45-degree shot',
        background = 'minimalist'
      } = options

      return `A high-resolution, studio-lit product photograph of ${product}, presented on a ${surface}. The lighting is a ${lighting} designed to create soft, diffused highlights and eliminate harsh shadows. The camera angle is a ${angle} to showcase key features. Ultra-realistic. ${background} background.`
    }
  },

  /**
   * 极简设计模板
   */
  minimalist: {
    name: '极简设计',
    generate(subject, options = {}) {
      const {
        position = 'bottom-right',
        backgroundColor = 'off-white canvas',
        lighting = 'soft, diffused lighting from the top left'
      } = options

      return `A minimalist composition featuring a single, ${subject} positioned in the ${position} of the frame. The background is a vast, empty ${backgroundColor}, creating significant negative space for text. ${lighting}.`
    }
  },

  /**
   * 漫画/故事板模板
   */
  comic: {
    name: '漫画/故事板',
    generate(scene, options = {}) {
      const {
        style = 'gritty, noir',
        panels = 3
      } = options

      return `Make a ${panels} panel comic in a ${style} art style with high-contrast black and white inks. ${scene}`
    }
  },

  /**
   * 风格转换模板
   */
  styleTransfer: {
    name: '风格转换',
    generate(targetStyle, options = {}) {
      const {
        preserveElements = 'composition and key elements'
      } = options

      return `Transform the provided image into the artistic style of ${targetStyle}. Preserve the original ${preserveElements} but render with the new stylistic elements.`
    }
  },

  /**
   * 图像编辑模板
   */
  edit: {
    name: '图像编辑',
    generate(instruction, options = {}) {
      const {
        preserve = 'Keep everything else unchanged, preserving the original style, lighting, and composition'
      } = options

      return `${instruction}. ${preserve}.`
    }
  },

  /**
   * 图像合成模板
   */
  composite: {
    name: '图像合成',
    generate(description, options = {}) {
      return `Create a new image by combining the elements from the provided images. ${description} Generate a realistic result with proper lighting and shadows.`
    }
  }
}

// ============================================================================
// 核心生成器类
// ============================================================================

class GeminiImageGenerator {
  constructor(options = {}) {
    this.outputDir = options.outputDir || Config.output.defaultDir
    this.defaultAspectRatio = options.aspectRatio || '1:1'
    this.defaultImageSize = options.imageSize || Config.defaultImageSize

    if (!Config.api.key) {
      console.warn('警告: 未设置API密钥')
    }
  }

  /**
   * 文生图
   */
  async textToImage(prompt, options = {}) {
    const {
      aspectRatio = this.defaultAspectRatio,
      imageSize = this.defaultImageSize,
      outputDir = this.outputDir,
      filename = null
    } = options

    console.log(`\n🎨 生成图片: "${prompt.substring(0, 50)}..."`)
    console.log(`📐 宽高比: ${aspectRatio}`)
    console.log(`📏 分辨率: ${imageSize}`)

    const contents = [{
      role: 'user',
      parts: [{ text: prompt }]
    }]

    const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize })
    const result = GeminiAPI.parseResponse(response)

    if (result.text) {
      console.log(`📝 模型回复: ${result.text}`)
    }

    const savedFiles = []
    FileUtils.ensureDir(outputDir)

    for (let i = 0; i < result.images.length; i++) {
      const img = result.images[i]
      const ext = img.mimeType.split('/')[1] || 'png'
      const outputFilename = filename || FileUtils.generateFilename('generated', ext)
      const outputPath = path.join(outputDir, outputFilename)

      FileUtils.base64ToImage(img.data, outputPath)
      savedFiles.push(outputPath)
      console.log(`✅ 已保存: ${outputPath}`)
    }

    return {
      text: result.text,
      images: result.images,
      savedFiles
    }
  }

  /**
   * 图生图（带参考图编辑）
   */
  async imageToImage(prompt, inputImages, options = {}) {
    const {
      aspectRatio = this.defaultAspectRatio,
      imageSize = this.defaultImageSize,
      outputDir = this.outputDir
    } = options

    console.log(`\n🖼️ 编辑图片: "${prompt.substring(0, 50)}..."`)
    console.log(`📁 输入图片: ${Array.isArray(inputImages) ? inputImages.length : 1} 张`)
    console.log(`📏 分辨率: ${imageSize}`)

    const parts = [{ text: prompt }]

    // 处理输入图片
    const images = Array.isArray(inputImages) ? inputImages : [inputImages]
    for (const imgPath of images) {
      const { mimeType, data } = FileUtils.imageToBase64(imgPath)
      parts.push({
        inlineData: {
          mime_type: mimeType,
          data: data
        }
      })
    }

    const contents = [{
      role: 'user',
      parts: parts
    }]

    const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize })
    const result = GeminiAPI.parseResponse(response)

    if (result.text) {
      console.log(`📝 模型回复: ${result.text}`)
    }

    const savedFiles = []
    FileUtils.ensureDir(outputDir)

    for (let i = 0; i < result.images.length; i++) {
      const img = result.images[i]
      const ext = img.mimeType.split('/')[1] || 'png'
      const outputFilename = FileUtils.generateFilename('edited', ext)
      const outputPath = path.join(outputDir, outputFilename)

      FileUtils.base64ToImage(img.data, outputPath)
      savedFiles.push(outputPath)
      console.log(`✅ 已保存: ${outputPath}`)
    }

    return {
      text: result.text,
      images: result.images,
      savedFiles
    }
  }

  /**
   * 使用模板生成
   */
  async generateFromTemplate(templateName, ...args) {
    const template = Templates[templateName]
    if (!template) {
      throw new Error(`未知的模板: ${templateName}。可用模板: ${Object.keys(Templates).join(', ')}`)
    }

    const options = args[args.length - 1] || {}
    const prompt = template.generate(...args)

    console.log(`📋 使用模板: ${template.name}`)
    return this.textToImage(prompt, options)
  }

  /**
   * 批量生成
   */
  async batchGenerate(prompts, options = {}) {
    const results = []
    const total = prompts.length

    console.log(`\n🚀 开始批量生成，共 ${total} 个任务`)

    for (let i = 0; i < prompts.length; i++) {
      console.log(`\n[${i + 1}/${total}] 处理中...`)

      try {
        const result = await this.textToImage(prompts[i], {
          ...options,
          filename: `batch_${i + 1}.png`
        })
        results.push({ success: true, prompt: prompts[i], result })
      } catch (error) {
        console.error(`❌ 失败: ${error.message}`)
        results.push({ success: false, prompt: prompts[i], error: error.message })
      }
    }

    const successCount = results.filter(r => r.success).length
    console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)

    return results
  }

  /**
   * 多轮对话编辑
   */
  createChatSession(options = {}) {
    const history = []

    return {
      async send(message, inputImages = null) {
        const parts = [{ text: message }]

        // 如果有输入图片
        if (inputImages) {
          const images = Array.isArray(inputImages) ? inputImages : [inputImages]
          for (const imgPath of images) {
            const { mimeType, data } = FileUtils.imageToBase64(imgPath)
            parts.push({
              inlineData: {
                mime_type: mimeType,
                data: data
              }
            })
          }
        }

        // 添加用户消息到历史
        history.push({
          role: 'user',
          parts: parts
        })

        const response = await GeminiAPI.generateContent(history, options)
        const result = GeminiAPI.parseResponse(response)

        // 添加模型回复到历史（需要包含图片数据以便后续编辑）
        const modelParts = []
        if (result.text) {
          modelParts.push({ text: result.text })
        }
        for (const img of result.images) {
          modelParts.push({
            inlineData: {
              mime_type: img.mimeType,
              data: img.data
            }
          })
        }
        if (modelParts.length > 0) {
          history.push({
            role: 'model',
            parts: modelParts
          })
        }

        // 保存图片
        const savedFiles = []
        FileUtils.ensureDir(options.outputDir || this.outputDir)

        for (const img of result.images) {
          const ext = img.mimeType.split('/')[1] || 'png'
          const outputFilename = FileUtils.generateFilename('chat', ext)
          const outputPath = path.join(options.outputDir || this.outputDir, outputFilename)

          FileUtils.base64ToImage(img.data, outputPath)
          savedFiles.push(outputPath)
          console.log(`✅ 已保存: ${outputPath}`)
        }

        return {
          text: result.text,
          images: result.images,
          savedFiles
        }
      },

      getHistory() {
        return history
      }
    }
  }
}

// ============================================================================
// CLI接口模块
// ============================================================================

const CLI = {
  /**
   * 解析命令行参数
   */
  parseArgs(args) {
    const result = {
      command: '',
      params: [],
      options: {}
    }

    let i = 0
    while (i < args.length) {
      const arg = args[i]

      if (arg.startsWith('--')) {
        const key = arg.substring(2)
        const nextArg = args[i + 1]

        if (nextArg && !nextArg.startsWith('-')) {
          result.options[key] = nextArg
          i += 2
        } else {
          result.options[key] = true
          i++
        }
      } else if (arg.startsWith('-')) {
        const key = arg.substring(1)
        const shortOptions = {
          'o': 'output',
          'r': 'ratio',
          's': 'size',
          'i': 'input',
          't': 'template',
          'h': 'help'
        }

        const fullKey = shortOptions[key] || key
        const nextArg = args[i + 1]

        if (nextArg && !nextArg.startsWith('-')) {
          result.options[fullKey] = nextArg
          i += 2
        } else {
          result.options[fullKey] = true
          i++
        }
      } else if (!result.command) {
        result.command = arg
      } else {
        result.params.push(arg)
      }
    }

    return result
  },

  /**
   * 显示帮助信息
   */
  showHelp() {
    console.log(`
🎨 Gemini Image Generator - 云雾API图片生成工具
📦 模型: ${Config.api.model}

用法:
  node gemini-image-generator.js <command> [options]

命令:
  generate <prompt>         文生图
  edit <prompt>             图生图（需要 -i 指定输入图片）
  template <name>           使用模板生成
  batch <file>              批量生成（从文件读取提示词）
  list-templates            列出所有可用模板

选项:
  -o, --output <dir>        输出目录 (默认: ./output)
  -r, --ratio <ratio>       宽高比 (1:1, 16:9, 9:16, 3:2, 2:3 等)
  -s, --size <size>         分辨率 (512, 1K, 2K, 4K，默认: 2K)
  -i, --input <file>        输入图片路径（用于edit命令）
  -t, --template <name>     模板名称
  --text <text>             Logo文字（用于logo模板）
  --subject <subject>       主题内容
  --style <style>           风格
  -h, --help                显示帮助信息

示例:
  # 基础文生图 16:9 2K分辨率
  node gemini-image-generator.js generate "A cute cat wearing a hat" -o ./my-images -r 16:9 -s 2K

  # 高分辨率4K图片
  node gemini-image-generator.js generate "A landscape photo" -r 16:9 -s 4K

  # 图生图编辑
  node gemini-image-generator.js edit "Add sunglasses to this person" -i ./photo.jpg

  # 使用Logo模板
  node gemini-image-generator.js template logo --text "MyBrand" --style minimalist

  # 使用产品图模板
  node gemini-image-generator.js template product --subject "a minimalist ceramic coffee mug"

  # 批量生成
  node gemini-image-generator.js batch ./prompts.txt -o ./batch-output

可用宽高比:
  ${Config.aspectRatios.join(', ')}

可用分辨率:
  ${Config.imageSizes.join(', ')}

可用模板:
  ${Object.entries(Templates).map(([k, v]) => `${k} (${v.name})`).join('\n  ')}
`)
  },

  /**
   * 列出模板
   */
  listTemplates() {
    console.log('\n📋 可用模板:\n')
    for (const [key, template] of Object.entries(Templates)) {
      console.log(`  ${key.padEnd(15)} - ${template.name}`)
    }
    console.log('')
  },

  /**
   * 执行命令
   */
  async run(args) {
    const { command, params, options } = this.parseArgs(args)

    if (options.help || command === 'help' || !command) {
      this.showHelp()
      return
    }

    const generator = new GeminiImageGenerator({
      outputDir: options.output || Config.output.defaultDir,
      aspectRatio: options.ratio || '1:1',
      imageSize: options.size || Config.defaultImageSize
    })

    switch (command) {
      case 'generate': {
        const prompt = params.join(' ')
        if (!prompt) {
          console.error('❌ 请提供生成提示词')
          return
        }
        await generator.textToImage(prompt, {
          aspectRatio: options.ratio,
          imageSize: options.size,
          outputDir: options.output
        })
        break
      }

      case 'edit': {
        const prompt = params.join(' ')
        const inputImages = options.input?.split(',').map(p => p.trim())

        if (!prompt) {
          console.error('❌ 请提供编辑指令')
          return
        }
        if (!inputImages || inputImages.length === 0) {
          console.error('❌ 请使用 -i 指定输入图片')
          return
        }

        await generator.imageToImage(prompt, inputImages, {
          aspectRatio: options.ratio,
          imageSize: options.size,
          outputDir: options.output
        })
        break
      }

      case 'template': {
        const templateName = params[0] || options.template
        if (!templateName) {
          this.listTemplates()
          return
        }

        const template = Templates[templateName]
        if (!template) {
          console.error(`❌ 未知的模板: ${templateName}`)
          this.listTemplates()
          return
        }

        // 根据模板类型处理参数
        let templateOptions = { aspectRatio: options.ratio, outputDir: options.output }

        switch (templateName) {
          case 'logo':
            await generator.generateFromTemplate('logo', options.text || '', {
              style: options.style || 'modern, minimalist',
              colorScheme: 'black and white'
            }, templateOptions)
            break

          case 'product':
            await generator.generateFromTemplate('product', options.subject || params.slice(1).join(' ') || 'a product', {
              surface: 'polished concrete surface'
            }, templateOptions)
            break

          case 'photorealistic':
            await generator.generateFromTemplate('photorealistic', options.subject || params.slice(1).join(' ') || 'a person', {}, templateOptions)
            break

          case 'sticker':
            await generator.generateFromTemplate('sticker', options.subject || params.slice(1).join(' ') || 'a cute character', {}, templateOptions)
            break

          default:
            await generator.generateFromTemplate(templateName, params.slice(1).join(' ') || '', {}, templateOptions)
        }
        break
      }

      case 'batch': {
        const filePath = params[0]
        if (!filePath) {
          console.error('❌ 请提供提示词文件路径')
          return
        }

        const prompts = FileUtils.readPromptsFile(filePath)
        await generator.batchGenerate(prompts, {
          aspectRatio: options.ratio,
          outputDir: options.output
        })
        break
      }

      case 'list-templates': {
        this.listTemplates()
        break
      }

      default:
        console.error(`❌ 未知命令: ${command}`)
        this.showHelp()
    }
  }
}

// ============================================================================
// 导出模块
// ============================================================================

module.exports = {
  // 核心类
  GeminiImageGenerator,

  // 模块
  Config,
  FileUtils,
  GeminiAPI,
  Templates,
  CLI,

  // 便捷方法
  generate: async (prompt, options) => {
    const generator = new GeminiImageGenerator(options)
    return generator.textToImage(prompt, options)
  },

  edit: async (prompt, images, options) => {
    const generator = new GeminiImageGenerator(options)
    return generator.imageToImage(prompt, images, options)
  },

  fromTemplate: async (templateName, ...args) => {
    const generator = new GeminiImageGenerator(args[args.length - 1] || {})
    return generator.generateFromTemplate(templateName, ...args)
  }
}

// ============================================================================
// 主入口
// ============================================================================

// 如果直接运行此脚本
if (require.main === module) {
  const args = process.argv.slice(2)
  CLI.run(args).catch(error => {
    console.error(`\n❌ 错误: ${error.message}`)
    process.exit(1)
  })
}