Files
sionrui/scripts/gemini-image-generator.js

907 lines
24 KiB
JavaScript
Raw Normal View History

/**
* Gemini Image Generator - 云雾API图片生成工具
*
* 功能
* - 文生图Text-to-Image
* - 图生图Image-to-Image
* - 多种业务场景模板
* - 批量生成
* - 自定义输出目录
*
* 使用示例
* node gemini-image-generator.js generate "A cute cat" -o ./output -r 16:9
* node gemini-image-generator.js edit "Add sunglasses" -i ./photo.jpg
* node gemini-image-generator.js template logo --text "MyBrand"
* node gemini-image-generator.js batch ./prompts.txt
*/
const fs = require('fs')
const path = require('path')
// ============================================================================
// 配置模块
// ============================================================================
const Config = {
// 云雾API配置 - 硬编码
api: {
baseUrl: 'https://yunwu.ai',
model: 'gemini-3.1-flash-image-preview',
endpoint: '/v1beta/models/gemini-3.1-flash-image-preview:generateContent',
key: 'sk-BjGv7Nf3KJHTBT8OB8LiGM0vHISl8yFcfCxZAWIZO4yogD7N'
},
// 默认输出配置
output: {
defaultDir: './output',
defaultFormat: 'png'
},
// 支持的宽高比
aspectRatios: ['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'],
// 支持的分辨率
imageSizes: ['512', '1K', '2K', '4K'],
// 默认分辨率
defaultImageSize: '2K',
// 响应模式
responseModalities: {
textAndImage: ['TEXT', 'IMAGE'],
imageOnly: ['IMAGE'],
textOnly: ['TEXT']
},
// 超时设置(毫秒)
timeout: {
default: 120000, // 默认2分钟
max: 300000 // 最大5分钟
}
}
// ============================================================================
// 文件处理模块
// ============================================================================
const FileUtils = {
/**
* 确保目录存在
*/
ensureDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true })
}
return dirPath
},
/**
* 图片转Base64
*/
imageToBase64(imagePath) {
const buffer = fs.readFileSync(imagePath)
const ext = path.extname(imagePath).toLowerCase()
const mimeTypes = {
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp'
}
return {
mimeType: mimeTypes[ext] || 'image/png',
data: buffer.toString('base64')
}
},
/**
* Base64保存为图片
*/
base64ToImage(base64Data, outputPath) {
const buffer = Buffer.from(base64Data, 'base64')
fs.writeFileSync(outputPath, buffer)
return outputPath
},
/**
* 生成唯一文件名
*/
generateFilename(prefix = 'image', ext = 'png') {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
const random = Math.random().toString(36).substring(2, 8)
return `${prefix}_${timestamp}_${random}.${ext}`
},
/**
* 读取提示词文件
*/
readPromptsFile(filePath) {
const content = fs.readFileSync(filePath, 'utf-8')
return content.split('\n').filter(line => line.trim()).map(line => line.trim())
}
}
// ============================================================================
// API调用模块
// ============================================================================
const GeminiAPI = {
/**
* 发送生成请求
*/
async generateContent(contents, options = {}) {
const {
aspectRatio = '1:1',
imageSize = Config.defaultImageSize,
responseModalities = Config.responseModalities.textAndImage,
timeout = Config.timeout.default
} = options
const url = `${Config.api.baseUrl}${Config.api.endpoint}?key=${Config.api.key}`
const body = {
contents: contents,
generationConfig: {
responseModalities: responseModalities,
imageConfig: {
aspectRatio: aspectRatio,
imageSize: imageSize
}
}
}
console.log(`\n📡 API请求: ${Config.api.baseUrl}${Config.api.endpoint}`)
console.log(`📋 模型: ${Config.api.model}`)
console.log(`⏱️ 超时: ${timeout / 1000}`)
// 使用 AbortController 实现超时
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), timeout)
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Config.api.key}`
},
body: JSON.stringify(body),
signal: controller.signal
})
if (!response.ok) {
const error = await response.text()
throw new Error(`API请求失败: ${response.status} - ${error}`)
}
return await response.json()
} finally {
clearTimeout(timeoutId)
}
},
/**
* 解析响应提取图片和文本
*/
parseResponse(response) {
const result = {
text: '',
images: []
}
if (!response.candidates || !response.candidates[0]) {
return result
}
const parts = response.candidates[0].content?.parts || []
for (const part of parts) {
if (part.text) {
result.text += part.text
}
if (part.inlineData) {
result.images.push({
mimeType: part.inlineData.mimeType,
data: part.inlineData.data
})
}
}
return result
}
}
// ============================================================================
// 业务场景模板模块
// ============================================================================
const Templates = {
/**
* 写实照片模板
*/
photorealistic: {
name: '写实照片',
generate(subject, options = {}) {
const {
shotType = 'close-up portrait',
lighting = 'soft, natural golden hour light',
mood = 'serene',
environment = '',
cameraDetails = '85mm lens, shallow depth of field'
} = options
return `A photorealistic ${shotType} of ${subject}. ${environment ? `Set in ${environment}. ` : ''}The scene is illuminated by ${lighting}, creating a ${mood} atmosphere. Captured with ${cameraDetails}. Ultra-realistic, with sharp focus on key details.`
}
},
/**
* 贴纸/图标模板
*/
sticker: {
name: '贴纸/图标',
generate(subject, options = {}) {
const {
style = 'kawaii',
colorPalette = 'vibrant',
background = 'white'
} = options
return `A ${style}-style sticker of ${subject}. The design features bold, clean outlines, simple cel-shading, and a ${colorPalette} color palette. The background must be ${background}.`
}
},
/**
* Logo设计模板
*/
logo: {
name: 'Logo设计',
generate(text, options = {}) {
const {
style = 'modern, minimalist',
colorScheme = 'black and white',
shape = 'circle'
} = options
return `Create a ${style} logo${text ? ` with the text "${text}"` : ''}. The text should be in a clean, bold, sans-serif font. The color scheme is ${colorScheme}. Put the logo in a ${shape}.`
}
},
/**
* 产品图模板
*/
product: {
name: '产品图',
generate(product, options = {}) {
const {
surface = 'polished concrete surface',
lighting = 'three-point softbox setup',
angle = 'slightly elevated 45-degree shot',
background = 'minimalist'
} = options
return `A high-resolution, studio-lit product photograph of ${product}, presented on a ${surface}. The lighting is a ${lighting} designed to create soft, diffused highlights and eliminate harsh shadows. The camera angle is a ${angle} to showcase key features. Ultra-realistic. ${background} background.`
}
},
/**
* 极简设计模板
*/
minimalist: {
name: '极简设计',
generate(subject, options = {}) {
const {
position = 'bottom-right',
backgroundColor = 'off-white canvas',
lighting = 'soft, diffused lighting from the top left'
} = options
return `A minimalist composition featuring a single, ${subject} positioned in the ${position} of the frame. The background is a vast, empty ${backgroundColor}, creating significant negative space for text. ${lighting}.`
}
},
/**
* 漫画/故事板模板
*/
comic: {
name: '漫画/故事板',
generate(scene, options = {}) {
const {
style = 'gritty, noir',
panels = 3
} = options
return `Make a ${panels} panel comic in a ${style} art style with high-contrast black and white inks. ${scene}`
}
},
/**
* 风格转换模板
*/
styleTransfer: {
name: '风格转换',
generate(targetStyle, options = {}) {
const {
preserveElements = 'composition and key elements'
} = options
return `Transform the provided image into the artistic style of ${targetStyle}. Preserve the original ${preserveElements} but render with the new stylistic elements.`
}
},
/**
* 图像编辑模板
*/
edit: {
name: '图像编辑',
generate(instruction, options = {}) {
const {
preserve = 'Keep everything else unchanged, preserving the original style, lighting, and composition'
} = options
return `${instruction}. ${preserve}.`
}
},
/**
* 图像合成模板
*/
composite: {
name: '图像合成',
generate(description, options = {}) {
return `Create a new image by combining the elements from the provided images. ${description} Generate a realistic result with proper lighting and shadows.`
}
}
}
// ============================================================================
// 核心生成器类
// ============================================================================
class GeminiImageGenerator {
constructor(options = {}) {
this.outputDir = options.outputDir || Config.output.defaultDir
this.defaultAspectRatio = options.aspectRatio || '1:1'
this.defaultImageSize = options.imageSize || Config.defaultImageSize
if (!Config.api.key) {
console.warn('警告: 未设置API密钥')
}
}
/**
* 文生图
*/
async textToImage(prompt, options = {}) {
const {
aspectRatio = this.defaultAspectRatio,
imageSize = this.defaultImageSize,
outputDir = this.outputDir,
filename = null
} = options
console.log(`\n🎨 生成图片: "${prompt.substring(0, 50)}..."`)
console.log(`📐 宽高比: ${aspectRatio}`)
console.log(`📏 分辨率: ${imageSize}`)
const contents = [{
role: 'user',
parts: [{ text: prompt }]
}]
const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize })
const result = GeminiAPI.parseResponse(response)
if (result.text) {
console.log(`📝 模型回复: ${result.text}`)
}
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = img.mimeType.split('/')[1] || 'png'
const outputFilename = filename || FileUtils.generateFilename('generated', ext)
const outputPath = path.join(outputDir, outputFilename)
FileUtils.base64ToImage(img.data, outputPath)
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return {
text: result.text,
images: result.images,
savedFiles
}
}
/**
* 图生图带参考图编辑
*/
async imageToImage(prompt, inputImages, options = {}) {
const {
aspectRatio = this.defaultAspectRatio,
imageSize = this.defaultImageSize,
outputDir = this.outputDir
} = options
console.log(`\n🖼️ 编辑图片: "${prompt.substring(0, 50)}..."`)
console.log(`📁 输入图片: ${Array.isArray(inputImages) ? inputImages.length : 1}`)
console.log(`📏 分辨率: ${imageSize}`)
const parts = [{ text: prompt }]
// 处理输入图片
const images = Array.isArray(inputImages) ? inputImages : [inputImages]
for (const imgPath of images) {
const { mimeType, data } = FileUtils.imageToBase64(imgPath)
parts.push({
inlineData: {
mime_type: mimeType,
data: data
}
})
}
const contents = [{
role: 'user',
parts: parts
}]
const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize })
const result = GeminiAPI.parseResponse(response)
if (result.text) {
console.log(`📝 模型回复: ${result.text}`)
}
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = img.mimeType.split('/')[1] || 'png'
const outputFilename = FileUtils.generateFilename('edited', ext)
const outputPath = path.join(outputDir, outputFilename)
FileUtils.base64ToImage(img.data, outputPath)
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return {
text: result.text,
images: result.images,
savedFiles
}
}
/**
* 使用模板生成
*/
async generateFromTemplate(templateName, ...args) {
const template = Templates[templateName]
if (!template) {
throw new Error(`未知的模板: ${templateName}。可用模板: ${Object.keys(Templates).join(', ')}`)
}
const options = args[args.length - 1] || {}
const prompt = template.generate(...args)
console.log(`📋 使用模板: ${template.name}`)
return this.textToImage(prompt, options)
}
/**
* 批量生成
*/
async batchGenerate(prompts, options = {}) {
const results = []
const total = prompts.length
console.log(`\n🚀 开始批量生成,共 ${total} 个任务`)
for (let i = 0; i < prompts.length; i++) {
console.log(`\n[${i + 1}/${total}] 处理中...`)
try {
const result = await this.textToImage(prompts[i], {
...options,
filename: `batch_${i + 1}.png`
})
results.push({ success: true, prompt: prompts[i], result })
} catch (error) {
console.error(`❌ 失败: ${error.message}`)
results.push({ success: false, prompt: prompts[i], error: error.message })
}
}
const successCount = results.filter(r => r.success).length
console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)
return results
}
/**
* 多轮对话编辑
*/
createChatSession(options = {}) {
const history = []
return {
async send(message, inputImages = null) {
const parts = [{ text: message }]
// 如果有输入图片
if (inputImages) {
const images = Array.isArray(inputImages) ? inputImages : [inputImages]
for (const imgPath of images) {
const { mimeType, data } = FileUtils.imageToBase64(imgPath)
parts.push({
inlineData: {
mime_type: mimeType,
data: data
}
})
}
}
// 添加用户消息到历史
history.push({
role: 'user',
parts: parts
})
const response = await GeminiAPI.generateContent(history, options)
const result = GeminiAPI.parseResponse(response)
// 添加模型回复到历史(需要包含图片数据以便后续编辑)
const modelParts = []
if (result.text) {
modelParts.push({ text: result.text })
}
for (const img of result.images) {
modelParts.push({
inlineData: {
mime_type: img.mimeType,
data: img.data
}
})
}
if (modelParts.length > 0) {
history.push({
role: 'model',
parts: modelParts
})
}
// 保存图片
const savedFiles = []
FileUtils.ensureDir(options.outputDir || this.outputDir)
for (const img of result.images) {
const ext = img.mimeType.split('/')[1] || 'png'
const outputFilename = FileUtils.generateFilename('chat', ext)
const outputPath = path.join(options.outputDir || this.outputDir, outputFilename)
FileUtils.base64ToImage(img.data, outputPath)
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return {
text: result.text,
images: result.images,
savedFiles
}
},
getHistory() {
return history
}
}
}
}
// ============================================================================
// CLI接口模块
// ============================================================================
const CLI = {
/**
* 解析命令行参数
*/
parseArgs(args) {
const result = {
command: '',
params: [],
options: {}
}
let i = 0
while (i < args.length) {
const arg = args[i]
if (arg.startsWith('--')) {
const key = arg.substring(2)
const nextArg = args[i + 1]
if (nextArg && !nextArg.startsWith('-')) {
result.options[key] = nextArg
i += 2
} else {
result.options[key] = true
i++
}
} else if (arg.startsWith('-')) {
const key = arg.substring(1)
const shortOptions = {
'o': 'output',
'r': 'ratio',
's': 'size',
'i': 'input',
't': 'template',
'h': 'help'
}
const fullKey = shortOptions[key] || key
const nextArg = args[i + 1]
if (nextArg && !nextArg.startsWith('-')) {
result.options[fullKey] = nextArg
i += 2
} else {
result.options[fullKey] = true
i++
}
} else if (!result.command) {
result.command = arg
} else {
result.params.push(arg)
}
}
return result
},
/**
* 显示帮助信息
*/
showHelp() {
console.log(`
🎨 Gemini Image Generator - 云雾API图片生成工具
📦 模型: ${Config.api.model}
用法:
node gemini-image-generator.js <command> [options]
命令:
generate <prompt> 文生图
edit <prompt> 图生图需要 -i 指定输入图片
template <name> 使用模板生成
batch <file> 批量生成从文件读取提示词
list-templates 列出所有可用模板
选项:
-o, --output <dir> 输出目录 (默认: ./output)
-r, --ratio <ratio> 宽高比 (1:1, 16:9, 9:16, 3:2, 2:3 )
-s, --size <size> 分辨率 (512, 1K, 2K, 4K默认: 2K)
-i, --input <file> 输入图片路径用于edit命令
-t, --template <name> 模板名称
--text <text> Logo文字用于logo模板
--subject <subject> 主题内容
--style <style> 风格
-h, --help 显示帮助信息
示例:
# 基础文生图 16:9 2K分辨率
node gemini-image-generator.js generate "A cute cat wearing a hat" -o ./my-images -r 16:9 -s 2K
# 高分辨率4K图片
node gemini-image-generator.js generate "A landscape photo" -r 16:9 -s 4K
# 图生图编辑
node gemini-image-generator.js edit "Add sunglasses to this person" -i ./photo.jpg
# 使用Logo模板
node gemini-image-generator.js template logo --text "MyBrand" --style minimalist
# 使用产品图模板
node gemini-image-generator.js template product --subject "a minimalist ceramic coffee mug"
# 批量生成
node gemini-image-generator.js batch ./prompts.txt -o ./batch-output
可用宽高比:
${Config.aspectRatios.join(', ')}
可用分辨率:
${Config.imageSizes.join(', ')}
可用模板:
${Object.entries(Templates).map(([k, v]) => `${k} (${v.name})`).join('\n ')}
`)
},
/**
* 列出模板
*/
listTemplates() {
console.log('\n📋 可用模板:\n')
for (const [key, template] of Object.entries(Templates)) {
console.log(` ${key.padEnd(15)} - ${template.name}`)
}
console.log('')
},
/**
* 执行命令
*/
async run(args) {
const { command, params, options } = this.parseArgs(args)
if (options.help || command === 'help' || !command) {
this.showHelp()
return
}
const generator = new GeminiImageGenerator({
outputDir: options.output || Config.output.defaultDir,
aspectRatio: options.ratio || '1:1',
imageSize: options.size || Config.defaultImageSize
})
switch (command) {
case 'generate': {
const prompt = params.join(' ')
if (!prompt) {
console.error('❌ 请提供生成提示词')
return
}
await generator.textToImage(prompt, {
aspectRatio: options.ratio,
imageSize: options.size,
outputDir: options.output
})
break
}
case 'edit': {
const prompt = params.join(' ')
const inputImages = options.input?.split(',').map(p => p.trim())
if (!prompt) {
console.error('❌ 请提供编辑指令')
return
}
if (!inputImages || inputImages.length === 0) {
console.error('❌ 请使用 -i 指定输入图片')
return
}
await generator.imageToImage(prompt, inputImages, {
aspectRatio: options.ratio,
imageSize: options.size,
outputDir: options.output
})
break
}
case 'template': {
const templateName = params[0] || options.template
if (!templateName) {
this.listTemplates()
return
}
const template = Templates[templateName]
if (!template) {
console.error(`❌ 未知的模板: ${templateName}`)
this.listTemplates()
return
}
// 根据模板类型处理参数
let templateOptions = { aspectRatio: options.ratio, outputDir: options.output }
switch (templateName) {
case 'logo':
await generator.generateFromTemplate('logo', options.text || '', {
style: options.style || 'modern, minimalist',
colorScheme: 'black and white'
}, templateOptions)
break
case 'product':
await generator.generateFromTemplate('product', options.subject || params.slice(1).join(' ') || 'a product', {
surface: 'polished concrete surface'
}, templateOptions)
break
case 'photorealistic':
await generator.generateFromTemplate('photorealistic', options.subject || params.slice(1).join(' ') || 'a person', {}, templateOptions)
break
case 'sticker':
await generator.generateFromTemplate('sticker', options.subject || params.slice(1).join(' ') || 'a cute character', {}, templateOptions)
break
default:
await generator.generateFromTemplate(templateName, params.slice(1).join(' ') || '', {}, templateOptions)
}
break
}
case 'batch': {
const filePath = params[0]
if (!filePath) {
console.error('❌ 请提供提示词文件路径')
return
}
const prompts = FileUtils.readPromptsFile(filePath)
await generator.batchGenerate(prompts, {
aspectRatio: options.ratio,
outputDir: options.output
})
break
}
case 'list-templates': {
this.listTemplates()
break
}
default:
console.error(`❌ 未知命令: ${command}`)
this.showHelp()
}
}
}
// ============================================================================
// 导出模块
// ============================================================================
module.exports = {
// 核心类
GeminiImageGenerator,
// 模块
Config,
FileUtils,
GeminiAPI,
Templates,
CLI,
// 便捷方法
generate: async (prompt, options) => {
const generator = new GeminiImageGenerator(options)
return generator.textToImage(prompt, options)
},
edit: async (prompt, images, options) => {
const generator = new GeminiImageGenerator(options)
return generator.imageToImage(prompt, images, options)
},
fromTemplate: async (templateName, ...args) => {
const generator = new GeminiImageGenerator(args[args.length - 1] || {})
return generator.generateFromTemplate(templateName, ...args)
}
}
// ============================================================================
// 主入口
// ============================================================================
// 如果直接运行此脚本
if (require.main === module) {
const args = process.argv.slice(2)
CLI.run(args).catch(error => {
console.error(`\n❌ 错误: ${error.message}`)
process.exit(1)
})
}