Files
video-create/.claude/skills/video-from-script/scripts/gpt-image-generator.js
sion123 35488beef2 feat(skills): 集成 GPT Image 图片生成和编辑能力
- 新增 gpt-image-generator.js 脚本,支持文生图、图生图/重绘、批量生成
- 更新 pipeline 和 phase-images 支持 GPT Image 模型
- 更新技能文档,添加 GPT Image 使用说明和 API 特点
- 新增配置文件中的 GPT Image API 参数
2026-05-05 23:49:30 +08:00

532 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* GPT Image Generator - GPT Image 模型图片生成工具
*
* 支持模型gpt-image-2, gpt-image-1.5, gpt-image-1
* 通过云雾 (yunwu.ai) API 代理调用,遵循 OpenAI Images API 格式
*
* 功能:
* - 文生图Text-to-Image— /v1/images/generations
* - 图生图/重绘Image-to-Image— /v1/images/edits (multipart)
* - 首尾帧编辑
* - 批量生成
*
* 用法:
* node gpt-image-generator.js generate "a cute cat" -o ./output -r 16:9
* node gpt-image-generator.js edit "add sunglasses" -i ./photo.jpg -o ./output
* node gpt-image-generator.js batch ./prompts.txt -o ./output
*/
const fs = require('fs')
const path = require('path')
const https = require('https')
const http = require('http')
// ============================================================================
// 配置
// ============================================================================
function loadConfig() {
const configPath = path.join(__dirname, '..', '..', 'config.json')
if (fs.existsSync(configPath)) {
return JSON.parse(fs.readFileSync(configPath, 'utf-8'))
}
return {}
}
const cfg = loadConfig()
const Config = {
baseUrl: cfg.gptImageApiBaseUrl || 'https://yunwu.ai',
apiKey: cfg.gptImageApiKey || '',
model: cfg.gptImageModel || 'gpt-image-2',
timeout: 120000,
}
// 宽高比 → 建议分辨率映射 (gpt-image-2 constraints: max edge 3840, multiples of 16, ratio ≤ 3:1)
const RATIO_SIZE_MAP = {
'1:1': '1024x1024',
'3:2': '1536x1024',
'2:3': '1024x1536',
'3:4': '1152x1536',
'4:3': '1536x1152',
'4:5': '1024x1280',
'5:4': '1280x1024',
'9:16': '1088x1920',
'16:9': '1920x1088',
'21:9': '2048x880',
}
// ============================================================================
// API 调用
// ============================================================================
const GptImageApi = {
/**
* 文生图 — POST /v1/images/generations (JSON body)
*/
async generate(prompt, options = {}) {
const {
model = Config.model,
n = 1,
size = '1024x1024',
quality = 'auto',
format = 'png',
outputCompression,
moderation = 'auto',
} = options
const body = {
model,
prompt,
n,
size,
quality,
format,
}
if (outputCompression !== undefined) body.output_compression = outputCompression
if (moderation !== 'auto') body.moderation = moderation
console.log(`\n📡 GPT Image 文生图请求`)
console.log(` 模型: ${model}`)
console.log(` 尺寸: ${size} 质量: ${quality}`)
const res = await fetch(`${Config.baseUrl}/v1/images/generations`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Config.apiKey}`,
},
body: JSON.stringify(body),
})
if (!res.ok) {
const errText = await res.text()
throw new Error(`GPT Image 生成失败: ${res.status} - ${errText}`)
}
return res.json()
},
/**
* 图生图/编辑 — POST /v1/images/edits (multipart/form-data)
*
* @param {string} prompt - 编辑指令
* @param {string[]} imagePaths - 输入图片路径(第一张为编辑对象,其余为参考)
* @param {string} [maskPath] - 可选蒙版路径
*/
async edit(prompt, imagePaths, options = {}) {
const {
model = Config.model,
n = 1,
size,
maskPath,
} = options
const FormData = globalThis.FormData
const fd = new FormData()
fd.append('model', model)
fd.append('prompt', prompt)
if (n > 1) fd.append('n', String(n))
if (size) fd.append('size', size)
// 附加图片文件
for (const imgPath of imagePaths) {
const buf = fs.readFileSync(imgPath)
const ext = path.extname(imgPath).toLowerCase()
const mimeMap = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.webp': 'image/webp', '.gif': 'image/gif' }
const mimeType = mimeMap[ext] || 'image/png'
fd.append('image', new Blob([buf], { type: mimeType }), path.basename(imgPath))
}
if (maskPath) {
const maskBuf = fs.readFileSync(maskPath)
fd.append('mask', new Blob([maskBuf], { type: 'image/png' }), path.basename(maskPath))
}
console.log(`\n📡 GPT Image 编辑请求`)
console.log(` 模型: ${model}`)
console.log(` 输入图片: ${imagePaths.length}${maskPath ? ' + 蒙版' : ''}`)
if (size) console.log(` 尺寸: ${size}`)
const res = await fetch(`${Config.baseUrl}/v1/images/edits`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${Config.apiKey}`,
},
body: fd,
})
if (!res.ok) {
const errText = await res.text()
throw new Error(`GPT Image 编辑失败: ${res.status} - ${errText}`)
}
return res.json()
},
/**
* 解析响应,提取图片
* 支持 base64 JSON 和 URL 两种格式
*/
parseResponse(response) {
if (!response || !response.data) {
return { images: [] }
}
const images = []
for (const item of response.data) {
if (item.b64_json) {
images.push({ data: item.b64_json, url: item.url, revised_prompt: item.revised_prompt })
} else if (item.url) {
images.push({ url: item.url, revised_prompt: item.revised_prompt })
}
}
return { images }
},
}
// ============================================================================
// 文件处理
// ============================================================================
const FileUtils = {
ensureDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true })
}
return dirPath
},
generateFilename(prefix = 'image', ext = 'png') {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
const random = Math.random().toString(36).substring(2, 8)
return `${prefix}_${timestamp}_${random}.${ext}`
},
readPromptsFile(filePath) {
const content = fs.readFileSync(filePath, 'utf-8')
return content.split('\n').filter(l => l.trim()).map(l => l.trim())
},
async downloadImage(url, outputPath) {
const protocol = url.startsWith('https') ? https : http
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(outputPath)
protocol.get(url, (response) => {
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
file.close()
fs.unlinkSync(outputPath)
return FileUtils.downloadImage(response.headers.location, outputPath).then(resolve).catch(reject)
}
response.pipe(file)
file.on('finish', () => { file.close(); resolve(outputPath) })
}).on('error', (err) => {
file.close()
if (fs.existsSync(outputPath)) fs.unlinkSync(outputPath)
reject(err)
})
})
},
}
// ============================================================================
// 核心生成器
// ============================================================================
class GptImageGenerator {
constructor(options = {}) {
this.outputDir = options.outputDir || './output'
this.defaultSize = options.size || '1024x1024'
this.defaultQuality = options.quality || 'auto'
if (!Config.apiKey) {
console.warn('警告: 未设置 gptImageApiKey')
}
}
/**
* 文生图 — 从文字提示生成图片
*/
async textToImage(prompt, options = {}) {
const {
size = this.defaultSize,
quality = this.defaultQuality,
format = 'png',
n = 1,
outputDir = this.outputDir,
filename = null,
} = options
console.log(`\n🎨 GPT Image 文生图: "${prompt.substring(0, 80)}..."`)
console.log(`📐 尺寸: ${size} 🎯 质量: ${quality}`)
const response = await GptImageApi.generate(prompt, { size, quality, format, n })
const result = GptImageApi.parseResponse(response)
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = format === 'jpeg' ? 'jpg' : format
const outputFilename = filename || FileUtils.generateFilename('gpt_gen', ext)
const outputPath = path.join(outputDir, outputFilename)
if (img.data) {
fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
} else if (img.url) {
await FileUtils.downloadImage(img.url, outputPath)
}
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return { images: result.images, savedFiles }
}
/**
* 图生图/重绘 — 带参考图编辑
*/
async imageToImage(prompt, inputImages, options = {}) {
const {
size = this.defaultSize,
outputDir = this.outputDir,
maskPath = null,
} = options
const imgPaths = Array.isArray(inputImages) ? inputImages : [inputImages]
console.log(`\n🖼️ GPT Image 编辑: "${prompt.substring(0, 80)}..."`)
console.log(`📁 输入图片: ${imgPaths.length}`)
const response = await GptImageApi.edit(prompt, imgPaths, { size, maskPath, n: 1 })
const result = GptImageApi.parseResponse(response)
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = 'png'
const outputFilename = FileUtils.generateFilename('gpt_edit', ext)
const outputPath = path.join(outputDir, outputFilename)
if (img.data) {
fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
} else if (img.url) {
await FileUtils.downloadImage(img.url, outputPath)
}
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return { images: result.images, savedFiles }
}
/**
* 批量文生图
*/
async batchGenerate(prompts, options = {}) {
const results = []
const total = prompts.length
console.log(`\n🚀 GPT Image 批量生成,共 ${total} 个任务`)
for (let i = 0; i < prompts.length; i++) {
console.log(`\n[${i + 1}/${total}] 处理中...`)
try {
const result = await this.textToImage(prompts[i], {
...options,
filename: `batch_${String(i + 1).padStart(3, '0')}.png`,
})
results.push({ success: true, prompt: prompts[i], result })
} catch (error) {
console.error(`❌ 失败: ${error.message}`)
results.push({ success: false, prompt: prompts[i], error: error.message })
}
}
const successCount = results.filter(r => r.success).length
console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)
return results
}
}
// ============================================================================
// 便捷函数(供 pipeline 调用)
// ============================================================================
/**
* 解析 CLI 参数中的宽高比,返回合适的 size 字符串
*/
function ratioToSize(ratio, quality = 'auto') {
return RATIO_SIZE_MAP[ratio] || '1024x1024'
}
// ============================================================================
// CLI
// ============================================================================
function showHelp() {
console.log(`
🎨 GPT Image Generator - 云雾API GPT Image 图片生成工具
📦 模型: ${Config.model}
用法:
node gpt-image-generator.js <command> [options]
命令:
generate <prompt> 文生图
edit <prompt> 图生图/重绘(需要 -i 指定输入图片)
batch <file> 批量生成(从文件读取提示词)
选项:
-o, --output <dir> 输出目录 (默认: ./output)
-r, --ratio <ratio> 宽高比 (1:1, 16:9, 9:16, 3:4, 4:3 等)
-s, --size <size> 尺寸 (1024x1024, 1088x1920, auto 等)
-q, --quality <q> 质量 (low, medium, high, auto)
-f, --format <fmt> 格式 (png, jpeg, webp)
-i, --input <files> 输入图片edit 模式,逗号分隔)
--mask <file> 蒙版图片edit 模式)
-n <num> 生成数量 (默认: 1)
-h, --help 显示帮助
示例:
# 文生图 9:16
node gpt-image-generator.js generate "A cat wearing a hat" -r 9:16 -q medium
# 图生图/重绘
node gpt-image-generator.js edit "Add sunglasses" -i ./photo.jpg
# 多张参考图编辑
node gpt-image-generator.js edit "Combine these items into a gift basket" -i ./a.jpg,./b.jpg
# 批量生成
node gpt-image-generator.js batch ./prompts.txt -r 9:16 -q low
可用宽高比及默认尺寸:
${Object.entries(RATIO_SIZE_MAP).map(([k, v]) => `${k}${v}`).join('\n ')}
`)
}
async function main() {
const args = process.argv.slice(2)
if (args.includes('-h') || args.includes('--help') || args.length === 0) {
showHelp()
return
}
let command = 'generate'
let params = []
const options = { outputDir: './output', size: '1024x1024', quality: 'auto', format: 'png', n: 1 }
let i = 0
if (args[0] === 'batch' || args[0] === 'edit' || args[0] === 'generate') {
command = args[0]
i = 1
}
while (i < args.length) {
const arg = args[i]
if (arg === '-o' || arg === '--output') {
options.outputDir = args[++i]
} else if (arg === '-r' || arg === '--ratio') {
const ratio = args[++i]
options.size = RATIO_SIZE_MAP[ratio] || '1024x1024'
options._ratio = ratio
} else if (arg === '-s' || arg === '--size') {
options.size = args[++i]
} else if (arg === '-q' || arg === '--quality') {
options.quality = args[++i]
} else if (arg === '-f' || arg === '--format') {
options.format = args[++i]
} else if (arg === '-i' || arg === '--input') {
options.inputImages = args[++i].split(',').map(s => s.trim()).filter(Boolean)
} else if (arg === '--mask') {
options.maskPath = args[++i]
} else if (arg === '-n') {
options.n = parseInt(args[++i], 10) || 1
} else {
params.push(arg)
}
i++
}
const generator = new GptImageGenerator({
outputDir: options.outputDir,
size: options.size,
quality: options.quality,
})
if (command === 'batch') {
const filePath = params[0]
if (!filePath || !fs.existsSync(filePath)) {
console.error('请提供提示词文件路径')
process.exit(1)
}
const prompts = FileUtils.readPromptsFile(filePath)
await generator.batchGenerate(prompts, options)
} else if (command === 'edit') {
const prompt = params.join(' ')
if (!prompt) { console.error('请提供编辑指令'); process.exit(1) }
if (!options.inputImages || options.inputImages.length === 0) {
console.error('请使用 -i 指定输入图片')
process.exit(1)
}
await generator.imageToImage(prompt, options.inputImages, {
size: options.size,
outputDir: options.outputDir,
maskPath: options.maskPath,
})
} else {
const prompt = params.join(' ')
if (!prompt) { console.error('请提供生成提示词'); process.exit(1) }
await generator.textToImage(prompt, {
size: options.size,
quality: options.quality,
format: options.format,
n: options.n,
outputDir: options.outputDir,
})
}
}
// ============================================================================
// 导出
// ============================================================================
module.exports = {
GptImageGenerator,
GptImageApi,
Config,
FileUtils,
RATIO_SIZE_MAP,
ratioToSize,
generate: async (prompt, options) => {
const generator = new GptImageGenerator(options)
return generator.textToImage(prompt, options)
},
edit: async (prompt, imagePaths, options) => {
const generator = new GptImageGenerator(options)
return generator.imageToImage(prompt, imagePaths, options)
},
batchGenerate: async (prompts, options) => {
const generator = new GptImageGenerator(options)
return generator.batchGenerate(prompts, options)
},
}
if (require.main === module) {
main().catch(err => {
console.error(`\n❌ 错误: ${err.message}`)
process.exit(1)
})
}