feat(skills): 集成 GPT Image 图片生成和编辑能力

- 新增 gpt-image-generator.js 脚本,支持文生图、图生图/重绘、批量生成
- 更新 pipeline 和 phase-images 支持 GPT Image 模型
- 更新技能文档,添加 GPT Image 使用说明和 API 特点
- 新增配置文件中的 GPT Image API 参数
This commit is contained in:
2026-05-05 23:49:30 +08:00
parent 823519cbf7
commit 35488beef2
8 changed files with 752 additions and 11 deletions

View File

@@ -0,0 +1,531 @@
#!/usr/bin/env node
/**
* GPT Image Generator - GPT Image 模型图片生成工具
*
* 支持模型gpt-image-2, gpt-image-1.5, gpt-image-1
* 通过云雾 (yunwu.ai) API 代理调用,遵循 OpenAI Images API 格式
*
* 功能:
* - 文生图Text-to-Image— /v1/images/generations
* - 图生图/重绘Image-to-Image— /v1/images/edits (multipart)
* - 首尾帧编辑
* - 批量生成
*
* 用法:
* node gpt-image-generator.js generate "a cute cat" -o ./output -r 16:9
* node gpt-image-generator.js edit "add sunglasses" -i ./photo.jpg -o ./output
* node gpt-image-generator.js batch ./prompts.txt -o ./output
*/
const fs = require('fs')
const path = require('path')
const https = require('https')
const http = require('http')
// ============================================================================
// 配置
// ============================================================================
function loadConfig() {
const configPath = path.join(__dirname, '..', '..', 'config.json')
if (fs.existsSync(configPath)) {
return JSON.parse(fs.readFileSync(configPath, 'utf-8'))
}
return {}
}
const cfg = loadConfig()
const Config = {
baseUrl: cfg.gptImageApiBaseUrl || 'https://yunwu.ai',
apiKey: cfg.gptImageApiKey || '',
model: cfg.gptImageModel || 'gpt-image-2',
timeout: 120000,
}
// 宽高比 → 建议分辨率映射 (gpt-image-2 constraints: max edge 3840, multiples of 16, ratio ≤ 3:1)
const RATIO_SIZE_MAP = {
'1:1': '1024x1024',
'3:2': '1536x1024',
'2:3': '1024x1536',
'3:4': '1152x1536',
'4:3': '1536x1152',
'4:5': '1024x1280',
'5:4': '1280x1024',
'9:16': '1088x1920',
'16:9': '1920x1088',
'21:9': '2048x880',
}
// ============================================================================
// API 调用
// ============================================================================
const GptImageApi = {
/**
* 文生图 — POST /v1/images/generations (JSON body)
*/
async generate(prompt, options = {}) {
const {
model = Config.model,
n = 1,
size = '1024x1024',
quality = 'auto',
format = 'png',
outputCompression,
moderation = 'auto',
} = options
const body = {
model,
prompt,
n,
size,
quality,
format,
}
if (outputCompression !== undefined) body.output_compression = outputCompression
if (moderation !== 'auto') body.moderation = moderation
console.log(`\n📡 GPT Image 文生图请求`)
console.log(` 模型: ${model}`)
console.log(` 尺寸: ${size} 质量: ${quality}`)
const res = await fetch(`${Config.baseUrl}/v1/images/generations`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Config.apiKey}`,
},
body: JSON.stringify(body),
})
if (!res.ok) {
const errText = await res.text()
throw new Error(`GPT Image 生成失败: ${res.status} - ${errText}`)
}
return res.json()
},
/**
* 图生图/编辑 — POST /v1/images/edits (multipart/form-data)
*
* @param {string} prompt - 编辑指令
* @param {string[]} imagePaths - 输入图片路径(第一张为编辑对象,其余为参考)
* @param {string} [maskPath] - 可选蒙版路径
*/
async edit(prompt, imagePaths, options = {}) {
const {
model = Config.model,
n = 1,
size,
maskPath,
} = options
const FormData = globalThis.FormData
const fd = new FormData()
fd.append('model', model)
fd.append('prompt', prompt)
if (n > 1) fd.append('n', String(n))
if (size) fd.append('size', size)
// 附加图片文件
for (const imgPath of imagePaths) {
const buf = fs.readFileSync(imgPath)
const ext = path.extname(imgPath).toLowerCase()
const mimeMap = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.webp': 'image/webp', '.gif': 'image/gif' }
const mimeType = mimeMap[ext] || 'image/png'
fd.append('image', new Blob([buf], { type: mimeType }), path.basename(imgPath))
}
if (maskPath) {
const maskBuf = fs.readFileSync(maskPath)
fd.append('mask', new Blob([maskBuf], { type: 'image/png' }), path.basename(maskPath))
}
console.log(`\n📡 GPT Image 编辑请求`)
console.log(` 模型: ${model}`)
console.log(` 输入图片: ${imagePaths.length}${maskPath ? ' + 蒙版' : ''}`)
if (size) console.log(` 尺寸: ${size}`)
const res = await fetch(`${Config.baseUrl}/v1/images/edits`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${Config.apiKey}`,
},
body: fd,
})
if (!res.ok) {
const errText = await res.text()
throw new Error(`GPT Image 编辑失败: ${res.status} - ${errText}`)
}
return res.json()
},
/**
* 解析响应,提取图片
* 支持 base64 JSON 和 URL 两种格式
*/
parseResponse(response) {
if (!response || !response.data) {
return { images: [] }
}
const images = []
for (const item of response.data) {
if (item.b64_json) {
images.push({ data: item.b64_json, url: item.url, revised_prompt: item.revised_prompt })
} else if (item.url) {
images.push({ url: item.url, revised_prompt: item.revised_prompt })
}
}
return { images }
},
}
// ============================================================================
// 文件处理
// ============================================================================
const FileUtils = {
ensureDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true })
}
return dirPath
},
generateFilename(prefix = 'image', ext = 'png') {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
const random = Math.random().toString(36).substring(2, 8)
return `${prefix}_${timestamp}_${random}.${ext}`
},
readPromptsFile(filePath) {
const content = fs.readFileSync(filePath, 'utf-8')
return content.split('\n').filter(l => l.trim()).map(l => l.trim())
},
async downloadImage(url, outputPath) {
const protocol = url.startsWith('https') ? https : http
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(outputPath)
protocol.get(url, (response) => {
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
file.close()
fs.unlinkSync(outputPath)
return FileUtils.downloadImage(response.headers.location, outputPath).then(resolve).catch(reject)
}
response.pipe(file)
file.on('finish', () => { file.close(); resolve(outputPath) })
}).on('error', (err) => {
file.close()
if (fs.existsSync(outputPath)) fs.unlinkSync(outputPath)
reject(err)
})
})
},
}
// ============================================================================
// 核心生成器
// ============================================================================
class GptImageGenerator {
constructor(options = {}) {
this.outputDir = options.outputDir || './output'
this.defaultSize = options.size || '1024x1024'
this.defaultQuality = options.quality || 'auto'
if (!Config.apiKey) {
console.warn('警告: 未设置 gptImageApiKey')
}
}
/**
* 文生图 — 从文字提示生成图片
*/
async textToImage(prompt, options = {}) {
const {
size = this.defaultSize,
quality = this.defaultQuality,
format = 'png',
n = 1,
outputDir = this.outputDir,
filename = null,
} = options
console.log(`\n🎨 GPT Image 文生图: "${prompt.substring(0, 80)}..."`)
console.log(`📐 尺寸: ${size} 🎯 质量: ${quality}`)
const response = await GptImageApi.generate(prompt, { size, quality, format, n })
const result = GptImageApi.parseResponse(response)
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = format === 'jpeg' ? 'jpg' : format
const outputFilename = filename || FileUtils.generateFilename('gpt_gen', ext)
const outputPath = path.join(outputDir, outputFilename)
if (img.data) {
fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
} else if (img.url) {
await FileUtils.downloadImage(img.url, outputPath)
}
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return { images: result.images, savedFiles }
}
/**
* 图生图/重绘 — 带参考图编辑
*/
async imageToImage(prompt, inputImages, options = {}) {
const {
size = this.defaultSize,
outputDir = this.outputDir,
maskPath = null,
} = options
const imgPaths = Array.isArray(inputImages) ? inputImages : [inputImages]
console.log(`\n🖼️ GPT Image 编辑: "${prompt.substring(0, 80)}..."`)
console.log(`📁 输入图片: ${imgPaths.length}`)
const response = await GptImageApi.edit(prompt, imgPaths, { size, maskPath, n: 1 })
const result = GptImageApi.parseResponse(response)
const savedFiles = []
FileUtils.ensureDir(outputDir)
for (let i = 0; i < result.images.length; i++) {
const img = result.images[i]
const ext = 'png'
const outputFilename = FileUtils.generateFilename('gpt_edit', ext)
const outputPath = path.join(outputDir, outputFilename)
if (img.data) {
fs.writeFileSync(outputPath, Buffer.from(img.data, 'base64'))
} else if (img.url) {
await FileUtils.downloadImage(img.url, outputPath)
}
savedFiles.push(outputPath)
console.log(`✅ 已保存: ${outputPath}`)
}
return { images: result.images, savedFiles }
}
/**
* 批量文生图
*/
async batchGenerate(prompts, options = {}) {
const results = []
const total = prompts.length
console.log(`\n🚀 GPT Image 批量生成,共 ${total} 个任务`)
for (let i = 0; i < prompts.length; i++) {
console.log(`\n[${i + 1}/${total}] 处理中...`)
try {
const result = await this.textToImage(prompts[i], {
...options,
filename: `batch_${String(i + 1).padStart(3, '0')}.png`,
})
results.push({ success: true, prompt: prompts[i], result })
} catch (error) {
console.error(`❌ 失败: ${error.message}`)
results.push({ success: false, prompt: prompts[i], error: error.message })
}
}
const successCount = results.filter(r => r.success).length
console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`)
return results
}
}
// ============================================================================
// 便捷函数(供 pipeline 调用)
// ============================================================================
/**
* 解析 CLI 参数中的宽高比,返回合适的 size 字符串
*/
function ratioToSize(ratio, quality = 'auto') {
return RATIO_SIZE_MAP[ratio] || '1024x1024'
}
// ============================================================================
// CLI
// ============================================================================
function showHelp() {
console.log(`
🎨 GPT Image Generator - 云雾API GPT Image 图片生成工具
📦 模型: ${Config.model}
用法:
node gpt-image-generator.js <command> [options]
命令:
generate <prompt> 文生图
edit <prompt> 图生图/重绘(需要 -i 指定输入图片)
batch <file> 批量生成(从文件读取提示词)
选项:
-o, --output <dir> 输出目录 (默认: ./output)
-r, --ratio <ratio> 宽高比 (1:1, 16:9, 9:16, 3:4, 4:3 等)
-s, --size <size> 尺寸 (1024x1024, 1088x1920, auto 等)
-q, --quality <q> 质量 (low, medium, high, auto)
-f, --format <fmt> 格式 (png, jpeg, webp)
-i, --input <files> 输入图片edit 模式,逗号分隔)
--mask <file> 蒙版图片edit 模式)
-n <num> 生成数量 (默认: 1)
-h, --help 显示帮助
示例:
# 文生图 9:16
node gpt-image-generator.js generate "A cat wearing a hat" -r 9:16 -q medium
# 图生图/重绘
node gpt-image-generator.js edit "Add sunglasses" -i ./photo.jpg
# 多张参考图编辑
node gpt-image-generator.js edit "Combine these items into a gift basket" -i ./a.jpg,./b.jpg
# 批量生成
node gpt-image-generator.js batch ./prompts.txt -r 9:16 -q low
可用宽高比及默认尺寸:
${Object.entries(RATIO_SIZE_MAP).map(([k, v]) => `${k}${v}`).join('\n ')}
`)
}
async function main() {
const args = process.argv.slice(2)
if (args.includes('-h') || args.includes('--help') || args.length === 0) {
showHelp()
return
}
let command = 'generate'
let params = []
const options = { outputDir: './output', size: '1024x1024', quality: 'auto', format: 'png', n: 1 }
let i = 0
if (args[0] === 'batch' || args[0] === 'edit' || args[0] === 'generate') {
command = args[0]
i = 1
}
while (i < args.length) {
const arg = args[i]
if (arg === '-o' || arg === '--output') {
options.outputDir = args[++i]
} else if (arg === '-r' || arg === '--ratio') {
const ratio = args[++i]
options.size = RATIO_SIZE_MAP[ratio] || '1024x1024'
options._ratio = ratio
} else if (arg === '-s' || arg === '--size') {
options.size = args[++i]
} else if (arg === '-q' || arg === '--quality') {
options.quality = args[++i]
} else if (arg === '-f' || arg === '--format') {
options.format = args[++i]
} else if (arg === '-i' || arg === '--input') {
options.inputImages = args[++i].split(',').map(s => s.trim()).filter(Boolean)
} else if (arg === '--mask') {
options.maskPath = args[++i]
} else if (arg === '-n') {
options.n = parseInt(args[++i], 10) || 1
} else {
params.push(arg)
}
i++
}
const generator = new GptImageGenerator({
outputDir: options.outputDir,
size: options.size,
quality: options.quality,
})
if (command === 'batch') {
const filePath = params[0]
if (!filePath || !fs.existsSync(filePath)) {
console.error('请提供提示词文件路径')
process.exit(1)
}
const prompts = FileUtils.readPromptsFile(filePath)
await generator.batchGenerate(prompts, options)
} else if (command === 'edit') {
const prompt = params.join(' ')
if (!prompt) { console.error('请提供编辑指令'); process.exit(1) }
if (!options.inputImages || options.inputImages.length === 0) {
console.error('请使用 -i 指定输入图片')
process.exit(1)
}
await generator.imageToImage(prompt, options.inputImages, {
size: options.size,
outputDir: options.outputDir,
maskPath: options.maskPath,
})
} else {
const prompt = params.join(' ')
if (!prompt) { console.error('请提供生成提示词'); process.exit(1) }
await generator.textToImage(prompt, {
size: options.size,
quality: options.quality,
format: options.format,
n: options.n,
outputDir: options.outputDir,
})
}
}
// ============================================================================
// 导出
// ============================================================================
module.exports = {
GptImageGenerator,
GptImageApi,
Config,
FileUtils,
RATIO_SIZE_MAP,
ratioToSize,
generate: async (prompt, options) => {
const generator = new GptImageGenerator(options)
return generator.textToImage(prompt, options)
},
edit: async (prompt, imagePaths, options) => {
const generator = new GptImageGenerator(options)
return generator.imageToImage(prompt, imagePaths, options)
},
batchGenerate: async (prompts, options) => {
const generator = new GptImageGenerator(options)
return generator.batchGenerate(prompts, options)
},
}
if (require.main === module) {
main().catch(err => {
console.error(`\n❌ 错误: ${err.message}`)
process.exit(1)
})
}

View File

@@ -24,7 +24,7 @@ function validateManifest(manifestPath) {
}
if (!manifest.account) issues.push('缺少顶层 account')
if (!manifest.imageModel) issues.push('缺少顶层 imageModel可选: gemini, mj')
if (!manifest.imageModel) issues.push('缺少顶层 imageModel可选: gemini, gpt-image, mj')
if (!manifest.format) issues.push('缺少顶层 format如 9:16')
if (!manifest.items || !Array.isArray(manifest.items)) issues.push('缺少顶层 items 数组')
if (!manifest.mode) issues.push('缺少顶层 modesingle 或 framePair')

View File

@@ -1,7 +1,7 @@
/**
* Phase: images — 图片生成
*
* 支持 Gemini / MJ / Kling 种模型,含首尾帧模式
* 支持 Gemini / GPT Image / MJ / Kling 种模型,含首尾帧模式
* 并发生成,支持 task ID 恢复MJ
*/
@@ -130,6 +130,32 @@ async function generateMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath,
return harvestMJ(item, idx, dir, imagesDir, ratio, refs, manifestPath, manifest)
}
async function generateGptImage(item, idx, dir, imagesDir, ratio, refs) {
const { generate: gptGen, edit: gptEdit, ratioToSize } = require('../gpt-image-generator')
const size = ratioToSize(ratio)
let result
if (refs.localPaths.length > 0) {
log('images', `[${idx}] GPT Image 图生图: ${item.imagePrompt.substring(0, 60)}...`)
result = await gptEdit(item.imagePrompt, refs.localPaths, {
outputDir: imagesDir,
size,
})
} else {
log('images', `[${idx}] GPT Image 文生图: ${item.imagePrompt.substring(0, 60)}...`)
result = await gptGen(item.imagePrompt, {
outputDir: imagesDir, size,
quality: 'auto',
})
}
const file = (result.savedFiles && result.savedFiles.length > 0)
? renameGeneratedFile(
path.relative(dir, result.savedFiles[0]).replace(/\\/g, '/'),
dir, idx, item.script || item.shotDesc, ''
)
: null
return { file }
}
async function generateKling(item, idx, dir, imagesDir, ratio, refs) {
const { generate: klingGen } = require('../kling-image-generator')
const klingOpts = { outputDir: imagesDir, aspectRatio: ratio }
@@ -158,6 +184,12 @@ async function generateLastFrame(item, idx, manifest, dir, imagesDir, model, rat
outputDir: imagesDir,
aspectRatio: ratio,
})
} else if (model === 'gpt-image') {
const { edit: gptEdit, ratioToSize } = require('../gpt-image-generator')
lastResult = await gptEdit(item.lastFramePrompt, [firstFramePath], {
outputDir: imagesDir,
size: ratioToSize(ratio),
})
} else if (model === 'kling') {
const { generate: klingGen } = require('../kling-image-generator')
lastResult = await klingGen(item.lastFramePrompt, {
@@ -273,10 +305,12 @@ async function processItem(item, manifest, manifestPath, dir, imagesDir, model,
let result
if (model === 'gemini') {
result = await generateGemini(item, idx, dir, imagesDir, ratio, refs)
} else if (model === 'gpt-image') {
result = await generateGptImage(item, idx, dir, imagesDir, ratio, refs)
} else if (model === 'kling') {
result = await generateKling(item, idx, dir, imagesDir, ratio, refs)
} else {
throw new Error(`不支持的模型: ${model}(支持: gemini, mj, kling`)
throw new Error(`不支持的模型: ${model}(支持: gemini, gpt-image, mj, kling`)
}
if (result.file) {

View File

@@ -225,7 +225,7 @@ async function main() {
console.log('用法:')
console.log(' pipeline.js create-account --id <id> --name <名称> [--desc ...] [--references file1,file2]')
console.log(' pipeline.js validate-account --account <id>')
console.log(' pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
console.log(' pipeline.js init --account <id> --mode <single|framePair> --items <JSON> [--items-file <path>] [--image-model gemini|gpt-image|mj] [--video-model veo3-fast|grok|kling] [--format 9:16]')
console.log(' pipeline.js validate --manifest <path>')
console.log(' pipeline.js confirm --manifest <path> --all')
console.log(' pipeline.js confirm --manifest <path> --items 1,3,5')