commit dadddc7aece01dea5059b69bf81a1d9a0590ba36 Author: sion123 <450702724@qq.com> Date: Wed Apr 29 21:04:43 2026 +0800 init: video-create project with skills and accounts diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..7006e88 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,123 @@ +{ + "permissions": { + "allow": [ + "Edit(/.claude/skills/video-from-script/**)", + "Bash(node setup.js)", + "Edit(/.claude/skills/capcut/**)", + "mcp__zai-mcp-server__extract_text_from_screenshot", + "Bash(docker --version)", + "Bash(docker build *)", + "Bash(docker pull *)", + "Bash(node mj-image-generator.js \"a cute orange cat sitting on a windowsill, soft morning light, photorealistic\" -o ./test_output)", + "Bash(node -e \"const sharp=require\\('sharp'\\); sharp\\('test_output/2026-04-25T16-26-09-275Z_grid.png'\\).metadata\\(\\).then\\(m=>console.log\\(JSON.stringify\\(m,null,2\\)\\)\\)\")", + "Bash(node -e ' *)", + "Edit(/.claude/skills/image-generator/**)", + "Edit(/.claude/skills/capcut-assembly/**)", + "Edit(/.claude/skills/remotion-video/**)", + "Bash(cp \"d:/美图/.claude/skills/video-from-script/references/remotion-mode.md\" \"d:/美图/.claude/skills/remotion-video/references/remotion-guide.md\")", + "Bash(cp \"d:/美图/.claude/skills/video-from-script/references/templates.md\" \"d:/美图/.claude/skills/remotion-video/references/templates.md\")", + "Bash(node .claude/skills/video-from-script/scripts/mj-image-generator.js 'Medium close-up of a divine arbiter deity with heterochromatic eyes, left eye burning molten gold and right eye gleaming cold silver, yin-yang dual pupils radiating absolute cosmic authority, an austere perfectly symmetrical face with refined aristocratic features, expression of transcendent detachment and cold omniscient judgment, a sharp dividing line of light and shadow splitting the face precisely along the bridge of the nose, left half bathed in warm divine golden radiance revealing every pore and noble contour, right half dissolving into deep cool shadow with only a silver edge light defining the jaw and cheekbone, faint celestial scale motifs etched along the temples in alternating gold and silver inlay, catchlight in the golden eye shaped like a flame and in the silver eye shaped like a crescent moon, lips sealed in absolute neutrality. Against a backdrop of a narrow bridge at the boundary between heaven and earth, one side blazing with holy light and the other swallowed in absolute darkness, both realms blurred into atmospheric contrast, silver white and dark gold and stark chiaroscuro palette, cinematic split lighting, dramatic Rembrandt contrast, volumetric light colliding with shadow at the center of the frame. Depth of field, bokeh background. Style: Unreal Engine 5 render, hyper-detailed facial features, traditional Chinese celestial judge aesthetics, 8k, masterpiece' -o ./output/forbidden-emperor_20260426/images -a 9:16 --mj-params '--style raw --s 750 --q 2')", + "Bash(mv \"2026-04-26T07-48-45-836Z_1.png\" \"v2_leo_sun-emperor_1.png\")", + "Bash(mv \"2026-04-26T07-48-45-836Z_2.png\" \"v2_leo_sun-emperor_2.png\")", + "Bash(mv \"2026-04-26T07-48-45-836Z_3.png\" \"v2_leo_sun-emperor_3.png\")", + "Bash(mv \"2026-04-26T07-48-45-836Z_4.png\" \"v2_leo_sun-emperor_4.png\")", + "Bash(mv \"2026-04-26T07-48-49-659Z_1.png\" \"v2_libra_arbiter_1.png\")", + "Bash(node .claude/skills/video-from-script/scripts/mj-image-generator.js 'Close-up portrait of an ethereal water spirit with half-closed dreamy eyes shimmering between lavender and pale aquamarine, irises like fractured moonlight on still water, long flowing hair drifting gently as if in a celestial breeze, soft delicate porcelain face with a melancholic transcendent expression caught between waking and dreaming, translucent teardrop-shaped pearl ornaments trailing along cheekbones and temples like crystallized divine tears, a faint bioluminescent sheen across ethereal skin in shifting lilac and moonlit blue tones, lips slightly parted as if whispering to the cosmos, delicate catchlight of scattered moonbeams mirrored in each pupil. Against a backdrop of a moonlit celestial lotus garden with drifting luminous petals and floating motes of pale light dissolving into misty lavender haze, lavender purple and water blue and moonlight silver palette, cinematic soft diffused lighting, caustic light patterns dancing gently across her face, rim glow in pale silver tracing every strand of flowing hair. Depth of field, bokeh background, dreamlike atmosphere. Style: Unreal Engine 5 render, hyper-detailed facial features, traditional Chinese water deity aesthetics, 8k, masterpiece' -o ./output/forbidden-emperor_20260426/images -a 9:16 --mj-params '--style raw --s 750 --q 2')", + "Bash(mv \"2026-04-26T07-52-20-429Z_1.png\" \"v2_pisces_water-spirit_1.png\")", + "Bash(mv \"2026-04-26T07-52-20-429Z_2.png\" \"v2_pisces_water-spirit_2.png\")", + "Bash(mv \"2026-04-26T07-52-20-429Z_3.png\" \"v2_pisces_water-spirit_3.png\")", + "Bash(mv \"2026-04-26T07-52-20-429Z_4.png\" \"v2_pisces_water-spirit_4.png\")", + "Bash(node .claude/skills/video-from-script/scripts/imgbb_upload.js upload output/forbidden-emperor_20260426/images/__TRACKED_VAR__)", + "Bash(node .claude/skills/video-from-script/scripts/grok-video-generator.js batch \"./output/forbidden-emperor_20260426/manifest_grok.json\" -o \"./output/forbidden-emperor_20260426/videos\" --concurrency 3)", + "Bash(node .claude/skills/video-from-script/scripts/grok-video-generator.js batch \"./output/forbidden-emperor_20260426/manifest_grok_v2.json\" -o \"./output/forbidden-emperor_20260426/videos_v2\" --concurrency 3)", + "Bash(mv \"2026-04-26T08-14-13-499Z_grok.mp4\" \"leo_1_resolve.mp4\")", + "Bash(mv \"2026-04-26T08-14-13-499Z_thumb.jpg\" \"leo_1_resolve_thumb.jpg\")", + "Bash(mv \"2026-04-26T08-14-03-079Z_grok.mp4\" \"leo_2_lonely-pride.mp4\")", + "Bash(mv \"2026-04-26T08-14-03-079Z_thumb.jpg\" \"leo_2_lonely-pride_thumb.jpg\")", + "Bash(mv \"2026-04-26T08-14-02-389Z_grok.mp4\" \"leo_3_suppressed-anger.mp4\")", + "Bash(node .claude/skills/video-from-script/scripts/veo-video-generator.js batch \"./output/forbidden-emperor_20260426/manifest_pisces_veo.json\" -o \"./output/forbidden-emperor_20260426/videos_v2\" --concurrency 3)", + "Bash(mv \"2026-04-26T08-24-33-349Z_veo.mp4\" \"pisces_1_awakening.mp4\")", + "Bash(mv \"2026-04-26T08-25-39-877Z_veo.mp4\" \"pisces_2_sorrow.mp4\")", + "Bash(mv \"2026-04-26T08-26-13-337Z_veo.mp4\" \"pisces_3_tenderness.mp4\")", + "Bash(curl -s \"http://capcut.muyetools.cn/openapi/capcut-mate/v1/health\")", + "Bash(node sync-to-jianying.js \"http://capcut.muyetools.cn/openapi/capcut-mate/v1/get_draft?draft_id=20260426163122642c7fcf\" --name \"星座面部特写×9\")", + "Bash(node .claude/skills/video-from-script/scripts/veo-video-generator.js --help)", + "Bash(node .claude/skills/video-from-script/scripts/veo-video-generator.js batch \"./output/forbidden-emperor_20260426/manifest_pisces_veo.json\" -o \"./output/forbidden-emperor_20260426/videos_v2\" --concurrency 3 -a 9:16)", + "Bash(node sync-to-jianying.js \"http://capcut.muyetools.cn/openapi/capcut-mate/v1/get_draft?draft_id=2026042616474679ac9cb2\" --name \"星座面部特写×9-顺序版\")", + "Bash(xxd)", + "Bash(grep -E \"\\\\.mp4$\")", + "Bash(node .claude/skills/video-from-script/scripts/capcut_assemble.js --input ./output/forbidden-emperor_20260426 --manifest ./output/forbidden-emperor_20260426/manifest_assemble.json --mode videos --subtitles false --format 9:16 --output ./output/forbidden-emperor_20260426/final/)", + "Bash(node sync-to-jianying.js \"http://capcut.muyetools.cn/openapi/capcut-mate/v1/get_draft?draft_id=2026042617032405872d2b\" --name \"星座面部特写-成片版\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/forbidden-emperor/references/微信图片_20260426135329_7359_105.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/forbidden-emperor/references/微信图片_20260426135333_7361_105.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/forbidden-emperor/references/微信图片_20260426135335_7362_105.png\")", + "Bash(node mj-image-generator.js batch \"d:/美图/output/forbidden-emperor_20260426/prompts.txt\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\" -c 3)", + "Bash(node mj-image-generator.js batch \"d:/美图/output/forbidden-emperor_20260426/retry.txt\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\" -c 3)", + "Bash(node mj-image-generator.js \"Cinematic portrait of a majestic young emperor with golden amber eyes and flowing auburn hair, wearing ornate dark gold imperial hanfu with subtle dragon patterns, standing on the highest terrace of an ancient celestial palace rooftop. He gazes upward with quiet determination and regal pride, a faint knowing smile on his lips. Behind him a vast starry night sky fills the frame with the milky way stretching across, constellations glowing softly, ancient jade palace rooftops and pagodas visible far below in the deep background. Gentle celestial wind catching his hair and robes. Warm gold and deep midnight blue palette. Lighting: Starlight and warm golden ambient glow from palace lanterns below, cinematic lighting, soft rim light on his silhouette. Style: Unreal Engine 5 render, highly detailed face and expression, traditional Chinese aesthetics, 8k, masterpiece --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node mj-image-generator.js \"Cinematic portrait of a majestic figure with golden amber eyes and flowing auburn hair, wearing ornate dark gold traditional Chinese robes with subtle patterns, standing on the highest terrace of an ancient celestial palace rooftop. Gazing upward with quiet determination and regal pride, a faint knowing smile on his lips. Behind him a vast starry night sky fills the frame with the milky way stretching across, constellations glowing softly, ancient jade palace rooftops and pagodas visible far below in the deep background. Gentle wind catching his hair and robes. Warm gold and deep midnight blue palette. Lighting: Starlight and warm golden ambient glow from lanterns below, cinematic lighting, soft rim light on his silhouette. Style: Unreal Engine 5 render, highly detailed face and expression, traditional Chinese aesthetics, 8k, masterpiece --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\")", + "Bash(node mj-image-generator.js \"Cinematic medium close-up of a majestic young figure with golden amber eyes and flowing auburn hair, wearing ornate dark gold traditional Chinese robes with subtle dragon patterns, standing on the highest terrace of an ancient celestial palace rooftop. He gazes upward with quiet determination and regal pride, a faint knowing smile on his lips. Sharp in-focus background: vast starry night sky with the milky way stretching across in brilliant detail, constellation patterns clearly visible, shooting star streaking across, ancient jade palace rooftops with curved tiles and pagodas visible far below, misty mountain peaks on the horizon. Gentle celestial wind catching his hair and robes. Warm gold and deep midnight blue palette. Lighting: Starlight and warm golden ambient glow from palace lanterns below, cinematic lighting, soft rim light on his silhouette, sharp details throughout. Style: Unreal Engine 5 render, deep depth of field, highly detailed face and expression, highly detailed sky and architecture, traditional Chinese aesthetics, 8k, masterpiece --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\")", + "Bash(rm -f \"d:/美图/output/forbidden-emperor_20260426/images/\"*.png)", + "Bash(node mj-image-generator.js \"3/4 body shot of a majestic deity with golden amber eyes and flowing auburn hair in ornate dark gold celestial hanfu, standing on a floating jade terrace railing. He looks upward with serene confidence. The upper two-thirds of frame: magnificent celestial night sky with vivid milky way, golden constellations mapped across the heavens, three floating jade islands with cascading starlight waterfalls plunging into a luminous cloud sea, mythical white cranes soaring between floating peaks, thousands of golden lotus lanterns drifting upward, distant celestial palace pagodas with curved golden roofs emerging from the cloud ocean. Bioluminescent lotus petals and golden dust particles floating through the entire scene. Warm gold and deep midnight blue palette with celestial silver. Lighting: Divine golden god rays piercing cloud layers from above, starlight illumination, bioluminescent glow from cloud sea, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, highly detailed face and hanfu embroidery, highly detailed clouds and floating architecture, celestial fairyland, Chinese mythology, 8k, masterpiece --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\")", + "Bash(rm -f \"d:/美图/output/forbidden-emperor_20260426/images/\"scene_*.png)", + "Bash(node mj-image-generator.js \"3/4 body shot of a majestic deity with golden amber eyes and flowing auburn hair in ornate dark gold celestial hanfu, standing on a floating jade terrace railing. He looks upward with serene confidence. The upper two-thirds of frame: magnificent celestial night sky with vivid milky way, golden constellations mapped across the heavens, three floating jade islands with cascading starlight waterfalls plunging into a luminous cloud sea, mythical white cranes soaring between floating peaks, thousands of golden lotus lanterns drifting upward, distant celestial palace pagodas with curved golden roofs emerging from the cloud ocean. Bioluminescent lotus petals and golden dust particles floating through the entire scene. Warm gold and deep midnight blue palette with celestial silver. Lighting: Divine golden god rays piercing cloud layers from above, starlight illumination, bioluminescent glow from cloud sea, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, highly detailed face and hanfu embroidery, highly detailed clouds and floating architecture, celestial fairyland, Chinese mythology, 8k, masterpiece --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node mj-image-generator.js \"3/4 body shot of a golden-haired deity with warm amber eyes and a calm confident expression, wearing ornate dark gold celestial hanfu with glowing thread dragon patterns on the sleeves, standing on a floating jade terrace with carved railings. He gazes forward with serene authority, a faint knowing smile on his refined face. Near ground: the jade terrace edge crumbles into luminous mist, bioluminescent golden lotus flowers blooming from cracks in the ancient stone, orbs of warm light drifting up from the railing. Mid ground: three floating jade palaces with curved golden rooftops connected by rainbow stone bridges, starlight waterfalls cascading from island edges into the cloud sea below, a flock of white cranes soaring between floating peaks trailing golden ribbons of light. Far distance: an endless mountain range of cloud-wrapped peaks fading into silvery atmospheric haze, colossal ancient statues visible on distant summits, the cloud sea stretching to the horizon. Sky above: the entire upper third filled with a vivid milky way stretching diagonally, Leo constellation glowing brilliant gold with connecting lines visible, a single shooting star streaking across, aurora-like ribbons of gold and amber light rippling near the horizon. Warm gold and deep midnight blue palette with celestial amber. Lighting: Divine golden god rays piercing through cloud layers from above, starlight and bioluminescent glow from below, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, highly detailed face and expression, highly detailed architecture and sky, celestial fairyland atmosphere, traditional Chinese mythology aesthetics, 8k, masterpiece --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node mj-image-generator.js \"3/4 body shot of a strikingly handsome golden-haired celestial guardian with large bright amber eyes, high nose bridge, defined cheekbones, strong jawline and a calm confident expression, wearing ornate dark gold celestial hanfu with glowing thread dragon patterns on the sleeves, standing on a floating jade terrace with carved railings. He gazes forward with serene authority, warm amber eyes holding quiet power, a faint knowing smile on his refined face. Near ground: the jade terrace edge crumbles into luminous mist, bioluminescent golden lotus flowers blooming from cracks in the ancient stone, orbs of warm light drifting up from the railing. Mid ground: three floating jade palaces with curved golden rooftops connected by rainbow stone bridges, starlight waterfalls cascading from island edges into the cloud sea below, a flock of white cranes soaring between floating peaks trailing golden ribbons of light. Far distance: an endless mountain range of cloud-wrapped peaks fading into silvery atmospheric haze, colossal ancient statues visible on distant summits, the cloud sea stretching to the horizon. Sky above: the entire upper third filled with a vivid milky way stretching diagonally, Leo constellation glowing brilliant gold with connecting lines visible, a single shooting star streaking across, aurora-like ribbons of gold and amber light rippling near the horizon. Warm gold and deep midnight blue palette with celestial amber. Lighting: Divine golden god rays piercing through cloud layers from above, starlight and bioluminescent glow from below, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, highly detailed face and expression, highly detailed architecture and sky, celestial fairyland atmosphere, traditional Chinese mythology aesthetics, 8k, masterpiece --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node mj-image-generator.js \"3/4 body shot of a strikingly handsome golden-haired celestial guardian with large bright amber eyes, high nose bridge, defined cheekbones, strong jawline and a calm confident expression, wearing ornate dark gold celestial hanfu with glowing thread dragon patterns on the sleeves, standing at the edge of a colossal floating jade terrace thousands of meters above an infinite cloud ocean. He gazes forward with serene authority, a faint knowing smile on his refined face. Near ground: the vast jade terrace stretching far behind him with carved railings, bioluminescent golden lotus flowers blooming from cracks in the ancient stone, orbs of warm light drifting up from the crumbling edge where the platform dissolves into luminous mist thousands of meters down. Mid ground: three colossal floating jade palaces with towering golden rooftops each the size of a mountain, connected by sprawling rainbow stone bridges spanning vast cloud canyons, thousand-meter starlight waterfalls cascading from island edges into the boundless cloud sea below, a flock of white cranes soaring between floating peaks appearing as tiny specks against the immense palaces. Far distance: an endless mountain range of cloud-wrapped peaks stretching to the horizon and vanishing into atmospheric haze, colossal ancient guardian statues hundreds of meters tall visible on distant summits, the boundless cloud ocean filling the entire background like a silver sea meeting the sky at the vanishing point. Sky above: the entire upper half of the frame dominated by a vivid milky way stretching from horizon to horizon, Leo constellation blazing brilliant gold with connecting lines visible across the heavens, a shooting star streaking across the galactic core, aurora-like ribbons of gold and amber light rippling across millions of stars near the horizon. Warm gold and deep midnight blue palette with celestial amber. Lighting: Volumetric golden god rays flooding the entire scene from above, light scattering through clouds stretching for miles, starlight and bioluminescent glow illuminating the vast cloud canyons below, atmospheric haze creating depth layers, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, epic cinematic scale, monumental architecture, vast celestial landscape, breathtaking scope, highly detailed face and expression, traditional Chinese mythology aesthetics, 8k, masterpiece. --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node mj-image-generator.js \"3/4 body shot of a strikingly handsome golden-haired celestial lord with large bright amber eyes, high nose bridge, defined cheekbones, strong jawline and a calm confident expression, wearing ornate dark gold celestial hanfu with glowing thread patterns on the sleeves, standing at the edge of a colossal floating jade terrace thousands of meters above an infinite cloud ocean. He gazes forward with serene authority, a faint knowing smile on his refined face. Near ground: the vast jade terrace stretching far behind him with carved railings, bioluminescent golden lotus flowers blooming from cracks in the ancient stone, orbs of warm light drifting up from the crumbling edge where the platform dissolves into luminous mist thousands of meters down. Mid ground: three colossal floating jade palaces with towering golden rooftops each the size of a mountain, connected by sprawling rainbow stone bridges spanning vast cloud canyons, thousand-meter starlight waterfalls cascading from island edges into the boundless cloud sea below, a flock of white cranes soaring between floating peaks appearing as tiny specks against the immense palaces. Far distance: an endless mountain range of cloud-wrapped peaks stretching to the horizon and vanishing into atmospheric haze, colossal ancient guardian statues hundreds of meters tall visible on distant summits, the boundless cloud ocean filling the entire background like a silver sea meeting the sky at the vanishing point. Sky above: the entire upper half of the frame dominated by a vivid milky way stretching from horizon to horizon, Leo constellation blazing brilliant gold with connecting lines visible across the heavens, a shooting star streaking across the galactic core, aurora-like ribbons of gold and amber light rippling across millions of stars near the horizon. Warm gold and deep midnight blue palette with celestial amber. Lighting: Volumetric golden god rays flooding the entire scene from above, light scattering through clouds stretching for miles, starlight and bioluminescent glow illuminating the vast cloud canyons below, atmospheric haze creating depth layers, cinematic lighting, deep depth of field, everything in sharp focus. Style: Unreal Engine 5 render, epic cinematic scale, monumental architecture, vast celestial landscape, breathtaking scope, highly detailed face and expression, traditional Chinese mythology aesthetics, 8k, masterpiece. --ar 9:16 --style raw --s 750 --q 2\" -o \"d:/美图/output/forbidden-emperor_20260426/images\" -r \"https://i.ibb.co/zW4tYYrN/047b6fcbf511.png,https://i.ibb.co/9HfJHQBW/6f8681e6a8e0.png,https://i.ibb.co/bjpg4TbD/ed6460f15d13.png\")", + "Bash(node imgbb_upload.js \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_br.png\")", + "Bash(node imgbb_upload.js \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_tl.png\")", + "Bash(node imgbb_upload.js \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_tr.png\")", + "Bash(node imgbb_upload.js \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_bl.png\")", + "Bash(node imgbb_upload.js \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_br.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/grunge_tl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/grunge_tr.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/grunge_bl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/grunge_br.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_tl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_tr.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_bl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_br.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_tl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_tr.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_bl.png\")", + "Bash(node imgbb_upload.js upload \"d:/美图/.claude/skills/video-from-script/accounts/military/references/manga_br.png\")", + "Bash(node gemini-image-generator.js edit \"calculating military officer in dark uniform, eyes slowly narrowing toward camera, jaw tightening imperceptibly, face partially obscured by shadows dissolving into darkness behind, oppressive atmosphere closing in, single cold rim light from above, extreme close-up, 暗黑漫画风格,深紫色与焦橙色双色调,纯黑背景,半调网点纹理,做旧丝网印刷质感,强戏剧性侧光,大面积阴影剪影,都市悬疑电影构图,无文字,无水印,竖版构图 9:16,dark noir illustration, deep purple and burnt orange duotone, halftone dot grain, gritty risograph print, high contrast chiaroscuro, bold black shadows, urban thriller aesthetic, editorial graphic novel style, no text, no watermark\" -i \"../accounts/military/references/popart_tl.png,../accounts/military/references/grunge_tl.png\" -o \"d:/美图/output/military_test\" -r 9:16)", + "Bash(node gemini-image-generator.js edit \"calculating military officer, eyes narrowing, jaw tightening, dark noir comic style, deep purple and burnt orange duotone on black background, halftone dot texture, high contrast chiaroscuro, editorial graphic novel style, no text, no watermark\" -i \"d:/美图/.claude/skills/video-from-script/accounts/military/references/popart_tl.png\" -o \"d:/美图/output/military_test\" -r 9:16)", + "Bash(node mj-image-generator.js \"calculating military officer in dark uniform, eyes slowly narrowing toward camera, jaw tightening imperceptibly, blurred silhouettes dissolving in background, single cold rim light from above, extreme close-up composition, dark noir comic style, limited color palette of deep purple and burnt orange on black background, halftone dot texture, gritty screen print effect, high contrast dramatic lighting, bold graphic shadows, cinematic close-up composition, editorial illustration, urban thriller aesthetic, no text, no watermark --ar 3:4 --style raw --q 2 --v 6.1 --sref https://i.ibb.co/Hf6wmL7c/af516be67570.png https://i.ibb.co/RGYcNj7z/578b290e10cf.png --sw 200\" -o \"d:/美图/output/military_test\" -a 9:16)", + "Bash(node mj-image-generator.js \"a military general standing in a war room, studying a strategic map, hand slowly clenching into fist, shoulders tensing with suppressed rage, shadows lengthening across the wall behind, dark noir comic style, limited color palette of deep purple and burnt orange on black background, halftone dot texture, gritty screen print effect, high contrast dramatic lighting, bold graphic shadows, cinematic composition, editorial illustration, urban thriller aesthetic, no text, no watermark --ar 3:4 --style raw --q 2 --v 6.1 --sref https://i.ibb.co/Hf6wmL7c/af516be67570.png --sw 80\" -o \"d:/美图/output/military_test\" -a 9:16)", + "Bash(node gemini-image-generator.js edit \"军事指挥官站在战争指挥室,俯瞰战略地图,手指缓缓收紧握拳,肩膀因压抑的怒火而紧绷,墙上的阴影在拉长,暗黑漫画风格,深紫色与焦橙色双色调,纯黑背景,半调网点纹理,做旧丝网印刷质感,强戏剧性侧光,大面积阴影剪影,都市悬疑电影构图,无文字,无水印,竖版构图 9:16,dark noir illustration, deep purple and burnt orange duotone, halftone dot grain, gritty risograph print, high contrast chiaroscuro, bold black shadows, urban thriller aesthetic, editorial graphic novel style, no text, no watermark\" -i \"d:/美图/.claude/skills/video-from-script/accounts/military/references/grunge_tr.png\" -o \"d:/美图/output/military_test\" -r 9:16)", + "Bash(python generate_audio.py --help)", + "Bash(mkdir -p \"d:/美图/output/military_20260427/audio\")", + "Bash(python generate_audio.py \"d:/美图/output/military_20260427/tts_input.json\")", + "Bash(grep -E \"\\\\.jpeg$|\\\\.png$\")", + "Bash(node imgbb_upload.js \"d:/美图/output/military_20260427/edited_2026-04-27T12-18-27-257Z_d837oh.jpeg\")", + "Bash(node imgbb_upload.js upload \"d:/美图/output/military_20260427/edited_2026-04-27T12-18-27-257Z_d837oh.jpeg\")", + "Bash(python -m edge_tts --text \"测试一下中文语音\" --voice zh-CN-XiaoxiaoNeural --write-media \"d:/美图/output/military_20260427/audio/test.mp3\")", + "Bash(node imgbb_upload.js batch \"d:/美图/output/military_20260427\" --pattern \"edited_*.jpeg,edited_*.png\" --output \"d:/美图/output/military_20260427/urls.json\")", + "Bash(node capcut_assemble.js --help)", + "Bash(node capcut_assemble.js --input d:/美图/output/military_20260427 --manifest d:/美图/output/military_20260427/manifest.json --mode images --format 9:16 --duration 8 --voiceover false --subtitles true --effects '录制边框 III' --filter 电影感:40)", + "Bash(node capcut_assemble.js --input \"d:/美图/output/military_20260427\" --manifest \"d:/美图/output/military_20260427/manifest.json\" --mode images --format 9:16 --duration 8 --voiceover false --subtitles true --effects \"录制边框 III\" --filter \"电影感:40\")", + "Bash(node sync-to-jianying.js \"http://capcut.muyetools.cn/openapi/capcut-mate/v1/get_draft?draft_id=2026042721024314a64e43\" --name \"军事账号_男性意识\")", + "Bash(node capcut_assemble.js --input \"d:/美图/output/military_20260427\" --manifest \"d:/美图/output/military_20260427/manifest.json\" --mode images --format 9:16 --duration 8 --voiceover false --subtitles true --effects \"录制边框 III\")", + "Bash(curl -s -X POST \"https://capcut-mate.jcaigc.cn/openapi/capcut-mate/v1/create_draft\" -H \"Content-Type: application/json\" -d '{\"width\":1080,\"height\":1920}')", + "Bash(python3 -m json.tool)", + "Bash(curl -s -X POST https://capcut-mate.jcaigc.cn/openapi/capcut-mate/v1/add_images -H 'Content-Type: application/json' -d '{ *)", + "Bash(curl -s -X POST \"http://localhost:30000/openapi/capcut-mate/v1/create_draft\" -H \"Content-Type: application/json\" -d '{\"width\":1080,\"height\":1920}')", + "Bash(curl -s -X POST http://localhost:30000/openapi/capcut-mate/v1/add_images -H 'Content-Type: application/json' -d '{ *)", + "Bash(curl -s \"http://localhost:30000/openapi/capcut-mate/v1/get_draft?draft_id=20260427231607ffced79b\")", + "Bash(curl -s \"http://localhost:30000/output/draft/20260427231607ffced79b/draft_content.json\")", + "Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); [print\\(m.get\\('path',''\\)\\) for m in d.get\\('materials',{}\\).get\\('videos',[]\\)]\")", + "Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); [print\\(m.get\\('path',''\\)\\) for m in d.get\\('materials',{}\\).get\\('videos',[]\\)]\")", + "Bash(curl -s \"http://localhost:30000/output/draft/2026042723205726f7bf82/draft_content.json\")", + "Bash(curl -s \"http://localhost:30000/output/draft/2026042723242157ccad6b/draft_content.json\")" + ], + "additionalDirectories": [ + "D:\\美图\\.claude\\skills\\video-from-script\\scripts", + "d:\\美图\\.claude\\skills\\image-generator", + "d:\\美图\\.claude\\skills\\video-from-script\\references", + "d:\\美图\\output\\forbidden-emperor_20260426\\images", + "d:\\美图\\output\\military_20260427", + "D:\\电脑管家迁移文件\\JianyingPro\\User Data\\Projects\\com.lveditor.draft\\20260427230746cef99bae" + ] + } +} diff --git a/.claude/skills/capcut/SKILL.md b/.claude/skills/capcut/SKILL.md new file mode 100644 index 0000000..8e8eff0 --- /dev/null +++ b/.claude/skills/capcut/SKILL.md @@ -0,0 +1,226 @@ +--- +name: capcut +description: 剪映/CapCut 自动化。通过 CapCut Mate API 实现草稿创建、素材导入、字幕添加、特效动画及云端渲染。支持两种模式:(1) API模式 — 单个接口调用;(2) 成片模式 — 素材目录一键组装(TTS配音+字幕+特效+BGM+云渲染)。触发词:剪映、CapCut、视频编辑、图片成片、视频合成、成片、CapCut组装、图片轮播、配音+成片。 +--- + +# CapCut Mate 剪映自动化 + +通过 CapCut Mate API 自动化剪映草稿创建、素材导入及云端渲染。 + +## 前置条件 + +使用远程 CapCut Mate API(`config.capcutMateApiBase`),无需本地部署。 +路径和密钥统一配置在 `skills/config.json`。 + +视频/音频素材通过 OSS 上传获取公网 URL(API 需要可下载的 URL)。 +上传工具:`node .claude/skills/video-from-script/scripts/oss-upload.js` + +**成片模式额外依赖:** + +``` +1. npm 依赖 → cd .claude/skills/video-from-script/scripts && npm install +2. TTS 配音 → 阿里云 Qwen-TTS(config.json 中配置 ttsApiKey) +``` + +--- + +## 两种使用模式 + +### 模式一:API 模式(单接口调用) + +用户给具体指令(加字幕、加特效等),Claude 直接调单个 API。 + +典型工作流: + +``` +1. create_draft(width, height) → draft_url +2. add_videos / add_images → 添加素材 +3. add_audios → 添加配音/音乐 +4. add_captions → 添加字幕(支持关键词高亮) +5. add_effects / add_keyframes → 特效和动画 +6. save_draft → 保存 +7. gen_video → 提交云端渲染 +8. gen_video_status → 轮询进度 +``` + +### 模式二:成片模式(一键组装) + +用户给素材目录 + manifest.json,走 CLI 自动流水线。 + +```bash +node .claude/skills/video-from-script/scripts/capcut_assemble.js \ + --input ./output/batch_xxx \ + --manifest ./output/batch_xxx/manifest.json \ + --mode images|videos \ + --subtitles true \ + --bgm \ + --effects "录制边框 III,雪花" \ + --filter "电影感:50" \ + --format 9:16 \ + --output ./output/final/ +``` + +| 参数 | 必填 | 说明 | +|------|------|------| +| `--input` | 是 | 素材目录 | +| `--manifest` | 是 | manifest.json 路径 | +| `--mode` | 是 | `images` 或 `videos` | +| `--subtitles` | 否 | 字幕(默认 true) | +| `--bgm` | 否 | 背景音乐 URL | +| `--effects` | 否 | 特效,逗号分隔 | +| `--filter` | 否 | 滤镜名称:强度 | +| `--format` | 否 | 画幅(默认账号配置) | + +**完整流程:** + +```dot +digraph capcut_assembly { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + edge [fontsize=10] + + input [label="素材 + manifest.json", shape=folder, fillcolor="#e3f2fd"] + + step1 [label="1. TTS 配音(可选)\nnode qwen-tts.js\n→ WAV + 时长"] + step2 [label="2. 上传图片到 OSS\n本地图片 → 公网 URL"] + step3 [label="3. 创建草稿\ncreate_draft\n→ draft_url"] + step4 [label="4. 导入素材+音频+字幕+特效\nadd_images / add_videos\nadd_audios / add_captions\nadd_effects"] + step5 [label="5. 保存草稿\nsave_draft"] + step6 [label="6. 同步本地剪映\nsync-to-jianying.js\n下载→重写→注册→扫描"] + step7 [label="7. 云渲染(可选)\ngen_video → 成片 MP4", fillcolor="#e8f5e9"] + + input -> step1 -> step2 -> step3 -> step4 -> step5 -> step6 -> step7 +} +``` + +**模式选择规则:** +- 用户提到"成片"、"组装"、"图片轮播"、"配音+成片" → 成片模式 +- 用户给具体操作指令(加字幕、加特效等) → API 模式 +- 不确定时 → 询问用户 + +--- + +## 通用约定 + +- **时间单位**: 微秒(1秒 = 1,000,000) +- **xxx_infos 参数**: JSON **字符串**(需 `JSON.stringify`),非对象 +- **draft_url**: 创建草稿后返回,后续所有操作都需要传入 + +--- + +## 核心接口速查 + +| 接口 | 用途 | 关键参数 | +|------|------|----------| +| `create_draft` | 创建草稿 | width, height | +| `add_videos` | 添加视频 | video_infos (URL+时长+转场) | +| `add_images` | 添加图片 | image_infos (URL+动画+转场) | +| `add_audios` | 添加音频 | audio_infos (URL+时长) | +| `add_captions` | 添加字幕 | captions (文本+关键词高亮+时间线) | +| `add_effects` | 添加特效 | effect_infos (名称+时间线) | +| `add_keyframes` | 关键帧动画 | keyframes (segment_id+属性+值) | +| `easy_create_material` | 一站式添加 | video/img/audio/text | +| `gen_video` | 云端渲染 | draft_url + apiKey | + +## 工具类接口 + +| 接口 | 入参 | 产出 | +|------|------|------| +| `audio_timelines` | `links: [mp3 URLs]` | 自动算时间线 | +| `audio_infos` | mp3_urls + timelines | add_audios 的 audio_infos | +| `caption_infos` | texts + timelines + keywords | add_captions 的 captions | +| `effect_infos` | effects[] + timelines | add_effects 的 effect_infos | +| `video_infos` | URLs + timelines | add_videos 的 video_infos | +| `imgs_infos` | URLs + timelines | add_images 的 image_infos | +| `get_audio_duration` | mp3_url | 音频时长(微秒) | +| `get_text_animations` | 无 | 可用文本动画列表 | +| `get_image_animations` | 无 | 可用图片动画列表 | +| `search_sticker` | 关键词 | 贴纸列表(含 sticker_id) | +| `get_url` | 文本 | 提取 URL | + +## 完整 API 参数文档 + +详细的请求参数、响应格式、字段说明见 [api-reference.md](references/api-reference.md) + +--- + +## TTS 配音(成片模式用) + +使用阿里云 Qwen-TTS(Node.js),替代原 Edge-TTS。 + +```bash +# 准备输入 +echo '{"segments":[{"id":1,"text":"文案"}],"voice":"Cherry","output_dir":"./audio"}' > input.json + +# 生成 +node .claude/skills/video-from-script/scripts/qwen-tts.js input.json +# → stdout: {"segments":[{"id":1,"audio":"./audio/seg_001.wav","duration":3.456}]} +``` + +配置在 `skills/config.json`:`ttsApiKey`(必填)、`ttsModel`、`ttsVoice`、`ttsLanguage`。 + +推荐音色: + +| 音色 | 风格 | 适用 | +|------|------|------| +| `Cherry` | 阳光积极小姐姐 | 通用、生活 | +| `Ethan` | 阳光温暖男声 | 科技、教程 | +| `Vincent` | 沙哑烟嗓 | 军事、纪录 | +| `Neil` | 新闻主持人 | 新闻、财经 | +| `Moon` | 率性帅气男声 | 解说、潮流 | + +--- + +## 账号配置 + +`account.json` 中 CapCut 相关字段: + +```json +{ + "capcut": { + "effects": ["录制边框 III"], + "filter": "电影感:40", + "subtitleStyle": { + "fontSize": 36, + "color": "#FFFFFF", + "highlightColor": "#FF6B35", + "bold": true + }, + "defaultBGM": "https://example.com/bgm.mp3" + } +} +``` + +--- + +## 图片动画预设 + +| 动画 | 说明 | 适用 | +|------|------|------| +| Ken Burns (zoom-in) | 1.0→1.1 缓慢放大 | 默认 | +| Ken Burns (pan-left) | 右→左平移 | 风景 | +| Ken Burns (pan-right) | 左→右平移 | 风景 | +| 缩放弹出 | 0.8→1.0 | 强调 | + +--- + +## 质量要求 + +- 字幕与文案对应正确,关键词高亮醒目 +- 图片动画流畅(Ken Burns 幅度 1.0→1.1) +- BGM 音量不盖过配音(配音为主、BGM 为辅) +- 转场自然(无黑帧、无跳帧) +- 底部字幕区不被遮挡 +- 总时长 30-90 秒(保证完播率) +- 9:16 竖屏适配抖音/快手,16:9 横屏适配 B 站 + +--- + +## 成片模式详细参考 + +完整步骤说明、API 调用细节见 [assembly-guide.md](references/assembly-guide.md) + +## index.js 封装 + +当前 `index.js` 只封装了 3 个接口(createDraft, addVideos, genVideo)。 +调用其他接口时,直接用 axios POST 对应的 API 路径即可。 diff --git a/.claude/skills/capcut/references/api-reference.md b/.claude/skills/capcut/references/api-reference.md new file mode 100644 index 0000000..c728315 --- /dev/null +++ b/.claude/skills/capcut/references/api-reference.md @@ -0,0 +1,459 @@ +# CapCut Mate API 完整参数参考 + +所有接口基础 URL:`config.capcutMateApiBase`(如 `http://capcut.muyetools.cn/openapi/capcut-mate/v1`) +时间单位:**微秒**(1秒 = 1,000,000) +`xxx_infos` 参数均为 **JSON 字符串**(需 `JSON.stringify`) + +--- + +## 目录 + +- [一、草稿管理](#一草稿管理) +- [二、视频素材](#二视频素材) +- [三、音频处理](#三音频处理) +- [四、文本字幕](#四文本字幕) +- [五、特效与动画](#五特效与动画) +- [六、视频渲染](#六视频渲染) +- [七、快速工具](#七快速工具) + +--- + +## 一、草稿管理 + +### create_draft + +```json +// POST /create_draft +{ "width": 1080, "height": 1920 } + +// Response +{ + "draft_url": "http://capcut.muyetools.cn/openapi/capcut-mate/v1/get_draft?draft_id=20251230xxx", + "tip_url": "" +} +``` + +### save_draft + +```json +// POST /save_draft +{ "draft_url": "..." } + +// Response +{ "draft_url": "...", "message": "成功" } +``` + +### get_draft + +```json +// GET /get_draft?draft_id=xxx +// Response: 返回草稿文件列表和详细信息 +``` + +--- + +## 二、视频素材 + +### add_videos + +```json +// POST /add_videos +{ + "draft_url": "...", + "video_infos": "[{\"video_url\":\"https://...\",\"width\":1920,\"height\":1080,\"start\":0,\"end\":3000000,\"duration\":6000000,\"mask\":\"\",\"transition\":\"\",\"transition_duration\":500000,\"volume\":1}]", + "alpha": 1, + "scale_x": 1, + "scale_y": 1, + "transform_x": 0, + "transform_y": 0, + "scene_timelines": [] +} + +// Response +{ + "draft_url": "...", + "video_ids": ["9b9f47126f384a6f95434cdc3fed5b7c"], + "segment_ids": ["20285056b4194e2c8944b587ceb592a7"], + "track_id": "720274203a11442280b64570caf362c4" +} +``` + +**全局参数:** + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| draft_url | string | 是 | 草稿 URL | +| video_infos | string(JSON) | 是 | 视频信息数组(JSON 字符串) | +| alpha | number | 否 | 透明度 0-1,默认 1 | +| scale_x | number | 否 | 水平缩放,默认 1 | +| scale_y | number | 否 | 垂直缩放,默认 1 | +| transform_x | number | 否 | 水平位移,默认 0 | +| transform_y | number | 否 | 垂直位移,默认 0 | + +**video_infos 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| video_url | string | 视频 URL | +| width | number | 宽度(像素) | +| height | number | 高度(像素) | +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | +| duration | number | 总时长(微秒) | +| mask | string | 遮罩名称 | +| transition | string | 转场名称 | +| transition_duration | number | 转场时长(微秒) | +| volume | number | 音量 0-1 | + +### add_images + +```json +// POST /add_images +{ + "draft_url": "...", + "image_infos": "[{\"image_url\":\"https://...\",\"width\":1920,\"height\":1080,\"start\":0,\"end\":5000000,\"duration\":5000000,\"animation\":\"淡入淡出\",\"transition\":\"溶解\",\"transition_duration\":500000}]", + "alpha": 1, + "scale_x": 1, + "scale_y": 1, + "transform_x": 0, + "transform_y": 0 +} + +// Response +{ + "draft_url": "...", + "image_ids": ["fa67760440104ca3adacdfd694fc25da"], + "segment_ids": ["2d19f38d5f544eecad18847e5eac1745"], + "segment_infos": [{"id": "xxx", "start": 0, "end": 5000000}], + "track_id": "38c8dfee92374137998715329494a8f9" +} +``` + +**image_infos 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| image_url | string | 图片 URL | +| width | number | 宽度(像素) | +| height | number | 高度(像素) | +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | +| duration | number | 展示时长(微秒) | +| animation | string | 动画名称(如 "淡入淡出") | +| transition | string | 转场名称(如 "溶解") | +| transition_duration | number | 转场时长(微秒) | + +### add_sticker + +```json +// POST /add_sticker +{ + "draft_url": "...", + "sticker_id": "7326810673609018675", + "start": 0, + "end": 5000000, + "scale": 1, + "transform_x": 0, + "transform_y": 0 +} + +// Response +{ + "draft_url": "...", + "sticker_id": "7326810673609018675", + "segment_id": "7902e009fcfb44768f6f73e9432b5d5b", + "track_id": "10b612824bcd4dd882ab764d145dd7ce", + "duration": 5000000 +} +``` + +--- + +## 三、音频处理 + +### add_audios + +```json +// POST /add_audios +{ + "draft_url": "...", + "audio_infos": "[{\"audio_url\":\"https://...\",\"duration\":23184000,\"end\":23184000,\"start\":0}]" +} + +// Response +{ + "draft_url": "...", + "audio_ids": ["9000bc9efb744c3196fb1d225993f43d"], + "track_id": "a2d21e5a343c4f1eba3c9234bea16658" +} +``` + +**audio_infos 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| audio_url | string | 音频文件 URL | +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | +| duration | number | 音频总时长(微秒) | + +### get_audio_duration + +```json +// POST /get_audio_duration +{ "mp3_url": "https://..." } + +// Response +{ "duration": 284891429, "message": "成功" } +``` + +--- + +## 四、文本字幕 + +### add_captions + +```json +// POST /add_captions +{ + "draft_url": "...", + "captions": "[{\"start\":0,\"end\":10000000,\"text\":\"你好,剪映\",\"keyword\":\"好\",\"keyword_color\":\"#457616\",\"keyword_font_size\":15}]", + "font_size": 15, + "text_color": "#ffffff", + "alignment": 1, + "bold": false, + "italic": false, + "underline": false, + "has_shadow": false, + "letter_spacing": 0, + "line_spacing": 0, + "alpha": 1, + "scale_x": 1, + "scale_y": 1, + "transform_x": 0, + "transform_y": 0, + "style_text": 0 +} + +// Response +{ + "draft_url": "...", + "text_ids": ["52ae035e01584adf8d052533d83948ed"], + "segment_ids": ["1b1102e3d2d14b9eabac064bbe64d2bf"], + "segment_infos": [{"id": "xxx", "start": 0, "end": 10000000}], + "track_id": "7ffdf0f7d67e4d8caff5531f5873d26f" +} +``` + +**captions 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | +| text | string | 字幕文本 | +| keyword | string | 高亮关键词 | +| keyword_color | string | 关键词颜色(如 "#ff7100") | +| keyword_font_size | number | 关键词字号 | + +**全局样式参数:** + +| 参数 | 类型 | 说明 | +|------|------|------| +| font_size | number | 字号 | +| text_color | string | 文字颜色 | +| alignment | number | 对齐(1=居中) | +| bold | boolean | 粗体 | +| italic | boolean | 斜体 | +| underline | boolean | 下划线 | +| has_shadow | boolean | 文字阴影 | +| letter_spacing | number | 字间距 | +| line_spacing | number | 行间距 | +| style_text | number | 样式模式 | + +### add_text_style + +```json +// POST /add_text_style +{ + "text": "快乐|顶级思维", + "keyword": "五个快乐到死的顶级思维", + "keyword_color": "#ff7100", + "keyword_font_size": 15, + "font_size": 15 +} + +// Response +{ + "text_style": "{\"text\":\"快乐|顶级思维\",\"styles\":[{\"fill\":{\"content\":{\"solid\":{\"color\":[1.0,1.0,1.0]}}},\"range\":[0,7],\"size\":15}]}" +} +``` + +--- + +## 五、特效与动画 + +### add_effects + +```json +// POST /add_effects +{ + "draft_url": "...", + "effect_infos": "[{\"effect_title\":\"录制边框 III\",\"start\":0,\"end\":5000000}]" +} + +// Response +{ + "draft_url": "...", + "effect_ids": ["34c70fba3619444fa897110b40b39386"], + "segment_ids": ["23e0c0900c69427f82e44243183706c9"], + "track_id": "2ffea3839d304f83ae29dadc43a18227" +} +``` + +**effect_infos 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| effect_title | string | 特效名称(如 "录制边框 III"、"雪花"、"红包来了") | +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | + +### add_filters + +```json +// POST /add_filters +{ + "draft_url": "...", + "filter_infos": "[{\"filter_title\":\"清透感\",\"start\":0,\"end\":5000000,\"intensity\":100.0}]" +} +``` + +| 字段 | 类型 | 说明 | +|------|------|------| +| filter_title | string | 滤镜名称 | +| start | number | 起始时间(微秒) | +| end | number | 结束时间(微秒) | +| intensity | number | 强度 0-100 | + +### add_keyframes + +```json +// POST /add_keyframes +{ + "draft_url": "...", + "keyframes": "[{\"segment_id\":\"e1933f126437421bb52abdc56f062266\",\"property\":\"KFTypePositionX\",\"offset\":0.5,\"value\":-0.1}]" +} + +// Response +{ "draft_url": "..." } +``` + +**keyframes 元素字段:** + +| 字段 | 类型 | 说明 | +|------|------|------| +| segment_id | string | 目标素材 segment ID | +| property | string | 动画属性 | +| offset | number | 关键帧位置(0-1,相对素材时长比例) | +| value | number | 属性值 | + +**property 可选值:** + +| 值 | 说明 | +|------|------| +| KFTypePositionX | 水平位移 | +| KFTypePositionY | 垂直位移 | +| KFTypeScaleX | 水平缩放 | +| KFTypeScaleY | 垂直缩放 | +| KFTypeRotation | 旋转角度 | + +### add_masks + +```json +// POST /add_masks +{ + "draft_url": "...", + "name": "线性", + "segment_ids": ["beb4adbea83d41d08021cf4e8f219206"] +} + +// Response +{} +``` + +### get_effects — 获取特效列表 + +```json +// POST /get_effects +{ "mode": 0 } + +// Response +{ + "effects": [ + { "effect_id": "1183068", "name": "1998", "has_params": true, "is_vip": false }, + ... + ] +} +``` + +--- + +## 六、视频渲染 + +### gen_video + +```json +// POST /gen_video +{ + "draft_url": "...", + "apiKey": "559a84a5-a9e9-4ade-a26b-e80b732754d6" +} + +// Response +{ "code": 0, "message": "视频生成任务已提交,请使用draft_url查询进度" } +``` + +### gen_video_status + +```json +// POST /gen_video_status +{ "draft_url": "..." } + +// Response +{ + "code": 0, + "status": "failed", + "progress": 0, + "video_url": "", + "error_message": "导出草稿失败: ...", + "created_at": "2026-01-31T21:09:15.104249", + "started_at": "2026-01-31T21:09:15.549183", + "completed_at": "2026-01-31T21:10:10.398727" +} +``` + +**status 值:** `processing` / `success` / `failed` + +--- + +## 七、快速工具 + +### easy_create_material — 一站式添加 + +```json +// POST /easy_create_material +{ + "draft_url": "...", + "video_url": "https://...", + "img_url": "https://...", + "audio_url": "https://...", + "text": "字幕内容", + "text_color": "#ffffff", + "font_size": 15, + "text_transform_y": 0 +} + +// Response +{ "draft_url": "...", "message": "成功" } +``` \ No newline at end of file diff --git a/.claude/skills/capcut/references/assembly-guide.md b/.claude/skills/capcut/references/assembly-guide.md new file mode 100644 index 0000000..90629ed --- /dev/null +++ b/.claude/skills/capcut/references/assembly-guide.md @@ -0,0 +1,257 @@ +# CapCut 成片组装 + +> 将图片/视频素材通过 CapCut 组装为成品视频。支持配音、字幕、特效、音乐、云渲染。 + +- **触发词**: 图片成片、视频合成、剪映成片、CapCut 渲染 + +--- + +## 前置条件 + +``` +1. CapCut Mate API 可达 → curl {config.capcutMateApiBase}/../docs + - 部署在 capcut.muyetools.cn(配置在 skills/config.json) +2. npm 依赖 → cd scripts && npm install +3. TTS 配音 → 阿里云 Qwen-TTS(配置在 config.json 的 ttsApiKey) +4. 同步到本地剪映 → 纯 Node.js(sync-to-jianying.js),无需 Python/uv +``` + +所有路径和 API 地址统一从 `skills/config.json` 读取。 + +--- + +## CLI 接口(快速开始) + +```bash +node scripts/capcut_assemble.js \ + --input ./output/batch_xxx \ + --manifest ./output/batch_xxx/manifest.json \ + --mode images|videos \ + --subtitles true \ + --bgm \ + --effects "录制边框 III,雪花" \ + --filter "电影感:50" \ + --format 9:16 \ + --output ./output/final/ +``` + +| 参数 | 必填 | 说明 | +|------|------|------| +| `--input` | 是 | 素材目录 | +| `--manifest` | 是 | manifest.json 路径 | +| `--mode` | 是 | `images` 或 `videos` | +| `--subtitles` | 否 | 是否添加字幕(默认 true) | +| `--bgm` | 否 | 背景音乐 URL(默认取账号配置) | +| `--effects` | 否 | 特效名称,逗号分隔 | +| `--filter` | 否 | 滤镜名称:强度 | +| `--format` | 否 | 画幅(默认取账号配置) | +| `--output` | 否 | 输出目录 | + +--- + +## 完整流程 + +```dot +digraph assembly_flow { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + input [label="素材 + manifest.json", shape=folder, fillcolor="#e3f2fd"] + step1 [label="1. TTS 配音(可选)\nnode qwen-tts.js\n→ WAV + 时长"] + step2 [label="2. 上传图片到 OSS\n本地图片 → 公网 URL"] + step3 [label="3. 创建草稿\ncreate_draft → draft_url"] + step4 [label="4. 导入素材+音频+字幕+特效"] + step5 [label="5. 保存草稿\nsave_draft"] + step6 [label="6. 同步本地剪映\n下载→重写→注册→扫描"] + step7 [label="7. 云渲染(可选)\ngen_video → 成片 MP4", fillcolor="#e8f5e9"] + + input -> step1 -> step2 -> step3 -> step4 -> step5 -> step6 -> step7 +} +``` + +--- + +## 详细步骤 + +### 1. TTS 配音(可选) + +使用阿里云 Qwen-TTS 进行语音合成(Node.js,无需 Python)。 + +```bash +# 准备输入 JSON +echo '{"segments":[{"id":1,"text":"第一段文案"},{"id":2,"text":"第二段文案"}],"voice":"Cherry","output_dir":"./audio"}' > input.json + +# 批量生成 +node scripts/qwen-tts.js input.json +# → stdout: {"segments":[{"id":1,"text":"...","audio":"./audio/seg_001.wav","duration":3.456}]} +``` + +配置在 `skills/config.json`: + +| 字段 | 说明 | 默认值 | +|------|------|--------| +| `ttsApiKey` | 阿里云百炼 API Key | (必填) | +| `ttsModel` | 模型名称 | `qwen-tts` | +| `ttsVoice` | 音色名称 | `Cherry` | +| `ttsLanguage` | 语言类型 | `Chinese` | + +推荐音色: + +| voice 参数 | 音色名 | 风格 | 适用 | +|------|------|------|------| +| `Cherry` | 芊悦 | 阳光积极小姐姐 | 通用、生活 | +| `Serena` | 苏瑶 | 温柔小姐姐 | 情感、故事 | +| `Ethan` | 晨煦 | 阳光温暖男声 | 科技、教程 | +| `Moon` | 月白 | 率性帅气男声 | 潮流、解说 | +| `Vincent` | 田叔 | 沙哑烟嗓男声 | 军事、纪录 | +| `Kai` | 凯 | 温柔磁性男声 | 情感、配音 | +| `Neil` | 阿闻 | 新闻主持人 | 新闻、财经 | +| `Bellona` | 燕铮莺 | 洪亮有力女声 | 热血、武侠 | + +所有音色均支持中英文,输出 WAV 格式(24kHz),URL 有效期 24 小时。 + +**作为模块调用**: + +```js +const { synthesize, synthesizeBatch } = require('./qwen-tts') + +// 单段 +const { filePath, duration } = await synthesize('你好世界', { voice: 'Cherry' }) + +// 批量 +const results = await synthesizeBatch( + [{ id: 1, text: '文案' }, { id: 2, text: '文案2' }], + { voice: 'Cherry', outputDir: './audio' } +) +``` + +### 2. 图片上传 + +CapCut API 需要公网 URL,本地图片通过 OSS 上传: + +```bash +node scripts/oss-upload.js ./image.png +# → https://i.ibb.co/xxx/image.png +``` + +OSS 配置在 `skills/config.json` 的 `ossRegion/ossAccessKeyId/ossAccessKeySecret/ossBucket` 中。 + +`capcut_assemble.js` 自动处理上传,无需手动调用。 + +### 3. 创建草稿 + +``` +POST /create_draft { width: 1080, height: 1920 } +→ 返回 draft_url(后续所有操作都需要传入) +``` + +### 4. 导入素材 + +**图片模式** (`--mode images`): + +``` +POST /add_images +每张图片 3-5 秒,附带 Ken Burns 动画(缩放 1.0→1.1) +``` + +**视频模式** (`--mode videos`): + +``` +POST /add_videos +逐个添加视频片段,自动计算时长 +``` + +### 5. 添加音频 + +``` +POST /add_audios +- BGM: 账号默认音乐 或 用户指定 URL +- 配音: Step 1 生成的 TTS 音频 +``` + +### 6. 添加字幕 + +``` +POST /add_captions +- 文案来自 manifest.json +- 关键词高亮(account.json 中 subtitleStyle.highlightColor) +- 字体大小、颜色从账号配置读取 +``` + +### 7. 添加特效/滤镜 + +``` +POST /add_effects ← account.json 中 capcut.effects +POST /add_filters ← account.json 中 capcut.filter +``` + +### 8. 保存 + 同步 + 渲染 + +**保存草稿(服务器端):** + +``` +POST /save_draft → 保存到 CapCut Mate 服务器 +``` + +**同步到本地剪映(纯 Node.js):** + +``` +sync-to-jianying.js → 下载素材文件 → 路径重写为本地 → 写入剪映草稿目录 → 注册 root_meta_info.json → 触发目录扫描 +``` + +无需 Python/uv,`capcut_assemble.js` 自动调用。 + +**504 超时回退:** + +add_videos 提交 9+ 视频时可能触发网关 504。脚本自动降级: +1. 先尝试全量提交 +2. 504 时自动分批(每批 3 个),保持绝对时间不变 + +**云渲染(可选):** + +``` +POST /gen_video → 提交云渲染 +POST /gen_video_status → 轮询直到 success/failed +``` + +--- + +## 账号配置(account.json) + +```json +{ + "capcut": { + "effects": ["录制边框 III"], + "filter": "电影感:40", + "subtitleStyle": { + "fontSize": 36, + "color": "#FFFFFF", + "highlightColor": "#FF6B35", + "bold": true + }, + "defaultBGM": "https://example.com/bgm.mp3" + } +} +``` + +--- + +## 图片动画预设 + +| 动画类型 | 说明 | 适用场景 | +|---------|------|---------| +| Ken Burns (zoom-in) | 从 1.0 缓慢放大到 1.1 | 默认,适合大多数场景 | +| Ken Burns (pan-left) | 画面从右向左平移 | 风景、全景 | +| Ken Burns (pan-right) | 画面从左向右平移 | 风景、全景 | +| 缩放弹出 | 从 0.8 弹到 1.0 | 强调、冲击感 | + +--- + +## 质量检查 + +- [ ] 字幕与文案对应正确 +- [ ] 关键词高亮颜色醒目 +- [ ] 图片动画流畅(无卡顿) +- [ ] BGM 音量与配音平衡 +- [ ] 转场自然(无黑帧) +- [ ] 总时长合理(建议 30-90 秒) diff --git a/.claude/skills/config.example.json b/.claude/skills/config.example.json new file mode 100644 index 0000000..a56d269 --- /dev/null +++ b/.claude/skills/config.example.json @@ -0,0 +1,14 @@ +{ + "jianyingDraftPath": "自动检测,也可手动指定", + "capcutMateDir": "自动检测,也可手动指定", + "capcutMateApiBase": "http://localhost:30000/openapi/capcut-mate/v1", + "ossRegion": "oss-cn-hangzhou", + "ossAccessKeyId": "你的 OSS AccessKeyId", + "ossAccessKeySecret": "你的 OSS AccessKeySecret", + "ossBucket": "你的 OSS Bucket", + "ossFolder": "tmp/", + "ossExpires": 31536000, + "geminiApiBaseUrl": "你的 Gemini API 地址", + "geminiModel": "gemini-3.1-flash-image-preview", + "geminiApiKey": "你的 Gemini API Key" +} diff --git a/.claude/skills/image-generator/SKILL.md b/.claude/skills/image-generator/SKILL.md new file mode 100644 index 0000000..99a9376 --- /dev/null +++ b/.claude/skills/image-generator/SKILL.md @@ -0,0 +1,351 @@ +--- +name: image-generator +description: 图片生成技能。支持 Gemini 和 Midjourney (MJ) 两个模型。批量生图、图生图、风格转换、4合1自动拆分。触发词:生图、生成图片、批量出图、图片素材、MJ生图、Gemini生图、图生图、风格转换。 +--- + +# 图片生成 + +Gemini(快速)+ MJ(精品)双模型图片生成。**以参考图为锚点**,确保批量出图风格统一。 + +--- + +## 核心原则:参考图优先 + +**参考图是生图质量的关键**。没有参考图的生图 = 风格不可控、批次不统一。 + +| 有参考图 | 无参考图 | +|---------|---------| +| 风格统一、色彩一致 | 每张图风格随机漂移 | +| 构图/氛围可控 | 构图/氛围全凭模型发挥 | +| 批次之间视觉连贯 | 同一批次看着像不同账号 | +| 提示词可以更简洁 | 需要极长的提示词描述风格 | + +**执行规则**: +1. **有参考图** → 必须用参考图生图(Gemini 图生图 / MJ --sref) +2. **无参考图** → 先让用户提供 1-3 张参考图,或先文生图 1 张让用户确认后再批量 +3. **参考图位置** → `accounts/{account}/references/` 目录 + +--- + +## 生图流程 + +两种模式:**参考风格**(风格锚点)和**锁定人物**(角色一致性)。 + +**启动生图时,必须先询问用户选择模式:** + +> 你要哪种生图模式? +> 1. **参考风格** — 统一批次色调/质感,每张图内容不同但风格一致(Gemini 快速 / MJ 精品) +> 2. **锁定人物** — 同一角色在不同场景中保持一致,仅 Gemini 支持 + +```dot +digraph image_gen { + rankdir=TB + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + edge [fontsize=10] + + start [label="用户触发生图", shape=oval, fillcolor="#e3f2fd"] + ask [label="询问用户\n选择生图模式", shape=diamond, fillcolor="#fff9c4"] + + read_ref [label="读取参考图(references/)\n+ 风格文件(styles/)"] + gen_prompt [label="为每条文案生成 prompt"] + + gemini_style [label="Gemini edit()\n风格参考", fillcolor="#e8f5e9"] + mj_style [label="MJ --sref\n风格参考", fillcolor="#fff3e0"] + gemini_char [label="Gemini edit()\n锁定人物\n角色一致性", fillcolor="#e1bee7"] + + validate [label="质量校验\n与参考图对比"] + + start -> ask + ask -> read_ref + read_ref -> gen_prompt + gen_prompt -> gemini_style [label="参考风格\n快速/批量"] + gen_prompt -> mj_style [label="参考风格\n精品/写实"] + gen_prompt -> gemini_char [label="锁定人物"] + gemini_style -> validate + mj_style -> validate + gemini_char -> validate +} +``` + +### 参考风格 vs 锁定人物 + +| | 参考风格 | 锁定人物 | +|---|---------|---------| +| 目的 | 统一批次的色调/光影/质感 | 同一角色在不同场景中保持一致 | +| 参考图内容 | 风格样本(光影、色调、质感) | 人物正面照/半身照 | +| 输出 | 每张图可以完全不同的内容,但风格统一 | 每张图同一人物,不同场景/动作/服装 | +| 可用模型 | Gemini + MJ | **仅 Gemini**(edit 图生图) | +| 提示词 | 描述场景内容,风格由参考图锚定 | 描述人物的新场景/动作/服装,角色由参考图锁定 | +| 适用 | 风景叙事、场景插画、背景素材 | 角色连载、人物故事、IP 内容 | + +### 锁定人物用法(Gemini 专用) + +```bash +# 锁定人物:人物参考图 + 新场景描述 +node .claude/skills/video-from-script/scripts/gemini-image-generator.js edit \ + "The same woman warrior standing on a cliff overlooking a burning city, dramatic lighting" \ + -i ./references/character_front.png \ + -o ./output -r 9:16 + +# 多角度锁定(正面 + 侧面) +node .claude/skills/video-from-script/scripts/gemini-image-generator.js edit \ + "The same woman warrior in a dark forest, holding a torch" \ + -i ./references/character_front.png,./references/character_side.png \ + -o ./output -r 9:16 +``` + +**锁定人物要点**: +- 参考图必须是**同一人物的清晰照片**(正面/半身优先) +- 提示词用 "the same [character]" 强调角色延续 +- 多张参考图提供不同角度,一致性更强 +- 仅 Gemini 支持此模式(MJ --sref 只传风格,无法锁定人物特征) + +--- + +## 模型选择 + +| 场景 | 模型 | 参考图用法 | 原因 | +|------|------|-----------|------| +| 快速出图、批量 | **Gemini** | 本地图文件直传(`-i`) | ~10s,API 直出单张 | +| 精品图、写实/艺术 | **MJ** | 公网 URL(`-r`,`--sref`) | 高质量,4图选1,~60s | +| 参考图融合风格 | Gemini 或 MJ | 见下方详细说明 | 两种都支持 | + +--- + +## 前置条件 + +``` +1. node --version → >= 18 +2. cd .claude/skills/video-from-script/scripts && npm install +3. skills/config.json 中配置 geminiApiKey 和 mjApiKey +4. 参考图放入 accounts/{account}/references/(至少 1 张) +``` + +--- + +## 参考图详细用法 + +### 放置参考图 + +``` +accounts/{account}/ +├── account.json # 模型、画幅配置 +├── styles/ # 风格提示词策略 +│ └── oriental-mythology-ue5.md +└── references/ # 参考图(风格锚点) + ├── ref_style_1.png # 建议重命名为有意义的名字 + ├── ref_style_2.png + └── ref_style_3.png +``` + +**参考图选择标准**: + +| 好的参考图 | 不好的参考图 | +|-----------|------------| +| 代表你想要的最终风格 | 随意找的网图 | +| 光影、色调、构图都满意 | 只有一个维度满意 | +| 3 张以内(太多会冲突) | 10 张堆砌 | +| 同一风格的不同场景 | 不同风格的混合 | + +### Gemini 参考图用法 + +```bash +# 图生图(核心用法:参考图 + 提示词) +node .claude/skills/video-from-script/scripts/gemini-image-generator.js edit \ + "A water deity in flowing hanfu, celestial palace background" \ + -i ./references/ref_style_1.png \ + -o ./output -r 9:16 + +# 多张参考图(Gemini 同时参考多张) +node .claude/skills/video-from-script/scripts/gemini-image-generator.js edit \ + "A water deity in flowing hanfu, celestial palace background" \ + -i ./references/ref_style_1.png,./references/ref_style_2.png \ + -o ./output -r 9:16 + +# 批量带参考图(pipeline init + run) +node .claude/skills/video-from-script/scripts/pipeline.js init \ + --account forbidden-emperor --mode single \ + --items '[{"text":"...","imagePrompt":"...","keyword":"关键词"}]' +node .claude/skills/video-from-script/scripts/pipeline.js run \ + --manifest ./output/forbidden-emperor_XXXXXXXX_001/manifest.json \ + --phase images +``` + +**Gemini 参考图原理**:将参考图作为 Base64 inline data 与文本 prompt 一起发送,模型同时看到参考图和提示词。 + +### MJ 参考图用法 + +```bash +# 单张参考图(--sref 风格参考) +node .claude/skills/video-from-script/scripts/mj-image-generator.js \ + "A water deity in flowing hanfu, celestial palace background --sref https://i.ibb.co/xxx/ref.png --sw 200" \ + -o ./output -a 9:16 + +# 多张参考图(逗号分隔 URL) +node .claude/skills/video-from-script/scripts/mj-image-generator.js \ + "prompt --sref URL1 URL2 --sw 200" \ + -o ./output -a 9:16 +``` + +**MJ 参考图注意**: +- 需要**公网 URL**(本地文件需先上传 OSS) +- `--sref` = 风格参考(Style Reference) +- `--sw 200` = 风格权重(0-1000,200 为默认) +- 参考图作为 prompt 尾缀传入,不是独立参数 + +### 上传参考图到公网(MJ 用) + +```bash +# 单张上传 +node .claude/skills/video-from-script/scripts/oss-upload.js ./references/ref_style_1.png +# → https://i.ibb.co/xxx/ref_style_1.png + +# 批量上传 +for f in ./references/*.png; do + node .claude/skills/video-from-script/scripts/oss-upload.js "$f" +done +``` + +--- + +## Gemini 完整用法 + +```bash +# 文生图(无参考图时使用) +node .claude/skills/video-from-script/scripts/gemini-image-generator.js generate "prompt" -o ./output -r 9:16 + +# 图生图(推荐:参考图 + 提示词) +node .claude/skills/video-from-script/scripts/gemini-image-generator.js edit "指令" -i ./ref.jpg -o ./output + +# 批量 +node .claude/skills/video-from-script/scripts/gemini-image-generator.js batch ./prompts.txt -o ./output +``` + +| 参数 | 说明 | +|------|------| +| `-o, --output` | 输出目录 | +| `-r, --ratio` | 宽高比:1:1, 9:16, 16:9, 3:4 等 | +| `-s, --size` | 分辨率:512, 1K, 2K(默认), 4K | +| `-i, --input` | 输入图片(图生图),逗号分隔多张 | + +--- + +## MJ 完整用法 + +```bash +# 文生图(自动拆分4张) +node .claude/skills/video-from-script/scripts/mj-image-generator.js "prompt" -o ./output -a 9:16 + +# 带参考图(--sref) +node .claude/skills/video-from-script/scripts/mj-image-generator.js "prompt --sref URL --sw 200" -o ./output -a 9:16 + +# 批量 +node .claude/skills/video-from-script/scripts/mj-image-generator.js batch ./prompts.txt -o ./output + +# 不拆分(保留原始4合1) +node .claude/skills/video-from-script/scripts/mj-image-generator.js "prompt" --no-split +``` + +| 参数 | 说明 | +|------|------| +| `-o, --output` | 输出目录 | +| `-a, --ar` | 宽高比(通过 --ar 传给 MJ) | +| `-r, --ref` | 参考图 URL(逗号分隔) | +| `--no-split` | 不拆分4合1 | +| `--keep-grid` | 保留原始网格图 | + +MJ 流程:提交 imagine → 轮询 5s/次 → 下载 4合1 → sharp 拆分为 4 张独立 PNG。 + +--- + +## 账号系统集成 + +当用户指定账号时,从 `accounts/{account}/` 读取三层资源: + +``` +accounts/{account}/ +├── account.json → 默认模型、画幅、风格参考图URL +├── styles/ → 风格文件(提示词模板 + 视觉规则) +└── references/ → 参考图原始文件(风格锚点) +``` + +**读取顺序**: +1. `account.json` → 读取 `styles.{styleName}.references` 中的公网 URL(优先使用,免上传) +2. `references/` → 扫描本地参考图(无 URL 时上传 OSS 获取公网 URL) +3. `styles/*.md` → 读取提示词策略(决定 prompt 结构) +4. `account.json` → 读取默认配置(模型、画幅) + +### account.json 中的参考图 URL + +参考图上传 OSS 后,将 URL 保存到 `account.json` 的 `styles.{styleName}.references` 数组中,避免重复上传: + +```json +{ + "styles": { + "oriental-mythology-ue5": { + "references": [ + { "file": "ref_style_1.png", "url": "https://i.ibb.co/xxx/ref_style_1.png" }, + { "file": "ref_style_2.png", "url": "https://i.ibb.co/yyy/ref_style_2.png" } + ] + } + } +} +``` + +**生图时**:先检查 `account.json` 中是否有对应风格的公网 URL,有则直接用;无则上传 `references/` 下的本地文件到 OSS,上传成功后回写 URL 到 `account.json`。 + +用户可指定风格,如 "用 cyberpunk-character 风格"。不指定时使用 `styles/` 下第一个文件。 + +--- + +## 质量要求(视频素材级) + +为保证后续视频成片质量,图片必须: + +- [ ] 分辨率 >= 1024px(短边) +- [ ] 画幅与目标视频一致(9:16/16:9) +- [ ] 无文字水印、无字幕覆盖 +- [ ] 构图留白(底部 1/4 留给字幕区域) +- [ ] **风格与参考图统一**(同一批次色调/光影/质感一致) +- [ ] MJ 拆分后检查 4 张图质量,丢弃不合格的 +- [ ] 每批次首图与参考图对比,风格偏差大则调整 prompt 重试 + +--- + +## 作为模块调用 + +```js +// Gemini 文生图 +const { generate: geminiGen } = require('./gemini-image-generator') +const r = await geminiGen('prompt', { outputDir: './out', aspectRatio: '9:16' }) + +// Gemini 图生图(带参考图) +const { edit: geminiEdit } = require('./gemini-image-generator') +const r = await geminiEdit('prompt', ['./ref1.png', './ref2.png'], { outputDir: './out', aspectRatio: '9:16' }) + +// MJ +const { generate: mjGen } = require('./mj-image-generator') +const r = await mjGen('prompt', { outputDir: './out', aspectRatio: '9:16' }) +// r.files = ['_1.png', '_2.png', '_3.png', '_4.png'] +``` + +--- + +## 文件命名规则 + +Pipeline 生成的文件统一命名,keyword 来自 manifest item 的 `keyword` 字段(slugify: 保留中文和字母数字,最多 20 字符,其余变 `_`): + +| 模式 | 文件名 | 示例 | +|------|--------|------| +| 单图首帧 | `scene_{NN}_{keyword}.jpeg` | `scene_01_崛起.jpeg` | +| 首尾帧首帧 | `scene_{NN}_{keyword}.jpeg` | `scene_01_觉醒.jpeg` | +| 首尾帧尾帧 | `scene_{NN}_{keyword}_last.jpeg` | `scene_01_觉醒_last.jpeg` | +| MJ 候选图 | `scene_{NN}_{keyword}_cand{1-4}.jpeg` | `scene_01_崛起_cand1.jpeg` | + +`{NN}` = 两位场景编号(01, 02, ...),对应 items 数组索引。 + +--- + +## 详细参考 + +批量生产完整流程(账号、文案、提示词生成、输出结构)见 [batch-mode.md](references/batch-mode.md) \ No newline at end of file diff --git a/.claude/skills/image-generator/references/batch-mode.md b/.claude/skills/image-generator/references/batch-mode.md new file mode 100644 index 0000000..9f9a3e5 --- /dev/null +++ b/.claude/skills/image-generator/references/batch-mode.md @@ -0,0 +1,93 @@ +# 批量图片生产 + +## 流程 + +```dot +digraph batch_gen { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + refs [label="参考图 references/\n+ 风格 styles/*.md", shape=folder, fillcolor="#e3f2fd"] + prompts [label="生成提示词\n每条文案→imagePrompt\n+ videoPrompt"] + model_gemini [label="Gemini edit()\n图生图(参考图直传)", fillcolor="#e8f5e9"] + model_mj [label="MJ --sref\n风格参考(URL)", fillcolor="#fff3e0"] + output [label="输出图片\n+ manifest.json"] + pick [label="人工挑选\n删除不合格变体", shape=diamond, fillcolor="#fff9c4"] + + refs -> prompts + prompts -> model_gemini [label="快速/批量"] + prompts -> model_mj [label="精品/写实"] + model_gemini -> output + model_mj -> output + output -> pick +} +``` + +## 提示词生成 + +### 单图模式(默认) + +每条文案生成 2 个 prompt: + +| 字段 | 用途 | 规则 | +|------|------|------| +| `imagePrompt` | 生图 | 英文,描述画面内容 | +| `videoPrompt` | 图生视频 | 描述**运动**(zoom/pan/dolly),不超过 50 词 | + +### 首尾帧模式(用户指定时) + +每条文案生成 3 个 prompt: + +| 字段 | 用途 | 规则 | +|------|------|------| +| `imagePrompt` | 起始帧 | 静止状态 | +| `lastFramePrompt` | 结束帧 | 同一场景的运动状态 | +| `videoPrompt` | 过渡视频 | "from X to Y" 格式 | + +首尾帧原则:同一场景、视角一致、状态对比、光照连贯。 + +## 输出目录 + +``` +output/{account}_{YYYYMMDD}_{NNN}/ +├── manifest.json # 主清单(贯穿全流程) +├── images/ # scene_{NN}_{keyword}.jpeg +├── videos/ # scene_{NN}_{keyword}.mp4 +└── audio/ # seg_001.mp3 +``` + +命名:图片 `scene_01_悬浮.jpeg` → 视频 `scene_01_悬浮.mp4`(keyword 支持中文) + +## manifest.json + +字段规范详见 [manifest-schema.md](../../video-from-script/references/manifest-schema.md)。 + +## 命令速查 + +```bash +# Gemini 图生图(推荐,本地图直传) +node scripts/gemini-image-generator.js edit "prompt" -i ./references/ref1.png -o ./output -r 9:16 + +# Pipeline 批量生图(推荐) +node scripts/pipeline.js init \ + --account {account} --mode single \ + --items '[{"text":"...","imagePrompt":"...","keyword":"关键词"}]' +node scripts/pipeline.js run \ + --manifest ./output/{account}_XXXXXXXX_001/manifest.json \ + --phase images + +# MJ 带参考图(需先上传 OSS) +node scripts/oss-upload.js ./references/ref1.png +node scripts/mj-image-generator.js "prompt --sref URL --sw 200" -o ./output -a 9:16 + +# Gemini 纯文生图(无参考图时) +node scripts/gemini-image-generator.js generate "prompt" -o ./output -r 9:16 +``` + +## 质量检查 + +- 风格与参考图一致 +- 画幅比例正确(9:16/16:9) +- 无文字/水印/字幕覆盖 +- 主体清晰,构图留白(底部 1/4 给字幕) +- manifest.json 与实际文件一一对应 diff --git a/.claude/skills/klingai-1.1.0/README.md b/.claude/skills/klingai-1.1.0/README.md new file mode 100644 index 0000000..e2ac062 --- /dev/null +++ b/.claude/skills/klingai-1.1.0/README.md @@ -0,0 +1,14 @@ +# Kling AI + +Video generation, image generation, and subject management. + +- **Command**: `node scripts/kling.mjs [options]` +- **Subcommands**: + - `video`: video generation (text-to-video, image-to-video, omni-video, multi-shot) + - `image`: image generation (text-to-image, image-to-image, omni-image, 4K/series) + - `element`: subject CRUD + - `account`: quota query and credential bind/import +- Choose by user intent; if ambiguous, ask the user first. + +See [SKILL.md](SKILL.md) for full routing/parameters and [reference.md](reference.md) for endpoint mapping. +Official docs: [CN](https://app.klingai.com/cn/dev/document-api) / [Global](https://kling.ai/document-api/quickStart/productIntroduction/overview). diff --git a/.claude/skills/klingai-1.1.0/SKILL.md b/.claude/skills/klingai-1.1.0/SKILL.md new file mode 100644 index 0000000..8398a92 --- /dev/null +++ b/.claude/skills/klingai-1.1.0/SKILL.md @@ -0,0 +1,407 @@ +--- +name: klingai +version: "1.1.0" +description: Official Kling AI Skill. Call Kling AI for video generation, image generation, subject management, and account quota inquiry. Use subcommand video / image / element / account by user intent. Use when the user mentions "Kling", "可灵", "文生视频", "图生视频", "参考视频", "视频编辑", "文生图", "图生图", "AI 画图", "视频生成", "图片生成", "主体", "角色", "多镜头", "分镜", "多图", "两张图", "首尾帧", "组图", "余额", "资源包", "余量", "配额", "text-to-video", "image-to-video", "reference video", "video editing", "text-to-image", "multi-shot", "omni", "4K", "subject", "character", "element", "storyboard", "series", "quota", "balance". +metadata: {"openclaw":{"emoji":"🎬","requires":{"bins":["node"]},"primaryEnv":"KLING_TOKEN","homepage":"https://app.klingai.com/cn/dev/document-api"}} +--- + +> **Language**: Respond in the user's language (detect from their message). Use it for explanations, confirmations, errors, and follow-ups. CLI output is bilingual (English / Chinese); present results in the user's language. + +# Kling AI + +Video generation, image generation, subject management, and (read-only) account resource/quota inquiry. +Invoke with subcommand `video` | `image` | `element` | `account` by user intent. +Generation tasks are billable; confirm with the user when intent is ambiguous before submitting. + +## Invocation + +From repository root: + +```bash +node skills/klingai/scripts/kling.mjs [options] +``` + +In examples below, `{baseDir}` means the skill directory (for example `skills/klingai`). + +## Routing priority (OpenClaw) + +- For Kling/可灵 requests with complex generation requirements, default to this skill (`node {baseDir}/scripts/kling.mjs ...`). +- Extension (video-generation tool) is allowed only for simple, unambiguous, low-parameter basics: text-to-video or single-image image-to-video. +- Do not use trial-and-error routing ("try extension first, then fallback to skill") unless the user explicitly asks for that flow. + +## Intent routing (required) + +Choose subcommand from user intent first. HTTP API path and default `model_name` are determined by **Route & model**. + +| User intent | Subcommand | +| --- | --- | +| Video (t2v, i2v, multi-shot, Omni ref/edit clip via `feature`/`base`, subject-in-video, animation) | `video` | +| Image (text-to-image, image-to-image, 4K, series, AI drawing) | `image` | +| Subject / element (create, manage, list, presets, delete) | `element` | +| Account resources, packs, remaining quota / balance (read-only) | `account` (`--costs`, default) | +| Credential setup (bind/import) | `account` with `--bind-url` / `--import-env` / `--import-credentials` | + +Selection rules: +- Video-related -> `video` +- For simple, unambiguous basic t2v or single-image i2v, extension may be used; for other video cases, prefer this skill. +- Image-only -> `image` +- Subject CRUD -> `element` +- Quota/balance/resource packs -> `account` (default `--costs`) +- Use existing subject in generation -> `video`/`image` + `--element_ids` +- Create subject first -> `element` + +Force skill conditions (any hit -> use this skill): +- multi-image input (>=2 images) +- Omni/frame control (`first_frame`/`end_frame`/`image_types`) +- reference video (`--video` + `feature`/`base`) or video editing +- subject/element reuse (`--element_ids`) +- storyboard/multi-shot (`--multi_shot`, 分镜) +- image series (`--result_type series`/`--series_amount`, 组图) +- extension parameter gaps or ambiguous/unclear parameter intent + +Model name strict rule: +- `--model` must be canonical lowercase/hyphen names only: `kling-v3`, `kling-v3-omni`, `kling-video-o1`, `kling-image-o1`. +- Do not pass aliases as CLI values. +- Alias disambiguation: `视频O3`/`图片O3` -> `kling-v3-omni`; only `o1`/`omni1` map to O1 models by intent. + +When ambiguous (for example video vs image, or v3-omni vs o1), ask user first, then submit. + +## Preflight checklist (mandatory before submit) + +Before any billable submit, pass all checks below. If any check fails, stop and ask user or run `--help`. + +1. Subcommand is confirmed: `video` / `image` / `element` / `account`. +2. Route is confirmed by flags: basic vs Omni (from **Route & model**). +3. `--model` is canonical (no alias values like `o3`, `omni3`). +4. All params come from this SKILL.md or subcommand `--help`; no undocumented flags. +5. No conflicting combinations (for example `--multi_shot` + `--image_tail`, `--video` + `--sound on`). +6. Query mode and submit mode are not mixed. + +## Anti-fabrication policy (no guessing) + +- Do not invent model names, enums, ranges, defaults, request fields, or hidden flags. +- Do not infer unsupported values from older/other skills. +- If value is uncertain, verify with `node {baseDir}/scripts/kling.mjs --help`. +- If user intent is uncertain, ask first; do not submit trial jobs. +- If user uses alias words, map to canonical names and pass canonical only. + +## Cost and submission rules + +- Every submit is charged; do not submit speculatively. +- Confirm intent first when unclear. +- On timeout/failure/unexpected result, ask user whether to wait or retry. +- Do not auto-resubmit or silently change intent/parameters. + +## Agent loop & results + +- Entry: only `node {baseDir}/scripts/kling.mjs` with `video`/`image`/`element`/`account`. +- Default flow: submit -> poll (~10s interval) -> download to `--output_dir`. +- Keep user updated on long runs (`submitted -> processing -> succeed/failed`). +- `--no-wait` flow (video/image): submit -> get `task_id` -> query by same subcommand `--task_id ` -> add `--download` when succeeded. +- Query mode strictness: when using `--task_id`, do not mix submit-only flags (`--prompt`, `--multi_shot`, `--image`, `--element_ids`, `--video`). +- Never print secrets (`KLING_TOKEN`, `access_key_id`, `secret_access_key`). + +Presenting results: +- Always return task id + local path(s). +- If stdout includes an URL, include markdown link as fallback. + +## Prerequisites + +- Runtime: Node.js 18+, no extra packages. +- Credential priority: `KLING_TOKEN` (session only) -> stored AK/SK in `.credentials` (JWT per request). +- `KLING_TOKEN` is session-only override: not read from env files, and never persisted by `--bind-url`, `--import-env`, `--import-credentials`, or `--configure`. +- Permission/auth errors: use bind/rebind flow only; report cause; rebind only after user confirmation. +- Storage root: default `~/.config/kling`, optional `KLING_STORAGE_ROOT`. +- No token and no AK/SK: CLI auto-starts bind flow. +- `account --bind-url`: init -> verify -> print URL (manual open) -> poll. +- Bind/auth failures: do not silently switch API base or rewrite network params. +- Forced rebind (requires user confirmation): + - `node {baseDir}/scripts/kling.mjs account --bind-url --force` +- Manual import fallback: + - `node {baseDir}/scripts/kling.mjs account --import-env` + - `node {baseDir}/scripts/kling.mjs account --import-credentials --access_key_id "" --secret_access_key ""` + - `node {baseDir}/scripts/kling.mjs account --configure` +- Mask secret values in user-facing text. +- Optional behavior (API base, media roots): check subcommand `--help`. + +## Quick start + +```bash +# Show help +node {baseDir}/scripts/kling.mjs --help + +# Video +node {baseDir}/scripts/kling.mjs video --prompt "A cat running on the grass" --output_dir ./output +node {baseDir}/scripts/kling.mjs video --image ./photo.jpg --prompt "Wind blowing hair" +node {baseDir}/scripts/kling.mjs video --prompt "Match motion of <<>>" --video "https://..." --video_refer_type feature +node {baseDir}/scripts/kling.mjs video --prompt "Change background to ..." --video "https://..." --video_refer_type base +node {baseDir}/scripts/kling.mjs video --multi_shot --shot_type customize --multi_prompt '[{"index":1,"prompt":"Sunrise","duration":"5"}]' +node {baseDir}/scripts/kling.mjs video --multi_shot --shot_type intelligence --prompt "A story in three beats: arrival, conflict, resolution" + +# Image +node {baseDir}/scripts/kling.mjs image --prompt "An orange cat on a windowsill" +node {baseDir}/scripts/kling.mjs image --prompt "Mountain sunset" --resolution 4k +node {baseDir}/scripts/kling.mjs image --prompt "<<>> on the beach" --element_ids 123456 + +# Subject / element +node {baseDir}/scripts/kling.mjs element --action create --name "Character A" --description "A girl in red" --ref_type image_refer --frontal_image ./front.jpg +node {baseDir}/scripts/kling.mjs element --action list +node {baseDir}/scripts/kling.mjs element --action query --task_id + +# Account +node {baseDir}/scripts/kling.mjs account --help +node {baseDir}/scripts/kling.mjs account +node {baseDir}/scripts/kling.mjs account --days 90 +node {baseDir}/scripts/kling.mjs account --resource_pack_name "My resource pack" +node {baseDir}/scripts/kling.mjs account --bind-url +node {baseDir}/scripts/kling.mjs account --bind-url --force +node {baseDir}/scripts/kling.mjs account --import-env +node {baseDir}/scripts/kling.mjs account --import-credentials --access_key_id "" --secret_access_key "" +node {baseDir}/scripts/kling.mjs account --configure + +# Query existing task +node {baseDir}/scripts/kling.mjs video --task_id --download +node {baseDir}/scripts/kling.mjs image --task_id --download +``` + +## Core parameters by subcommand + +Do not invent values/ranges/enums/defaults. If unsure, check: +`node {baseDir}/scripts/kling.mjs --help` + +### video (video generation) + +| Parameter | Description | Default | +| --- | --- | --- | +| `--prompt` | Non-multi-shot text2video/Omni requires non-empty prompt. With `--multi_shot`, follow `--shot_type` rules. | — | +| `--image` | Basic i2v: single image. Omni: image list (comma-separated). With `--aspect_ratio`, route to Omni video. | — | +| `--image_types` | Omni only. Per-image type list aligned with `--image`: `first_frame` / `end_frame` / empty. | — | +| `--duration` | 3–15 seconds. | 5 | +| `--model` | `model_name`; see **Route & model** and **Model catalog**. | route default | +| `--mode` | `pro` (1080P) / `std` (720P). | pro | +| `--aspect_ratio` | `16:9` / `9:16` / `1:1`. With `--image`, routes to Omni. | 16:9 | +| `--sound` | `on` / `off`. `kling-v3` and `kling-v3-omni` support sound; `kling-video-o1` does not. With `--video`, must be `off`. | off | +| `--image_tail` | Last-frame image. | — | +| `--element_ids` | Subject IDs (comma-separated, Omni). | — | +| `--video` | Omni reference clip: public http(s) URL only. | — | +| `--video_refer_type` | `feature` (reference) / `base` (edit clip). | base | +| `--keep_original_sound` | Omni-only, with `--video`: `yes` / `no`. | — | +| `--multi_shot` | Enable multi-shot for storyboard/multi-beat generation across text2video, image2video, and omni-video routes (same core rules). | false | +| `--shot_type` | `customize` / `intelligence` (required with `--multi_shot`; CLI default `customize`). | — | +| `--multi_prompt` | For `shot_type=customize` only. | — | +| `--output_dir` | Output directory. | `./output` | +| `--task_id` | Query task id; pair with `--download` for download. | — | + +Model alias reminder: +- `omni3`/`omni v3`/`o3`/`video o3`/`image o3`/`视频O3`/`图片O3` -> `kling-v3-omni` +- `o1`/`omni1` -> `kling-video-o1` or `kling-image-o1` by intent + +Multi-shot (`--multi_shot`) rules (text2video / image2video / omni-video share the same request semantics): +- `multi_shot=false`: `shot_type` and `multi_prompt` ignored. +- `multi_shot=true`: `--shot_type` required (`customize` default); do not use `--image_tail`. +- `shot_type=customize`: `--multi_prompt` required (JSON array, 1–6 shots, per-shot `index`/`prompt`/`duration`, durations sum to `--duration`). +- `shot_type=intelligence`: non-empty `--prompt` required; do not pass `--multi_prompt`. + +Omni `image_list` rules (video): +- `image_url` cannot be empty (URL or Base64). +- `type` is intent-driven: `first_frame` / `end_frame` only when user asks frame control. +- `--image_tail` requires `--image`. +- With `--video`: max 4 images. Without `--video`: max 7. +- `kling-video-o1`: when image count > 2, no `end_frame`. +- Frame generation cannot combine with `--video_refer_type base`. + +Omni `element_list` rules (video): +- `element_id` cannot be empty. +- Frame generation supports up to 3 subjects. +- First+last frame with `kling-video-o1`: subjects unsupported. +- With `--video`: `image_count + element_count <= 4`; otherwise `<= 7`. +- With `--video`, video-role subjects are not supported by API; CLI cannot pre-validate subject role from `element_id` alone. + +Omni `video_list` rules (video): +- Max one video URL. +- `--video_refer_type`: `feature` / `base` (default `base`). +- `--keep_original_sound`: `yes` / `no`. +- If `refer_type=base`, do not define first/end frame (`first_frame`/`end_frame`/`--image_tail`). +- When `--video` is used, `--sound` must be `off`. + +Compact examples: + +```bash +# explicit frame marking by intent +node {baseDir}/scripts/kling.mjs video --model kling-v3-omni --image a.jpg,b.jpg,c.jpg --image_types first_frame,,end_frame --prompt "..." + +# with reference video: image count <= 4 +node {baseDir}/scripts/kling.mjs video --video "https://..." --video_refer_type feature --image a.jpg,b.jpg --prompt "..." +``` + +### image (image generation) + +| Parameter | Description | Default | +| --- | --- | --- | +| `--model` | `model_name`; see **Route & model** and **Model catalog**. | route default | +| `--prompt` | Image prompt (required). | — | +| `--image` | Basic: single image. Omni: image list (comma-separated). | — | +| `--resolution` | `1k` / `2k` / `4k`; `4k` routes to Omni. | 1k | +| `--aspect_ratio` | `16:9` / `9:16` / `1:1` / `auto` (`auto` Omni only). | basic: `16:9`; Omni: `auto` | +| `--n` | Result count 1–9 (`result_type=single`). | 1 | +| `--negative_prompt` | Basic API only. | — | +| `--result_type` | `single` / `series` (`series` is Omni and i2i-only). | single | +| `--series_amount` | 2–9 for `result_type=series`. | 4 | +| `--element_ids` | Subject IDs (comma-separated, Omni). | — | +| `--output_dir` | Output directory. | `./output` | +| `--task_id` | Query task id; pair with `--download`. | — | + +Notes: +- `n` and `series_amount` apply to different modes. +- `series` is i2i-only, so `--result_type series` requires `--image`. + +Omni refs rules (image): +- `image` cannot be empty (URL or Base64). +- `element_id` cannot be empty. +- `image_count + element_count <= 10`. + +### element (subject management) + +Manage custom subjects: create from image/video, query task, list custom/preset, delete. +Use `element_id` in `video`/`image` with `--element_ids` for reusable subject consistency. + +| Parameter | Description | +| --- | --- | +| `--action create` | Create subject; requires `--name` (<=20), `--description` (<=100), `--ref_type` | +| `--ref_type` | `image_refer` (requires `--frontal_image`) / `video_refer` (requires `--video`) | +| `--frontal_image` | Front reference image (`image_refer`) | +| `--refer_images` | Other reference images (comma-separated, 1–3) | +| `--video` | Reference video (`video_refer`) | +| `--action query --task_id ` | Query creation task | +| `--action list` | List custom subjects | +| `--action list-presets` | List preset subjects | +| `--action delete --element_id ` | Delete subject | + +### account (resource & quota inquiry, optional credential setup) + +| Flag | Purpose | +| --- | --- | +| `--costs` (default) | Read-only quota/resource packs via `GET /account/costs`. | +| `--bind-url` | Device bind with polling; prints URL for manual open; optional `--force`. | +| `--import-env` | Read `KLING_ACCESS_KEY_ID` + `KLING_SECRET_ACCESS_KEY` and persist. | +| `--import-credentials` | Persist keys from `--access_key_id` + `--secret_access_key`. | +| `--configure` | Interactive key input and save credentials. | + +All bind/account files persist under storage root (`~/.config/kling` by default, or `KLING_STORAGE_ROOT`). + +`--costs` query params: + +| Query param (API) | CLI | Default | +| --- | --- | --- | +| `start_time` (required, Unix ms) | `--start_time` | if omitted: `end_time - days` | +| `end_time` (required, Unix ms) | `--end_time` | if omitted: now | +| — | `--days` | 30 (only when `--start_time` omitted) | +| `resource_pack_name` (optional) | `--resource_pack_name` | — | + +Run `node {baseDir}/scripts/kling.mjs account --help` for details. +Run `node {baseDir}/scripts/kling.mjs video --help`, `image --help`, or `element --help` for full params. + +## Route & model (CLI: `kling.mjs` + flags -> default `model_name`) + +Agents call `node {baseDir}/scripts/kling.mjs ` with flags. +`--model` sets `model_name` for selected route and must be exact canonical spelling. +If `--model` is omitted, route defaults apply. +CLI guardrails reject incompatible model/route and invalid `sound` combinations before submit. + +### Routing decision tree (must follow) + +1. Choose subcommand from intent: `video` / `image` / `element` / `account`. +2. Determine route triggers: + - any Omni trigger -> Omni route + - otherwise -> basic route +3. Validate model-route compatibility: + - Omni route accepts only Omni-capable canonical models + - basic route rejects Omni-only models +4. Validate strict parameter combos (`sound`, `multi_shot`, frame rules, ref limits). +5. If uncertain, run `--help` or ask user; never guess-submit. + +### Video (`video` subcommand) + +Omni routing triggers (any of these -> omni-video API route): +- `--element_ids` +- `--video` +- comma in `--image` +- `--image` + `--aspect_ratio` +- explicit `--model kling-v3-omni` or `--model kling-video-o1` + +Otherwise: +- basic text2video (T2V): no `--image` +- basic image2video (I2V): single `--image` (optional `--image_tail`) + +`--multi_shot` does not force Omni; storyboard mode still follows the same routing triggers above. + +| Video routing (CLI) | Default if `--model` omitted | Allowed `--model` (examples) | +| --- | --- | --- | +| Basic T2V | `kling-v3` | `kling-v2-6`, `kling-v3` | +| Basic I2V | `kling-v3` | `kling-v2-6`, `kling-v3` | +| Omni | `kling-v3-omni` | `kling-v3-omni` (default), `kling-video-o1` (explicit) | + +### Image (`image` subcommand) + +Omni routing triggers (any of these -> omni-image API route): +- explicit `--model kling-v3-omni` or `--model kling-image-o1` +- `--element_ids` +- `--result_type series` +- `--resolution 4k` +- `--aspect_ratio auto` +- comma in `--image` + +Else -> basic generations route (text-to-image / image-to-image). + +| Image routing (CLI) | Default if `--model` omitted | Allowed `--model` (examples) | +| --- | --- | --- | +| Basic | `kling-v3` | `kling-v3` by default; use canonical basic-route models supported by current CLI (`image --help`) | +| Omni | `kling-v3-omni` | `kling-v3-omni` (default), `kling-image-o1` (explicit) | + +### Model catalog (by name) + +Common aliases (understanding only; do not pass aliases to `--model`): +- `omni3`, `omni v3`, `视频O3`, `O3`, `o3`, `图片O3` -> `kling-v3-omni` +- `o1`, `omni1` -> `kling-video-o1` or `kling-image-o1` by intent + +`--model` input rule: pass only canonical names from this table. + +| Model | Valid on | Notes | +| --- | --- | --- | +| `kling-v2-6` | Basic T2V / I2V only | Not Omni video. | +| `kling-v3` | Basic video / basic image | Default for basic routes. | +| `kling-v3-omni` | Omni video / Omni image | Default for Omni routes. With `--video`, `sound` must be `off`. | +| `kling-video-o1` | Omni video only | No `sound`. | +| `kling-image-o1` | Omni image only | Optional explicit Omni-image model. | + +Principle: +- Set task flags first (`--image`, `--element_ids`, `--video`, `--multi_shot`, ...). +- Omit `--model` to use route defaults. +- If `--model` is explicit, it must match route implied by flags. + +## When to use Omni; element vs image reference + +Which route: follow **Route & model** triggers. +Prefer Omni when you need multi-image composition, images + elements, 4K/series modes, or edit-style instructions. +Use prompt placeholders `<<<...>>>` for Omni media/subject references. + +Prefer plain image reference for simple tasks. +Create element first only when user explicitly wants reusable subject consistency across outputs. + +## Prompt template syntax (video / image Omni) + +In Omni, pass media/subjects by flags; reference in `--prompt` with placeholders: +- `<<>>` -> first `--image` (`<<>>`, ...) +- `<<>>` -> first `--element_ids` (`<<>>`, ...) +- `<<>>` -> `--video` clip (`video` subcommand only) + +## Notes + +- Timing: video ~1–5+ min; image ~20–60 s; subject creation ~30 s–2 min. +- Retention: platform may remove assets after ~30 days; save outputs locally. + +## Reference + +- Official developer docs (CN): https://app.klingai.com/cn/dev/document-api +- Official developer docs (Global): https://kling.ai/document-api/quickStart/productIntroduction/overview +- API endpoint quick map in this package: `reference.md` diff --git a/.claude/skills/klingai-1.1.0/_meta.json b/.claude/skills/klingai-1.1.0/_meta.json new file mode 100644 index 0000000..7cb539c --- /dev/null +++ b/.claude/skills/klingai-1.1.0/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7bczybw3dwrwf452ghdtzty582nxj0", + "slug": "klingai", + "version": "1.1.0", + "publishedAt": 1775744696609 +} \ No newline at end of file diff --git a/.claude/skills/klingai-1.1.0/reference.md b/.claude/skills/klingai-1.1.0/reference.md new file mode 100644 index 0000000..7afe3dc --- /dev/null +++ b/.claude/skills/klingai-1.1.0/reference.md @@ -0,0 +1,31 @@ +# Kling AI — API reference + + +| Subcommand | Endpoints | +| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `video` | `POST/GET /v1/videos/text2video`, `POST/GET /v1/videos/image2video`, `POST/GET /v1/videos/omni-video` | +| `image` | `POST/GET /v1/images/generations`, `POST/GET /v1/images/omni-image` | +| `element` | `POST/GET /v1/general/advanced-custom-elements`, `GET /v1/general/advanced-presets-elements`, `POST /v1/general/delete-elements` | +| `account` | `GET /account/costs` (quota/resource packs); bind flow (no Bearer, bind base): `POST /console/api/auth/skill/init-sessions`, `POST /console/api/auth/skill/exchange` | + + +Auth and polling notes: + +- Business APIs (`/v1/...` + `/account/costs`) use Bearer token (JWT from `~/.config/kling/.credentials`, or session `KLING_TOKEN`). +- Bind APIs (`/console/api/auth/skill/...`) are device-bind endpoints and do not use Bearer. +- Submit APIs return `task_id`; polling uses `GET {submit_path}/{task_id}` until `succeed`/`failed`, then read result URLs from `task_result`/`output`. + +Account mode mapping: + +- `account --costs`: remote call to `GET /account/costs` +- `account --bind` / `account --bind-url`: remote bind calls (`init-sessions` + `exchange`) +- `account --import-env` / `--import-credentials` / `--configure`: local credential operations only (no business API submit) + +## Model docs + +Official docs (use as primary source; paths may vary by locale): + +- [Developer docs home (CN)](https://app.klingai.com/cn/dev/document-api) +- [Developer docs home (Global)](https://kling.ai/document-api/quickStart/productIntroduction/overview) +- Use the navigation from the Global/CN docs home to open model pages for video/image/omni in the current site structure. + diff --git a/.claude/skills/klingai-1.1.0/scripts/account.mjs b/.claude/skills/klingai-1.1.0/scripts/account.mjs new file mode 100644 index 0000000..9ab31b9 --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/account.mjs @@ -0,0 +1,319 @@ +#!/usr/bin/env node +/** + * Kling AI — 账号:资源包查询、设备绑定、交互式配置 credentials + */ +import { + klingGet, + runDeviceBindFlow, + KLING_CONSOLE_URLS, +} from './shared/client.mjs'; +import { + getActiveProfile, + getCredentialsFilePath, + getIdentityFilePath, + hasStoredAccessKeys, + promptInteractiveCredentialsFile, + writeCredentialsProfile, +} from './shared/auth.mjs'; +import { fileURLToPath } from 'node:url'; +import { resolve } from 'node:path'; +import { parseArgs, getTokenOrExit } from './shared/args.mjs'; + +const API_COSTS = '/account/costs'; +const MS_PER_DAY = 24 * 60 * 60 * 1000; + +function maskSecret(secret) { + const s = String(secret || ''); + if (!s) return ''; + if (s.length <= 6) return '***'; + return `${s.slice(0, 3)}***${s.slice(-2)}`; +} + +function maskAccessKey(accessKey) { + const s = String(accessKey || ''); + if (!s) return ''; + if (s.length <= 8) return `${s.slice(0, 2)}***`; + return `${s.slice(0, 4)}***${s.slice(-3)}`; +} + +function printConsoleUrls() { + for (const [region, url] of Object.entries(KLING_CONSOLE_URLS || {})) { + const label = region === 'cn' ? 'China / 国内' : (region === 'global' ? 'Global / 国际' : region); + console.error(`${label}: ${url}`); + } +} + +function printHelp() { + console.log(`Kling AI account — quota, device bind, configure credentials + +Usage: + node kling.mjs account [options] + node kling.mjs account --costs (default) + node kling.mjs account --bind-url + node kling.mjs account --bind (alias of --bind-url, kept for compatibility) + node kling.mjs account --configure + node kling.mjs account --import-env + node kling.mjs account --import-credentials --access_key_id --secret_access_key + +--costs (default) + GET ${API_COSTS} (Bearer from credentials JWT or KLING_TOKEN) + --days, --start_time, --end_time, --resource_pack_name + +--bind-url + init → verify → print URL (manual open) → poll + --bind is equivalent to --bind-url (compatibility alias) + --force Re-bind even if credentials already exist + writes ~/.config/kling/.credentials after exchange succeeds + +--import-env + Read KLING_ACCESS_KEY_ID + KLING_SECRET_ACCESS_KEY from env and save (no prompt) + +--import-credentials + Write AK/SK via args in one step, no prompts + +--configure + Interactive prompts → credentials file (hidden SK on TTY, paste supported) + +Env: + KLING_STORAGE_ROOT Optional storage root for credentials/identity/env files + KLING_TOKEN Session Bearer (not loaded from kling.env; export or agent env) + KLING_API_BASE Optional API origin + KLING_ACCESS_KEY_ID With KLING_SECRET_ACCESS_KEY: used by import-env (not echoed) + KLING_SECRET_ACCESS_KEY (same)`); +} + +function saveCredentialsQuietly(ak, sk, source = 'input') { + const savePath = writeCredentialsProfile(getActiveProfile(), String(ak || '').trim(), String(sk || '').trim()); + console.error(`✓ Credentials saved / 凭证已保存(来源: ${source};密钥未在日志中输出)`); + console.error(` Path / 路径: ${savePath}\n`); + return { + savePath, + accessKeyMasked: maskAccessKey(ak), + secretKeyMasked: maskSecret(sk), + }; +} + +function getEnvCredentials() { + const ak = (process.env.KLING_ACCESS_KEY_ID || '').trim(); + const sk = (process.env.KLING_SECRET_ACCESS_KEY || '').trim(); + return { ak, sk }; +} + +export function importCredentialsFromEnv() { + const { ak, sk } = getEnvCredentials(); + if (!ak || !sk) { + throw new Error( + 'Set both KLING_ACCESS_KEY_ID and KLING_SECRET_ACCESS_KEY / ' + + '请同时设置 KLING_ACCESS_KEY_ID 与 KLING_SECRET_ACCESS_KEY', + ); + } + return saveCredentialsQuietly(ak, sk, 'env'); +} + +export function importCredentialsFromArgs(accessKey, secretKey) { + const ak = String(accessKey || '').trim(); + const sk = String(secretKey || '').trim(); + if (!ak || !sk) { + throw new Error( + 'import-credentials requires --access_key_id and --secret_access_key / ' + + 'import-credentials 需要 --access_key_id 与 --secret_access_key', + ); + } + return saveCredentialsQuietly(ak, sk, 'args'); +} + +function parseMs(name, raw) { + const n = parseInt(String(raw).trim(), 10); + if (!Number.isFinite(n)) { + console.error(`Error / 错误: ${name} must be a valid integer (ms) / 须为有效整数(毫秒)`); + process.exit(1); + } + return n; +} + +function buildCostsQueryPath(args) { + let endMs; + let startMs; + + if (args.end_time != null) { + endMs = parseMs('--end_time', args.end_time); + } else { + endMs = Date.now(); + } + + if (args.start_time != null) { + startMs = parseMs('--start_time', args.start_time); + } else { + const days = Math.max(1, parseInt(String(args.days ?? '30'), 10) || 30); + startMs = endMs - days * MS_PER_DAY; + } + + if (startMs >= endMs) { + console.error('Error / 错误: start_time must be < end_time / start_time 须小于 end_time'); + process.exit(1); + } + + const params = new URLSearchParams(); + params.set('start_time', String(startMs)); + params.set('end_time', String(endMs)); + if (args.resource_pack_name) { + params.set('resource_pack_name', String(args.resource_pack_name).trim()); + } + + return `${API_COSTS}?${params.toString()}`; +} + +function printAccountStateNoAccount(detail = '') { + console.error('Account State / 账号状态: NO_ACCOUNT / 无可用账号凭证'); + if (detail) { + console.error(` Detail / 详情: ${detail}`); + } +} + +function isPermissionOrServerIssue(errorMessage = '') { + const msg = String(errorMessage || '').toLowerCase(); + return ( + msg.includes('http 401') + || msg.includes('http 403') + || msg.includes('code=1000') + || msg.includes('code=1002') + || msg.includes('permission') + || msg.includes('forbidden') + || msg.includes('unauthorized') + || msg.includes('api service error') + || msg.includes('http 500') + || msg.includes('http 502') + || msg.includes('http 503') + || msg.includes('http 504') + || msg.includes('server error') + ); +} + +async function runBindUrlAction(args, options = {}) { + const viaAliasBind = options.viaAliasBind === true; + if (!args.force && hasStoredAccessKeys()) { + console.error('Credentials already present / 已存在凭证(使用 --force 重新绑定)'); + console.error(`Credentials file / 凭证文件: ${getCredentialsFilePath()}`); + process.exit(0); + } + if (viaAliasBind) { + console.error('Info / 提示: --bind is an alias of --bind-url / --bind 与 --bind-url 等价'); + } + + try { + const result = await runDeviceBindFlow(); + console.error('\n✓ Bind succeeded / 绑定成功'); + console.error(` Saved / 已写入: ${result.savePath || getCredentialsFilePath()}`); + } catch (e) { + console.error(`\nBind failed / 绑定失败: ${e?.message || e}\n`); + console.error('Hint / 提示:'); + console.error(' 1) Check network/DNS/proxy / 检查网络、DNS、代理'); + console.error(' 2) Check configured API base in ~/.config/kling/kling.env / 检查 ~/.config/kling/kling.env 中的 API 基址配置'); + console.error(' 3) Re-probe business API base: remove KLING_API_BASE then run account --costs / 重新探测业务 API 基址:删除 KLING_API_BASE 后执行 account --costs'); + console.error('Fallback / 备选:'); + console.error(' 1) Create keys Manually / 手动创建密钥:'); + printConsoleUrls(); + console.error(' 2) Set env then: node skills/klingai/scripts/kling.mjs account --import-env'); + console.error(' 3) or Pass args: node skills/klingai/scripts/kling.mjs account --import-credentials --access_key_id --secret_access_key \n'); + process.exit(1); + } +} + +export async function main() { + const args = parseArgs(process.argv); + if (args.help) { + printHelp(); + return; + } + if (args.action != null) { + console.error('Error / 错误: --action has been removed. Use one flag: --costs | --bind-url (or alias --bind) | --import-env | --import-credentials | --configure'); + process.exit(1); + } + + const modes = ['costs', 'bind', 'bind-url', 'configure', 'import-env', 'import-credentials']; + const selected = modes.filter((m) => args[m]); + if (selected.length > 1) { + console.error(`Error / 错误: account mode flags are mutually exclusive / account 模式参数互斥: ${selected.map((s) => `--${s}`).join(', ')}`); + process.exit(1); + } + const action = selected[0] || 'costs'; + + if (action === 'bind') { + await runBindUrlAction(args, { viaAliasBind: true }); + return; + } + + if (action === 'bind-url') { + await runBindUrlAction(args); + return; + } + + if (action === 'import-env') { + try { + importCredentialsFromEnv(); + } catch (e) { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + } + return; + } + + if (action === 'import-credentials') { + try { + importCredentialsFromArgs(args.access_key_id, args.secret_access_key); + } catch (e) { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + } + return; + } + + if (action === 'configure') { + try { + console.error('Get keys / 获取密钥:'); + printConsoleUrls(); + await promptInteractiveCredentialsFile(); + } catch (e) { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + } + return; + } + + let token; + try { + token = await getTokenOrExit(); + } catch (e) { + const msg = e?.message || String(e); + printAccountStateNoAccount(msg); + console.error(`Error / 错误: ${msg}`); + console.error('Get keys / 获取密钥:'); + printConsoleUrls(); + process.exit(1); + } + const pathWithQuery = buildCostsQueryPath(args); + + try { + const data = await klingGet(pathWithQuery, token, { contentType: 'application/json' }); + const infos = Array.isArray(data?.resource_pack_subscribe_infos) ? data.resource_pack_subscribe_infos : []; + console.error(`Account State / 账号状态: ACCOUNT_OK / 账号正常(资源包 ${infos.length})`); + console.log('Account / 账户资源 (API data):'); + console.log(JSON.stringify(data, null, 2)); + return; + } catch (e) { + const msg = e?.message || String(e); + if (isPermissionOrServerIssue(msg)) { + console.error('Account State / 账号状态: BOUND_BUT_PERMISSION_OR_SERVER_ERROR / 已绑定但权限或服务异常'); + } + console.error(`Error / 错误: ${msg}`); + process.exit(1); + } +} + +const __filename = fileURLToPath(import.meta.url); +if (process.argv[1] && resolve(__filename) === resolve(process.argv[1])) { + main().catch((e) => { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + }); +} diff --git a/.claude/skills/klingai-1.1.0/scripts/element.mjs b/.claude/skills/klingai-1.1.0/scripts/element.mjs new file mode 100644 index 0000000..08e6de8 --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/element.mjs @@ -0,0 +1,211 @@ +#!/usr/bin/env node +/** + * Kling AI subject management — create, query, list, delete custom subjects + * Node.js 18+, zero external deps + */ +import { submitTask, queryTask, pollTask } from './shared/task.mjs'; +import { klingGet, klingPost } from './shared/client.mjs'; +import { resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { parseArgs, getTokenOrExit, readMediaAsValue } from './shared/args.mjs'; + +const API_PATH = '/v1/general/advanced-custom-elements'; +const API_PATH_PRESETS = '/v1/general/advanced-presets-elements'; +const API_PATH_DELETE = '/v1/general/delete-elements'; + +function getElementType(el) { + return el?.reference_type || el?.element_type || el?.ref_type || 'unknown'; +} + +function printHelp() { + console.log(`Kling AI subject management (create/query/list/delete) + +Usage: + node kling.mjs element --action create [create options] + node kling.mjs element --action query --task_id + node kling.mjs element --action list [--page_num 1] [--page_size 30] + node kling.mjs element --action list-presets [--page_num 1] [--page_size 30] + node kling.mjs element --action delete --element_id + +Actions: + --action create Create custom subject + --action query Query creation task status + --action list List custom subjects + --action list-presets List preset subjects + --action delete Delete subject + +Create options: + --name Subject name (required, ≤20 chars) + --description Subject description (required, ≤100 chars) + --ref_type image_refer / video_refer (required) + --frontal_image Front reference image path or URL (required for image_refer) + --refer_images Other reference images, comma-separated (optional, 1-3) + --video Reference video path or URL (required for video_refer) + --voice_id Voice ID (optional, video-based only) + --tags Tag IDs, comma-separated (e.g. "o_102,o_108") + --no-wait Submit only, do not wait + +Query: + --task_id Task ID + +List: + --page_num Page 1-1000 (default: 1) + --page_size Page size 1-500 (default: 30) + +Delete: + --element_id Subject ID to delete + +Env: + credentials file ~/.config/kling/.credentials (access_key_id, secret_access_key) + KLING_TOKEN Session-only Bearer (optional override)`); +} + +async function actionCreate(args, token) { + if (!args.name) { console.error('Error / 错误: --name required'); process.exit(1); } + if (!args.description) { console.error('Error / 错误: --description required'); process.exit(1); } + if (!args.ref_type) { console.error('Error / 错误: --ref_type required (image_refer / video_refer)'); process.exit(1); } + + const payload = { + element_name: args.name, + element_description: args.description, + reference_type: args.ref_type, + callback_url: '', + }; + + if (args.ref_type === 'image_refer') { + if (!args.frontal_image) { + console.error('Error / 错误: image_refer requires --frontal_image'); process.exit(1); + } + const imageList = { + frontal_image: await readMediaAsValue(args.frontal_image), + }; + if (args.refer_images) { + const imgs = args.refer_images.split(','); + imageList.refer_images = []; + for (const img of imgs) { + imageList.refer_images.push({ image_url: await readMediaAsValue(img.trim()) }); + } + } + payload.element_image_list = imageList; + } else if (args.ref_type === 'video_refer') { + if (!args.video) { + console.error('Error / 错误: video_refer requires --video'); process.exit(1); + } + payload.element_video_list = { + refer_videos: [{ video_url: await readMediaAsValue(args.video) }], + }; + } else { + console.error('Error / 错误: --ref_type must be image_refer or video_refer'); + process.exit(1); + } + + if (args.voice_id) { + payload.element_voice_id = args.voice_id; + } + + if (args.tags) { + payload.tag_list = args.tags.split(',').map(id => ({ tag_id: id.trim() })); + } + + const result = await submitTask(API_PATH, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + + if (args.wait !== false) { + console.log(); + const data = await pollTask(API_PATH, result.taskId, { token }); + const elements = data?.task_result?.elements || []; + if (elements.length > 0) { + console.log('\n✓ Created / 已创建:'); + for (const el of elements) { + console.log(` Element ID / 主体 ID: ${el.element_id}`); + console.log(` Name / 名称: ${el.element_name}`); + console.log(` Description / 描述: ${el.element_description}`); + console.log(` Type / 类型: ${getElementType(el)}`); + } + } + } +} + +async function actionQuery(args, token) { + if (!args.task_id) { console.error('Error / 错误: --task_id required'); process.exit(1); } + const data = await queryTask(API_PATH, args.task_id, token); + console.log(`Task ID / 任务 ID: ${args.task_id}`); + console.log(`Status / 状态: ${data?.task_status || 'unknown'}`); + if (data?.task_status_msg) console.log(`Message / 消息: ${data.task_status_msg}`); + const elements = data?.task_result?.elements || []; + for (const el of elements) { + console.log(`\nElement ID / 主体 ID: ${el.element_id}`); + console.log(` Name / 名称: ${el.element_name}`); + console.log(` Description / 描述: ${el.element_description}`); + console.log(` Type / 类型: ${getElementType(el)}`); + if (el.element_voice_info?.voice_id) { + console.log(` Voice / 音色: ${el.element_voice_info.voice_name} (${el.element_voice_info.voice_id})`); + } + } +} + +async function actionList(args, token, presets) { + const path = presets ? API_PATH_PRESETS : API_PATH; + const pageNum = args.page_num || '1'; + const pageSize = args.page_size || '30'; + const data = await klingGet(`${path}?pageNum=${pageNum}&pageSize=${pageSize}`, token); + + const items = Array.isArray(data) ? data : [data]; + const label = presets ? 'Preset / 预设主体' : 'Custom / 自定义主体'; + console.log(`${label} (page ${pageNum}):\n`); + + for (const item of items) { + const elements = item?.task_result?.elements || []; + if (elements.length === 0 && item?.task_id) { + console.log(` Task / 任务 ${item.task_id}: ${item.task_status || 'unknown'}`); + continue; + } + for (const el of elements) { + console.log(` [${el.element_id}] ${el.element_name} — ${el.element_description} (${getElementType(el)})`); + } + } +} + +async function actionDelete(args, token) { + if (!args.element_id) { console.error('Error / 错误: --element_id required'); process.exit(1); } + const data = await klingPost(API_PATH_DELETE, { element_id: String(args.element_id) }, token); + console.log(`✓ Deleted / 已删除: ${args.element_id}`); + if (data?.task_status) console.log(` Status / 状态: ${data.task_status}`); +} + +export async function main() { + const args = parseArgs(process.argv, ['no-wait']); + if (args.help) { printHelp(); return; } + + const token = await getTokenOrExit(); + const action = args.action; + + if (!action) { + console.error('Error / 错误: --action required (create / query / list / list-presets / delete)'); + process.exit(1); + } + + try { + switch (action) { + case 'create': await actionCreate(args, token); break; + case 'query': await actionQuery(args, token); break; + case 'list': await actionList(args, token, false); break; + case 'list-presets': await actionList(args, token, true); break; + case 'delete': await actionDelete(args, token); break; + default: + console.error(`Error / 错误: unknown action "${action}". Use: create / query / list / list-presets / delete`); + process.exit(1); + } + } catch (e) { + console.error(`Error / 错误: ${e.message}`); + process.exit(1); + } +} + +const __filename = fileURLToPath(import.meta.url); +if (process.argv[1] && resolve(__filename) === resolve(process.argv[1])) { + main().catch((e) => { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + }); +} diff --git a/.claude/skills/klingai-1.1.0/scripts/image.mjs b/.claude/skills/klingai-1.1.0/scripts/image.mjs new file mode 100644 index 0000000..5481552 --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/image.mjs @@ -0,0 +1,327 @@ +#!/usr/bin/env node +/** + * Kling AI image generation — text-to-image, image-to-image, 4K, series, subject + * Node.js 18+, zero external deps + */ +import { submitTask, queryTask, pollTask, downloadFile } from './shared/task.mjs'; +import { resolve, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { parseArgs, getTokenOrExit, readMediaAsValue, resolveAllowedOutputDir } from './shared/args.mjs'; + +const API_GEN = '/v1/images/generations'; +const API_OMNI = '/v1/images/omni-image'; + +function normalizeModelName(v) { + return String(v || '').trim(); +} + +function normalizeAliasKey(v) { + return String(v || '').trim().toLowerCase().replace(/[\s_]+/g, '-'); +} + +function getImageModelAliasTarget(v) { + const key = normalizeAliasKey(v); + const aliasMap = new Map([ + ['omni3', 'kling-v3-omni'], + ['omni-3', 'kling-v3-omni'], + ['omni-v3', 'kling-v3-omni'], + ['v3-omni', 'kling-v3-omni'], + ['o3', 'kling-v3-omni'], + ['O3', 'kling-v3-omni'], + ['kling-image-o3', 'kling-v3-omni'], + ['kling-o3', 'kling-v3-omni'], + ['omni1', 'kling-image-o1'], + ['omni-1', 'kling-image-o1'], + ['o1', 'kling-image-o1'], + ['kling-o1', 'kling-image-o1'], + ]); + return aliasMap.get(key) || ''; +} + +function validateModelAliasInput(rawModel) { + if (!rawModel) return; + const model = normalizeModelName(rawModel).toLowerCase(); + const target = getImageModelAliasTarget(rawModel); + if (!target || model === target) return; + throw new Error( + `Invalid --model alias / --model 使用了别名: ${rawModel}\n` + + `Use canonical name / 请改用标准名: ${target}\n` + + 'Alias mapping / 别名映射: omni3 | omni v3 | o3 -> kling-v3-omni; image o1/omni1 -> kling-image-o1', + ); +} + +function validateModelForRoute(apiPath, args) { + validateModelAliasInput(args.model); + const model = normalizeModelName(args.model); + if (!model) return; + + // We only validate what we can be sure about from public enums. + // - omni-image: only kling-v3-omni / kling-image-o1 + // - generations: must not use omni-only models + if (apiPath === API_OMNI) { + const allowed = new Set(['kling-v3-omni', 'kling-image-o1']); + if (!allowed.has(model)) { + throw new Error( + `Invalid --model for omni-image / omni-image 不支持该模型: ${model}\n` + + `Allowed / 允许: kling-v3-omni, kling-image-o1`, + ); + } + } else { + const forbidden = new Set(['kling-v3-omni', 'kling-image-o1', 'kling-video-o1']); + if (forbidden.has(model)) { + throw new Error( + `Invalid --model for generations / generations 不支持该模型: ${model}\n` + + `Hint / 提示: remove --model or use kling-v3`, + ); + } + } +} + +function parseImageInputs(rawImageArg) { + if (!rawImageArg) return []; + const parts = String(rawImageArg).split(',').map(s => s.trim()); + if (parts.some(p => !p)) { + throw new Error( + 'Invalid --image list / --image 列表中存在空值;请移除空项并确保每个 image 非空。', + ); + } + return parts; +} + +function parseElementIds(rawElementIdsArg) { + if (!rawElementIdsArg) return []; + const parts = String(rawElementIdsArg).split(',').map(s => s.trim()); + if (parts.some(p => !p)) { + throw new Error( + 'Invalid --element_ids list / --element_ids 列表中存在空值;请移除空项并确保每个 element_id 非空。', + ); + } + return parts; +} + +function validateOmniRefCount(imageInputs, elementIds) { + const totalRefs = imageInputs.length + elementIds.length; + if (totalRefs > 10) { + throw new Error( + `Too many refs for omni-image / omni-image 参考图与主体总数超限: max 10 (current ${totalRefs})`, + ); + } +} + +function printHelp() { + console.log(`Kling AI image generation + +Usage: + node kling.mjs image --prompt [options] # Text/image-to-image + node kling.mjs image --prompt "..." [--resolution 4k] # 4K / series / subject → Omni + node kling.mjs image --model kling-v3-omni --prompt "..." # explicit Omni model → omni-image (t2i / i2i) + node kling.mjs image --task_id [--download] # Query/download + +Submit (common): + --prompt Image description (required). Omni: <<>> / <<>> + --resolution 1k / 2k / 4k (4k uses Omni) + --aspect_ratio Aspect ratio (default: 16:9 basic, auto for Omni) + --n Number of images 1-9 + --output_dir Output dir (default: ./output) + --no-wait Submit only, do not wait + --wait Wait for completion (default) + +Basic API: + --negative_prompt Negative prompt + --model Model (default: kling-v3) + +Omni (4K/series/subject): + --model kling-v3-omni / kling-image-o1 + --result_type single / series (default: single) + --series_amount Series count 2-9 (when result_type=series) + --image Reference image path or URL, comma-separated for multiple + --element_ids Subject IDs, comma-separated + (omni refs) image count + element count <= 10 + +Query/download: + --task_id Task ID + --download Download if task succeeded + +Env: + credentials file ~/.config/kling/.credentials (access_key_id, secret_access_key) + KLING_TOKEN Session-only Bearer (optional override) + KLING_MEDIA_ROOTS Comma-separated extra dirs for --image / --output_dir (default: cwd only) + KLING_ALLOW_ABSOLUTE_PATHS=1 Allow any local path (e.g. WSL downloads)`); +} + +function useOmniApi(args) { + // Match video.mjs chooseApiPath: explicit Omni image models → omni-image (incl. plain text-to-image). + const m = normalizeModelName(args.model).toLowerCase(); + if (m === 'kling-v3-omni' || m === 'kling-image-o1') return true; + if (args.element_ids) return true; + if (args.result_type === 'series') return true; + if ((args.resolution || '').toLowerCase() === '4k') return true; + if ((args.aspect_ratio || '').toLowerCase() === 'auto') return true; + if (args.image && args.image.includes(',')) return true; + return false; +} + +async function queryTaskAnyPath(taskId, token) { + for (const apiPath of [API_OMNI, API_GEN]) { + try { + const data = await queryTask(apiPath, taskId, token); + if (data && (data.task_status === 'succeed' || data.task_status === 'failed' || data.task_status === 'processing' || data.task_status === 'submitted')) { + return { apiPath, data }; + } + } catch (_) { /* try next */ } + } + throw new Error(`Task not found / 未找到任务: ${taskId}`); +} + +function collectImageUrls(taskResult) { + const urls = []; + const append = (list) => { + if (!Array.isArray(list)) return; + for (const item of list) { + if (item?.url) urls.push(item.url); + } + }; + append(taskResult?.images); + append(taskResult?.series_images); + if (urls.length === 0 && taskResult?.url) urls.push(taskResult.url); + return urls; +} + +async function pollAndDownloadImages(apiPath, taskId, outputDir, opts = {}) { + const data = await pollTask(apiPath, taskId, opts); + const urls = collectImageUrls(data?.task_result || {}); + if (urls.length === 0) { + throw new Error('Task succeeded but missing image urls / 任务成功但未返回图片 URL'); + } + const outPaths = []; + for (let i = 0; i < urls.length; i++) { + const outPath = join(outputDir, urls.length === 1 ? `${taskId}.png` : `${taskId}_${i}.png`); + await downloadFile(urls[i], outPath); + outPaths.push(outPath); + } + return outPaths; +} + +export async function main() { + const args = parseArgs(process.argv); + if (args.help) { printHelp(); return; } + validateModelAliasInput(args.model); + + const token = await getTokenOrExit(); + const outputDir = resolveAllowedOutputDir(args.output_dir || './output'); + const queryHint = `node kling.mjs image --task_id`; + + if (args.task_id && !args.prompt) { + try { + const { apiPath, data } = await queryTaskAnyPath(args.task_id, token); + console.log(`Task ID / 任务 ID: ${args.task_id}`); + console.log(`Status / 状态: ${data?.task_status || 'unknown'}`); + const result = data?.task_result || {}; + const imageUrls = collectImageUrls(result); + imageUrls.forEach((url, i) => { + console.log(`Image / 图片[${i}]: ${url}`); + }); + if (args.download && imageUrls.length > 0) { + const { mkdir } = await import('node:fs/promises'); + await mkdir(outputDir, { recursive: true }); + for (let i = 0; i < imageUrls.length; i++) { + const outPath = join(outputDir, imageUrls.length === 1 ? `${args.task_id}.png` : `${args.task_id}_${i}.png`); + await downloadFile(imageUrls[i], outPath); + } + } + } catch (e) { + console.error(`Error / 错误: ${e.message}`); + process.exit(1); + } + return; + } + + if (!args.prompt) { + console.error('Error / 错误: --prompt or --task_id required'); + console.error('Use --help / 使用 --help 查看帮助'); + process.exit(1); + } + + const apiPath = useOmniApi(args) ? API_OMNI : API_GEN; + const imageInputs = parseImageInputs(args.image); + const elementIds = parseElementIds(args.element_ids); + + try { + validateModelForRoute(apiPath, args); + + if (apiPath === API_GEN) { + const payload = { + model_name: args.model || 'kling-v3', + prompt: args.prompt, + negative_prompt: args.negative_prompt || '', + n: parseInt(args.n || '1', 10), + aspect_ratio: args.aspect_ratio || '16:9', + resolution: args.resolution || '1k', + callback_url: '', + }; + if (imageInputs.length > 0) { + payload.image = await readMediaAsValue(imageInputs[0]); + } + const result = await submitTask(API_GEN, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + console.log(`Query / 查询: ${queryHint} ${result.taskId} [--download]`); + if (args.wait !== false) { + console.log(); + const outPaths = await pollAndDownloadImages(API_GEN, result.taskId, outputDir, { token }); + console.log(`\n✓ Done / 完成: ${outPaths.length} image(s)`); + outPaths.forEach((p) => console.log(` - ${p}`)); + } + return; + } + + const payload = { + model_name: args.model || 'kling-v3-omni', + prompt: args.prompt, + resolution: (args.resolution || '1k').toLowerCase(), + aspect_ratio: (args.aspect_ratio || 'auto').toLowerCase(), + result_type: args.result_type || 'single', + callback_url: '', + }; + if (payload.result_type === 'series') { + if (imageInputs.length === 0) { + throw new Error( + 'Invalid --result_type series without --image / 组图仅支持 i2i,请提供 --image(t2i 不支持 series)。', + ); + } + payload.series_amount = parseInt(args.series_amount || '4', 10); + } else { + payload.n = parseInt(args.n || '1', 10); + } + validateOmniRefCount(imageInputs, elementIds); + if (imageInputs.length > 0) { + payload.image_list = []; + for (const img of imageInputs) { + payload.image_list.push({ image: await readMediaAsValue(img) }); + } + } + if (elementIds.length > 0) { + payload.element_list = elementIds.map(id => ({ element_id: id })); + } + + const result = await submitTask(API_OMNI, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + console.log(`Query / 查询: ${queryHint} ${result.taskId} [--download]`); + if (args.wait !== false) { + console.log(); + const outPaths = await pollAndDownloadImages(API_OMNI, result.taskId, outputDir, { token }); + console.log(`\n✓ Done / 完成: ${outPaths.length} image(s)`); + outPaths.forEach((p) => console.log(` - ${p}`)); + } + } catch (e) { + console.error(`Error / 错误: ${e.message}`); + process.exit(1); + } +} + +const __filename = fileURLToPath(import.meta.url); +if (process.argv[1] && resolve(__filename) === resolve(process.argv[1])) { + main().catch((e) => { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + }); +} diff --git a/.claude/skills/klingai-1.1.0/scripts/kling.mjs b/.claude/skills/klingai-1.1.0/scripts/kling.mjs new file mode 100644 index 0000000..daf898d --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/kling.mjs @@ -0,0 +1,80 @@ +#!/usr/bin/env node +/** + * Kling AI — video generation, image generation, subject management + * Usage: node kling.mjs [options] + * Node.js 18+, zero external deps + */ +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +function getVersionFromSkillMd() { + try { + const raw = readFileSync(join(__dirname, '..', 'SKILL.md'), 'utf-8'); + const m = raw.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!m) return null; + const v = m[1].match(/^version:\s*["']?([^"'\s\n]+)["']?/m); + return v ? v[1].trim() : null; + } catch { + return null; + } +} + +let argvRest = process.argv.slice(2); +const vidx = argvRest.indexOf('--skill-version'); +if (vidx === -1 || argvRest[vidx + 1] == null || String(argvRest[vidx + 1]).startsWith('--')) { + argvRest = [argvRest[0], '--skill-version', getVersionFromSkillMd() || '1.0', ...argvRest.slice(1)]; +} +process.argv = [process.argv[0], process.argv[1], ...argvRest]; + +const SUBCOMMANDS = new Set(['video', 'image', 'element', 'account']); + +function printHelp() { + console.log(`Kling AI + +Usage: + node kling.mjs [options] + +Subcommands: + video Video generation (text-to-video, image-to-video, Omni, multi-shot) + image Image generation (text-to-image, image-to-image, 4K, series, subject) + element Subject management (create, query, list, delete) + account Quota, bind-url/import credentials, configure + +Examples: + node kling.mjs video --prompt "A cat running on the grass" --output_dir ./out + node kling.mjs image --prompt "Sunset over mountains" --resolution 4k + node kling.mjs element --action list + node kling.mjs account + node kling.mjs account --bind-url + + node kling.mjs video --help + node kling.mjs image --help + node kling.mjs element --help + +Env: credentials under ~/.config/kling/.credentials (or KLING_STORAGE_ROOT/.credentials), or session KLING_TOKEN; KLING_API_BASE + --skill-version: version for skill (default from SKILL.md)`); +} + +const sub = argvRest[0]; +if (!sub || sub === '--help' || sub === '-h') { + printHelp(); + process.exit(sub === '--help' || sub === '-h' ? 0 : 1); +} + +if (!SUBCOMMANDS.has(sub)) { + console.error(`Error / 错误: unknown subcommand "${sub}". Use: video | image | element | account`); + process.exit(1); +} + +async function run() { + const mod = await import(`./${sub}.mjs`); + await mod.main(); +} + +run().catch((err) => { + console.error(`Error / 错误: ${err?.message || err}`); + process.exit(1); +}); diff --git a/.claude/skills/klingai-1.1.0/scripts/shared/args.mjs b/.claude/skills/klingai-1.1.0/scripts/shared/args.mjs new file mode 100644 index 0000000..ae5874b --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/shared/args.mjs @@ -0,0 +1,205 @@ +/** + * Kling AI CLI helpers (zero external deps) + * Argument parsing, auth, media file reading + */ +import { readFile } from 'node:fs/promises'; +import { resolve, relative, sep } from 'node:path'; +import { platform } from 'node:process'; +import { + getBearerToken, + CredentialsMissingError, + setSkillVersion, +} from './auth.mjs'; +import { runDeviceBindFlow } from './client.mjs'; + +/** 是否允许读取/写入 cwd 与 KLING_MEDIA_ROOTS 以外的本地路径(默认关闭) */ +function allowAbsolutePaths() { + const v = (process.env.KLING_ALLOW_ABSOLUTE_PATHS || '').trim().toLowerCase(); + return v === '1' || v === 'true' || v === 'yes'; +} + +/** 额外允许的根目录(逗号分隔),用于下载目录、WSL 跨盘路径等 */ +function extraMediaRoots() { + const raw = (process.env.KLING_MEDIA_ROOTS || '').trim(); + if (!raw) return []; + return raw.split(',').map((s) => s.trim()).filter(Boolean).map((p) => resolve(p)); +} + +function allAllowedRoots() { + const roots = [resolve(process.cwd()), ...extraMediaRoots()]; + return roots; +} + +/** Windows:仅在同盘内做 relative 校验 */ +function sameDriveRoot(a, b) { + if (platform !== 'win32') return true; + const ra = resolve(a); + const rb = resolve(b); + const da = ra.match(/^([A-Za-z]:)/); + const db = rb.match(/^([A-Za-z]:)/); + if (!da || !db) return true; + return da[1].toLowerCase() === db[1].toLowerCase(); +} + +/** + * 判断绝对路径是否落在任一允许根下(用于本地文件读、输出目录写) + * @param {string} absPath 已 resolve 的绝对路径 + */ +export function isAllowedLocalPath(absPath) { + if (allowAbsolutePaths()) return true; + const normalized = resolve(absPath); + for (const root of allAllowedRoots()) { + if (!sameDriveRoot(root, normalized)) continue; + const rel = relative(root, normalized); + if (rel === '') return true; + if (!rel.startsWith('..') && !rel.includes(`${sep}..`)) return true; + } + return false; +} + +/** + * 校验并返回用于读文件的绝对路径(URL 不适用) + * @param {string} userPath 用户传入的本地路径 + * @returns {string} + */ +export function resolveAllowedReadPath(userPath) { + const normalized = resolve(userPath.trim()); + if (!isAllowedLocalPath(normalized)) { + const roots = allAllowedRoots().join(', '); + throw new Error( + `Local path outside allowed roots / 本地路径不在允许范围内: ${normalized}\n` + + `Allowed / 允许: cwd + KLING_MEDIA_ROOTS, or set KLING_ALLOW_ABSOLUTE_PATHS=1\n` + + `Roots / 当前根: ${roots}\n` + + `Example / 示例: export KLING_MEDIA_ROOTS="/mnt/c/Users/you/Downloads,/tmp/claw-downloads"`, + ); + } + return normalized; +} + +/** + * 校验输出目录(相对路径相对于 cwd 解析) + * @param {string} userPath 如 ./output 或绝对路径 + * @returns {string} 绝对路径 + */ +export function resolveAllowedOutputDir(userPath) { + const normalized = resolve(userPath.trim()); + if (!isAllowedLocalPath(normalized)) { + const roots = allAllowedRoots().join(', '); + throw new Error( + `Output dir outside allowed roots / 输出目录不在允许范围内: ${normalized}\n` + + `Allowed / 允许: under cwd, KLING_MEDIA_ROOTS, or KLING_ALLOW_ABSOLUTE_PATHS=1\n` + + `Roots / 当前根: ${roots}`, + ); + } + return normalized; +} + +/** 消费 --skill-version */ +function consumeSkillVersionArgv(argv) { + for (let i = 2; i < argv.length - 1; i++) { + if (argv[i] === '--skill-version') { + setSkillVersion(argv[i + 1]); + argv.splice(i, 2); + return; + } + } +} + +/** + * 解析命令行参数 + * @param {string[]} argv process.argv(会原地消费 --skill-version) + * @param {string[]} [booleanFlags] 额外的布尔标志名(不需要跟值的 --flag) + * @returns {object} 参数键值对 + */ +export function parseArgs(argv, booleanFlags = []) { + consumeSkillVersionArgv(argv); + const boolSet = new Set(['no-wait', 'download', 'wait', 'help', ...booleanFlags]); + const args = {}; + for (let i = 2; i < argv.length; i++) { + const key = argv[i]; + if (!key.startsWith('--')) continue; + const name = key.slice(2); + if (name === 'no-wait') { args.wait = false; continue; } + if (boolSet.has(name)) { args[name] = true; continue; } + const val = argv[i + 1]; + if (val !== undefined && !val.startsWith('--')) { + args[name] = val; i++; + } else { + args[name] = true; + } + } + return args; +} + +/** + * 获取 Bearer:优先进程内 KLING_TOKEN;否则 credentials 中 AK/SK → JWT。 + * 若皆无(首次或仅有空凭证),自动执行设备绑定(bind)后再取 token。 + */ +export async function getTokenOrExit() { + try { + return getBearerToken(); + } catch (e) { + const missing = e instanceof CredentialsMissingError || e?.name === 'CredentialsMissingError'; + if (!missing) { + throw new Error(`Auth error / 鉴权错误: ${e?.message || e}`); + } + try { + console.error('\n── No credentials / 无可用凭证,启动设备绑定 bind ────\n'); + await runDeviceBindFlow({}); + return getBearerToken(); + } catch (err) { + const lines = [ + `Bind failed / 绑定失败: ${err?.message || err}`, + ]; + if (err?.bindAuthorizeUrl) { + lines.push(`Bind URL / 手动绑定链接: ${err.bindAuthorizeUrl}`); + } + lines.push('Fallback / 备选:'); + lines.push(' node skills/klingai/scripts/kling.mjs account --bind-url'); + lines.push(' set KLING_ACCESS_KEY_ID + KLING_SECRET_ACCESS_KEY, then'); + lines.push(' node skills/klingai/scripts/kling.mjs account --import-env'); + lines.push(' or pass args: --import-credentials --access_key_id --secret_access_key '); + lines.push(' or set KLING_TOKEN for this session / 或设置 KLING_TOKEN'); + throw new Error(lines.join('\n')); + } + } +} + +/** + * 读取媒体文件:URL 直接返回,本地文件读为 base64(路径受 KLING_MEDIA_ROOTS / KLING_ALLOW_ABSOLUTE_PATHS 约束) + * @param {string} pathOrUrl 文件路径或 URL + * @returns {Promise} URL 或 base64 字符串 + */ +export async function readMediaAsValue(pathOrUrl) { + if (!pathOrUrl) return undefined; + const s = pathOrUrl.trim(); + try { + const u = new URL(s); + if (u.protocol === 'http:' || u.protocol === 'https:') return s; + } catch { + // Not a URL: treat as local path below. + } + const abs = resolveAllowedReadPath(s); + const buf = await readFile(abs); + return buf.toString('base64'); +} + +/** + * Omni-Video 参考视频字段 `video_list[].video_url`:仅接受公网 `http://` 或 `https://` 链接,不接受本地路径或 Base64。 + * @param {string} pathOrUrl + * @returns {string|undefined} + */ +export function readOmniVideoRefUrl(pathOrUrl) { + if (!pathOrUrl) return undefined; + const s = pathOrUrl.trim(); + try { + const u = new URL(s); + if (u.protocol === 'http:' || u.protocol === 'https:') return s; + } catch { + // Fallthrough to unified error below. + } + throw new Error( + 'Omni --video must be a public http(s) URL / Omni --video 须为公网 http(s) 链接(不接受本地路径或 Base64)。\n' + + 'Upload the file and pass the URL / 请先上传视频再传入 URL。', + ); +} diff --git a/.claude/skills/klingai-1.1.0/scripts/shared/auth.mjs b/.claude/skills/klingai-1.1.0/scripts/shared/auth.mjs new file mode 100644 index 0000000..ba7da3f --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/shared/auth.mjs @@ -0,0 +1,457 @@ +/** + * Kling AI — 鉴权层(无网络) + * + * 凭证优先级: + * 1. 当前进程 KLING_TOKEN(仅环境变量显式传入,不落盘) + * 2. ~/.config/kling/.credentials(INI,[profile] access_key_id / secret_access_key)→ 请求时 makeJwt(30min exp) + * bind / configure 写入 credentials,固定 default profile。 + * 存储根目录默认 ~/.config/kling;可选 KLING_STORAGE_ROOT 指向统一存储根。 + * 非凭证 env:仅读 /kling.env,不覆盖启动前已在 process.env 中的键。 + * 探测得到的 API Base 由 client 调用 `persistProbedApiBase` 写回 ~/.config/kling/kling.env 中的 KLING_API_BASE; + * **不会**从文件注入 KLING_TOKEN(凭证仅 credentials 文件 + 可选进程内 KLING_TOKEN)。 + * + * 网络与 API Base 探测统一在 client.mjs。 + */ +import { createHmac, randomUUID } from 'node:crypto'; +import { + readFileSync, writeFileSync, mkdirSync, chmodSync, +} from 'node:fs'; +import { dirname, resolve, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { createInterface } from 'node:readline'; +import os from 'node:os'; + +const __dir = dirname(fileURLToPath(import.meta.url)); + +const KLING_ENV_FILENAME = 'kling.env'; +const IDENTITY_FILENAME = 'identity.json'; +const CREDENTIALS_FILENAME = '.credentials'; +const STORAGE_ROOT_ENV = 'KLING_STORAGE_ROOT'; + +/** 写入 process.env 时跳过(凭证不走 dotenv 文件) */ +const CREDENTIAL_ENV_DENYLIST = new Set(['KLING_TOKEN']); + +/** + * @param {string} content + * @param {{ shellKeys: Set }} opts + */ +function parseEnvContent(content, opts) { + const { shellKeys } = opts; + for (const line of content.split('\n')) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + const eqIdx = trimmed.indexOf('='); + if (eqIdx <= 0) continue; + const key = trimmed.slice(0, eqIdx).trim(); + if (CREDENTIAL_ENV_DENYLIST.has(key)) continue; + let val = trimmed.slice(eqIdx + 1).trim(); + if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) { + val = val.slice(1, -1); + } + // 已在启动前导出的环境变量优先,不被文件覆盖。 + if (!shellKeys.has(key) && !(key in process.env)) { + process.env[key] = val; + } + } +} + +export function getKlingConfigDir() { + const explicitRoot = (process.env[STORAGE_ROOT_ENV] || '').trim(); + if (explicitRoot) return resolve(explicitRoot); + const home = process.env.HOME || process.env.USERPROFILE; + if (home) return join(home, '.config', 'kling'); + return resolve(__dir, '..', '..', '..'); +} + +function getDefaultKlingEnvPath() { + return join(getKlingConfigDir(), KLING_ENV_FILENAME); +} + +/** 更新或追加 KLING_API_BASE=…,仅写入 ~/.config/kling/kling.env */ +function upsertEnvFileKey(content, key, value) { + const line = `${key}=${value}`; + const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const re = new RegExp(`^${escaped}=.*$`, 'm'); + if (re.test(content)) return content.replace(re, line); + const trimmed = content.replace(/\s+$/, ''); + if (!trimmed) return `${line}\n`; + return `${trimmed}\n${line}\n`; +} + +(function loadEnvFiles() { + const shellKeys = new Set(Object.keys(process.env)); + try { + parseEnvContent(readFileSync(getDefaultKlingEnvPath(), 'utf-8'), { shellKeys }); + } catch {} +})(); + +export function getIdentityFilePath() { + return join(getKlingConfigDir(), IDENTITY_FILENAME); +} + +/** 凭证 INI 路径:/.credentials */ +export function getCredentialsFilePath() { + return join(getKlingConfigDir(), CREDENTIALS_FILENAME); +} + +export function getActiveProfile() { + return 'default'; +} + +export class CredentialsMissingError extends Error { + constructor(msg = 'No credentials / 未配置凭证') { + super(msg); + this.name = 'CredentialsMissingError'; + } +} + +function logAuthSource(source) { + const messageMap = { + credentials: 'Auth source / 鉴权来源: credentials (AK/SK -> JWT)', + env_token: 'Auth source / 鉴权来源: KLING_TOKEN (process env)', + }; + const msg = messageMap[source]; + if (msg) console.error(msg); +} + +function parseCredentialsIni(content) { + const profiles = {}; + let current = null; + for (const line of content.split('\n')) { + const t = line.trim(); + if (!t || t.startsWith('#') || t.startsWith(';')) continue; + const m = t.match(/^\[([^\]]+)\]\s*$/); + if (m) { + current = m[1].trim(); + if (!profiles[current]) profiles[current] = {}; + continue; + } + const eqIdx = t.indexOf('='); + if (eqIdx <= 0 || !current) continue; + const k = t.slice(0, eqIdx).trim(); + let v = t.slice(eqIdx + 1).trim(); + if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) { + v = v.slice(1, -1); + } + profiles[current][k] = v; + } + return profiles; +} + +/** @returns {{ access_key_id: string, secret_access_key: string }} */ +export function readCredentialsProfile(profile) { + try { + const raw = readFileSync(getCredentialsFilePath(), 'utf-8'); + const all = parseCredentialsIni(raw); + const p = all[profile] || {}; + const ak = String(p.access_key_id || p.access_key || '').trim(); + const sk = String(p.secret_access_key || p.secret_key || '').trim(); + return { access_key_id: ak, secret_access_key: sk }; + } catch { + return { access_key_id: '', secret_access_key: '' }; + } +} + +export function hasStoredAccessKeys() { + const { access_key_id, secret_access_key } = readCredentialsProfile(getActiveProfile()); + return Boolean(access_key_id && secret_access_key); +} + +export function hasSessionBearerOverride() { + return Boolean((process.env.KLING_TOKEN || '').trim()); +} + +export function hasUsableCredentialSource() { + return hasStoredAccessKeys() || hasSessionBearerOverride(); +} + +/** + * 写入 [profile] 下 AK/SK,Unix 上 chmod 600 + * @param {string} profile + * @param {string} accessKey + * @param {string} secretKey + * @param {Record} [extra] 如 region + */ +export function writeCredentialsProfile(profile, accessKey, secretKey, extra = {}) { + const path = getCredentialsFilePath(); + mkdirSync(dirname(path), { recursive: true }); + let all = {}; + try { + all = parseCredentialsIni(readFileSync(path, 'utf-8')); + } catch {} + all[profile] = { + ...all[profile], + access_key_id: String(accessKey || '').trim(), + secret_access_key: String(secretKey || '').trim(), + ...extra, + }; + const lines = []; + for (const prof of Object.keys(all)) { + lines.push(`[${prof}]`); + const o = all[prof]; + for (const [k, v] of Object.entries(o)) { + if (v == null || String(v) === '') continue; + lines.push(`${k} = ${String(v)}`); + } + lines.push(''); + } + writeFileSync(path, lines.join('\n').trimEnd() + '\n'); + try { + if (process.platform !== 'win32') chmodSync(path, 0o600); + } catch {} + return path; +} + +// —— Skill 版本 / 请求头 —— +const DEFAULT_SKILL_VERSION = '1.0.0'; +let skillVersion = DEFAULT_SKILL_VERSION; +export function setSkillVersion(version) { + skillVersion = String(version || DEFAULT_SKILL_VERSION); +} +export function getSkillVersion() { + return skillVersion; +} + +export function makeKlingHeaders(token, contentType = 'application/json') { + const h = { 'User-Agent': `Kling-Provider-Skill/${getSkillVersion()}` }; + if (token) h['Authorization'] = `Bearer ${token}`; + if (contentType) h['Content-Type'] = contentType; + return h; +} + +function base64url(buf) { + return Buffer.from(buf).toString('base64') + .replace(/=/g, '') + .replace(/\+/g, '-') + .replace(/\//g, '_'); +} + +function makeJwt(accessKey, secretKey) { + const header = base64url(JSON.stringify({ alg: 'HS256', typ: 'JWT' })); + const now = Math.floor(Date.now() / 1000); + const payload = base64url(JSON.stringify({ + iss: accessKey, + exp: now + 1800, + nbf: now - 5, + })); + const signature = base64url( + createHmac('sha256', secretKey).update(`${header}.${payload}`).digest() + ); + return `${header}.${payload}.${signature}`; +} + +/** + * 1) 进程环境变量 KLING_TOKEN(不落盘;kling.env 不会注入 KLING_TOKEN) + * 2) 否则 credentials 文件 AK/SK → 每次调用重新签发 JWT(30min exp) + */ +export function getBearerToken() { + let token = (process.env.KLING_TOKEN || '').trim(); + if (token) { + logAuthSource('env_token'); + if (token.toLowerCase().startsWith('bearer ')) { + token = token.slice(7).trim(); + } + return token; + } + const profile = getActiveProfile(); + const { access_key_id, secret_access_key } = readCredentialsProfile(profile); + if (access_key_id && secret_access_key) { + logAuthSource('credentials'); + return makeJwt(access_key_id, secret_access_key); + } + throw new CredentialsMissingError( + 'Configure credentials under KLING_STORAGE_ROOT (or ~/.config/kling), set KLING_TOKEN for this session, or run account bind/configure / ' + + '请在 KLING_STORAGE_ROOT(或 ~/.config/kling)下配置 credentials、本次 shell 导出 KLING_TOKEN,或执行 account --bind|--configure', + ); +} + +export function getConfiguredApiBase() { + const baseTest = (process.env.KLING_API_BASE_TEST || '').trim(); + if (baseTest) return baseTest; + const base = (process.env.KLING_API_BASE || '').trim(); + return base || null; +} + +export function getConfiguredBindBase() { + const baseTest = (process.env.KLING_BIND_BASE_TEST || '').trim(); + if (baseTest) return baseTest; + const base = (process.env.KLING_BIND_BASE || '').trim(); + return base || null; +} + +/** 将探测到的业务 API 根写入 ~/.config/kling/kling.env(仅 KLING_API_BASE 一行) */ +export function persistProbedApiBase(baseUrl) { + const b = String(baseUrl || '').trim(); + if (!b) return; + const dir = getKlingConfigDir(); + const path = getDefaultKlingEnvPath(); + mkdirSync(dir, { recursive: true }); + let raw = ''; + try { + raw = readFileSync(path, 'utf-8'); + } catch {} + writeFileSync(path, upsertEnvFileKey(raw, 'KLING_API_BASE', b)); + process.env.KLING_API_BASE = b; +} + +export function readIdentity() { + try { + const raw = readFileSync(getIdentityFilePath(), 'utf-8'); + const o = JSON.parse(raw); + return o && typeof o === 'object' ? o : null; + } catch { + return null; + } +} + +function writeIdentity(obj) { + const dir = getKlingConfigDir(); + mkdirSync(dir, { recursive: true }); + writeFileSync(getIdentityFilePath(), `${JSON.stringify(obj, null, 2)}\n`); +} + +export function ensureIdentityForBind() { + const existing = readIdentity() || {}; + const id = { ...existing }; + let dirty = Object.keys(existing).length === 0; + if (!id.client_instance_id) { + id.client_instance_id = randomUUID(); + dirty = true; + } + const localHostname = (() => { + try { + const h = String(os.hostname() || '').trim(); + return h || 'unknown'; + } catch { + return 'unknown'; + } + })(); + if (!id.hostname) { + id.hostname = localHostname; + dirty = true; + } + if (!id.device_name) { + const n = String(process.env.COMPUTERNAME || process.env.HOSTNAME || id.hostname || '').trim(); + id.device_name = n || 'unknown'; + dirty = true; + } + if (!id.platform) { + if (process.platform === 'darwin') id.platform = 'macOS'; + else if (process.platform === 'win32') id.platform = 'Windows'; + else if (process.platform === 'linux') id.platform = 'Linux'; + else id.platform = 'unknown'; + dirty = true; + } + id.version = id.version ?? 1; + if (id.session_id === undefined) id.session_id = null; + id.updated_at = Date.now(); + if (dirty) writeIdentity(id); + return id; +} + +export function patchKlingIdentity(patch) { + const cur = readIdentity() || {}; + const next = { ...cur, ...patch, updated_at: Date.now() }; + writeIdentity(next); + return next; +} + +/** 绑定 / configure 成功后写入 credentials;identity 中不保留 AK/SK(并清除历史字段) */ +export function persistBoundApiKeys(accessKey, secretKey, extraIdentity = {}, extraCredentials = {}) { + const ak = String(accessKey || '').trim(); + const sk = String(secretKey || '').trim(); + if (!ak || !sk) throw new Error('Missing access_key or secret_key / 缺少 access_key 或 secret_key'); + const profile = getActiveProfile(); + const savePath = writeCredentialsProfile(profile, ak, sk, extraCredentials); + const cur = readIdentity() || {}; + const next = { ...cur, ...extraIdentity, bound_at: Date.now(), updated_at: Date.now() }; + delete next.access_key; + delete next.secret_key; + delete next.credential_id; + delete next.account_id; + delete next.credentialId; + delete next.accountId; + writeIdentity(next); + return { savePath, token: makeJwt(ak, sk) }; +} + +export { makeJwt }; + +function readHiddenLine(prompt) { + function sanitizeChunk(chunk) { + // Strip bracketed-paste markers (\x1b[200~...\x1b[201~), keep printable chars only. + return String(chunk || '') + .replace(/\u001b\[200~/g, '') + .replace(/\u001b\[201~/g, '') + .replace(/[\u0000-\u001f\u007f]/g, ''); + } + + const stdin = process.stdin; + const stdout = process.stderr; + if (!stdin.isTTY) { + return new Promise((r) => { + const rl = createInterface({ input: stdin, output: stdout }); + rl.question(prompt, (a) => { + rl.close(); + r(a.trim()); + }); + }); + } + stdout.write(prompt); + return new Promise((resolveLine) => { + stdin.setRawMode(true); + stdin.resume(); + stdin.setEncoding('utf8'); + let s = ''; + const onData = (key) => { + const k = String(key); + if (k === '\u0003') { + stdin.setRawMode(false); + stdin.removeListener('data', onData); + stdin.pause(); + process.exit(1); + } + if (k === '\r' || k === '\n') { + stdin.setRawMode(false); + stdin.removeListener('data', onData); + stdin.pause(); + stdout.write('\n'); + resolveLine(s); + return; + } + if (k === '\u007f' || k === '\b') { + s = s.slice(0, -1); + return; + } + s += sanitizeChunk(k); + }; + stdin.on('data', onData); + }); +} + +/** 交互式录入 AK/SK → credentials(SK 在 TTY 下隐藏输入,支持粘贴) */ +export async function promptInteractiveCredentialsFile() { + if (!process.stdin.isTTY || !process.stderr.isTTY) { + throw new CredentialsMissingError( + 'TTY required / 需要交互式终端', + ); + } + + console.error('\n── Kling AI configure / 可灵凭证配置 ─────────────'); + console.error(`Profile / 配置名: ${getActiveProfile()}`); + console.error(`File / 文件: ${getCredentialsFilePath()}`); + console.error('────────────────────────────────────────────────\n'); + + const rl1 = createInterface({ input: process.stdin, output: process.stderr }); + const accessKey = await new Promise((r) => { + rl1.question('Access Key ID / 访问密钥 ID: ', (a) => r(a.trim())); + }); + rl1.close(); + if (!accessKey) throw new Error('Access Key required / 需要 Access Key'); + + const secretKey = await readHiddenLine('Secret Access Key / 秘密访问密钥(隐藏输入,可粘贴): '); + if (!secretKey) throw new Error('Secret Key required / 需要 Secret Key'); + const savePath = writeCredentialsProfile(getActiveProfile(), accessKey, secretKey); + console.error(`\n✓ Saved / 已保存(密钥未在日志中输出): ${savePath}\n`); + return makeJwt(accessKey, secretKey); +} diff --git a/.claude/skills/klingai-1.1.0/scripts/shared/client.mjs b/.claude/skills/klingai-1.1.0/scripts/shared/client.mjs new file mode 100644 index 0000000..03aecce --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/shared/client.mjs @@ -0,0 +1,680 @@ +/** + * Kling AI HTTP client (zero external deps, Node.js 18+ fetch) + * + * - **klingGet / klingPost**:Bearer 鉴权 + resolveApiBase(业务 API) + * - **runAccountBindHttpSequence**:无 Bearer,固定 bind 端点(与鉴权流量区分在实现上,不混用 token) + */ +import { createHash, randomBytes } from 'node:crypto'; +import { + getBearerToken, + makeKlingHeaders, + getConfiguredApiBase, + getConfiguredBindBase, + persistProbedApiBase, + getSkillVersion, + ensureIdentityForBind, + patchKlingIdentity, + persistBoundApiKeys, +} from './auth.mjs'; + +const KLING_API_ENDPOINTS = Object.freeze([ + { + key: 'cn', + apiBase: 'https://api-beijing.klingai.com', + bindBase: 'https://klingai.com', + consoleUrl: 'https://klingai.com/dev/api-key', + }, + { + key: 'global', + apiBase: 'https://api-singapore.klingai.com', + bindBase: 'https://kling.ai', + consoleUrl: 'https://kling.ai/dev/api-key', + }, +]); + +const ALL_KLING_CONSOLE_URLS = Object.freeze( + Object.fromEntries(KLING_API_ENDPOINTS.map((item) => [item.key, item.consoleUrl])), +); + +const API_BASE = KLING_API_ENDPOINTS[0].apiBase; +const CANDIDATE_BASES = KLING_API_ENDPOINTS.map((item) => item.apiBase); +export let KLING_CONSOLE_URLS = ALL_KLING_CONSOLE_URLS; + +function normalizeApiBase(base) { + return String(base || '').trim().replace(/\/+$/, ''); +} + +function findEndpointByBase(base) { + const normalized = normalizeApiBase(base); + if (!normalized) return null; + const direct = KLING_API_ENDPOINTS.find((item) => normalizeApiBase(item.apiBase) === normalized); + if (direct) return direct; + const bindDirect = KLING_API_ENDPOINTS.find((item) => normalizeApiBase(item.bindBase) === normalized); + if (bindDirect) return bindDirect; + if (normalized.includes('api-beijing.klingai.com')) return KLING_API_ENDPOINTS.find((item) => item.key === 'cn') || null; + if (normalized.includes('api-singapore.klingai.com')) return KLING_API_ENDPOINTS.find((item) => item.key === 'global') || null; + if (normalized.includes('klingai.com')) return KLING_API_ENDPOINTS.find((item) => item.key === 'cn') || null; + if (normalized.includes('kling.ai')) return KLING_API_ENDPOINTS.find((item) => item.key === 'global') || null; + if (normalized.includes('kuaishou.com')) return KLING_API_ENDPOINTS.find((item) => item.key === 'cn') || null; + return null; +} + +function setConsoleUrlsForBase(base) { + const endpoint = findEndpointByBase(base); + if (!endpoint) { + KLING_CONSOLE_URLS = ALL_KLING_CONSOLE_URLS; + return; + } + KLING_CONSOLE_URLS = Object.freeze({ [endpoint.key]: endpoint.consoleUrl }); +} + +const initialConfiguredApiBase = getConfiguredApiBase(); +if (initialConfiguredApiBase) { + setConsoleUrlsForBase(initialConfiguredApiBase); +} + +function printConsoleUrlsHint(prefix = ' ') { + for (const [region, url] of Object.entries(KLING_CONSOLE_URLS)) { + const label = region === 'cn' ? 'China / 国内' : (region === 'global' ? 'Global / 国际' : region); + console.error(`${prefix}${label}: ${url}`); + } +} + +async function probeBase(base, token) { + try { + const res = await fetch(`${base}/v1/videos/text2video?pageNum=1&pageSize=1`, { + method: 'GET', + headers: makeKlingHeaders(token, null), + signal: AbortSignal.timeout(8000), + }); + if (!res.ok) return false; + const json = await res.json().catch(() => null); + return json != null && (json.code === 0 || json.code === 200); + } catch { + return false; + } +} + +let _resolvedBase = null; + +async function resolveApiBase(token) { + if (_resolvedBase) return _resolvedBase; + const configuredApiBase = getConfiguredApiBase(); + if (configuredApiBase) { + _resolvedBase = normalizeApiBase(configuredApiBase); + setConsoleUrlsForBase(_resolvedBase); + return _resolvedBase; + } + + console.error('\n🔍 Probing API endpoints... / 正在检测 API 节点...'); + for (const endpoint of KLING_API_ENDPOINTS) { + process.stderr.write(` [${endpoint.key}] ${endpoint.apiBase} ... `); + if (await probeBase(endpoint.apiBase, token)) { + process.stderr.write('✓ OK\n\n'); + _resolvedBase = endpoint.apiBase; + setConsoleUrlsForBase(_resolvedBase); + try { + persistProbedApiBase(_resolvedBase); + } catch {} + return _resolvedBase; + } + process.stderr.write('✗\n'); + } + + console.error('\n❌ Cannot connect to any Kling API endpoint / 无法连接任何可灵 API 节点'); + for (const base of CANDIDATE_BASES) console.error(` • ${base}`); + console.error('\nPossible causes / 可能原因:'); + console.error(' 1. Token invalid or expired / Token 无效或已过期:'); + printConsoleUrlsHint(); + console.error(' 2. Network issue / 网络问题'); + console.error('\nCheck credentials file, KLING_TOKEN, or run account configure / 检查 credentials、KLING_TOKEN 或 account configure:\n'); + process.exit(1); +} + +/** + * 保护 JSON 中的大整数字段(防止 Number 精度丢失) + * 将 element_id, task_id 等大整数字段转为字符串 + */ +function protectBigInts(text) { + return text.replace( + /"(element_id|task_id|elementId|taskId)":\s*(\d{15,})/g, + '"$1":"$2"' + ); +} + +/** + * 解析可灵 API 响应,code 为 0 或 200 为成功 + */ +function parseResponse(json) { + if (json.code !== 0 && json.code !== 200) { + throw new Error(`API error / API 错误 (code=${json.code}): ${json.message || 'Unknown error'}`); + } + return json.data; +} + +function parseJsonSafely(text) { + try { + return JSON.parse(protectBigInts(String(text || ''))); + } catch { + return null; + } +} + +function buildHttpErrorMessage(status, text) { + const body = parseJsonSafely(text); + if (status === 401 && body && typeof body === 'object') { + const code = Number(body.code); + const requestId = body.request_id ? `, request_id=${body.request_id}` : ''; + if (code === 1000) { + return `HTTP 401: code=1000,signature is invalid / 秘钥无效,请重新绑定${requestId}`; + } + if (code === 1002) { + return `HTTP 401: code=1002,access key not exist / 账户不存在,请重新绑定${requestId}`; + } + } + return `HTTP ${status}: ${text}`; +} + +function parseApiJsonOrThrow(text) { + const parsed = parseJsonSafely(text); + if (parsed != null) return parsed; + const preview = String(text || '').trim().slice(0, 60); + if (preview.startsWith('<')) { + throw new Error(`API Service Error: Non-JSON content. check KLING_API_BASE and network/DNS/proxy: ${preview}`); + } + throw new Error(`API Service Error: Cannot parse JSON: ${preview}`); +} + +async function safeFetch(url, init, context) { + try { + return await fetch(url, init); + } catch (e) { + const baseHint = getConfiguredApiBase() || ''; + const msg = e?.message || String(e); + throw new Error( + `Network error / 网络错误: ${msg}\n` + + `Request / 请求: ${context.method} ${url}\n` + + `KLING_API_BASE: ${baseHint}\n` + + 'Hint / 提示: check KLING_API_BASE and network/DNS/proxy, or remove KLING_API_BASE to auto-probe official endpoints / ' + + '请检查 KLING_API_BASE 与网络(DNS/代理),或移除 KLING_API_BASE 让脚本自动探测官方节点。', + ); + } +} + +/** + * POST 请求可灵 API + * @param {string} path API 路径,如 /v1/videos/image2video + * @param {object} body 请求体 + * @param {string} [token] 可选 token,不传则自动获取 + * @returns {Promise} data 字段 + */ +export async function klingPost(path, body, token) { + if (!token) token = getBearerToken(); + const base = await resolveApiBase(token); + const url = `${base}${path}`; + const res = await safeFetch(url, { + method: 'POST', + headers: makeKlingHeaders(token), + body: JSON.stringify(body), + }, { method: 'POST' }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(buildHttpErrorMessage(res.status, text)); + } + const text = await res.text(); + return parseResponse(parseApiJsonOrThrow(text)); +} + +/** + * GET 请求可灵 API + * @param {string} path API 路径,如 /v1/videos/image2video/{task_id} + * @param {string} [token] 可选 token,不传则自动获取 + * @param {{ contentType?: string|null }} [options] 如部分接口要求 `Content-Type: application/json`(传 `'application/json'`);默认不传 Content-Type + * @returns {Promise} data 字段 + */ +export async function klingGet(path, token, options = {}) { + if (!token) token = getBearerToken(); + const base = await resolveApiBase(token); + const ct = options.contentType !== undefined ? options.contentType : null; + const url = `${base}${path}`; + const res = await safeFetch(url, { + method: 'GET', + headers: makeKlingHeaders(token, ct), + }, { method: 'GET' }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(buildHttpErrorMessage(res.status, text)); + } + const text = await res.text(); + return parseResponse(parseApiJsonOrThrow(text)); +} + +// —— 设备绑定 HTTP(无 Authorization;不经过 resolveApiBase) —— + +const DEFAULT_BIND_INIT = '/console/api/auth/skill/init-sessions'; +const DEFAULT_BIND_EXCHANGE = '/console/api/auth/skill/exchange'; +const DEFAULT_BIND_SKILL_ID = 'Kling-Provider-Skill'; +const DEFAULT_BIND_SCOPE = 'kling.openapi.invoke'; +const DEFAULT_BIND_FETCH_TIMEOUT_MS = 30000; +const DEFAULT_BIND_TIMEOUT_MS = 180000; + +function sleepBind(ms) { + return new Promise((r) => setTimeout(r, ms)); +} + +function base64url(input) { + return Buffer.from(input).toString('base64') + .replace(/=/g, '') + .replace(/\+/g, '-') + .replace(/\//g, '_'); +} + +function createPkcePair() { + const codeVerifier = base64url(randomBytes(48)); + const codeChallenge = base64url(createHash('sha256').update(codeVerifier, "utf8").digest()); + return { codeVerifier, codeChallenge }; +} + +function bindExtractData(json) { + if (json == null || typeof json !== 'object') return json; + const c = json.code; + if (c !== undefined && c !== 0 && c !== 200) { + const msg = json.message || json.msg || 'Unknown error'; + throw new Error(`Bind API error / 绑定接口错误 (code=${c}): ${msg}`); + } + return json.data !== undefined ? json.data : json; +} + +function normalizeBindBase(base) { + const raw = String(base || '').trim(); + return raw.replace(/\/+$/, ''); +} + +function resolveBindBase(bindBaseOverride) { + const override = normalizeBindBase(bindBaseOverride); + if (override) { + const overrideEndpoint = findEndpointByBase(override); + if (overrideEndpoint?.bindBase) return normalizeBindBase(overrideEndpoint.bindBase); + return override; + } + const configuredBindBase = getConfiguredBindBase(); + if (configuredBindBase) return normalizeBindBase(configuredBindBase); + const candidate = getConfiguredApiBase() || _resolvedBase || API_BASE; + const endpoint = findEndpointByBase(candidate); + if (endpoint?.bindBase) return normalizeBindBase(endpoint.bindBase); + return normalizeBindBase(candidate); +} + +async function skillBindHttpJson(userAgent, base, path, body, method = 'POST') { + const b = String(base || '').replace(/\/$/, ''); + const p = path.startsWith('/') ? path : `/${path}`; + const url = method === 'GET' && body && typeof body === 'object' + ? `${b}${p}${p.includes('?') ? '&' : '?'}${new URLSearchParams( + Object.entries(body).filter(([, v]) => v != null).map(([k, v]) => [k, String(v)]), + ).toString()}` + : `${b}${p}`; + const headers = { 'User-Agent': userAgent }; + if (method !== 'GET') headers['Content-Type'] = 'application/json'; + const init = { + method, + headers, + signal: AbortSignal.timeout(DEFAULT_BIND_FETCH_TIMEOUT_MS), + }; + if (method !== 'GET' && body != null) init.body = JSON.stringify(body); + let res; + try { + res = await fetch(url, init); + } catch (e) { + throw new Error( + `Network error / 网络错误: ${e?.message || e}\n` + + 'Hint / 提示: check network/DNS/proxy and endpoint reachability / 请检查网络、DNS、代理与目标地址可达性。', + ); + } + const text = await res.text().catch(() => ''); + if (!res.ok) { + throw new Error( + `HTTP ${res.status}: ${text}\n` + + 'Hint / 提示: verify API base and network reachability / 请确认 API 基址与网络可达性。', + ); + } + let json; + try { + json = JSON.parse(text); + } catch { + throw new Error(`Invalid JSON / 非 JSON 响应: ${text.slice(0, 200)}`); + } + return bindExtractData(json); +} + +function pickBindSessionId(data) { + if (!data || typeof data !== 'object') return null; + return data.session_id || data.sessionId || data.bind_session_id || data.id || null; +} + +function pickBindAuthorizeHint(data) { + if (!data || typeof data !== 'object') return null; + return ( + data.verificationUriComplete + || data.verification_uri_complete + || data.verificationUri + || data.verification_uri + || data.authorize_url + || data.authorization_url + || data.qr_url + || null + ); +} + +function pickBindAccessSecretKeys(data) { + const src = data?.credential && typeof data.credential === 'object' ? data.credential : data; + if (!src || typeof src !== 'object') { + return { + ak: null, sk: null, credentialId: null, accountId: null, + }; + } + const ak = src.accessKey || src.access_key || src.access_key_id || src.accessKeyId || src.ak; + const sk = src.secretKey || src.secret_key || src.secret_access_key || src.secretAccessKey || src.sk; + const credentialId = src.credentialId || src.credential_id || src.credentialID || src.credentialid; + const accountId = src.accountId || src.account_id || src.accountID || src.accountid; + return { + ak: ak != null ? String(ak).trim() : null, + sk: sk != null ? String(sk).trim() : null, + credentialId: credentialId != null ? String(credentialId).trim() : null, + accountId: accountId != null ? String(accountId).trim() : null, + }; +} + +function normalizeBindStatus(data) { + if (!data || typeof data !== 'object') return 'pending'; + const s = data.status || data.state || data.bind_status || data.phase; + if (s == null) return 'pending'; + return String(s).toUpperCase(); +} + +function makeBindFlowError(message, meta = {}) { + const err = new Error(message); + err.name = 'BindFlowError'; + if (meta.code) err.bindCode = meta.code; + if (meta.authorizeUrl) err.bindAuthorizeUrl = meta.authorizeUrl; + if (meta.sessionId) err.bindSessionId = meta.sessionId; + if (meta.status) err.bindStatus = meta.status; + if (meta.responseData !== undefined) err.bindResponseData = meta.responseData; + return err; +} + +function resolveAuthorizationUrl(bindBase, authorizePathOrUrl) { + const raw = String(authorizePathOrUrl || '').trim(); + if (!raw) return null; + if (raw.startsWith('http://') || raw.startsWith('https://')) return raw; + const baseUrl = new URL(`${normalizeBindBase(bindBase)}/`); + if (raw.startsWith('/')) return `${baseUrl.origin}${raw}`; + return new URL(raw, baseUrl).toString(); +} + +function defaultBindOnLog(ev) { + if (ev.url) { + console.error(`${ev.message}\n ${ev.url}`); + } else { + console.error(ev.message); + } +} + +function maskSecret(secret) { + const s = String(secret || ''); + if (!s) return ''; + if (s.length <= 6) return '***'; + return `${s.slice(0, 3)}***${s.slice(-2)}`; +} + +function maskAccessKey(accessKey) { + const s = String(accessKey || ''); + if (!s) return ''; + if (s.length <= 8) return `${s.slice(0, 2)}***`; + return `${s.slice(0, 4)}***${s.slice(-3)}`; +} + +/** + * 执行完整设备绑定并写入 credentials(供 account 与 getTokenOrExit 自动调用) + * @param {{ onLog?: function }} [options] + */ +export async function runDeviceBindFlow(options = {}) { + const onLog = options.onLog || defaultBindOnLog; + const identity = ensureIdentityForBind(); + const { + client_instance_id, device_name, platform, hostname, + } = identity; + const userAgent = `Kling-Provider-Skill/${getSkillVersion()}`; + + const result = await runAccountBindHttpSequence({ + userAgent, + skillVersion: getSkillVersion(), + identity: { + clientInstanceId: client_instance_id, + deviceName: device_name, + platform, + hostname, + }, + onInitSession: (sessionId) => { + patchKlingIdentity({ session_id: sessionId }); + }, + onLog, + }); + const persisted = persistBoundApiKeys( + result.accessKey, + result.secretKey, + { session_id: result.sessionId }, + { + credentialId: result.credentialId || null, + accountId: result.accountId || null, + }, + ); + return { + sessionId: result.sessionId, + authorizeUrl: result.authorizeHint || null, + savePath: persisted.savePath, + accessKeyMasked: maskAccessKey(result.accessKey), + secretKeyMasked: maskSecret(result.secretKey), + }; +} + +/** + * 仅执行绑定前置:init → verify,返回可手动打开的授权 URL。 + * @param {{ onLog?: function }} [options] + */ +export async function prepareDeviceBindUrl(options = {}) { + const onLog = options.onLog || defaultBindOnLog; + const identity = ensureIdentityForBind(); + const { + client_instance_id, device_name, platform, hostname, + } = identity; + const userAgent = `Kling-Provider-Skill/${getSkillVersion()}`; + const result = await runAccountBindInitVerify({ + userAgent, + skillVersion: getSkillVersion(), + identity: { + clientInstanceId: client_instance_id, + deviceName: device_name, + platform, + hostname, + }, + onInitSession: (sessionId) => { + patchKlingIdentity({ session_id: sessionId }); + }, + onLog, + }); + return { + sessionId: result.sessionId, + authorizeUrl: result.authorizeHint || null, + }; +} + +/** + * 账号绑定前半段:init → verify,拿到可给用户手动打开的授权 URL。 + * @returns {Promise<{sessionId: string, authorizeHint: string|null}>} + */ +export async function runAccountBindInitVerify(options) { + const bindBase = options.bindBase ? normalizeBindBase(options.bindBase) : resolveBindBase(); + const initPath = options.initPath || DEFAULT_BIND_INIT; + const { + clientInstanceId, + deviceName, + platform, + hostname, + } = options.identity || {}; + if (!clientInstanceId) { + throw makeBindFlowError('identity.clientInstanceId is required / 缺少 identity.clientInstanceId', { code: 'MISSING_CLIENT_INSTANCE_ID' }); + } + const userAgent = String(options.userAgent || 'Kling-Provider-Skill/unknown'); + const skillVersion = String(options.skillVersion || getSkillVersion()); + const onLog = typeof options.onLog === 'function' ? options.onLog : () => {}; + const onInitSession = options.onInitSession; + const { codeVerifier, codeChallenge } = createPkcePair(); + onLog({ step: 'base', message: 'Using bind base / 当前 bind 基址:', url: bindBase }); + + onLog({ step: 'init', message: 'Calling init-sessions / 调用 init-sessions …' }); + const initData = await skillBindHttpJson(userAgent, bindBase, initPath, { + skillId: DEFAULT_BIND_SKILL_ID, + skillVersion, + clientInstanceId, + deviceName: String(deviceName || '').trim() || 'unknown', + platform: String(platform || '').trim() || 'unknown', + hostname: String(hostname || '').trim() || 'unknown', + requestedScopes: [DEFAULT_BIND_SCOPE], + codeChallenge, + codeChallengeMethod: 'S256', + }); + const sessionId = pickBindSessionId(initData); + if (!sessionId) { + throw makeBindFlowError( + 'init-sessions response missing sessionId / init-sessions 响应缺少 sessionId', + { code: 'MISSING_SESSION_ID' }, + ); + } + if (onInitSession) await onInitSession(sessionId); + const deviceCode = String(initData.deviceCode || initData.device_code || '').trim(); + if (!deviceCode) { + throw makeBindFlowError( + 'init-sessions response missing deviceCode / init-sessions 响应缺少 deviceCode', + { code: 'MISSING_DEVICE_CODE', sessionId }, + ); + } + const authorizeHint = resolveAuthorizationUrl(bindBase, pickBindAuthorizeHint(initData)); + if (!authorizeHint) { + throw makeBindFlowError( + 'init-sessions response missing authorize url / init-sessions 响应缺少授权链接', + { code: 'MISSING_AUTHORIZE_URL', sessionId }, + ); + } + + onLog({ step: 'authorize', message: 'Open in browser / 请在浏览器完成授权:', url: authorizeHint }); + return { + sessionId, + deviceCode, + codeVerifier, + authorizeHint, + interval: Number(initData.interval), + expiresIn: Number(initData.expiresIn), + }; +} + +/** + * 账号设备绑定:init → verify → 轮询 check。无 Bearer;凭证落盘由调用方配合 auth 负责。 + */ +export async function runAccountBindHttpSequence(options) { + const bindBase = resolveBindBase(options.bindBase); + const exchangePath = options.exchangePath || DEFAULT_BIND_EXCHANGE; + const timeoutMs = Math.max(1000, Number(options.timeoutMs ?? DEFAULT_BIND_TIMEOUT_MS)); + const userAgent = String(options.userAgent || 'Kling-Provider-Skill/unknown'); + const onLog = typeof options.onLog === 'function' ? options.onLog : () => {}; + const { + sessionId, + deviceCode, + codeVerifier, + authorizeHint, + expiresIn, + } = await runAccountBindInitVerify({ + ...options, + bindBase, + userAgent, + onLog, + }); + + const deadline = Date.now() + timeoutMs; + + // 服务端已返回 ttl,优先取较小值避免本地等待过长。 + let remainingTtlSec = Number.isFinite(Number(expiresIn)) + ? Number(expiresIn) + : null; + + while (Date.now() < deadline) { + if (remainingTtlSec != null && remainingTtlSec <= 0) { + throw makeBindFlowError('Bind expired / 绑定已过期', { + code: 'BIND_EXPIRED', + authorizeUrl: authorizeHint, + sessionId, + status: 'EXPIRED', + }); + } + onLog({ step: 'exchange', message: 'Polling exchange / 轮询 exchange …' }); + const exchangeData = await skillBindHttpJson(userAgent, bindBase, exchangePath, { + sessionId, + deviceCode, + codeVerifier, + }, 'POST'); + const status = normalizeBindStatus(exchangeData); + if (status === 'ISSUED' || status === 'ALREADY_EXCHANGED') { + const { + ak, sk, credentialId, accountId, + } = pickBindAccessSecretKeys(exchangeData); + if (!ak || !sk) { + throw makeBindFlowError(`${status} without credential / ${status} 但缺少 credential`, { + code: 'MISSING_CREDENTIAL', + authorizeUrl: authorizeHint, + sessionId, + status, + responseData: exchangeData, + }); + } + return { + sessionId, + authorizeHint, + accessKey: ak, + secretKey: sk, + credentialId, + accountId, + status, + }; + } + if (status !== 'PENDING') { + throw makeBindFlowError(`Bind status: ${status}`, { + code: 'BIND_STATUS', + authorizeUrl: authorizeHint, + sessionId, + status, + responseData: exchangeData, + }); + } + const waitSec = Number(exchangeData?.pollAfterSeconds); + const nextExpiresSec = Number(exchangeData?.expiresIn); + if (Number.isFinite(nextExpiresSec)) remainingTtlSec = nextExpiresSec; + if (!Number.isFinite(waitSec) || waitSec <= 0) { + throw makeBindFlowError('Missing pollAfterSeconds, treat as timeout / 缺少 pollAfterSeconds,按超时处理', { + code: 'BIND_TIMEOUT', + authorizeUrl: authorizeHint, + sessionId, + status, + }); + } + await sleepBind(waitSec * 1000); + } + + throw makeBindFlowError(`Bind timeout / 绑定超时(>${timeoutMs}ms)`, { + code: 'BIND_TIMEOUT', + authorizeUrl: authorizeHint, + sessionId, + status: 'TIMEOUT', + }); +} + +export { getBearerToken, makeKlingHeaders, setSkillVersion, getSkillVersion } from './auth.mjs'; +export { API_BASE, CANDIDATE_BASES, resolveApiBase }; diff --git a/.claude/skills/klingai-1.1.0/scripts/shared/task.mjs b/.claude/skills/klingai-1.1.0/scripts/shared/task.mjs new file mode 100644 index 0000000..85e140d --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/shared/task.mjs @@ -0,0 +1,103 @@ +/** + * Kling AI task helpers (zero external deps) + * Submit → poll status → download result + */ +import { writeFile, mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { klingPost, klingGet, makeKlingHeaders } from './client.mjs'; + +/** + * 提交任务 + * @param {string} apiPath 如 /v1/videos/image2video + * @param {object} payload 请求体 + * @param {string} [token] + * @returns {Promise<{taskId: string, status: string, data: object}>} + */ +export async function submitTask(apiPath, payload, token) { + const data = await klingPost(apiPath, payload, token); + const taskId = data?.task_id; + if (!taskId) throw new Error('API did not return task_id / API 未返回 task_id'); + console.log(`Task submitted / 任务已提交: ${taskId}`); + console.log(`Status / 状态: ${data.task_status || 'submitted'}`); + return { taskId, status: data.task_status || 'submitted', data }; +} + +/** + * 查询任务状态 + * @param {string} apiPath 如 /v1/videos/image2video + * @param {string} taskId + * @param {string} [token] + * @returns {Promise} task data + */ +export async function queryTask(apiPath, taskId, token) { + return klingGet(`${apiPath}/${taskId}`, token); +} + +/** + * 轮询任务直到完成 + * @param {string} apiPath + * @param {string} taskId + * @param {object} [opts] + * @param {number} [opts.interval=10000] 轮询间隔(ms) + * @param {string} [opts.token] + * @returns {Promise} 成功的 task data + */ +export async function pollTask(apiPath, taskId, opts = {}) { + const interval = opts.interval || 10000; + const token = opts.token; + console.log('Waiting for task... / 等待任务完成...'); + while (true) { + const data = await queryTask(apiPath, taskId, token); + const status = data?.task_status; + console.log(`Status / 状态: ${status}`); + if (status === 'succeed') return data; + if (status === 'failed') { + throw new Error(`Task failed / 任务失败: ${data?.task_status_msg || 'Unknown error'}`); + } + await new Promise(r => setTimeout(r, interval)); + } +} + +/** + * 下载文件到本地 + * @param {string} url 下载 URL + * @param {string} outPath 输出文件路径 + */ +export async function downloadFile(url, outPath) { + console.log('Downloading... / 正在下载...'); + const res = await fetch(url, { headers: makeKlingHeaders(null, null) }); + if (!res.ok) throw new Error(`Download failed / 下载失败: HTTP ${res.status}`); + const buf = Buffer.from(await res.arrayBuffer()); + await mkdir(join(outPath, '..'), { recursive: true }); + await writeFile(outPath, buf); + console.log(`Saved / 已保存: ${outPath}`); +} + +/** + * 轮询并下载结果 + * @param {string} apiPath + * @param {string} taskId + * @param {string} outputDir + * @param {object} [opts] + * @param {string} [opts.urlField='url'] output 中的 URL 字段名 + * @param {string} [opts.ext='.mp4'] 文件扩展名 + * @param {number} [opts.interval] + * @param {string} [opts.token] + * @returns {Promise} 输出文件路径 + */ +export async function pollAndDownload(apiPath, taskId, outputDir, opts = {}) { + const data = await pollTask(apiPath, taskId, opts); + const urlField = opts.urlField || 'url'; + const ext = opts.ext || '.mp4'; + const output = data?.task_result || {}; + // 支持多种输出结构 + const url = output[urlField] + || output?.videos?.[0]?.[urlField] + || output?.images?.[0]?.url + || (typeof output === 'string' ? output : null); + if (!url) throw new Error(`Task succeeded but missing ${urlField} / 任务成功但未返回 ${urlField}`); + await mkdir(outputDir, { recursive: true }); + const outPath = join(outputDir, `${taskId}${ext}`); + await downloadFile(url, outPath); + return outPath; +} diff --git a/.claude/skills/klingai-1.1.0/scripts/video.mjs b/.claude/skills/klingai-1.1.0/scripts/video.mjs new file mode 100644 index 0000000..7d82e7c --- /dev/null +++ b/.claude/skills/klingai-1.1.0/scripts/video.mjs @@ -0,0 +1,646 @@ +#!/usr/bin/env node +/** + * Kling AI video generation — text-to-video, image-to-video, Omni, multi-shot + * Node.js 18+, zero external deps + */ +import { existsSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { submitTask, queryTask, pollAndDownload, downloadFile } from './shared/task.mjs'; +import { parseArgs, getTokenOrExit, readMediaAsValue, readOmniVideoRefUrl, resolveAllowedOutputDir } from './shared/args.mjs'; + +const API_T2V = '/v1/videos/text2video'; +const API_I2V = '/v1/videos/image2video'; +const API_OMNI = '/v1/videos/omni-video'; + +function normalizeModelName(v) { + return String(v || '').trim(); +} + +/** Lowercase trim for route checks and API `model_name` enum matching. */ +function normalizeModelKey(v) { + return normalizeModelName(v).toLowerCase(); +} + +function normalizeAliasKey(v) { + return String(v || '').trim().toLowerCase().replace(/[\s_]+/g, '-'); +} + +function getVideoModelAliasTarget(v) { + const key = normalizeAliasKey(v); + const aliasMap = new Map([ + ['omni3', 'kling-v3-omni'], + ['omni-3', 'kling-v3-omni'], + ['omni-v3', 'kling-v3-omni'], + ['kling-video-o3', 'kling-v3-omni'], + ['v3-omni', 'kling-v3-omni'], + ['o3', 'kling-v3-omni'], + ['O3', 'kling-v3-omni'], + ['kling-o3', 'kling-v3-omni'], + ['omni1', 'kling-video-o1'], + ['omni-1', 'kling-video-o1'], + ['o1', 'kling-video-o1'], + ['kling-o1', 'kling-video-o1'], + ]); + return aliasMap.get(key) || ''; +} + +function validateModelAliasInput(rawModel) { + if (!rawModel) return; + const model = normalizeModelKey(rawModel); + const target = getVideoModelAliasTarget(rawModel); + if (!target || model === target) return; + throw new Error( + `Invalid --model alias / --model 使用了别名: ${rawModel}\n` + + `Use canonical name / 请改用标准名: ${target}\n` + + 'Alias mapping / 别名映射: omni3 | omni v3 | o3 -> kling-v3-omni; o1 | omni1 -> kling-video-o1', + ); +} + +function normalizeSound(v) { + const s = String(v || '').trim().toLowerCase(); + if (!s) return ''; + if (s === 'on' || s === 'off') return s; + return s; +} + +function normalizeReferType(v) { + const s = String(v || '').trim().toLowerCase(); + if (!s) return 'base'; + return s; +} + +function normalizeKeepOriginalSound(v) { + const s = String(v || '').trim().toLowerCase(); + if (!s) return ''; + return s; +} + +/** Multi-shot `shot_type`: `customize` | `intelligence` (empty → default customize when --multi_shot) */ +function normalizeShotType(v) { + const s = String(v || '').trim().toLowerCase(); + if (!s) return ''; + if (s === 'customize' || s === 'intelligence') return s; + return s; +} + +/** + * Sets `multi_shot`, `shot_type`, and `prompt` / `multi_prompt` on payload (text2video / image2video / omni-video share rules). + * Exits the process on validation error. + * @param {Record} payload + * @param {Record} args + */ +function mergeMultiShotIntoPayload(payload, args) { + const rawShot = normalizeShotType(args.shot_type); + const shotType = rawShot || 'customize'; + if (shotType !== 'customize' && shotType !== 'intelligence') { + console.error( + 'Error / 错误: --shot_type must be customize or intelligence / 须为 customize 或 intelligence', + ); + process.exit(1); + } + payload.multi_shot = true; + payload.shot_type = shotType; + + if (shotType === 'customize') { + if (!args.multi_prompt || !String(args.multi_prompt).trim()) { + console.error( + 'Error / 错误: customize multi-shot requires --multi_prompt / 自定义分镜须提供 --multi_prompt', + ); + process.exit(1); + } + try { + payload.multi_prompt = JSON.parse(args.multi_prompt); + } catch { + console.error('Error / 错误: --multi_prompt must be valid JSON / 必须是合法 JSON'); + process.exit(1); + } + payload.prompt = ''; + } else { + const p = String(args.prompt || '').trim(); + if (!p) { + console.error( + 'Error / 错误: intelligence multi-shot requires non-empty --prompt / 智能分镜须提供非空 --prompt', + ); + process.exit(1); + } + if (args.multi_prompt && String(args.multi_prompt).trim()) { + console.error( + 'Error / 错误: intelligence multi-shot does not use --multi_prompt / 智能分镜请勿传 --multi_prompt', + ); + process.exit(1); + } + payload.prompt = p; + } +} + +function validateModelForRoute(apiPath, args) { + validateModelAliasInput(args.model); + const model = normalizeModelKey(args.model); + if (!model) return; + + // We only validate what we can be sure about from public enums. + // - omni-video: only kling-v3-omni / kling-video-o1 + // - non-omni video: must not use omni-only models + if (apiPath === API_OMNI) { + const allowed = new Set(['kling-v3-omni', 'kling-video-o1']); + if (!allowed.has(model)) { + throw new Error( + `Invalid --model for omni-video / omni-video 不支持该模型: ${model}\n` + + `Allowed / 允许: kling-v3-omni, kling-video-o1`, + ); + } + } else { + const forbidden = new Set(['kling-v3-omni', 'kling-video-o1', 'kling-image-o1']); + if (forbidden.has(model)) { + throw new Error( + `Invalid --model for text2video/image2video / 文生/图生不支持该模型: ${model}\n` + + `Hint / 提示: remove --model or use a basic video model (e.g. kling-v3, kling-v2-6)`, + ); + } + } +} + +function validateSoundConstraints(apiPath, args) { + const sound = normalizeSound(args.sound || 'off') || 'off'; + const model = normalizeModelKey(args.model); + + if (apiPath === API_OMNI && args.video && sound === 'on') { + throw new Error( + 'Invalid --sound with Omni --video / Omni 参考视频时 sound 仅支持 off。\n' + + 'Fix / 修复: remove --sound or set --sound off', + ); + } + if (model === 'kling-video-o1' && sound === 'on') { + throw new Error( + 'Invalid --sound for kling-video-o1 / kling-video-o1 不支持 sound。\n' + + 'Fix / 修复: set --sound off or omit it', + ); + } +} + +function validateOmniVideoListRules(args) { + if (!args.video) { + if (args.video_refer_type) { + throw new Error( + 'Invalid --video_refer_type without --video / 仅在传入 --video 时才能设置 --video_refer_type。', + ); + } + if (args.keep_original_sound) { + throw new Error( + 'Invalid --keep_original_sound without --video / 仅在传入 --video 时才能设置 --keep_original_sound。', + ); + } + return { referType: '', keepOriginalSound: '' }; + } + + const rawVideo = String(args.video).trim(); + if (!rawVideo) { + throw new Error('Invalid --video / --video 不能为空,video_url 必须为非空公网 http(s) URL。'); + } + if (rawVideo.includes(',')) { + throw new Error('Invalid --video / 当前仅支持 1 段参考视频,请只传一个 video_url。'); + } + + const referType = normalizeReferType(args.video_refer_type); + if (referType !== 'feature' && referType !== 'base') { + throw new Error( + `Invalid --video_refer_type / 无效 refer_type: ${referType}. Allowed / 允许: feature, base`, + ); + } + + const keepOriginalSound = normalizeKeepOriginalSound(args.keep_original_sound); + if (keepOriginalSound && keepOriginalSound !== 'yes' && keepOriginalSound !== 'no') { + throw new Error( + `Invalid --keep_original_sound / 无效 keep_original_sound: ${keepOriginalSound}. Allowed / 允许: yes, no`, + ); + } + + return { referType, keepOriginalSound }; +} + +function parseImageInputs(rawImageArg) { + if (!rawImageArg) return []; + const parts = String(rawImageArg).split(',').map(s => s.trim()); + if (parts.some(p => !p)) { + throw new Error( + 'Invalid --image list / --image 列表中存在空值;请移除空项并确保每个 image_url 非空。', + ); + } + return parts; +} + +function parseImageTypes(rawImageTypesArg, imageCount) { + if (!rawImageTypesArg) return new Array(imageCount).fill(''); + const parts = String(rawImageTypesArg).split(',').map(s => s.trim().toLowerCase()); + if (parts.length !== imageCount) { + throw new Error( + `Invalid --image_types / --image_types 数量需与 --image 一致: expected ${imageCount}, got ${parts.length}`, + ); + } + for (const t of parts) { + if (!t) continue; + if (t !== 'first_frame' && t !== 'end_frame') { + throw new Error( + `Invalid image type / 无效图片 type: ${t}. Allowed / 允许: first_frame, end_frame, empty`, + ); + } + } + return parts; +} + +function parseElementIds(rawElementIdsArg) { + if (!rawElementIdsArg) return []; + const parts = String(rawElementIdsArg).split(',').map(s => s.trim()); + if (parts.some(p => !p)) { + throw new Error( + 'Invalid --element_ids list / --element_ids 列表中存在空值;请移除空项并确保每个 element_id 非空。', + ); + } + return parts; +} + +function validateOmniImageListRules(args, imageInputs, imageTypes, hasTailArg) { + // API limit: with reference video max 4 images, otherwise max 7. + const maxImages = args.video ? 4 : 7; + const totalImages = imageInputs.length + (hasTailArg ? 1 : 0); + if (totalImages > maxImages) { + throw new Error( + `Too many images for omni-video / omni-video 图片数量超限: max ${maxImages} (current ${totalImages})`, + ); + } + + const hasFirstFrame = imageTypes.includes('first_frame'); + const hasEndFrame = imageTypes.includes('end_frame') || hasTailArg; + + if (hasEndFrame && !hasFirstFrame) { + throw new Error( + 'Invalid image_list: end_frame needs first_frame / 不支持仅尾帧,配置 end_frame 时必须同时有 first_frame。', + ); + } + + // O1 + >2 images does not support end_frame. + const model = normalizeModelKey(args.model); + if (model === 'kling-video-o1' && hasEndFrame && totalImages > 2) { + throw new Error( + 'Invalid image_list for kling-video-o1 / kling-video-o1 在图片数超过 2 时不支持任何 end_frame。', + ); + } + + // Frame generation cannot be used with video editing (base). + const hasFrame = hasFirstFrame || hasEndFrame; + if (hasFrame && args.video && normalizeReferType(args.video_refer_type) === 'base') { + throw new Error( + 'Invalid combo: frame images with video edit / 首帧或尾帧生视频不能与视频编辑(--video_refer_type base)同时使用。', + ); + } + return { totalImages, hasFirstFrame, hasEndFrame }; +} + +function validateOmniElementListRules(args, elementIds, imageState) { + if (!elementIds.length) return; + const model = normalizeModelKey(args.model); + const hasFirstAndEnd = imageState.hasFirstFrame && imageState.hasEndFrame; + + // Frame-generation with subjects supports up to 3 subjects. + if ((imageState.hasFirstFrame || imageState.hasEndFrame) && elementIds.length > 3) { + throw new Error( + `Too many subjects with frame generation / 首帧或尾帧生视频时主体最多 3 个: current ${elementIds.length}`, + ); + } + + // First+last frame with O1 does not support subjects. + if (hasFirstAndEnd && model === 'kling-video-o1') { + throw new Error( + 'Invalid element_list for kling-video-o1 / kling-video-o1 在首尾帧生视频场景不支持主体。', + ); + } + + // Combined reference count limit: images + elements. + const totalRefs = imageState.totalImages + elementIds.length; + const maxRefs = args.video ? 4 : 7; + if (totalRefs > maxRefs) { + throw new Error( + `Too many refs for omni-video / omni-video 参考图与主体总数超限: max ${maxRefs} (current ${totalRefs})`, + ); + } +} + +function printHelp() { + console.log(`Kling AI video generation + +Usage: + node kling.mjs video --prompt [options] # Text-to-video + node kling.mjs video --image [--prompt ...] # Image-to-video + node kling.mjs video --prompt "..." [--image ...] [--element_ids ...] # Omni + node kling.mjs video --multi_shot --shot_type customize --multi_prompt # Multi-shot (customize) + node kling.mjs video --multi_shot --shot_type intelligence --prompt "..." # Multi-shot (intelligence) + node kling.mjs video --task_id [--download] # Query/download + +Submit (common): + --prompt Video description (Omni: <<>> <<>> <<>>) + --duration Duration 3-15 s (default: 5) + --model T2V/I2V: kling-v3 / kling-v2-6 / …; explicit kling-v3-omni or kling-video-o1 → omni-video (simple t2v/i2v too). Omni default: kling-v3-omni or kling-video-o1 + --mode pro / std (default: pro) + --aspect_ratio 16:9 / 9:16 / 1:1 (default: 16:9). With --image, this routes to omni-video + --sound on / off (default: off). v3/omni support; with --video only off; o1 no sound + --negative_prompt Negative prompt + --output_dir Output dir (default: ./output) + --no-wait Submit only, do not wait + --wait Wait for completion (default) + +Image-to-video / Omni: + --image Image list path or URL (comma-separated for Omni) + --image_types Optional type list aligned with --image (comma-separated): first_frame/end_frame/empty + --image_tail Last-frame image + --element_ids Subject IDs, comma-separated (Omni; combined limits with images) + --video Omni reference video: public http(s) URL only (video_list[].video_url) + --video_refer_type feature / base (default: base) + --keep_original_sound yes / no (optional; works for feature/base) + +Multi-shot (text2video / image2video / omni-video; same rules; see SKILL.md): + --multi_shot Enable multi-shot (with customize, top-level --prompt unused; not with --image_tail) + --shot_type customize | intelligence (required when multi_shot; default: customize) + --multi_prompt customize only: JSON array, max 6 shots, durations sum to --duration + --prompt intelligence: required (model splits shots); customize: ignored if set + +Query/download: + --task_id Task ID + --download Download if task succeeded + +Watermark: + --watermark Generate with watermark (adds watermark_info: {enabled: true}) + +Env: + credentials file ~/.config/kling/.credentials (access_key_id, secret_access_key) + KLING_TOKEN Session-only Bearer (optional override) + KLING_MEDIA_ROOTS Comma-separated extra dirs for local media / --output_dir (default: cwd only) + KLING_ALLOW_ABSOLUTE_PATHS=1 Allow any local path (e.g. WSL downloads outside project)`); +} + +function chooseApiPath(args) { + if (args.element_ids || args.video) return API_OMNI; + const m = normalizeModelKey(args.model); + const explicitOmniModel = m === 'kling-v3-omni' || m === 'kling-video-o1'; + if (args.image) { + const images = args.image.split(',').map(s => s.trim()).filter(Boolean); + // image2video does not support aspect_ratio; route to omni-video when explicitly provided. + if (args.aspect_ratio) return API_OMNI; + if (images.length > 1) return API_OMNI; + if (explicitOmniModel) return API_OMNI; + return API_I2V; + } + if (explicitOmniModel) return API_OMNI; + return API_T2V; +} + +async function queryTaskAnyPath(taskId, token) { + const paths = [API_OMNI, API_I2V, API_T2V]; + for (const apiPath of paths) { + try { + const data = await queryTask(apiPath, taskId, token); + if (data && (data.task_status === 'succeed' || data.task_status === 'failed' || data.task_status === 'processing' || data.task_status === 'submitted')) { + return { apiPath, data }; + } + } catch (_) { /* try next */ } + } + throw new Error(`Task not found / 未找到任务: ${taskId}`); +} + +export async function main() { + const args = parseArgs(process.argv, ['multi_shot']); + if (args.help) { printHelp(); return; } + validateModelAliasInput(args.model); + + const token = await getTokenOrExit(); + const outputDir = resolveAllowedOutputDir(args.output_dir || './output'); + + if (args.task_id && !args.prompt && !args.image && !args.multi_shot) { + try { + const { apiPath, data } = await queryTaskAnyPath(args.task_id, token); + console.log(`Task ID / 任务 ID: ${args.task_id}`); + console.log(`Status / 状态: ${data?.task_status || 'unknown'}`); + if (data?.task_status_msg) console.log(`Message / 消息: ${data.task_status_msg}`); + const videos = data?.task_result?.videos || []; + if (videos.length > 0 && videos[0].url) { + console.log(`Video URL / 视频链接: ${videos[0].url}`); + if (videos[0].watermark_url) { + console.log(`Watermark URL / 水印视频: ${videos[0].watermark_url}`); + } + if (args.download) { + const { mkdir } = await import('node:fs/promises'); + const { join } = await import('node:path'); + await mkdir(outputDir, { recursive: true }); + await downloadFile(videos[0].url, join(outputDir, `${args.task_id}.mp4`)); + } + } + } catch (e) { + console.error(`Error / 错误: ${e.message}`); + process.exit(1); + } + return; + } + + const imageInputs = parseImageInputs(args.image); + const imageTypes = parseImageTypes(args.image_types, imageInputs.length); + const elementIds = parseElementIds(args.element_ids); + const aspectForcesOmni = Boolean(args.image && args.aspect_ratio && imageInputs.length > 0); + const videoState = validateOmniVideoListRules(args); + const hasImage = imageInputs.length > 0; + if (!args.prompt && !hasImage && !args.multi_shot) { + console.error('Error / 错误: --prompt, --image, or --multi_shot required'); + console.error('Use --help / 使用 --help 查看帮助'); + process.exit(1); + } + + if (args.image_tail && !hasImage) { + console.error('Error / 错误: --image_tail requires --image (first frame) / 首尾帧需要首帧 --image'); + process.exit(1); + } + + if (args.multi_shot && args.image_tail) { + console.error( + 'Error / 错误: multi-shot does not support first+last frame (--image_tail) / 多镜头不支持首尾帧生视频,请去掉 --image_tail', + ); + process.exit(1); + } + + if (hasImage) { + const firstInput = imageInputs[0]; + const isUrl = firstInput.startsWith('http://') || firstInput.startsWith('https://'); + if (!isUrl && !existsSync(resolve(firstInput))) { + console.error(`Error / 错误: image not found / 图片不存在: ${firstInput}`); + process.exit(1); + } + } + + const apiPath = chooseApiPath(args); + const queryHint = `node kling.mjs video --task_id`; + if (apiPath === API_OMNI && aspectForcesOmni && args.model) { + const model = normalizeModelKey(args.model); + const isOmniModel = model === 'kling-v3-omni' || model === 'kling-video-o1'; + if (!isOmniModel) { + console.error( + `Error / 错误: --model ${model} does not support --aspect_ratio with --image.\n` + + 'Use omni model / 请使用 Omni 模型: kling-v3-omni or kling-video-o1', + ); + process.exit(1); + } + } + if (apiPath === API_OMNI && aspectForcesOmni && args.negative_prompt) { + console.error( + 'Info / 提示: omni-video does not support --negative_prompt; this parameter will be ignored', + ); + } + + try { + validateModelForRoute(apiPath, args); + validateSoundConstraints(apiPath, args); + + if (apiPath === API_T2V) { + const payload = { + model_name: args.model ? normalizeModelKey(args.model) : 'kling-v3', + negative_prompt: args.negative_prompt || '', + duration: String(args.duration || '5'), + mode: args.mode || 'pro', + aspect_ratio: args.aspect_ratio || '16:9', + sound: args.sound || 'off', + callback_url: '', + external_task_id: '', + }; + if (args.watermark) payload.watermark_info = { enabled: true }; + if (args.multi_shot) { + mergeMultiShotIntoPayload(payload, args); + } else { + const p = String(args.prompt || '').trim(); + if (!p) { + console.error( + 'Error / 错误: text-to-video requires --prompt when not using --multi_shot / 文生视频非多镜头须提供 --prompt', + ); + process.exit(1); + } + payload.prompt = args.prompt; + } + const result = await submitTask(API_T2V, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + console.log(`Query / 查询: ${queryHint} ${result.taskId} [--download]`); + if (args.wait !== false) { + console.log(); + const outPath = await pollAndDownload(API_T2V, result.taskId, outputDir, { token }); + console.log(`\n✓ Done / 完成: ${outPath}`); + } + return; + } + + if (apiPath === API_I2V) { + const payload = { + model_name: args.model ? normalizeModelKey(args.model) : 'kling-v3', + image: await readMediaAsValue(args.image), + image_tail: args.image_tail ? await readMediaAsValue(args.image_tail) : '', + negative_prompt: args.negative_prompt || '', + duration: String(args.duration || '5'), + mode: args.mode || 'pro', + sound: args.sound || 'off', + callback_url: '', + external_task_id: '', + }; + if (args.watermark) payload.watermark_info = { enabled: true }; + if (args.multi_shot) { + mergeMultiShotIntoPayload(payload, args); + } else { + payload.prompt = args.prompt || ''; + } + const result = await submitTask(API_I2V, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + console.log(`Query / 查询: ${queryHint} ${result.taskId} [--download]`); + if (args.wait !== false) { + console.log(); + const outPath = await pollAndDownload(API_I2V, result.taskId, outputDir, { token }); + console.log(`\n✓ Done / 完成: ${outPath}`); + } + return; + } + + const payload = { + model_name: args.model ? normalizeModelKey(args.model) : 'kling-v3-omni', + duration: String(args.duration || '5'), + mode: args.mode || 'pro', + sound: args.sound || 'off', + callback_url: '', + }; + const hasFirstFrameRef = imageTypes.includes('first_frame'); + const usesVideoEdit = Boolean(args.video && normalizeReferType(args.video_refer_type) === 'base'); + const requireAspectRatio = !hasFirstFrameRef && !usesVideoEdit; + if (args.aspect_ratio) { + payload.aspect_ratio = args.aspect_ratio; + } else if (requireAspectRatio) { + payload.aspect_ratio = '16:9'; + } + if (args.watermark) payload.watermark_info = { enabled: true }; + + if (args.multi_shot) { + mergeMultiShotIntoPayload(payload, args); + } else { + const p = String(args.prompt || '').trim(); + if (!p) { + console.error( + 'Error / 错误: Omni (non-multi-shot) requires non-empty --prompt / 非多镜头 Omni 须提供非空 --prompt', + ); + process.exit(1); + } + payload.multi_shot = false; + payload.prompt = args.prompt; + } + + const imageList = []; + let imageState = { totalImages: 0, hasFirstFrame: false, hasEndFrame: false }; + if (imageInputs.length > 0 || args.image_tail) { + imageState = validateOmniImageListRules(args, imageInputs, imageTypes, Boolean(args.image_tail)); + } + validateOmniElementListRules(args, elementIds, imageState); + if (imageInputs.length > 0) { + for (let i = 0; i < imageInputs.length; i++) { + const item = { image_url: await readMediaAsValue(imageInputs[i]) }; + if (imageTypes[i]) item.type = imageTypes[i]; + imageList.push(item); + } + } + if (args.image_tail) { + imageList.push({ image_url: await readMediaAsValue(args.image_tail), type: 'end_frame' }); + } + if (imageList.length > 0) payload.image_list = imageList; + + if (elementIds.length > 0) { + payload.element_list = elementIds.map(id => { + return { element_id: String(id.trim()) }; + }); + } + + if (args.video) { + const videoUrl = readOmniVideoRefUrl(args.video); + const videoItem = { video_url: videoUrl, refer_type: videoState.referType }; + if (videoState.keepOriginalSound) videoItem.keep_original_sound = videoState.keepOriginalSound; + payload.video_list = [videoItem]; + } + + const result = await submitTask(API_OMNI, payload, token); + console.log(`\nTask ID / 任务 ID: ${result.taskId}`); + console.log(`Query / 查询: ${queryHint} ${result.taskId} [--download]`); + if (args.wait !== false) { + console.log(); + const outPath = await pollAndDownload(API_OMNI, result.taskId, outputDir, { token }); + console.log(`\n✓ Done / 完成: ${outPath}`); + } + } catch (e) { + console.error(`Error / 错误: ${e.message}`); + process.exit(1); + } +} + +const __filename = fileURLToPath(import.meta.url); +if (process.argv[1] && resolve(__filename) === resolve(process.argv[1])) { + main().catch((e) => { + console.error(`Error / 错误: ${e?.message || e}`); + process.exit(1); + }); +} diff --git a/.claude/skills/setup.js b/.claude/skills/setup.js new file mode 100644 index 0000000..f0649c5 --- /dev/null +++ b/.claude/skills/setup.js @@ -0,0 +1,98 @@ +#!/usr/bin/env node + +/** + * 素材生产系统 - 环境配置工具 + * + * 自动检测本地路径,生成 config.json。 + * 新机器首次使用时运行一次即可。 + * + * 用法: node setup.js + */ + +const fs = require('fs') +const path = require('path') +const os = require('os') + +const CONFIG_PATH = path.join(__dirname, 'config.json') + +// ============================================================================ +// 自动检测 +// ============================================================================ + +function detectJianyingDraftPath() { + const username = os.userInfo().username + const candidates = [ + `C:/Users/${username}/AppData/Local/JianyingPro/User Data/Projects/com.lveditor.draft`, + `C:/Users/${username}/AppData/Local/CapCut/User Data/Projects/com.lveditor.draft`, + ] + for (const p of candidates) { + if (fs.existsSync(p)) return p + } + return candidates[0] // 默认返回剪映路径(即使不存在) +} + +function detectCapcutMateDir() { + const username = os.userInfo().username + const candidates = [ + `C:/Users/${username}/capcut-mate`, + path.join(os.homedir(), 'capcut-mate'), + ] + for (const p of candidates) { + if (fs.existsSync(p)) return p + } + return candidates[0] +} + +function detectCapcutMateApiBase() { + return 'http://capcut.muyetools.cn/openapi/capcut-mate/v1' +} + +// ============================================================================ +// 主流程 +// ============================================================================ + +function main() { + console.log('素材生产系统 - 环境配置\n') + + if (fs.existsSync(CONFIG_PATH)) { + console.log('config.json 已存在,跳过生成') + console.log(` 路径: ${CONFIG_PATH}`) + console.log(' 如需重新配置,删除 config.json 后重跑\n') + return + } + + const config = { + jianyingDraftPath: detectJianyingDraftPath(), + capcutMateDir: detectCapcutMateDir(), + capcutMateApiBase: detectCapcutMateApiBase(), + imgbbApiKey: 'deprecated', + geminiApiBaseUrl: '', + geminiModel: 'gemini-3.1-flash-image-preview', + geminiApiKey: '', + } + + // 检测结果标注 + const jianyingExists = fs.existsSync(config.jianyingDraftPath) + const capcutExists = fs.existsSync(config.capcutMateDir) + + console.log('自动检测结果:') + console.log(` 剪映草稿目录: ${config.jianyingDraftPath} ${jianyingExists ? '[存在]' : '[未找到,请确认剪映已安装]'}`) + console.log(` CapCut Mate: ${config.capcutMateDir} ${capcutExists ? '[存在]' : '[未找到,需安装]'}`) + console.log(` API 地址: ${config.capcutMateApiBase}`) + console.log('') + + fs.writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2), 'utf-8') + console.log(`已生成: ${CONFIG_PATH}`) + console.log('') + console.log('请补充以下必填项:') + console.log(' ossRegion - 阿里云 OSS Region') + console.log(' ossAccessKeyId - 阿里云 OSS AccessKeyId') + console.log(' ossAccessKeySecret - 阿里云 OSS AccessKeySecret') + console.log(' ossBucket - 阿里云 OSS Bucket') + console.log(' geminiApiBaseUrl - Gemini API 地址') + console.log(' geminiApiKey - Gemini API Key') + console.log('') + console.log('编辑 config.json 后即可使用。') +} + +main() diff --git a/.claude/skills/skill-creator/SKILL.md b/.claude/skills/skill-creator/SKILL.md new file mode 100644 index 0000000..b7f8659 --- /dev/null +++ b/.claude/skills/skill-creator/SKILL.md @@ -0,0 +1,356 @@ +--- +name: skill-creator +description: Guide for creating effective skills. This skill should be used when users want to create a new skill (or update an existing skill) that extends Claude's capabilities with specialized knowledge, workflows, or tool integrations. +license: Complete terms in LICENSE.txt +--- + +# Skill Creator + +This skill provides guidance for creating effective skills. + +## About Skills + +Skills are modular, self-contained packages that extend Claude's capabilities by providing +specialized knowledge, workflows, and tools. Think of them as "onboarding guides" for specific +domains or tasks—they transform Claude from a general-purpose agent into a specialized agent +equipped with procedural knowledge that no model can fully possess. + +### What Skills Provide + +1. Specialized workflows - Multi-step procedures for specific domains +2. Tool integrations - Instructions for working with specific file formats or APIs +3. Domain expertise - Company-specific knowledge, schemas, business logic +4. Bundled resources - Scripts, references, and assets for complex and repetitive tasks + +## Core Principles + +### Concise is Key + +The context window is a public good. Skills share the context window with everything else Claude needs: system prompt, conversation history, other Skills' metadata, and the actual user request. + +**Default assumption: Claude is already very smart.** Only add context Claude doesn't already have. Challenge each piece of information: "Does Claude really need this explanation?" and "Does this paragraph justify its token cost?" + +Prefer concise examples over verbose explanations. + +### Set Appropriate Degrees of Freedom + +Match the level of specificity to the task's fragility and variability: + +**High freedom (text-based instructions)**: Use when multiple approaches are valid, decisions depend on context, or heuristics guide the approach. + +**Medium freedom (pseudocode or scripts with parameters)**: Use when a preferred pattern exists, some variation is acceptable, or configuration affects behavior. + +**Low freedom (specific scripts, few parameters)**: Use when operations are fragile and error-prone, consistency is critical, or a specific sequence must be followed. + +Think of Claude as exploring a path: a narrow bridge with cliffs needs specific guardrails (low freedom), while an open field allows many routes (high freedom). + +### Anatomy of a Skill + +Every skill consists of a required SKILL.md file and optional bundled resources: + +``` +skill-name/ +├── SKILL.md (required) +│ ├── YAML frontmatter metadata (required) +│ │ ├── name: (required) +│ │ └── description: (required) +│ └── Markdown instructions (required) +└── Bundled Resources (optional) + ├── scripts/ - Executable code (Python/Bash/etc.) + ├── references/ - Documentation intended to be loaded into context as needed + └── assets/ - Files used in output (templates, icons, fonts, etc.) +``` + +#### SKILL.md (required) + +Every SKILL.md consists of: + +- **Frontmatter** (YAML): Contains `name` and `description` fields. These are the only fields that Claude reads to determine when the skill gets used, thus it is very important to be clear and comprehensive in describing what the skill is, and when it should be used. +- **Body** (Markdown): Instructions and guidance for using the skill. Only loaded AFTER the skill triggers (if at all). + +#### Bundled Resources (optional) + +##### Scripts (`scripts/`) + +Executable code (Python/Bash/etc.) for tasks that require deterministic reliability or are repeatedly rewritten. + +- **When to include**: When the same code is being rewritten repeatedly or deterministic reliability is needed +- **Example**: `scripts/rotate_pdf.py` for PDF rotation tasks +- **Benefits**: Token efficient, deterministic, may be executed without loading into context +- **Note**: Scripts may still need to be read by Claude for patching or environment-specific adjustments + +##### References (`references/`) + +Documentation and reference material intended to be loaded as needed into context to inform Claude's process and thinking. + +- **When to include**: For documentation that Claude should reference while working +- **Examples**: `references/finance.md` for financial schemas, `references/mnda.md` for company NDA template, `references/policies.md` for company policies, `references/api_docs.md` for API specifications +- **Use cases**: Database schemas, API documentation, domain knowledge, company policies, detailed workflow guides +- **Benefits**: Keeps SKILL.md lean, loaded only when Claude determines it's needed +- **Best practice**: If files are large (>10k words), include grep search patterns in SKILL.md +- **Avoid duplication**: Information should live in either SKILL.md or references files, not both. Prefer references files for detailed information unless it's truly core to the skill—this keeps SKILL.md lean while making information discoverable without hogging the context window. Keep only essential procedural instructions and workflow guidance in SKILL.md; move detailed reference material, schemas, and examples to references files. + +##### Assets (`assets/`) + +Files not intended to be loaded into context, but rather used within the output Claude produces. + +- **When to include**: When the skill needs files that will be used in the final output +- **Examples**: `assets/logo.png` for brand assets, `assets/slides.pptx` for PowerPoint templates, `assets/frontend-template/` for HTML/React boilerplate, `assets/font.ttf` for typography +- **Use cases**: Templates, images, icons, boilerplate code, fonts, sample documents that get copied or modified +- **Benefits**: Separates output resources from documentation, enables Claude to use files without loading them into context + +#### What to Not Include in a Skill + +A skill should only contain essential files that directly support its functionality. Do NOT create extraneous documentation or auxiliary files, including: + +- README.md +- INSTALLATION_GUIDE.md +- QUICK_REFERENCE.md +- CHANGELOG.md +- etc. + +The skill should only contain the information needed for an AI agent to do the job at hand. It should not contain auxilary context about the process that went into creating it, setup and testing procedures, user-facing documentation, etc. Creating additional documentation files just adds clutter and confusion. + +### Progressive Disclosure Design Principle + +Skills use a three-level loading system to manage context efficiently: + +1. **Metadata (name + description)** - Always in context (~100 words) +2. **SKILL.md body** - When skill triggers (<5k words) +3. **Bundled resources** - As needed by Claude (Unlimited because scripts can be executed without reading into context window) + +#### Progressive Disclosure Patterns + +Keep SKILL.md body to the essentials and under 500 lines to minimize context bloat. Split content into separate files when approaching this limit. When splitting out content into other files, it is very important to reference them from SKILL.md and describe clearly when to read them, to ensure the reader of the skill knows they exist and when to use them. + +**Key principle:** When a skill supports multiple variations, frameworks, or options, keep only the core workflow and selection guidance in SKILL.md. Move variant-specific details (patterns, examples, configuration) into separate reference files. + +**Pattern 1: High-level guide with references** + +```markdown +# PDF Processing + +## Quick start + +Extract text with pdfplumber: +[code example] + +## Advanced features + +- **Form filling**: See [FORMS.md](FORMS.md) for complete guide +- **API reference**: See [REFERENCE.md](REFERENCE.md) for all methods +- **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns +``` + +Claude loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. + +**Pattern 2: Domain-specific organization** + +For Skills with multiple domains, organize content by domain to avoid loading irrelevant context: + +``` +bigquery-skill/ +├── SKILL.md (overview and navigation) +└── reference/ + ├── finance.md (revenue, billing metrics) + ├── sales.md (opportunities, pipeline) + ├── product.md (API usage, features) + └── marketing.md (campaigns, attribution) +``` + +When a user asks about sales metrics, Claude only reads sales.md. + +Similarly, for skills supporting multiple frameworks or variants, organize by variant: + +``` +cloud-deploy/ +├── SKILL.md (workflow + provider selection) +└── references/ + ├── aws.md (AWS deployment patterns) + ├── gcp.md (GCP deployment patterns) + └── azure.md (Azure deployment patterns) +``` + +When the user chooses AWS, Claude only reads aws.md. + +**Pattern 3: Conditional details** + +Show basic content, link to advanced content: + +```markdown +# DOCX Processing + +## Creating documents + +Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md). + +## Editing documents + +For simple edits, modify the XML directly. + +**For tracked changes**: See [REDLINING.md](REDLINING.md) +**For OOXML details**: See [OOXML.md](OOXML.md) +``` + +Claude reads REDLINING.md or OOXML.md only when the user needs those features. + +**Important guidelines:** + +- **Avoid deeply nested references** - Keep references one level deep from SKILL.md. All reference files should link directly from SKILL.md. +- **Structure longer reference files** - For files longer than 100 lines, include a table of contents at the top so Claude can see the full scope when previewing. + +## Skill Creation Process + +Skill creation involves these steps: + +1. Understand the skill with concrete examples +2. Plan reusable skill contents (scripts, references, assets) +3. Initialize the skill (run init_skill.py) +4. Edit the skill (implement resources and write SKILL.md) +5. Package the skill (run package_skill.py) +6. Iterate based on real usage + +Follow these steps in order, skipping only if there is a clear reason why they are not applicable. + +### Step 1: Understanding the Skill with Concrete Examples + +Skip this step only when the skill's usage patterns are already clearly understood. It remains valuable even when working with an existing skill. + +To create an effective skill, clearly understand concrete examples of how the skill will be used. This understanding can come from either direct user examples or generated examples that are validated with user feedback. + +For example, when building an image-editor skill, relevant questions include: + +- "What functionality should the image-editor skill support? Editing, rotating, anything else?" +- "Can you give some examples of how this skill would be used?" +- "I can imagine users asking for things like 'Remove the red-eye from this image' or 'Rotate this image'. Are there other ways you imagine this skill being used?" +- "What would a user say that should trigger this skill?" + +To avoid overwhelming users, avoid asking too many questions in a single message. Start with the most important questions and follow up as needed for better effectiveness. + +Conclude this step when there is a clear sense of the functionality the skill should support. + +### Step 2: Planning the Reusable Skill Contents + +To turn concrete examples into an effective skill, analyze each example by: + +1. Considering how to execute on the example from scratch +2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly + +Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows: + +1. Rotating a PDF requires re-writing the same code each time +2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill + +Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows: + +1. Writing a frontend webapp requires the same boilerplate HTML/React each time +2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill + +Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows: + +1. Querying BigQuery requires re-discovering the table schemas and relationships each time +2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill + +To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets. + +### Step 3: Initializing the Skill + +At this point, it is time to actually create the skill. + +Skip this step only if the skill being developed already exists, and iteration or packaging is needed. In this case, continue to the next step. + +When creating a new skill from scratch, always run the `init_skill.py` script. The script conveniently generates a new template skill directory that automatically includes everything a skill requires, making the skill creation process much more efficient and reliable. + +Usage: + +```bash +scripts/init_skill.py --path +``` + +The script: + +- Creates the skill directory at the specified path +- Generates a SKILL.md template with proper frontmatter and TODO placeholders +- Creates example resource directories: `scripts/`, `references/`, and `assets/` +- Adds example files in each directory that can be customized or deleted + +After initialization, customize or remove the generated SKILL.md and example files as needed. + +### Step 4: Edit the Skill + +When editing the (newly-generated or existing) skill, remember that the skill is being created for another instance of Claude to use. Include information that would be beneficial and non-obvious to Claude. Consider what procedural knowledge, domain-specific details, or reusable assets would help another Claude instance execute these tasks more effectively. + +#### Learn Proven Design Patterns + +Consult these helpful guides based on your skill's needs: + +- **Multi-step processes**: See references/workflows.md for sequential workflows and conditional logic +- **Specific output formats or quality standards**: See references/output-patterns.md for template and example patterns + +These files contain established best practices for effective skill design. + +#### Start with Reusable Skill Contents + +To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`. + +Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion. + +Any example files and directories not needed for the skill should be deleted. The initialization script creates example files in `scripts/`, `references/`, and `assets/` to demonstrate structure, but most skills won't need all of them. + +#### Update SKILL.md + +**Writing Guidelines:** Always use imperative/infinitive form. + +##### Frontmatter + +Write the YAML frontmatter with `name` and `description`: + +- `name`: The skill name +- `description`: This is the primary triggering mechanism for your skill, and helps Claude understand when to use the skill. + - Include both what the Skill does and specific triggers/contexts for when to use it. + - Include all "when to use" information here - Not in the body. The body is only loaded after triggering, so "When to Use This Skill" sections in the body are not helpful to Claude. + - Example description for a `docx` skill: "Comprehensive document creation, editing, and analysis with support for tracked changes, comments, formatting preservation, and text extraction. Use when Claude needs to work with professional documents (.docx files) for: (1) Creating new documents, (2) Modifying or editing content, (3) Working with tracked changes, (4) Adding comments, or any other document tasks" + +Do not include any other fields in YAML frontmatter. + +##### Body + +Write instructions for using the skill and its bundled resources. + +### Step 5: Packaging a Skill + +Once development of the skill is complete, it must be packaged into a distributable .skill file that gets shared with the user. The packaging process automatically validates the skill first to ensure it meets all requirements: + +```bash +scripts/package_skill.py +``` + +Optional output directory specification: + +```bash +scripts/package_skill.py ./dist +``` + +The packaging script will: + +1. **Validate** the skill automatically, checking: + + - YAML frontmatter format and required fields + - Skill naming conventions and directory structure + - Description completeness and quality + - File organization and resource references + +2. **Package** the skill if validation passes, creating a .skill file named after the skill (e.g., `my-skill.skill`) that includes all files and maintains the proper directory structure for distribution. The .skill file is a zip file with a .skill extension. + +If validation fails, the script will report the errors and exit without creating a package. Fix any validation errors and run the packaging command again. + +### Step 6: Iterate + +After testing the skill, users may request improvements. Often this happens right after using the skill, with fresh context of how the skill performed. + +**Iteration workflow:** + +1. Use the skill on real tasks +2. Notice struggles or inefficiencies +3. Identify how SKILL.md or bundled resources should be updated +4. Implement changes and test again diff --git a/.claude/skills/video-from-script/SKILL.md b/.claude/skills/video-from-script/SKILL.md new file mode 100644 index 0000000..f49503f --- /dev/null +++ b/.claude/skills/video-from-script/SKILL.md @@ -0,0 +1,565 @@ +--- +name: video-from-script +description: 素材生产路由。根据用户意图分发到对应子技能:image-generator(生图)、capcut(成片)。支持单图和首尾帧两种视频模式。触发词:做视频、视频素材、生图+成片、图生视频、首尾帧。 +--- + +# 素材生产路由 + +## 强制规则 + +1. **工作流不可跳步**:分镜(纯叙事)→ Prompt 生成(分镜+风格)→ Pipeline 执行。每阶段之间必须审查结果 +2. **manifest.json 是唯一状态源**:任何操作(生图、上传、替换素材)完成后必须立即回写 manifest +3. **禁止 curl 调用生图/生视频 API**:必须通过 `pipeline.js` 或对应 generator 脚本执行 +4. **并行优先**:多个独立子任务必须用子 agent 并行,不要在主对话中串行完成 + +**禁止**:跳过分镜 / 分镜阶段读风格 / 不更新 manifest 就继续 / 一口气跑完 pipeline 不审查 + +--- + +**你(主 Agent)是整个流程的导演。** 子 Agent 是执行者,你负责:理解意图、编排调度、质量卡点、用户沟通、错误恢复。 + +## 主 Agent 职责 + +| 职责 | 说明 | +|------|------| +| 意图理解 | 分析用户需求,选择正确的模式、视频模型和帧模式 | +| 编排调度 | 决定 Agent 串行/并行、传递参数、收集结果 | +| 质量卡点 | 每个阶段完成后校验结果,不合格则要求子 Agent 重做 | +| 用户沟通 | 汇报进度、请求用户决策(挑选图片、确认风格) | +| 错误恢复 | API 失败时重试或换模型,质量不达标时补生成 | + +--- + +## 路由规则 + +| 用户意图 | 执行流程 | 子技能 | +|---------|---------|--------| +| "生图"、"批量图片" | 生图 | `image-generator` | +| "图片成片"、"图片轮播" | 已有图片 → 组装 | `capcut` | +| "图文成片"、"生图+成片" | 生图 → TTS+字幕+组装 | `image-generator` → `capcut` | +| "图生视频"、"图片转视频" | 生图 → AI视频 → 组装 | `image-generator` → Grok/VEO → `capcut` | +| "首尾帧"、"帧动画"、"关键帧" | 生图(成对) → VEO视频 → 组装 | `image-generator`(帧对) → VEO → `capcut` | +| "文案转视频"、"配音视频" | 生图 → TTS+字幕+组装 | `image-generator` → `capcut` | +| 只说"做视频" | **询问**:图文成片 / 图生视频(单图/首尾帧)? | — | + +**"图生视频"的后续追问**:用户说"图生视频"时,追问视频模式: +- **单图模式**:一张图 → 一段视频(Grok 或 VEO) +- **首尾帧模式**:起始帧+结束帧 → 一段过渡视频(仅 VEO) + +--- + +## Pipeline 执行流程 + +Agent 创建 manifest.json 后,用 `pipeline.js` 分阶段执行。**不要一口气跑完,必须在阶段之间审查结果。** + +### 分工 + +| 角色 | 职责 | +|------|------| +| **Agent**(你) | 读取 account.json + style.md → **分镜规划** → 从分镜生成 imagePrompt/videoPrompt → 写出 manifest.json → 审查每阶段结果 | +| **Pipeline** | 机械执行:生图 → 上传 → 生视频 → TTS → 成片。每完成一个 item 写盘,支持断点续跑 | + +### 执行步骤 + +``` +Step -1: 意图确认(进入任何步骤前必须完成,逐项确认,缺一不可) + + 1. 内容意图:用户要做什么? + - 生图 / 图生视频 / 图片成片 / 配音视频 / 首尾帧 + - 模糊时追问到明确,不要自己猜 + + 2. 素材来源: + - 有现成文案/图片?还是需要 AI 生成文案? + - 有参考图/风格参考? + + 3. 视频模式(涉及视频时必问): + - 单图模式:1 张图 → 1 段视频(Grok 或 VEO) + - 首尾帧模式:2 张图 → 过渡视频(仅 VEO) + + 4. 账号确认: + - 扫描 accounts/*/account.json 获取最新账号列表 + - 展示:ID、名称、风格、画幅 + - 未指定 → 让用户选 + - 指定了但不匹配 → 告知可用账号,问是否新建 + - 确认后记住 account ID + + 5. 参数确认: + - 画幅(9:16 / 16:9)、生图模型(Gemini / MJ)、视频模型(VEO / Grok) + - 有账号时从 account.json 继承默认值,只问是否覆盖 + + → 以上 5 项全部确认后,agent 写出完整执行计划,让用户最终确认: + + 执行计划示例(根据实际任务调整): + 1. 读取 {account} 账号配置 + 风格文件(style.md) + 2. 根据用户文案生成分镜表(N shot) + 3. 分镜 + 风格 → 生成英文 prompts(imagePrompt + videoPrompt) + 4. pipeline.js init → 创建 manifest.json + 输出目录 + 5. pipeline.js run --phase images → 生图 → 人工审查 + 6. pipeline.js run --phase upload,videos → 上传 + 生成视频 + 7. pipeline.js run --phase tts,assemble → TTS + 成片 + + 用户确认 "开始" → 进入 Step 0 + 用户修改 → 调整计划后重新输出 + → 禁止在用户未确认执行计划的情况下进入 Step 0 + +Step 0: 前置检查(账号+风格校验) + - 读取 accounts/{account}/account.json,检查 styles 字段是否配置了风格文件 + - 如果账号不存在或没有风格: + → 暂停流程,通过 CLI 创建:`pipeline.js create-account --id --name <名称> --references ./ref.png` + → 然后编辑 `styles/*.md` 完善提示词策略 + - 校验账号完整性:`pipeline.js validate-account --account ` + - 有风格则继续 Step 1 + +Step 1: 分镜规划(纯叙事,不读风格) + - 输入:用户文案 + - 分析文案语义和节奏,拆成 N 个 shot + - 为每个 shot 规划:景别、镜头运动、画面内容(中文)、与下一 shot 的转场 + - 输出分镜表(见「分镜规划规则」章节) + - 分镜与风格无关,同一分镜可换不同风格复用 + +Step 2: Prompt 生成 + Manifest 初始化(分镜 + 风格 → 英文 prompts → pipeline.js init) + - 输入:分镜表 + style.md + account.json + - 子 Agent 将每个 shot 的中文画面描述结合风格文件,生成: + · imagePrompt(英文画面描述,给 Gemini/MJ) + · videoPrompt(英文运动描述,给 Grok/VEO) + · keyword, keywordColor + - **禁止 AI 手写 manifest.json**,必须通过脚本初始化: + ```bash + node pipeline.js init --account --mode \ + --items '[{"text":"文案","imagePrompt":"...","videoPrompt":"...","keyword":"关键词","keywordColor":"#FF6B35"}]' + ``` + - 脚本自动从 account.json 继承:imageModel、videoModel、format、references + - 脚本自动创建目录、校验必填字段、设置 status=pending + - AI 只负责创意内容(text、imagePrompt、videoPrompt、keyword),不碰结构字段 + - 首尾帧模式额外要求:每个 item 必须有 `lastFramePrompt`(`imagePrompt` 作为第一帧,不需要单独的 `firstFramePrompt`) + - init 返回 manifest 路径,后续命令使用该路径 + +Step 3: 生图 → 人工审查 + 跑 images 阶段。完成后审查:分辨率≥1024、风格一致性、构图、无水印。 + 不合格则删除/调 prompt 重跑,不进入下一步。 + +Step 4: 上传 + 生视频(可选,图文成片跳过此步) + 跑 upload + videos 阶段。首尾帧模式检查过渡连贯性。 + +Step 5: TTS + 成片 + 跑 tts + assemble 阶段。检查字幕准确、BGM 不盖配音。 +``` + +> 命令语法见下方「CLI 参考」,不在此处重复。 + +### CLI 参考 + +```bash +# 创建账号(Step 0:首次使用时) +node pipeline.js create-account --id --name <名称> \ + --desc <描述> --video-model veo3-fast --references ./ref1.png,./ref2.png + +# 校验账号完整性 +node pipeline.js validate-account --account + +# 初始化 manifest(Step 2 使用,AI 只提供创意内容) +node pipeline.js init --account --mode \ + --items '[{"text":"...","imagePrompt":"...","videoPrompt":"...","keyword":"...","keywordColor":"..."}]' +# 也可从文件读取 items(适合大量数据) +node pipeline.js init --account --mode single --items-file ./items.json + +# 校验 manifest 完整性 +node pipeline.js validate --manifest + +# 跑指定阶段 +node pipeline.js run --manifest --phase images +node pipeline.js run --manifest --phase upload,videos + +# 断点续跑(跳过已完成阶段和 item) +node pipeline.js run --manifest --resume + +# 查看进度 +node pipeline.js status --manifest +``` + +**阶段**: `images` → `upload` → `videos` → `tts` → `assemble` + +**Manifest item 状态**: `pending` → `generating` → `done` / `failed`。无 status 字段视为 pending。 + +--- + +## 视频模式对比 + +### 单图模式 + +```dot +digraph single_image { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + img [label="一张图", shape=oval] + prompt [label="videoPrompt"] + grok [label="Grok\n6s 视频", fillcolor="#fff3e0"] + veo [label="VEO\n6-8s 视频", fillcolor="#e8f5e9"] + result [label="视频输出", shape=oval, fillcolor="#e3f2fd"] + + img -> prompt + prompt -> grok + prompt -> veo + grok -> result + veo -> result +} +``` + +- 每条文案生成 1 张图 + 1 个 videoPrompt +- Grok 和 VEO 都支持 +- 提示词描述运动:"slow zoom in on subject" + +### 首尾帧模式 + +```dot +digraph frame_pair { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + first [label="起始帧"] + last [label="结束帧"] + prompt [label="videoPrompt"] + veo [label="VEO\n6-8s 过渡视频", fillcolor="#e8f5e9"] + result [label="视频输出", shape=oval, fillcolor="#e3f2fd"] + + first -> veo + last -> veo + prompt -> veo + veo -> result +} +``` + +- 每条文案生成 **2 张图**(firstFrame + lastFrame)+ 1 个 videoPrompt +- **仅 VEO 支持**(images 数组传两张图) +- 起始帧和结束帧必须是**同一场景的不同状态** +- 提示词描述过渡:"transition from idle machines to active production" + +| 对比 | 单图模式 | 首尾帧模式 | +|------|---------|-----------| +| 图片数量 | N 张 | 2N 张 | +| 生图耗时 | 标准 | ~2 倍(可并行) | +| 视频连贯性 | 仅运动 | 场景变化(更强) | +| 可用模型 | Grok + VEO | 仅 VEO | +| 适用场景 | 风景、人物展示 | 状态变化、叙事过渡 | + +--- + +## 多阶段执行策略 + +用 Agent 工具串行或并行执行子技能,**阶段间必须通过质量卡点**: + +**生图+成片(串行+人工卡点)**: +```dot +digraph image_then_assemble { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + agent1 [label="Agent 1\nimage-generator\n生成图片到 output/"] + gate1 [label="人工卡点\n用户挑选图片\n删除不合格的", shape=diamond, fillcolor="#fff9c4"] + agent2 [label="Agent 2\ncapcut\n读取精选素材 → 组装"] + + agent1 -> gate1 -> agent2 +} +``` + +**配音+生图(并行+自动校验)**: +```dot +digraph parallel_image_tts { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + agent1 [label="Agent 1\nimage-generator\n生图", fillcolor="#e8f5e9"] + agent2 [label="Agent 2\ncapcut\nTTS 配音", fillcolor="#e8f5e9"] + validate [label="自动校验\n分辨率>=1024\n画幅匹配\n音频时长匹配", shape=diamond, fillcolor="#fff9c4"] + agent3 [label="Agent 3\ncapcut\n组装全部素材 → 成片"] + + agent1 -> validate + agent2 -> validate + validate -> agent3 +} +``` + +**图生视频 - 单图模式**: +```dot +digraph single_image_video { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + agent1 [label="Agent 1\nimage-generator\n生图 + videoPrompt"] + gate1 [label="人工卡点\n用户挑选图片", shape=diamond, fillcolor="#fff9c4"] + agent2 [label="Agent 2\nGrok / VEO\n单图输入,并行生成视频"] + agent3 [label="Agent 3\ncapcut\n视频片段 + 字幕 → 成片"] + + agent1 -> gate1 -> agent2 -> agent3 +} +``` + +**图生视频 - 首尾帧模式**: +```dot +digraph frame_pair_video { + rankdir=LR + node [shape=box, style=filled, fillcolor="#f5f5f5", fontsize=11] + + agent1 [label="Agent 1\nimage-generator\n成对生图\n(firstFrame + lastFrame)\n可并行"] + gate1 [label="人工卡点\n检查首尾帧连贯性\n同一场景/相似视角", shape=diamond, fillcolor="#fff9c4"] + agent2 [label="Agent 2\nVEO\n双图输入\nimages:[first, last]"] + agent3 [label="Agent 3\ncapcut\n视频片段 + 字幕 → 成片"] + + agent1 -> gate1 -> agent2 -> agent3 +} +``` + +**视频模型选择**: + +| 模型 | 时长 | 画幅 | 单图 | 首尾帧 | 特点 | API | +|------|------|------|------|--------|------|-----| +| Grok | 6s | 任意 | ✅ | ❌ | 快、稳定 | yunwu.ai | +| Veo3-fast | ~8s | 16:9, 9:16 | ✅ | ✅ | 超分、中文增强 | jimmyai.cn | +| Veo3-fast-frames | ~8s | 16:9, 9:16 | ✅ | ✅ | 多帧、质量最高 | jimmyai.cn | + +图生视频注意事项: +- **并行执行**:先同时提交所有任务(并发 3),再并行轮询结果 +- 单个视频生成耗时 60-300 秒 +- 脚本内置 3 次重试,每次自动简化提示词 +- **videoPrompt 在生图阶段一并生成** +- VEO 独有:`enhance_prompt=true` 中文增强,`enable_upsample=true` 超分 +- 配置在 `config.json` + +### 视频大小一致性 + +- **同批次同模型**,不混合 Grok(720P/6s)和 VEO(超分/8s) +- 画幅统一跟随 manifest 顶层 `format`(默认 `9:16`) +- 个别 item 降级到备用模型时,在 manifest 中标记 `"videoModel"` 以便追踪 + +### 视频生成失败降级 + +**降级链**: `Grok ↔ VEO → 可灵(Kling)` + +**触发**: 同一 item 重试 5 次仍失败 → 用备用模型单独补生成 + +```bash +# Grok 失败 → VEO 补 +node veo-video-generator.js --image --prompt -o ./videos + +# VEO 失败 → Grok 补 +node grok-video-generator.js --image --prompt -o ./videos +``` + +**规则**: 逐 item 降级,不卡整批次。补完后上传 OSS,回写 `videoUrl`,继续 `tts → assemble`。 + +--- + +## 目录规范 + +所有批次的输出遵循统一目录结构。完整规范见 [batch-mode.md](../image-generator/references/batch-mode.md) 的"目录规范"章节。 + +**核心规则**: + +``` +output/{account}_{YYYYMMDD}_{NNN}/ +├── manifest.json # 主清单(贯穿全流程) +├── prompts.txt # 原始提示词存档 +├── images/ # scene_{NN}_{keyword}.jpeg(首尾帧加 _last 后缀) +├── videos/ # scene_{NN}_{keyword}.mp4(与图片对应) +└── urls.json # OSS 公网 URL 映射 +``` + +**命名对应关系**:图片 `scene_01_觉醒.jpeg` → 视频 `scene_01_觉醒.mp4`;首尾帧尾帧 `scene_01_觉醒_last.jpeg`;MJ 候选 `scene_01_觉醒_cand1.jpeg` + +--- + +## manifest.json 格式 + +完整字段规范见 [manifest-schema.md](references/manifest-schema.md)(字段权重 P0/P1/P2、读写方、流转关系)。 + +**核心规则**: +- 脚本检测 `lastFrameUrl` → 首尾帧模式(传 images:[url, lastFrameUrl]);否则 → 单图模式(传 images:[url]) +- 顶层 `format` 自动传给 VEO/Grok 作为画幅比例 +- `account` 字段驱动 capcut_assemble 读取对应 account.json 的字幕风格配置 + +--- + +## 分镜规划规则 + +**分镜是 Agent 的纯叙事思考,与视觉风格无关。** 拿到文案后、读风格文件之前,先完成分镜。 + +短视频的画面节奏和文案节奏是脱钩的:TTS 配音连续流淌,画面在配音下面切换。分镜规划的是**视觉节拍**,不是文字断句。 + +### 核心原则 + +1. **按视觉节拍切 shot**:每个 shot = 6-8 秒视频片段。不是按文字断句,而是按画面能承载的信息量切 +2. **前 3 秒 hook**:shot 1 必须有强视觉冲击,决定完播率 +3. **景别快速交替**:相邻 shot 景别必须有落差(wide → close-up,close-up → medium),不要连续同一景别 +4. **镜头服务情绪**:每个 cameraMove 对应文案的情绪节拍,不要无意义运动 +5. **时长匹配**:先算总时长(shot 数 × 6-8s),再和配音时长对齐 + +### 时长规划 + +分镜前先算数: +- 短视频目标时长:20-60 秒 +- 每个 shot 时长:6-8 秒(由视频模型决定) +- shot 数量 = 目标时长 ÷ 6~8(取整,一般 4-8 个 shot) +- 配音字数 ≈ shot 数 × 12-15 字(按正常语速) + +### 分镜表字段 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `text` | string | 该 shot 覆盖的配音文案(可能不到一句,也可能跨句) | +| `shotType` | enum | `wide` / `medium` / `close-up` / `extreme-close-up` | +| `cameraMove` | enum | `static` / `zoom-in` / `zoom-out` / `pan-left` / `pan-right` / `dolly-in` / `tracking` | +| `visualDesc` | string | 画面描述(中文),只写三件事:**主体是什么、什么状态/动作、视觉焦点在哪**。氛围和构图交给风格层 | +| `hook` | boolean | 仅 shot 1 为 true,标记是否为开场钩子 | + +### 景别节奏 + +``` +shot 1 (hook): close-up 或 extreme-close-up,强主体,抓眼球 +shot 2: wide 或 medium,展开场景,给上下文 +shot 3-N(交替): close-up(压)→ wide(松)→ close-up(压)→ ... +最后一个 shot: medium 或 wide,收束,不过度设计 +``` + +不要用 extreme-close-up 收尾(太紧),不要用 tracking 滥用(信息密度低)。 + +### 镜头运动选择 + +| cameraMove | 情绪 | 典型场景 | +|------------|------|---------| +| `static` | 稳定、庄严 | 建筑、静物、仪式感 | +| `zoom-in` | 聚焦、压迫 | 悬疑、揭秘、强调细节 | +| `zoom-out` | 揭示、震撼 | 从局部拉出全景,揭示真相 | +| `pan-left/right` | 环顾、流动 | 展示空间、物品陈列 | +| `dolly-in` | 沉浸、紧张 | 人物面部、关键物件 | +| `tracking` | 跟随、活力 | 运动场景、行走(少用,AI 生成的 tracking 质量不稳定) | + +短视频默认转场是硬切,不需要单独字段。特殊转场(fade/dissolve)仅在 Agent 判断需要情绪转换时标注在 `visualDesc` 里。 + +--- + +## 提示词生成规则 + +**提示词由子 Agent 生成**:主 Agent 将分镜表 + 风格文件(style.md)交给子 Agent,子 Agent 负责将中文画面描述转化为英文 imagePrompt / videoPrompt。主 Agent 审核提示词质量,不合格则退回重做。 + +**前置条件**:账号必须有风格文件。无风格 → 提醒用户创建,不跳过。 + +### 单图模式提示词 + +每条文案生成: +- `imagePrompt`:画面描述(英文,给 Gemini/MJ) +- `videoPrompt`:运动描述(英文,给 Grok/VEO) + +videoPrompt 规则: +- 描述**运动**而非内容("zoom in" 而非 "a cat") +- 与 imagePrompt 画面内容对应 +- 简洁(1-2 句,不超过 50 词) +- **收敛原则**:基于图片已有内容,仅描述镜头运动和微动效果 +- **禁止**:大幅度环境切换、场景变化、人物位置跳变 +- **推荐写法**:镜头运动(slow zoom/pan/dolly)+ 星座/光效微动 + 保持静止氛围 +- **画幅继承**:manifest.json 顶层 `format` 字段(如 `"9:16"`)会自动传给 VEO,无需命令行 `-a` + +### 首尾帧模式提示词 + +每条文案生成: +- `imagePrompt`:起始帧画面(英文,与 single 模式复用同一字段) +- `lastFramePrompt`:结束帧画面(英文) +- `videoPrompt`:过渡描述(英文,给 VEO) + +**首尾帧提示词设计原则**: + +| 原则 | 说明 | 示例 | +|------|------|------| +| 同一场景 | 首尾帧是同一地点/主体的不同状态 | 都是工厂,不是两个地方 | +| 视角一致 | 相机角度/高度/距离相同 | 都是 wide shot | +| 状态对比 | imagePrompt"静止/之前",lastFramePrompt"运动/之后" | 空车间 → 生产线运转 | +| 过渡自然 | videoPrompt 描述从首到尾的变化 | "machines start up rhythmically" | +| 光照连贯 | 光源方向一致,可以有渐变 | 冷光 → 暖光可以,不能反转光源 | + +**videoPrompt 规则**(首尾帧): +- 描述**过渡过程**而非单帧状态 +- "from X to Y" 或 "X begins, Y happens" 格式 +- 必须同时呼应 imagePrompt(起始帧)和 lastFramePrompt(结束帧)中的元素 +- 简洁(1-2 句,不超过 50 词) + +--- + +## 质量卡点(跨阶段) + +多阶段任务中,每个阶段完成后必须校验再进入下一阶段: + +### 生图 → 成片 卡点 + +| 检查项 | 标准 | 不通过处理 | +|--------|------|-----------| +| 图片分辨率 | 短边 >= 1024px | 重新生成 | +| 画幅比例 | 与目标视频一致 (9:16/16:9) | 重新生成 | +| 图片内容 | 无水印、无文字、主体清晰 | 删除,人工补选 | +| 风格一致性 | 同批次风格统一 | 替换偏差大的图 | +| 数量 | 至少 3 张(< 3 张无法成片) | 补充生成 | + +**首尾帧额外检查**: + +| 检查项 | 标准 | 不通过处理 | +|--------|------|-----------| +| 场景一致性 | 首尾帧是同一场景 | 重新生成 lastFrame | +| 视角匹配 | 构图、角度、距离一致 | 重新生成不匹配的帧 | +| 状态过渡合理 | 结束帧是起始帧的自然延续 | 调整提示词重新生成 | + +**自动校验脚本**(在 Agent 间插入): +```bash +node .claude/skills/video-from-script/scripts/validate_assets.js \ + --dir ./output/batch_xxx \ + --min-resolution 1024 \ + --expected-ratio 9:16 +``` + +### 配音 → 成片 卡点 + +| 检查项 | 标准 | 不通过处理 | +|--------|------|-----------| +| 音频时长 | 与素材总时长相近(±20%) | 调整语速或素材时长 | +| 音频质量 | 无静音段、无爆音 | 重新生成 | +| 音频数量 | 与素材数量匹配 | 补充或裁剪 | + +### AI视频 → 成片 卡点 + +| 检查项 | 标准 | 不通过处理 | +|--------|------|-----------| +| 视频时长 | 每段 6-8 秒 | 正常,模型固定输出 | +| 视频画质 | 无明显伪影、无黑帧 | 重新生成 | +| 过渡连贯(首尾帧) | 视频从首帧平滑过渡到尾帧 | 优化提示词重试 | +| 视频数量 | 与素材数量匹配 | 补充生成失败的视频 | + +### 成片输出 卡点 + +| 检查项 | 标准 | +|--------|------| +| 字幕准确 | 与原始文案一一对应 | +| 关键词高亮 | 颜色醒目、位置正确 | +| 图片动画 | Ken Burns 流畅无卡顿 | +| BGM 音量 | 不盖过配音(配音为主) | +| 转场 | 无黑帧、无跳帧 | + +**任何卡点不通过,必须修复后再进入下一阶段,不可跳过。** + +--- + +## 共享资源 + +所有子技能共享以下资源(位于本目录): + +- `scripts/` — 共享脚本(gemini-image-generator.js, mj-image-generator.js, grok-video-generator.js, veo-video-generator.js, capcut_assemble.js, sync-to-jianying.js, oss-upload.js) +- `accounts/` — 账号配置(详见 [account-system.md](references/account-system.md)) +- `references/account-system.md` — 账号系统说明 + +配置统一在 `skills/config.json`(API密钥、路径)。 + +--- + +## 子技能 + +| 技能 | 触发词 | 职责 | +|------|--------|------| +| `image-generator` | 生图、批量出图、MJ、Gemini | 图片生成(双模型、单图/帧对) | +| `capcut` | 成片、组装、剪映、图片轮播 | CapCut 成片组装 | diff --git a/.claude/skills/video-from-script/accounts/_template/account.json b/.claude/skills/video-from-script/accounts/_template/account.json new file mode 100644 index 0000000..d43290d --- /dev/null +++ b/.claude/skills/video-from-script/accounts/_template/account.json @@ -0,0 +1,20 @@ +{ + "id": "", + "name": "", + "description": "", + "defaultFormat": "9:16", + "imageModel": "gemini", + "videoModel": "", + "batchSize": 30, + "capcut": { + "effects": [], + "filter": "", + "subtitleStyle": { + "fontSize": 36, + "color": "#FFFFFF", + "highlightColor": "#FF6B35", + "bold": true + }, + "defaultBGM": "" + } +} diff --git a/.claude/skills/video-from-script/accounts/_template/styles/.gitkeep b/.claude/skills/video-from-script/accounts/_template/styles/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.claude/skills/video-from-script/accounts/forbidden-emperor/account.json b/.claude/skills/video-from-script/accounts/forbidden-emperor/account.json new file mode 100644 index 0000000..6bbfeb4 --- /dev/null +++ b/.claude/skills/video-from-script/accounts/forbidden-emperor/account.json @@ -0,0 +1,40 @@ +{ + "id": "forbidden-emperor", + "name": "禁忌帝王学", + "description": "禁书档案×东方密室美学×历史权谋。被删除的权力技术,历史课不教的真相。暗调古籍+烛火+朱砂,昭和大正禁书档案风格。", + "pipeline": "image-video", + "defaultFormat": "9:16", + "imageModel": "gemini", + "videoModel": "veo", + "batchSize": 10, + "styles": { + "oriental-mythology-ue5": { + "references": [ + { + "file": "下载 (3).jpg", + "url": "https://i.ibb.co/GQtg388Z/6fcf1869c871.jpg" + } + ] + } + }, + "capcut": { + "effects": [], + "filter": "电影感:30", + "subtitleStyle": { + "font": "SourceHanSerifCN_Regular", + "fontSize": 18, + "color": "#FFFFFF", + "bold": false, + "inAnimation": "向右滑动", + "inAnimationDuration": 1000000, + "outAnimation": "向左滑动", + "outAnimationDuration": 1000000, + "alpha": 0.9, + "transformY": 350, + "hasShadow": true, + "shadowColor": "#000000", + "shadowAlpha": 0.5 + }, + "defaultBGM": "" + } +} diff --git a/.claude/skills/video-from-script/accounts/forbidden-emperor/references/下载 (3).jpg b/.claude/skills/video-from-script/accounts/forbidden-emperor/references/下载 (3).jpg new file mode 100644 index 0000000..89c4d46 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/forbidden-emperor/references/下载 (3).jpg differ diff --git a/.claude/skills/video-from-script/accounts/forbidden-emperor/styles/oriental-mythology-ue5.md b/.claude/skills/video-from-script/accounts/forbidden-emperor/styles/oriental-mythology-ue5.md new file mode 100644 index 0000000..19a0461 --- /dev/null +++ b/.claude/skills/video-from-script/accounts/forbidden-emperor/styles/oriental-mythology-ue5.md @@ -0,0 +1,193 @@ +# 东方神话史诗 — 阈限梦核版 + +> Liminal Space × 80s Film Grain × VHS Aesthetic × 传统东方神话 +> 封闭空间 + 静止 + 怀旧 + 超现实 + 十二星座 + +--- + +## 1. 核心原则 + +| 维度 | 规则 | +|------|------| +| 空间 | 室内外均可,liminal(阈限空间),强调空旷、被遗弃的寂静感 | +| 氛围 | empty, still, silent, frozen moment — 空旷但不是宏大,是"被遗弃的寂静" | +| 美学 | 80s film grain, VHS aesthetic, analog photography, color bleeding, scan lines | +| 情绪 | nostalgic, uncanny, surreal, dreamlike — 怀旧、不安、超现实 | +| 人物 | 用纯尺度/位置词:`a small silhouette in the distance` / `a tiny presence at far end`,体现渺小,禁用外貌/服装/颜色描述 | +| 光照 | dim, tungsten, single light beam, fluorescent — 不是辉煌光芒,是衰弱的余光 | +| 色调 | faded, warm analog tones, desaturated — 褪色、偏暖、低饱和 | + +### ⚠ --sref 参考图规则(必须遵守) + +使用 --sref 时,**提示词中禁止出现任何人物描述词**(figure, person, character, hanfu 等)。MJ 会自动从参考图继承人物形象和风格。 + +| 模式 | 提示词内容 | 结果 | +|------|-----------|------| +| 有 --sref | 纯场景描述(建筑+光照+氛围),**零人物词** | ✅ 通过,参考图自动注入人物 | +| 有 --sref | 场景 + "lone figure" / "hanfu" 等人物词 | ❌ 触发 deepfake 审核 | +| 无 --sref | 场景 + "a lone figure in [color] hanfu" | ✅ 通过,MJ 自行生成人物 | + +参数:`--sref [URL] --sw 50`,最多1张参考图。无 --sref 时不加 --sw。 + +--- + +## 2. Prompt 结构 + +**模式 A:带 --sref 参考图(推荐)** +``` +[空间类型], [建筑 + 超现实元素]. [光照 + 氛围细节]. [星座元素]. +[色调] palette, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 +``` +⚠ 零人物词。参考图自动提供人物形象和风格。 + +**模式 B:无 --sref** +``` +[空间类型], [建筑 + 超现实元素]. A lone figure in [颜色] hanfu [姿态], still. +[光照 + 氛围细节]. [星座元素]. +[色调] palette, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 +``` + +--- + +## 3. 十二星座提示词 + +> 完整 imagePrompt + videoPrompt 见下方第 6 节。 + +--- + +## 4. MJ 参数规范 + +| 参数 | 规范值 | 说明 | +|------|--------|------| +| --style | raw | 必须,保留 film grain 质感 | +| --s | 750 | stylize值 | +| --sw | ≤50 | ⚠ 仅配合 --sref | +| --sref | 最多1张 | 超过容易触发审核 | +| --ar | 9:16 | 竖版 | + +--- + +## 5. 视频Prompt规则 + +**收敛原则**: +- 基于图片已有内容,仅描述镜头微动 + 光效/星座缓慢变化 +- 禁止大幅度环境切换、场景变化、人物位置跳变 +- 每条不超过 40 词 + +**videoPrompt 模板**: +``` +[镜头运动] of [空间]. [光效/微粒微动]. [星座] constellation [缓慢出现/变亮]. [氛围微动]. Cinematic, photorealistic, 4K. +``` + +**镜头运动词表**(选一): +| 运动 | 英文 | 适用 | +|------|------|------| +| 静止 | Static shot | 默认,走廊/殿堂 | +| 前推 | Slow dolly forward | 走廊、隧道 | +| 横摇 | Slow pan | 对称空间 | +| 微推 | Slow push forward | 封闭空间 | +| 漂移 | Slow gentle drift | 水下、悬浮 | + +--- + +## 6. 十二星座 videoPrompt + +### 3.1 白羊座 Aries — 空火走廊 + +```text +imagePrompt: Empty fire temple corridor stretching into shadow, ancient dragon carvings on stone walls, dying embers in braziers casting dim orange glow. Stone pillars receding endlessly, ember particles frozen in warm air. A small silhouette in the distance. Aries constellation faintly glowing on distant ceiling. Warm amber faded palette, tungsten dim glow, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal still atmosphere, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Slow dolly forward through fire temple corridor. Dying embers in braziers pulse dim orange. Faint amber glow slowly appears on distant ceiling. Ember particles drift in warm air. Cinematic, photorealistic, 4K. +``` + +### 3.2 金牛座 Taurus — 废弃温室 + +```text +imagePrompt: Abandoned celestial greenhouse, overgrown ancient trees with golden leaves breaking through cracked jade ceiling. Dust floating in single shaft of warm light. Crystal waterways dried up, bioluminescent flowers still glowing faintly in dim corners. A small silhouette among overgrown roots. Taurus constellation in faded emerald on dusty glass. Warm gold faded green palette, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal overgrown stillness, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of abandoned celestial greenhouse. Single shaft of warm light shifts gently. Bioluminescent flowers pulse faintly in dim corners. Faded emerald glow appears on dusty glass ceiling. Dust floats in light beam. Cinematic, photorealistic, 4K. +``` + +### 3.3 双子座 Gemini — 镜面长廊 + +```text +imagePrompt: Infinite mirror corridor reflecting twin versions of a celestial observatory interior. Floating constellation maps drifting between mirror walls, starlight reflected infinitely into darkness. Two small silhouettes on opposite ends. Gemini constellation in dual silver-gold on ceiling. Silver gold faded palette, fluorescent light hum, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal mirror infinity, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of infinite mirror corridor. Slow pan between twin reflections. Floating star maps drift imperceptibly. Silver-gold light shifts on ceiling. Starlight reflects softly into mirror depth. Cinematic, photorealistic, 4K. +``` + +### 3.4 巨蟹座 Cancer — 空月池 + +```text +imagePrompt: Empty moonlit pool room, vast jade chamber with perfectly still water reflecting crescent moon through glass ceiling. Moon jellyfish floating motionless above water surface. Ancient lotus carved into jade walls glowing faintly. A small silhouette kneeling at pool edge. Cancer constellation in silver-blue on water surface. Silver-blue moonlit palette, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal still water, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of empty moonlit pool room. Moonlight slowly shifts across still water. Moon jellyfish drift almost imperceptibly above surface. Silver-blue light ripples faintly on water. Lotus carvings glow faintly on jade walls. Cinematic, photorealistic, 4K. +``` + +### 3.5 狮子座 Leo — 空金殿 + +```text +imagePrompt: Empty golden throne room, vast silent hall with jade pillars receding into haze. Single beam of golden light from ceiling crack illuminating dust particles drifting. Dragon carvings on walls fading into shadow. A small silhouette in the distance before the empty throne. Leo constellation blazing gold on distant ceiling mural. Warm gold shadow palette, tungsten dim lighting, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal empty grandeur, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Slow dolly forward into empty golden throne room. Single beam of golden light illuminates drifting dust particles. Golden glow slowly brightens on distant ceiling mural. Jade pillars recede into haze. Cinematic, photorealistic, 4K. +``` + +### 3.6 处女座 Virgo — 晶石圣所 + +```text +imagePrompt: Enclosed crystal sanctuary interior, translucent crystal tree trunks growing through jade floor and ceiling. Bioluminescent leaves casting pale jade glow in dim space. Crystal pathways empty, ancient stone lanterns unlit but glowing faintly within. A small silhouette at the tree base. Virgo constellation in green-gold on crystal canopy. Jade green pale gold palette, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal bioluminescent stillness, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of crystal sanctuary interior. Bioluminescent leaves pulse pale jade rhythmically. Crystal tree trunks shimmer faintly. Green-gold light slowly appears on crystal canopy above. Stone lanterns glow softly within. Cinematic, photorealistic, 4K. +``` + +### 3.7 天秤座 Libra — 暮光殿 + +```text +imagePrompt: Twilight balance temple interior, twin massive jade doors on opposite walls, ancient balance scale mechanism filling the center. Floating ritual instruments motionless in air. Fading light from twin windows casting long symmetrical shadows. A small silhouette at the fulcrum. Libra constellation in perfect symmetry on ceiling. Twilight gold indigo palette, fading dual light, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal symmetry, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of twilight balance temple. Twin windows cast fading light that slowly dims. Floating ritual instruments drift imperceptibly. Symmetrical twilight glow holds balance on ceiling. Long symmetrical shadows stretch slowly. Cinematic, photorealistic, 4K. +``` + +### 3.8 天蝎座 Scorpio — 地下熔廊 + +```text +imagePrompt: Underground volcanic corridor, ancient stone tunnel with glowing mineral veins in walls. Dim ruby light pulsing slowly from cracks in floor. Guardian reliefs carved into stone walls emanating faint glow. A small silhouette in the corridor center. Scorpio constellation in garnet on tunnel ceiling. Deep ruby dark palette, geological dim glow, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal underground stillness, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Slow push forward into underground volcanic corridor. Mineral veins in walls pulse ruby glow slowly. Garnet light brightens on tunnel ceiling. Guardian reliefs emanate faint rhythmic glow. Dim warm light from floor cracks. Cinematic, photorealistic, 4K. +``` + +### 3.9 射手座 Sagittarius — 废弃星阁 + +```text +imagePrompt: Abandoned celestial observatory interior, massive dome ceiling open to void sky. Ancient star-maps on walls faded and peeling, stone instruments covered in dust. Starlight streaming through single dome crack. A small silhouette at the observatory center. Sagittarius constellation in silver on dome ceiling. Midnight blue silver faded palette, single starlight beam, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal abandoned cosmos, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot inside abandoned celestial observatory. Starlight beam through dome crack shifts slowly. Silver starlight fades in on dome ceiling. Ancient star-maps peel slightly on walls. Dust drifts in single light beam. Cinematic, photorealistic, 4K. +``` + +### 3.10 摩羯座 Capricorn — 冰封长廊 + +```text +imagePrompt: Frozen ice palace corridor, frost crystals covering jade walls and ceiling. Dim blue light filtering through ice-encrusted windows. Black iron chains frozen mid-swing, frozen waterfalls visible through ice walls. A small silhouette at the corridor end. Capricorn constellation in aurora blue on ice ceiling. Deep blue ice silver palette, cold dim light, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal frozen stillness, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Slow dolly forward into frozen ice palace corridor. Frost crystals glint on jade walls. Aurora blue glow slowly brightens on ice ceiling. Dim blue light filters gently through ice-encrusted windows. Frozen chains sway almost imperceptibly. Cinematic, photorealistic, 4K. +``` + +### 3.11 水瓶座 Aquarius — 空云浴殿 + +```text +imagePrompt: Empty cloud palace bathhouse, vast jade pool with perfectly still water reflecting nothing. Steam rising from warm water, bioluminescent lotus floating on surface. Empty jade archways receding into mist. A small silhouette at the pool edge. Aquarius constellation in soft cyan on water surface. Silver-blue warm mist palette, dim steamy light, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal empty bathhouse, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Static shot of empty cloud palace bathhouse. Steam rises slowly from still warm water. Bioluminescent lotus pulses soft cyan. Soft cyan glow appears on water surface. Jade archways recede into gentle mist. Cinematic, photorealistic, 4K. +``` + +### 3.12 双鱼座 Pisces — 沉没水殿 + +```text +imagePrompt: Submerged crystal throne room, jade architecture visible through murky bioluminescent water. Coral growing on throne, pearls scattered on floor, jellyfish floating motionless. Dragon statue coiled around dome visible through crystal ceiling, ocean depths beyond. A small silhouette floating before the throne. Pisces constellation glowing aqua through water. Jade green aqua murky palette, bioluminescent dim glow, 80s film grain, VHS aesthetic, analog photography, liminal space, surreal underwater stillness, deep depth of field, everything sharp --ar 9:16 --style raw --s 750 --sref [URL] --sw 50 + +videoPrompt: Slow gentle drift through submerged crystal throne room. Coral on throne sways softly in current. Aqua light glows through water above. Jellyfish float almost motionless. Bioluminescent particles drift through murky jade-green water. Cinematic, photorealistic, 4K. +``` + +--- + +*— End of Framework —* diff --git a/.claude/skills/video-from-script/accounts/military/account.json b/.claude/skills/video-from-script/accounts/military/account.json new file mode 100644 index 0000000..d679020 --- /dev/null +++ b/.claude/skills/video-from-script/accounts/military/account.json @@ -0,0 +1,36 @@ +{ + "id": "military", + "name": "军事账号", + "description": "军事主题短视频账号,暗黑漫画风格,深紫焦橙双色调", + "pipeline": "image-video", + "defaultFormat": "9:16", + "imageModel": "gemini", + "videoModel": "veo3-fast", + "batchSize": 30, + "styles": { + "dark-noir-military": { + "references": [ + { "file": "grunge_br.png", "url": "https://i.ibb.co/SwfD7YM6/e3caf4ad6e8a.png" } + ] + } + }, + "capcut": { + "effects": ["录制边框 III"], + "filter": "电影感:40", + "subtitleStyle": { + "font": "思源黑体 Heavy", + "fontSize": 24, + "color": "#FFFFFF", + "highlightColor": "#FF6B35", + "bold": true, + "hasShadow": true, + "shadowColor": "#000000", + "shadowAlpha": 0.8, + "transformY": -380, + "alignment": 1, + "inAnimation": "淡入", + "outAnimation": "淡出" + }, + "defaultBGM": "" + } +} diff --git a/.claude/skills/video-from-script/accounts/military/references/grunge_bl.png b/.claude/skills/video-from-script/accounts/military/references/grunge_bl.png new file mode 100644 index 0000000..fa3bd54 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/grunge_bl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/grunge_br.png b/.claude/skills/video-from-script/accounts/military/references/grunge_br.png new file mode 100644 index 0000000..3656452 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/grunge_br.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/grunge_tl.png b/.claude/skills/video-from-script/accounts/military/references/grunge_tl.png new file mode 100644 index 0000000..52ccc00 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/grunge_tl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/grunge_tr.png b/.claude/skills/video-from-script/accounts/military/references/grunge_tr.png new file mode 100644 index 0000000..510201f Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/grunge_tr.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/manga_bl.png b/.claude/skills/video-from-script/accounts/military/references/manga_bl.png new file mode 100644 index 0000000..e681b74 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/manga_bl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/manga_br.png b/.claude/skills/video-from-script/accounts/military/references/manga_br.png new file mode 100644 index 0000000..6632e96 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/manga_br.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/manga_tl.png b/.claude/skills/video-from-script/accounts/military/references/manga_tl.png new file mode 100644 index 0000000..f7e6e5c Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/manga_tl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/manga_tr.png b/.claude/skills/video-from-script/accounts/military/references/manga_tr.png new file mode 100644 index 0000000..f12b419 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/manga_tr.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/popart_bl.png b/.claude/skills/video-from-script/accounts/military/references/popart_bl.png new file mode 100644 index 0000000..f3dd1cc Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/popart_bl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/popart_br.png b/.claude/skills/video-from-script/accounts/military/references/popart_br.png new file mode 100644 index 0000000..6ee3a77 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/popart_br.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/popart_tl.png b/.claude/skills/video-from-script/accounts/military/references/popart_tl.png new file mode 100644 index 0000000..a691d79 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/popart_tl.png differ diff --git a/.claude/skills/video-from-script/accounts/military/references/popart_tr.png b/.claude/skills/video-from-script/accounts/military/references/popart_tr.png new file mode 100644 index 0000000..e42c527 Binary files /dev/null and b/.claude/skills/video-from-script/accounts/military/references/popart_tr.png differ diff --git a/.claude/skills/video-from-script/accounts/military/styles/dark-noir-military.md b/.claude/skills/video-from-script/accounts/military/styles/dark-noir-military.md new file mode 100644 index 0000000..cc42623 --- /dev/null +++ b/.claude/skills/video-from-script/accounts/military/styles/dark-noir-military.md @@ -0,0 +1,169 @@ +# dark-noir-military + +暗黑漫画军事风格 — 深紫与焦橙双色调,纯黑背景,半调网点纹理,做旧丝网印刷质感,强戏剧性侧光。基于口播文案分镜导演方法论,注入隐性动势(Implied Motion),使静态图片天然具备运动趋势。 + +参考图: +- `references/ref_001_grunge_portrait.png` — 紫色油彩刮痕变体 +- `references/ref_002_halftone_popart.png` — 波普网点权威人物变体 +- `references/ref_003_warrior_manga.png` — 日式漫画武将变体 + +--- + +## 图片提示词 + +### 核心视觉要素 +- 主体为人物(军事/权力/悬疑角色),表情刚毅/冷峻/压抑 +- 半调网点(halftone dots)纹理贯穿全画面 +- 做旧丝网印刷质感(gritty risograph print) +- 强戏剧性侧光(high contrast chiaroscuro),大面积阴影剪影 +- 深紫 `#4B0082` + 焦橙 `#E07B00` + 纯黑 `#0A0A0A` 严格三色体系 + +### 隐性动势(Implied Motion) + +> 在图片提示词中,通过描述「动作的进行时态」或「趋势中的瞬间」,使图片隐含运动方向,方便后续视频生成。 + +**人物动作趋势:** +`slowly turning head` / `eyes narrowing` / `jaw tightening` / `lips parting slightly` / +`fingers tightening on glass` / `shoulders slowly rising` / `coat swept by unseen wind` / +`exhaling deeply` / `leaning forward imperceptibly` / `gaze drifting downward` + +**场景变化趋势:** +`smoke curling upward` / `shadows lengthening` / `light fading at edges` / +`rain beginning to blur the glass` / `dust slowly settling` / `candle flame flickering` / +`fog creeping in from the distance` / `city lights blurring into streaks` + +**情绪张力趋势:** +`tension building in stillness` / `a breath held before breaking` / +`the moment before collapse` / `silence stretching thin` / +`the last second of control` / `something about to shatter` + +### 构图模式 +- 竖版 9:16,特写或半身为主 +- 极端面部特写:eye-level 或微仰角 +- 强调明暗对比,人物从阴影中浮现 +- 大面积黑色背景,主体集中在画面中心偏上 + +### 图片 Prompt 模板 + +**模板A — Gemini / Nanobanana(中英混排,最佳响应):** +``` +[中文主体描述],[中文隐性动势],[英文动势强化词],[中文环境/光线],暗黑漫画风格,深紫色与焦橙色双色调,纯黑背景,半调网点纹理,做旧丝网印刷质感,强戏剧性侧光,大面积阴影剪影,都市悬疑电影构图,无文字,无水印,竖版构图 9:16,dark noir illustration, deep purple and burnt orange duotone, halftone dot grain, gritty risograph print, high contrast chiaroscuro, bold black shadows, urban thriller aesthetic, editorial graphic novel style, no text, no watermark +``` + +**模板B — MidJourney(纯英文,MJ参数):** +``` +[情绪词] [主体描述], [隐性动势], [环境], [光线], [构图], dark noir comic style, limited color palette of deep purple and burnt orange on black background, halftone dot texture, gritty screen print effect, high contrast dramatic lighting, bold graphic shadows, cinematic close-up composition, editorial illustration, urban thriller aesthetic, no text, no watermark --ar 3:4 --style raw --q 2 --v 6.1 +``` + +**结构层级对照:** +| 层级 | 中文写法(Nanobanana) | 英文写法(MJ) | +|---|---|---| +| 主体 | `中年西装男子` | `middle-aged suited man` | +| **隐性动势** | `缓缓抬起头,眼神从低沉转向冷峻` | `head slowly lifting, eyes shifting from hollow to cold` | +| 环境 | `背后的人影正在消散` | `silhouettes dissolving behind him` | +| 光线 | `单侧冷白边缘光从上方打入` | `cold rim light from above` | +| 构图 | `极端面部特写,低角度仰视` | `extreme close-up, low angle` | + +### 负向提示词(每条附加) +``` +【负向】彩色背景,蓝色调,绿色调,写实照片风格,卡通可爱风,人物姓名,画面文字,logo,水印,过度曝光,模糊 +``` + +### 示例 + +**Nanobanana 示例:** +``` +中年西装男子半身特写,头缓缓低垂,目光从正视转为向下沉落,下颌微微收紧,背景中模糊的人影正在向黑暗消散,压抑氛围弥漫,冷白边缘光从头顶单侧打入,head slowly bowing, gaze sinking downward, jaw tightening, silhouettes dissolving into darkness behind him, cold rim light from above, extreme close-up, 暗黑漫画风格,深紫色与焦橙色双色调,纯黑背景,半调网点纹理,做旧丝网印刷质感,强戏剧性侧光,大面积阴影剪影,都市悬疑电影构图,无文字,无水印,竖版构图 9:16,dark noir illustration, deep purple and burnt orange duotone, halftone dot grain, gritty risograph print, high contrast chiaroscuro, editorial graphic novel style, no text, no watermark +``` + +**MJ 示例:** +``` +calculating middle-aged man in a dark suit, head slowly bowing, gaze sinking downward, jaw tightening imperceptibly, blurred silhouettes of figures dissolving in the background, oppressive atmosphere closing in, single cold rim light from above, extreme close-up composition, dark noir comic style, limited color palette of deep purple and burnt orange on black background, halftone dot texture, gritty screen print effect, high contrast dramatic lighting, bold graphic shadows, cinematic close-up composition, editorial illustration, urban thriller aesthetic, no text, no watermark --ar 3:4 --style raw --q 2 --v 6.1 +``` + +### MJ/Gemini 参数 +- MJ: `--ar 3:4 --style raw --q 2 --v 6.1` +- Gemini: 无额外参数,画幅由提示词中 `竖版构图 9:16` 控制 + +### 图片禁止项 +- 彩色背景、蓝色调、绿色调 +- 写实照片风格、卡通可爱风 +- 真实人名、画面文字、logo、水印 +- 过度曝光、整体模糊 +- 主色调偏离紫橙黑体系 +- 静态描述(standing/sitting/looking)单独使用,必须附加动势词 + +--- + +## 视频提示词 + +### 核心原则 +- **以图为锚,以文为魂**:视频色调/构图/人物状态必须与图片保持一致 +- **动势继承**:图片提示词中的隐性动势必须在视频中被接收并放大 +- **片段自洽**:每条视频首尾可衔接,开头承接图片状态,结尾留有余势 + +### 镜头运动类型 +| 运动名称 | 英文 | 情绪效果 | 适用场景 | +|---|---|---|---| +| 缓慢推进 | slow push in / creeping zoom | 压迫感上升,悬疑收紧 | 人物内心独白,威胁逼近 | +| 缓慢拉远 | slow pull back / creeping zoom out | 孤立感,宏观俯瞰 | 孤独叙事,结局揭示 | +| 环绕运镜 | slow orbit / circular dolly | 权力感,人物立体化 | 强权人物出场,对峙 | +| 手持微颤 | subtle handheld shake | 真实感,紧张不安 | 跟踪,暗中观察 | +| 荷兰角倾斜 | dutch angle tilt | 不稳定,道德扭曲 | 阴谋,背叛,失控 | +| 垂直升降 | slow vertical rise / crane up | 格局升级,视角切换 | 从细节到全局 | +| 极慢速度 | ultra slow motion | 强调细节,时间凝固 | 关键动作,情绪爆发前 | +| 定机微动 | static with micro drift | 沉默张力,压迫平静 | 对峙,沉默,等待 | + +### 光线变化动势 +| 光效 | 英文 | 情绪效果 | +|---|---|---| +| 阴影缓缓吞噬画面 | shadows slowly consuming the frame | 危险临近,失控 | +| 橙光从边缘渗入 | warm orange light bleeding from edge | 希望/威胁的暗示 | +| 单光源缓慢摇曳 | single light source gently swaying | 不稳定,脆弱 | +| 逆光轮廓渐清晰 | backlit silhouette slowly sharpening | 人物揭示,权力感 | +| 闪烁的环境光 | flickering ambient light | 危机,系统崩溃感 | + +### 人物微动势 +| 动势 | 英文 | +|---|---| +| 眼神从空洞转冷峻 | eyes shifting from hollow to cold | +| 呼出的气息可见 | breath visible as cold vapor exhaled slowly | +| 嘴角细微下压 | corner of mouth almost imperceptibly tightening | +| 手指缓缓收紧 | fingers slowly tightening around glass | +| 转头停在一半 | head turn arrested mid-motion | +| 眼皮缓缓下垂 | eyelids slowly lowering with exhaustion | + +### 视频 Prompt 模板 +``` +Opening on [镜头起始状态/构图], camera [运镜方式] — [主体动势演绎]. [环境/光线动态]. [情绪氛围收尾]. aspect ratio 9:16, cinematic vertical frame, 24fps film grain, duration [Xs], no text overlay, no subtitles +``` + +### 示例 +``` +Opening on a medium close-up of a calculating middle-aged man in a dark suit, head already bowed, camera beginning an imperceptibly slow creep inward toward his face — a creeping push in that tightens like a closing trap. His jaw tightens by a single degree. His gaze never rises. The blurred silhouettes of figures in the background continue their silent dissolution into darkness, as if the world is emptying itself around him. A cold rim light from above traces the edge of his skull, orange warmth bleeding faintly at the frame's periphery — warmth he has long stopped reaching for. The shot holds in stillness until stillness itself becomes a statement. aspect ratio 9:16, cinematic vertical frame, 24fps film grain, duration 4s, no text overlay, no subtitles +``` + +### 叙事连贯性规则 +| 片段关系 | 衔接策略 | +|---|---| +| 情绪递进(A→更强A) | 镜头更近 + 动势更慢 + 光线更暗 | +| 情绪转折(A→B) | 镜头切换视角 + 光线色温转变 | +| 时间跳跃 | 运动方向反转 或 速度突变 | +| 场景切换 | 上一片段以虚焦结尾,下一片段从虚焦拉清 | +| 高潮强调 | 静止定机 + 极慢速度 + 单一光源特写 | + +### 时长与动势匹配 +| 画面类型 | 推荐时长 | 隐性动势强度 | +|---|---|---| +| 人物情绪特写 | 3–4s | 微表情(眼神 / 呼吸 / 嘴角) | +| 动作 + 情绪复合 | 4–5s | 肢体趋势(转身 / 抬头 / 握拳) | +| 场景全景 / 环境叙事 | 5–6s | 环境动势(烟雾 / 光线 / 风) | + +### VEO/Grok 后缀 +- VEO: `enhance_prompt=true, enable_upsample=true` +- Grok: 无额外参数 + +### 视频禁止项 +- 大幅度环境切换、场景变化、人物位置跳变 +- 快速剪辑/闪切效果 +- 任何文字叠加/字幕 diff --git a/.claude/skills/video-from-script/references/account-system.md b/.claude/skills/video-from-script/references/account-system.md new file mode 100644 index 0000000..ff2451f --- /dev/null +++ b/.claude/skills/video-from-script/references/account-system.md @@ -0,0 +1,181 @@ +# 账号系统规范 + +> 每个账号独立管理视觉风格、提示词策略和 CapCut 配置。 +> 一个账号可以有多种视觉风格,每种风格是一个独立的 style 文件。 + +--- + +## 目录结构 + +``` +accounts/ +├── _template/ # 新账号模板(复制此目录创建新账号) +│ ├── account.json +│ ├── references/ # 参考图目录 +│ │ └── .gitkeep +│ └── styles/ # 风格文件目录(可多个) +│ └── .gitkeep +└── {account_id}/ # 用户创建的账号 + ├── account.json + ├── references/ # 参考图(所有风格共用) + │ ├── ref_001.png + │ └── ref_002.png + └── styles/ # 风格文件(一个文件 = 一种视觉风格) + ├── cyberpunk-character.md + ├── dark-archive.md + └── neon-city.md +``` + +--- + +## account.json 字段说明 + +```json +{ + "id": "tech-talk", + "name": "科技解说", + "description": "科技类短视频账号,深色背景,赛博朋克风格", + "pipeline": "image-video", // 已废弃,保留不影响 + "imageModel": "gemini", + "videoModel": "kling", + "batchSize": 30, + "capcut": { + "effects": ["录制边框 III"], + "filter": "电影感:40", + "subtitleStyle": { + "fontSize": 36, + "color": "#FFFFFF", + "highlightColor": "#FF6B35", + "bold": true + }, + "defaultBGM": "https://example.com/bgm_tech.mp3" + } +} +``` + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | string | 账号唯一标识(与目录名一致) | +| `name` | string | 账号显示名 | +| `description` | string | 一句话描述 | +| `pipeline` | enum | `image-only` / `image-video` | +| `defaultFormat` | string | 默认画幅(9:16 / 16:9 / 1:1 / 4:3) | +| `imageModel` | string | 默认图片模型 | +| `videoModel` | string | 默认视频模型 | +| `batchSize` | number | 默认批量生成数量 | +| `capcut.effects` | string[] | CapCut 特效名称列表 | +| `capcut.filter` | string | CapCut 滤镜,格式 "名称:强度" | +| `capcut.subtitleStyle` | object | 字幕样式(字号、颜色、高亮色、加粗) | +| `capcut.defaultBGM` | string | 默认背景音乐 URL | + +--- + +## 风格文件(styles/) + +每种视觉风格一个文件,文件名即风格名。文件内同时包含图片和视频的提示词策略。 + +### 风格文件结构 + +```markdown +# 风格名称(英文短横线命名) + +一句话描述风格。 + +--- + +## 图片提示词 + +### 核心视觉要素 + + +### 场景/背景规则 + + +### 色调方案 + + +### 构图模式 + + +### 图片 Prompt 模板 + + +### 示例 + + +### MJ/Gemini 参数 + + +### 图片禁止项 + + +--- + +## 视频提示词 + +### 运镜规则 + + +### 动态元素要求 + + +### 视频 Prompt 模板 + + +### 示例 + + +### VEO/Grok 后缀 + + +### 视频禁止项 + +``` + +### 风格文件命名 + +使用英文短横线命名,描述性强: +- `cyberpunk-eastern-character.md` — 赛博东方角色 +- `dark-forbidden-archive.md` — 暗黑禁书档案 +- `neon-cityscape.md` — 霓虹城市 +- `ink-wash-landscape.md` — 水墨山水 + +--- + +## 创建新账号 + +### 一键创建(推荐) + +```bash +node scripts/pipeline.js create-account \ + --id military \ + --name "军事账号" \ + --desc "军事主题短视频,暗黑漫画风格" \ + --video-model veo3-fast \ + --references ./ref1.png,./ref2.png +``` + +自动完成:创建目录 → 生成 account.json → 复制参考图 → 上传 OSS → 回写 URL → 生成风格骨架。 + +### 手动创建 + +1. 复制 `_template/` 目录,重命名为账号 ID +2. 编辑 `account.json` 填写账号信息 +3. 在 `references/` 中放入参考图(所有风格共用) +4. 上传参考图到 OSS,URL 写入 account.json: + - `node scripts/oss-upload.js accounts/{id}/references/{图片文件}` + - 将返回的 URL 写入 `styles.{styleName}.references[].url` +5. 在 `styles/` 中创建风格文件(至少一个) + +### 校验账号 + +```bash +node scripts/pipeline.js validate-account --account military +``` + +检查:id 匹配、必填字段、参考图完整性、风格文件存在、OSS URL 有效。 + +## 添加新风格 + +在账号的 `styles/` 目录下新建 `.md` 文件即可,文件名即风格 ID。 +Claude 调用时指定风格名,如 "用 cyberpunk-eastern-character 风格"。 diff --git a/.claude/skills/video-from-script/references/manifest-schema.md b/.claude/skills/video-from-script/references/manifest-schema.md new file mode 100644 index 0000000..90f8559 --- /dev/null +++ b/.claude/skills/video-from-script/references/manifest-schema.md @@ -0,0 +1,108 @@ +# manifest.json 规范 + +> `pipeline.js init` 创建,Pipeline 执行,Agent 审查。 +> +> **禁止 AI 手写 manifest.json**,必须通过 `pipeline.js init` 初始化。脚本从 account.json 自动继承结构字段,AI 只提供创意内容(items 的 text/imagePrompt/videoPrompt/keyword)。 + +--- + +## 创建方式 + +```bash +# AI 生成创意内容后,通过脚本初始化 +node pipeline.js init --account military --mode single \ + --items '[{"text":"中文文案","imagePrompt":"English prompt","videoPrompt":"motion prompt","keyword":"关键词","keywordColor":"#FF6B35"}]' + +# 或从文件读取 +node pipeline.js init --account military --mode single --items-file ./items.json + +# 校验已有 manifest +node pipeline.js validate --manifest +``` + +--- + +## 顶层字段 + +| 字段 | 说明 | 来源 | 谁填充 | +|------|------|------|--------| +| `account` | 账号 ID | account.json | **init 自动** | +| `imageModel` | `gemini` / `mj` | account.json | **init 自动** | +| `videoModel` | `veo3-fast` / `grok-video-3` 等 | account.json | **init 自动** | +| `format` | 画幅:`9:16` / `16:9` | account.json | **init 自动** | +| `mode` | `single` 单图 / `framePair` 首尾帧 | CLI 参数 | **init 自动** | +| `references` | 参考图数组,从 account.json styles.*.references 搬入 | account.json | **init 自动** | +| `items` | 素材数组(AI 提供创意内容) | CLI --items | **AI → init** | + +**init 自动继承的字段不需要 AI 关心,不会出错。** + +--- + +## references 字段 + +从 account.json 搬入,pipeline 直接使用,不再回读 account.json。 + +- **Gemini** → 读 `file`(本地路径,图生图用) +- **MJ** → 读 `url`(公网 URL,`--sref` 用) + +--- + +## items[] 字段 + +### Agent 写入(创建时) + +| 字段 | 说明 | +|------|------| +| `status` | 固定写 `"pending"` | +| `text` | 中文字幕文案 | +| `imagePrompt` | 英文画面描述(给 Gemini/MJ) | +| `videoPrompt` | 英文运动描述(给 Grok/VEO),描述镜头运动而非内容 | +| `keyword` | 字幕高亮关键词 | +| `keywordColor` | 高亮颜色 | + +### Pipeline 回写(执行后) + +| 字段 | 说明 | 写入阶段 | +|------|------|---------| +| `status` | `pending` → `generating` → `done` / `failed` | images | +| `file` | 生成的图片路径(相对 manifest) | images | +| `candidates` | MJ 拆分的 4 张候选图路径(Gemini 无此字段) | images | +| `url` | 图片 OSS 公网 URL | upload | +| `video` | 生成的视频路径 | videos | +| `videoDuration` | 视频时长(秒),Grok=6, VEO=8 | videos | +| `videoUrl` | 视频 OSS 公网 URL | videos | +| `audio` | TTS 音频路径 | tts | +| `duration` | 音频时长(秒) | tts | + +### Agent 审查时可操作 + +- MJ 换选:`item.file = item.candidates[2]` +- 删除不合格 item:直接从 items 数组移除,重新跑 `--phase images` +- 调整 prompt 重跑:改 `imagePrompt`,status 改回 `pending` + +--- + +## 首尾帧模式 + +`mode: "framePair"` 时,`imagePrompt` 作为起始帧,每个 item 额外字段: + +| 字段 | 说明 | 谁填充 | +|------|------|--------| +| `imagePrompt` | 起始帧画面描述(与 single 模式复用同一字段) | AI | +| `lastFramePrompt` | 结束帧画面描述 | AI | +| `lastFrame` | 结束帧图片路径 | **pipeline images 回写** | +| `lastFrameUrl` | 结束帧 OSS URL | **pipeline upload 回写** | + +**首尾帧规则**:同一场景、视角一致、状态对比。VEO 检测到 `lastFrameUrl` 自动启用双图模式。 + +--- + +## 目录结构 + +``` +output/{account}_{YYYYMMDD}_{NNN}/ +├── manifest.json # 主清单 +├── images/ # scene_{NN}_{keyword}.jpeg(首尾帧加 _last,MJ 候选加 _cand{1-4}) +├── videos/ # scene_{NN}_{keyword}.mp4 +└── audio/ # seg_001.mp3 +``` diff --git a/.claude/skills/video-from-script/scripts/capcut_assemble.js b/.claude/skills/video-from-script/scripts/capcut_assemble.js new file mode 100644 index 0000000..1f82d69 --- /dev/null +++ b/.claude/skills/video-from-script/scripts/capcut_assemble.js @@ -0,0 +1,730 @@ +#!/usr/bin/env node + +/** + * CapCut 成片组装脚本 + * + * 将图片/视频素材通过 CapCut Mate API 组装为草稿,同步到本地剪映。 + * + * 用法: + * node capcut_assemble.js --input ./output/batch_xxx [选项] + * + * 配置: + * 请运行 node setup.js 生成配置 + * 同步方式: 纯 Node.js(sync-to-jianying.js),无需 Python/uv + */ + +const axios = require('axios') +const path = require('path') +const fs = require('fs') +const { syncDraft, registerDraft, triggerDirectoryScan } = require('./sync-to-jianying') + +// ============================================================================ +// 配置 +// ============================================================================ + +let _config = null +function getConfig() { + if (_config) return _config + const configPath = path.join(__dirname, '..', '..', 'config.json') + if (!fs.existsSync(configPath)) { + console.error('缺少配置文件: skills/config.json') + console.error('请运行 node setup.js 生成配置') + process.exit(1) + } + const config = JSON.parse(fs.readFileSync(configPath, 'utf-8')) + if (!config.jianyingDraftPath || !config.capcutMateDir || !config.capcutMateApiBase) { + console.error('config.json 需要填写 jianyingDraftPath、capcutMateDir 和 capcutMateApiBase') + process.exit(1) + } + _config = config + return _config +} + +const BASE_URL = getConfig().capcutMateApiBase +const US = 1_000_000 + +// ============================================================================ +// CapCut API 封装 +// ============================================================================ + +async function api(endpoint, data = {}, timeout = 60000) { + const url = `${BASE_URL}/${endpoint}` + const method = endpoint === 'get_draft' ? 'get' : 'post' + try { + const res = method === 'get' + ? await axios.get(url, { params: data, timeout }) + : await axios.post(url, data, { timeout }) + if (res.data.code !== undefined && res.data.code !== 0) { + throw new Error(`API [${endpoint}] 返回错误: ${res.data.message}`) + } + return res.data + } catch (err) { + if (err.response) { + throw new Error(`API [${endpoint}] HTTP ${err.response.status}: ${JSON.stringify(err.response.data)}`) + } + throw err + } +} + +// ============================================================================ +// CLI 参数 +// ============================================================================ + +function parseArgs(argv) { + const args = {} + for (let i = 0; i < argv.length; i++) { + if (argv[i].startsWith('--')) { + const key = argv[i].slice(2) + const value = argv[i + 1] + if (value && !value.startsWith('--')) { + args[key] = value + i++ + } else { + args[key] = true + } + } + } + return args +} + +function getResolution(format) { + const map = { + '9:16': { width: 1080, height: 1920 }, + '16:9': { width: 1920, height: 1080 }, + '1:1': { width: 1080, height: 1080 }, + '4:3': { width: 1440, height: 1080 }, + } + return map[format] || map['9:16'] +} + +// ============================================================================ +// OSS 上传 +// ============================================================================ + +const ossUpload = require(path.join(__dirname, 'oss-upload')) + +async function uploadToOSS(filePath) { + const { url } = await ossUpload.uploadFile(filePath) + return url +} + +async function batchUploadToOSS(inputDir, files) { + const urls = {} + for (const file of files) { + const filePath = path.join(inputDir, file) + if (!fs.existsSync(filePath)) continue + try { + urls[file] = await uploadToOSS(filePath) + console.log(` 上传: ${file} -> OK`) + } catch (err) { + console.error(` 上传失败: ${file} - ${err.message}`) + } + } + return urls +} + +// ============================================================================ +// 主流程 +// ============================================================================ + +function buildTimeline(items, defaultDurationUs) { + // 音频为主轴,视频适配音频(短视频行业标准) + // 有视频时长时取 max(不截断音频),无视频时用音频时长 + let offset = 0 + return items.map(item => { + const audioDur = (item.duration != null) ? item.duration * US : 0 + const videoDur = (item.videoDuration != null) ? item.videoDuration * US : 0 + // 有视频:保证音频不被截断;无视频(图片模式):用音频时长 + const dur = videoDur > 0 + ? Math.max(audioDur, videoDur) + : (audioDur || defaultDurationUs) + const entry = { start: offset, end: offset + dur, duration: dur } + offset += dur + return entry + }) +} + +async function assemble(args) { + const { + input, + manifest: manifestPath, + mode = 'images', + subtitles = 'true', + voiceover = 'true', + bgm, + effects: effectsStr, + filter: filterStr, + format = '9:16', + apiKey = '', + duration = '4', + animation = 'kenburns-zoom', + } = args + + if (!input) throw new Error('缺少 --input 参数') + + const inputDir = path.resolve(input) + const manifestFile = manifestPath + ? path.resolve(manifestPath) + : path.join(inputDir, 'manifest.json') + + if (!fs.existsSync(manifestFile)) { + throw new Error(`找不到 manifest.json: ${manifestFile}`) + } + + const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf-8')) + const { width, height } = getResolution(format) + const defaultDurationUs = parseFloat(duration) * US + + // 过滤出实际存在的文件 + const items = manifest.items.filter(item => { + if (item.url) return true // 视频模式可能用 URL + const filePath = path.join(inputDir, item.file) + return fs.existsSync(filePath) + }) + + if (items.length === 0) throw new Error('没有可用的素材文件') + + // 统一时间线:由 duration 驱动(TTS 音频时长)或 fallback 到固定时长 + const timeline = buildTimeline(items, defaultDurationUs) + const totalDurationUs = timeline.length > 0 ? timeline[timeline.length - 1].end : 0 + const hasTTS = items.some(item => item.audio && item.duration != null) + + console.log(`\nCapCut 成片组装`) + console.log(` 模式: ${mode} 画幅: ${format} (${width}x${height})`) + console.log(` 时间线: ${hasTTS ? 'TTS音频驱动' : `固定${duration}s/段`} 总时长: ${(totalDurationUs / US).toFixed(1)}s`) + console.log(` 字幕: ${subtitles} 配音: ${voiceover} 动画: ${animation}`) + console.log(` 素材: ${items.length} 个可用\n`) + + const steps = [] + if (mode === 'images') steps.push('upload') + steps.push('draft', 'materials', 'voiceover', 'audio', 'subtitles', 'effects', 'filter', 'save', 'sync') + const totalSteps = steps.length + let step = 0 + + // -- 上传图片到 OSS(优先使用 manifest 中已有的 URL) -- + let imgUrls = {} + if (mode === 'images') { + // 先从 manifest 收集已有 URL + const needUpload = [] + for (const item of items) { + if (item.url && item.url.startsWith('http')) { + imgUrls[item.file] = item.url + } else { + needUpload.push(item.file) + } + } + if (needUpload.length > 0) { + step++; console.log(`[${step}/${totalSteps}] 上传图片到 OSS (${needUpload.length} 张需上传, ${Object.keys(imgUrls).length} 张已有URL)...`) + const uploaded = await batchUploadToOSS(inputDir, needUpload) + imgUrls = { ...imgUrls, ...uploaded } + } else { + step++; console.log(`[${step}/${totalSteps}] 所有图片已有 URL,跳过上传`) + } + if (Object.keys(imgUrls).length === 0) throw new Error('所有图片上传失败') + console.log(` 成功: ${Object.keys(imgUrls).length}/${items.length}\n`) + } + + // -- 创建草稿 -- + step++; console.log(`[${step}/${totalSteps}] 创建草稿...`) + const draftRes = await api('create_draft', { width, height }) + const draftUrl = draftRes.draft_url + const draftId = new URL(draftUrl).searchParams.get('draft_id') + console.log(` draft_id: ${draftId}\n`) + + // -- 导入素材 -- + step++; console.log(`[${step}/${totalSteps}] 导入素材...`) + if (mode === 'images') { + await addImages(draftUrl, items, imgUrls, timeline, width, height, animation) + } else { + // 视频模式:确保所有 item 都有 videoUrl(CapCut API 需要公网 URL) + const missingUrl = items.filter(it => it.video && !it.videoUrl) + if (missingUrl.length > 0) { + const { uploadFile } = require('./oss-upload') + console.log(` 上传 ${missingUrl.length} 个视频到 OSS...`) + for (const item of missingUrl) { + const videoPath = path.resolve(inputDir, item.video) + try { + const { url } = await uploadFile(videoPath) + item.videoUrl = url + // 回写 manifest + if (manifestFile) { + try { + const m = JSON.parse(fs.readFileSync(manifestFile, 'utf-8')) + const mi = m.items.find(i => i.text === item.text) + if (mi) { mi.videoUrl = url; fs.writeFileSync(manifestFile, JSON.stringify(m, null, 2)) } + } catch (_) {} + } + } catch (err) { + console.log(` 视频上传失败: ${err.message}`) + } + } + } + await addVideos(draftUrl, inputDir, items, timeline, width, height) + } + + // -- 添加 TTS 配音 -- + step++; console.log(`[${step}/${totalSteps}] 添加 TTS 配音...`) + if (voiceover === 'true' && hasTTS) { + await addVoiceover(draftUrl, inputDir, items, timeline) + } else { + console.log(' 跳过(无 TTS 音频或未启用)') + } + + // -- 添加 BGM -- + step++; console.log(`[${step}/${totalSteps}] 添加背景音乐...`) + if (bgm) { + await addBGM(draftUrl, bgm, totalDurationUs) + } else { + console.log(' 跳过(未指定 --bgm)') + } + + // -- 读取账号字幕风格 -- + const subtitleStyle = loadSubtitleStyle(manifest) + if (Object.keys(subtitleStyle).length > 0) { + console.log(` 字幕风格: ${subtitleStyle.font || '默认'} ${subtitleStyle.inAnimation ? subtitleStyle.inAnimation + '→' + subtitleStyle.outAnimation : ''}`) + } + + // -- 添加字幕 -- + step++; console.log(`[${step}/${totalSteps}] 添加字幕...`) + if (subtitles === 'true' && items.some(i => i.text)) { + await addSubtitles(draftUrl, items, timeline, subtitleStyle) + } else { + console.log(' 跳过') + } + + // -- 添加特效 -- + step++; console.log(`[${step}/${totalSteps}] 添加特效...`) + if (effectsStr) { + await addEffects(draftUrl, effectsStr, totalDurationUs) + } else { + console.log(' 跳过(未指定 --effects)') + } + + // -- 添加滤镜 -- + step++; console.log(`[${step}/${totalSteps}] 添加滤镜...`) + if (filterStr) { + await addFilter(draftUrl, filterStr, totalDurationUs) + } else { + console.log(' 跳过(未指定 --filter)') + } + + // -- 保存草稿 -- + step++; console.log(`[${step}/${totalSteps}] 保存草稿...`) + await api('save_draft', { draft_url: draftUrl }) + console.log(' 已保存\n') + + // -- 同步到本地剪映 -- + step++; console.log(`[${step}/${totalSteps}] 同步到本地剪映...`) + await syncToLocalJianying(draftUrl, draftId, totalDurationUs) + console.log(' 同步完成\n') + + // -- 云渲染(可选)-- + if (apiKey) { + console.log('提交云渲染...') + await api('gen_video', { draft_url: draftUrl, apiKey }) + console.log('渲染已提交,使用 gen_video_status 查询进度') + } + + console.log(`\n成片组装完成`) + console.log(` 草稿ID: ${draftId}`) + console.log(` 总时长: ${(totalDurationUs / US).toFixed(1)}s`) + console.log(` 素材数: ${items.length}`) + console.log(` 时间线: ${hasTTS ? 'TTS音频驱动' : '固定时长'}`) + if (mode === 'videos' && subtitles === 'false') { + console.log(`\n >> 视频模式未加字幕,请在剪映中打开草稿 → 识别字幕 → 语音识别生成\n`) + } +} + +// ============================================================================ +// 添加图片(自动上传到 OSS) +// ============================================================================ + +async function addImages(draftUrl, items, imgUrls, timeline, width, height, animation = '') { + const imageInfos = items.map((item, i) => { + const url = imgUrls[item.file] + if (!url) throw new Error(`图片 ${item.file} 未上传成功,无法添加`) + const tl = timeline[i] + + return { + image_url: url, + width, + height, + start: tl.start, + end: tl.end, + duration: tl.duration, + animation: animation || '', + transition: i > 0 ? '溶解' : '', + transition_duration: 300000, + } + }) + + // 单次全量提交,所有图片在同一轨道 + console.log(` 一次性添加 ${imageInfos.length} 张图片...`) + const res = await api('add_images', { + draft_url: draftUrl, + image_infos: JSON.stringify(imageInfos), + alpha: 1, scale_x: 1, scale_y: 1, + transform_x: 0, transform_y: 0, + }, 300000) + const allSegmentIds = res.segment_ids || [] + + console.log(` 已添加 ${items.length} 张图片`) + return allSegmentIds +} + +// ============================================================================ +// 添加视频(从 manifest 读取时长) +// ============================================================================ + +async function addVideos(draftUrl, inputDir, items, timeline, width, height) { + const videoInfos = items.map((item, i) => { + const tl = timeline[i] + return { + video_url: item.videoUrl || (item.video ? path.resolve(inputDir, item.video) : null) || item.url || path.resolve(inputDir, item.file), + width, + height, + start: tl.start, + end: tl.end, + duration: tl.duration, + mask: '', + transition: i > 0 ? '溶解' : '', + transition_duration: 300000, + volume: item.volume || 1, + } + }) + + // 先尝试全量提交 + try { + const res = await api('add_videos', { + draft_url: draftUrl, + video_infos: JSON.stringify(videoInfos), + alpha: 1, scale_x: 1, scale_y: 1, + transform_x: 0, transform_y: 0, + scene_timelines: [], + }) + console.log(` 已添加 ${items.length} 个视频片段(全量)`) + return res.segment_ids || [] + } catch (err) { + if (!err.message.includes('504') && !err.message.includes('timeout')) throw err + console.log(` 全量提交超时,降级为分批添加...`) + } + + // 504 回退:分批添加(每批 3 个,保持绝对时间不变) + const BATCH_SIZE = 3 + const allSegmentIds = [] + for (let i = 0; i < videoInfos.length; i += BATCH_SIZE) { + const batch = videoInfos.slice(i, i + BATCH_SIZE) + const batchNum = Math.floor(i / BATCH_SIZE) + 1 + const totalBatches = Math.ceil(videoInfos.length / BATCH_SIZE) + console.log(` 分批 [${batchNum}/${totalBatches}] 添加 ${batch.length} 个片段...`) + const res = await api('add_videos', { + draft_url: draftUrl, + video_infos: JSON.stringify(batch), + alpha: 1, scale_x: 1, scale_y: 1, + transform_x: 0, transform_y: 0, + scene_timelines: [], + }) + if (res.segment_ids) allSegmentIds.push(...res.segment_ids) + } + + console.log(` 已添加 ${items.length} 个视频片段(分批)`) + return allSegmentIds +} + +// ============================================================================ +// 音频上传(本地文件 → OSS 公网 URL) +// ============================================================================ + +async function uploadAudioToOSS(filePath) { + try { + const oss = require(path.join(__dirname, 'oss-upload')) + const { url } = await oss.uploadFile(filePath) + return url + } catch (err) { + throw new Error(`音频上传 OSS 失败: ${err.message}`) + } +} + +async function batchUploadAudio(inputDir, items) { + const urls = {} + for (const item of items) { + if (!item.audio || item.audio.startsWith('http')) { + if (item.audio) urls[item.audio] = item.audio + continue + } + // audio 可以是相对路径或绝对路径 + const filePath = path.isAbsolute(item.audio) + ? item.audio + : path.resolve(inputDir, item.audio) + + if (!fs.existsSync(filePath)) { + console.error(` 音频文件不存在: ${filePath}`) + continue + } + try { + urls[item.audio] = await uploadAudioToOSS(filePath) + console.log(` 上传: ${path.basename(filePath)} -> OK`) + } catch (err) { + console.error(` 上传失败: ${path.basename(filePath)} - ${err.message}`) + } + } + return urls +} + +// ============================================================================ +// 添加 TTS 配音(每段音频按时间线排列) +// ============================================================================ + +async function addVoiceover(draftUrl, inputDir, items, timeline) { + // 收集需要上传的音频 + const audioItems = items.filter(item => item.audio) + if (audioItems.length === 0) { + console.log(' 无 TTS 音频文件,跳过') + return + } + + // 上传本地音频到 OSS(已有的 URL 直接通过) + console.log(' 上传 TTS 音频到 OSS...') + const audioUrls = await batchUploadAudio(inputDir, items) + + const audioInfos = [] + for (let i = 0; i < items.length; i++) { + const item = items[i] + if (!item.audio) continue + + const audioUrl = audioUrls[item.audio] + if (!audioUrl) continue + + const tl = timeline[i] + audioInfos.push({ + audio_url: audioUrl, + start: tl.start, + end: tl.end, + duration: tl.duration, + volume: 1.0, + }) + } + + if (audioInfos.length === 0) { + console.log(' 所有音频上传失败,跳过配音') + return + } + + await api('add_audios', { + draft_url: draftUrl, + audio_infos: JSON.stringify(audioInfos), + }) + console.log(` 已添加 ${audioInfos.length} 段 TTS 配音`) +} + +// ============================================================================ +// 添加背景音乐 +// ============================================================================ + +async function addBGM(draftUrl, bgmUrl, totalDurationUs) { + // 先获取音频实际时长 + let audioDuration = totalDurationUs + try { + const durRes = await api('get_audio_duration', { mp3_url: bgmUrl }) + if (durRes.duration) audioDuration = durRes.duration + } catch (_) { + // 无法获取时长就用视频总时长 + } + + await api('add_audios', { + draft_url: draftUrl, + audio_infos: JSON.stringify([{ + audio_url: bgmUrl, + duration: audioDuration, + end: Math.min(audioDuration, totalDurationUs), + start: 0, + volume: 0.15, + }]), + }) + console.log(` 已添加 BGM (${(audioDuration / US).toFixed(1)}s)`) +} + +// ============================================================================ +// 读取账号字幕风格配置 +// ============================================================================ + +function loadSubtitleStyle(manifest) { + const account = manifest.account + if (!account) return {} + const scriptDir = __dirname + const accountFile = path.join(scriptDir, '..', 'accounts', account, 'account.json') + if (!fs.existsSync(accountFile)) return {} + try { + const accountData = JSON.parse(fs.readFileSync(accountFile, 'utf-8')) + return accountData.capcut?.subtitleStyle || {} + } catch { return {} } +} + +// ============================================================================ +// 添加字幕(支持关键词高亮 + 账号字幕风格) +// ============================================================================ + +async function addSubtitles(draftUrl, items, timeline, style = {}) { + const captions = [] + + // 从账号配置读取动画参数 + const inAnimation = style.inAnimation || '' + const outAnimation = style.outAnimation || '' + const inAnimDuration = style.inAnimationDuration || null + const outAnimDuration = style.outAnimationDuration || null + + for (let i = 0; i < items.length; i++) { + const item = items[i] + const text = item.text || item.caption || '' + if (!text) continue + + const tl = timeline[i] + const keyword = item.keyword || '' + const keywordColor = style.highlightColor || item.keywordColor || style.color || '#FFFFFF' + + const cap = { + start: tl.start, + end: tl.end, + text, + keyword, + keyword_color: keyword ? keywordColor : '', + keyword_font_size: 18, + } + + // 动画参数(每条字幕都带) + if (inAnimation) cap.in_animation = inAnimation + if (outAnimation) cap.out_animation = outAnimation + if (inAnimDuration) cap.in_animation_duration = inAnimDuration + if (outAnimDuration) cap.out_animation_duration = outAnimDuration + + captions.push(cap) + } + + if (captions.length === 0) { + console.log(' 无字幕内容,跳过') + return + } + + await api('add_captions', { + draft_url: draftUrl, + captions: JSON.stringify(captions), + font: style.font || null, + font_size: style.fontSize || 15, + text_color: style.color || '#ffffff', + alignment: 1, + bold: style.bold || false, + italic: false, + underline: false, + has_shadow: style.hasShadow || false, + shadow_info: style.shadowAlpha ? { + shadow_alpha: style.shadowAlpha, + shadow_color: style.shadowColor || '#000000', + shadow_diffuse: 15, + shadow_distance: 5, + shadow_angle: -45, + } : undefined, + letter_spacing: style.letterSpacing || 0, + line_spacing: style.lineSpacing || 0, + alpha: style.alpha || 1, + scale_x: 1, scale_y: 1, + transform_x: 0, + transform_y: style.transformY || 0, + style_text: 0, + }) + + console.log(` 已添加 ${captions.length} 条字幕 (字体: ${style.font || '默认'}, 动画: ${inAnimation || '无'} → ${outAnimation || '无'})`) +} + +// ============================================================================ +// 添加特效 +// ============================================================================ + +async function addEffects(draftUrl, effectsStr, totalDurationUs) { + const effectNames = effectsStr.split(',').map(s => s.trim()).filter(Boolean) + const effectInfos = effectNames.map(name => ({ + effect_title: name, + start: 0, + end: totalDurationUs, + })) + + await api('add_effects', { + draft_url: draftUrl, + effect_infos: JSON.stringify(effectInfos), + }) + + console.log(` 已添加: ${effectNames.join(', ')}`) +} + +// ============================================================================ +// 添加滤镜 +// ============================================================================ + +async function addFilter(draftUrl, filterStr, totalDurationUs) { + const [name, intensity] = filterStr.split(':') + await api('add_filters', { + draft_url: draftUrl, + filter_infos: JSON.stringify([{ + filter_title: (name || '').trim(), + start: 0, + end: totalDurationUs, + intensity: parseFloat(intensity) || 50, + }]), + }) + console.log(` 已添加: ${(name || '').trim()} 强度 ${intensity || 50}`) +} + +// ============================================================================ +// 同步草稿到本地剪映 +// ============================================================================ + +async function syncToLocalJianying(draftUrl, draftId, totalDurationUs) { + await syncDraft(draftUrl, { name: draftId }) + registerDraft(draftId, draftId, totalDurationUs) +} + +// ============================================================================ +// 主入口 +// ============================================================================ + +async function main() { + const args = parseArgs(process.argv.slice(2)) + + if (!args.input) { + console.log('用法: node capcut_assemble.js --input <目录> [选项]') + console.log('') + console.log('必填:') + console.log(' --input 素材目录(含 manifest.json)') + console.log('') + console.log('选项:') + console.log(' --mode images|videos 素材类型(默认 images)') + console.log(' --format 9:16 画幅比例') + console.log(' --duration 4 默认每段时长/秒(无TTS时的fallback,默认 4)') + console.log(' --voiceover true|false 是否添加TTS配音轨道(默认 true)') + console.log(' --subtitles true|false 是否添加字幕(默认 true)') + console.log(' --bgm 背景音乐 URL') + console.log(' --effects "名称1,名称2" 特效名称(逗号分隔)') + console.log(' --filter "名称:强度" 滤镜(强度 0-100)') + console.log(' --apiKey 云渲染 API Key(可选)') + console.log(' --manifest manifest.json 路径') + console.log('') + console.log('时间线模式:') + console.log(' manifest.json 中每段包含 audio + duration → TTS音频驱动时间线') + console.log(' 无 audio/duration → 按 --duration 固定时长') + console.log('') + console.log('manifest.json 示例(TTS驱动):') + console.log(' {"items":[{"file":"1.png","text":"文案","audio":"seg_1.mp3","duration":3.5}]}') + console.log('') + console.log('配置:') + console.log(' 请运行 node setup.js 生成配置') + process.exit(0) + } + + await assemble(args) +} + +main().catch(err => { + console.error(`\n错误: ${err.message}`) + process.exit(1) +}) + +module.exports = { assemble } diff --git a/.claude/skills/video-from-script/scripts/gemini-image-generator.js b/.claude/skills/video-from-script/scripts/gemini-image-generator.js new file mode 100644 index 0000000..f204d7b --- /dev/null +++ b/.claude/skills/video-from-script/scripts/gemini-image-generator.js @@ -0,0 +1,917 @@ +/** + * Gemini Image Generator 图片生成工具 + * + * 功能: + * - 文生图(Text-to-Image) + * - 图生图(Image-to-Image) + * - 多种业务场景模板 + * - 批量生成 + * - 自定义输出目录 + * + * 使用示例: + * node gemini-image-generator.js generate "A cute cat" -o ./output -r 16:9 + * node gemini-image-generator.js edit "Add sunglasses" -i ./photo.jpg + * node gemini-image-generator.js template logo --text "MyBrand" + * node gemini-image-generator.js batch ./prompts.txt + */ + +const fs = require('fs') +const path = require('path') + +// ============================================================================ +// 配置模块 +// ============================================================================ + +function _loadConfig() { + const configPath = path.join(__dirname, '..', '..', 'config.json') + if (fs.existsSync(configPath)) { + return JSON.parse(fs.readFileSync(configPath, 'utf-8')) + } + return {} +} + +const _cfg = _loadConfig() + +const Config = { + api: { + baseUrl: _cfg.geminiApiBaseUrl || 'https://yunwu.ai', + model: _cfg.geminiModel || 'gemini-3.1-flash-image-preview', + endpoint: _cfg.geminiEndpoint || `/v1beta/models/${_cfg.geminiModel || 'gemini-3.1-flash-image-preview'}:generateContent`, + key: _cfg.geminiApiKey || '' + }, + + // 默认输出配置 + output: { + defaultDir: './output', + defaultFormat: 'png' + }, + + // 支持的宽高比 + aspectRatios: ['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'], + + // 支持的分辨率 + imageSizes: ['512', '1K', '2K', '4K'], + + // 默认分辨率 + defaultImageSize: '2K', + + // 响应模式 + responseModalities: { + textAndImage: ['TEXT', 'IMAGE'], + imageOnly: ['IMAGE'], + textOnly: ['TEXT'] + }, + + // 超时设置(毫秒) + timeout: { + default: 120000, // 默认2分钟 + max: 300000 // 最大5分钟 + } +} + +// ============================================================================ +// 文件处理模块 +// ============================================================================ + +const FileUtils = { + /** + * 确保目录存在 + */ + ensureDir(dirPath) { + if (!fs.existsSync(dirPath)) { + fs.mkdirSync(dirPath, { recursive: true }) + } + return dirPath + }, + + /** + * 图片转Base64 + */ + imageToBase64(imagePath) { + const buffer = fs.readFileSync(imagePath) + const ext = path.extname(imagePath).toLowerCase() + const mimeTypes = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.webp': 'image/webp' + } + return { + mimeType: mimeTypes[ext] || 'image/png', + data: buffer.toString('base64') + } + }, + + /** + * Base64保存为图片 + */ + base64ToImage(base64Data, outputPath) { + const buffer = Buffer.from(base64Data, 'base64') + fs.writeFileSync(outputPath, buffer) + return outputPath + }, + + /** + * 生成唯一文件名 + */ + generateFilename(prefix = 'image', ext = 'png') { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-') + const random = Math.random().toString(36).substring(2, 8) + return `${prefix}_${timestamp}_${random}.${ext}` + }, + + /** + * 读取提示词文件 + */ + readPromptsFile(filePath) { + const content = fs.readFileSync(filePath, 'utf-8') + return content.split('\n').filter(line => line.trim()).map(line => line.trim()) + } +} + +// ============================================================================ +// API调用模块 +// ============================================================================ + +const GeminiAPI = { + /** + * 发送生成请求 + */ + async generateContent(contents, options = {}) { + const { + aspectRatio = '1:1', + imageSize = Config.defaultImageSize, + responseModalities = Config.responseModalities.textAndImage, + timeout = Config.timeout.default + } = options + + const url = `${Config.api.baseUrl}${Config.api.endpoint}?key=${Config.api.key}` + + const body = { + contents: contents, + generationConfig: { + responseModalities: responseModalities, + imageConfig: { + aspectRatio: aspectRatio, + imageSize: imageSize + } + } + } + + console.log(`\n📡 API请求: ${Config.api.baseUrl}${Config.api.endpoint}`) + console.log(`📋 模型: ${Config.api.model}`) + console.log(`⏱️ 超时: ${timeout / 1000}秒`) + + // 使用 AbortController 实现超时 + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), timeout) + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${Config.api.key}` + }, + body: JSON.stringify(body), + signal: controller.signal + }) + + if (!response.ok) { + const error = await response.text() + throw new Error(`API请求失败: ${response.status} - ${error}`) + } + + return await response.json() + } finally { + clearTimeout(timeoutId) + } + }, + + /** + * 解析响应,提取图片和文本 + */ + parseResponse(response) { + const result = { + text: '', + images: [] + } + + if (!response.candidates || !response.candidates[0]) { + return result + } + + const parts = response.candidates[0].content?.parts || [] + + for (const part of parts) { + if (part.text) { + result.text += part.text + } + if (part.inlineData) { + result.images.push({ + mimeType: part.inlineData.mimeType, + data: part.inlineData.data + }) + } + } + + return result + } +} + +// ============================================================================ +// 业务场景模板模块 +// ============================================================================ + +const Templates = { + /** + * 写实照片模板 + */ + photorealistic: { + name: '写实照片', + generate(subject, options = {}) { + const { + shotType = 'close-up portrait', + lighting = 'soft, natural golden hour light', + mood = 'serene', + environment = '', + cameraDetails = '85mm lens, shallow depth of field' + } = options + + return `A photorealistic ${shotType} of ${subject}. ${environment ? `Set in ${environment}. ` : ''}The scene is illuminated by ${lighting}, creating a ${mood} atmosphere. Captured with ${cameraDetails}. Ultra-realistic, with sharp focus on key details.` + } + }, + + /** + * 贴纸/图标模板 + */ + sticker: { + name: '贴纸/图标', + generate(subject, options = {}) { + const { + style = 'kawaii', + colorPalette = 'vibrant', + background = 'white' + } = options + + return `A ${style}-style sticker of ${subject}. The design features bold, clean outlines, simple cel-shading, and a ${colorPalette} color palette. The background must be ${background}.` + } + }, + + /** + * Logo设计模板 + */ + logo: { + name: 'Logo设计', + generate(text, options = {}) { + const { + style = 'modern, minimalist', + colorScheme = 'black and white', + shape = 'circle' + } = options + + return `Create a ${style} logo${text ? ` with the text "${text}"` : ''}. The text should be in a clean, bold, sans-serif font. The color scheme is ${colorScheme}. Put the logo in a ${shape}.` + } + }, + + /** + * 产品图模板 + */ + product: { + name: '产品图', + generate(product, options = {}) { + const { + surface = 'polished concrete surface', + lighting = 'three-point softbox setup', + angle = 'slightly elevated 45-degree shot', + background = 'minimalist' + } = options + + return `A high-resolution, studio-lit product photograph of ${product}, presented on a ${surface}. The lighting is a ${lighting} designed to create soft, diffused highlights and eliminate harsh shadows. The camera angle is a ${angle} to showcase key features. Ultra-realistic. ${background} background.` + } + }, + + /** + * 极简设计模板 + */ + minimalist: { + name: '极简设计', + generate(subject, options = {}) { + const { + position = 'bottom-right', + backgroundColor = 'off-white canvas', + lighting = 'soft, diffused lighting from the top left' + } = options + + return `A minimalist composition featuring a single, ${subject} positioned in the ${position} of the frame. The background is a vast, empty ${backgroundColor}, creating significant negative space for text. ${lighting}.` + } + }, + + /** + * 漫画/故事板模板 + */ + comic: { + name: '漫画/故事板', + generate(scene, options = {}) { + const { + style = 'gritty, noir', + panels = 3 + } = options + + return `Make a ${panels} panel comic in a ${style} art style with high-contrast black and white inks. ${scene}` + } + }, + + /** + * 风格转换模板 + */ + styleTransfer: { + name: '风格转换', + generate(targetStyle, options = {}) { + const { + preserveElements = 'composition and key elements' + } = options + + return `Transform the provided image into the artistic style of ${targetStyle}. Preserve the original ${preserveElements} but render with the new stylistic elements.` + } + }, + + /** + * 图像编辑模板 + */ + edit: { + name: '图像编辑', + generate(instruction, options = {}) { + const { + preserve = 'Keep everything else unchanged, preserving the original style, lighting, and composition' + } = options + + return `${instruction}. ${preserve}.` + } + }, + + /** + * 图像合成模板 + */ + composite: { + name: '图像合成', + generate(description, options = {}) { + return `Create a new image by combining the elements from the provided images. ${description} Generate a realistic result with proper lighting and shadows.` + } + } +} + +// ============================================================================ +// 核心生成器类 +// ============================================================================ + +class GeminiImageGenerator { + constructor(options = {}) { + this.outputDir = options.outputDir || Config.output.defaultDir + this.defaultAspectRatio = options.aspectRatio || '1:1' + this.defaultImageSize = options.imageSize || Config.defaultImageSize + + if (!Config.api.key) { + console.warn('警告: 未设置API密钥') + } + } + + /** + * 文生图 + */ + async textToImage(prompt, options = {}) { + const { + aspectRatio = this.defaultAspectRatio, + imageSize = this.defaultImageSize, + outputDir = this.outputDir, + filename = null + } = options + + console.log(`\n🎨 生成图片: "${prompt.substring(0, 50)}..."`) + console.log(`📐 宽高比: ${aspectRatio}`) + console.log(`📏 分辨率: ${imageSize}`) + + const contents = [{ + role: 'user', + parts: [{ text: prompt }] + }] + + const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize }) + const result = GeminiAPI.parseResponse(response) + + if (result.text) { + console.log(`📝 模型回复: ${result.text}`) + } + + const savedFiles = [] + FileUtils.ensureDir(outputDir) + + for (let i = 0; i < result.images.length; i++) { + const img = result.images[i] + const ext = img.mimeType.split('/')[1] || 'png' + const outputFilename = filename || FileUtils.generateFilename('generated', ext) + const outputPath = path.join(outputDir, outputFilename) + + FileUtils.base64ToImage(img.data, outputPath) + savedFiles.push(outputPath) + console.log(`✅ 已保存: ${outputPath}`) + } + + return { + text: result.text, + images: result.images, + savedFiles + } + } + + /** + * 图生图(带参考图编辑) + */ + async imageToImage(prompt, inputImages, options = {}) { + const { + aspectRatio = this.defaultAspectRatio, + imageSize = this.defaultImageSize, + outputDir = this.outputDir + } = options + + console.log(`\n🖼️ 编辑图片: "${prompt.substring(0, 50)}..."`) + console.log(`📁 输入图片: ${Array.isArray(inputImages) ? inputImages.length : 1} 张`) + console.log(`📏 分辨率: ${imageSize}`) + + const parts = [{ text: prompt }] + + // 处理输入图片 + const images = Array.isArray(inputImages) ? inputImages : [inputImages] + for (const imgPath of images) { + const { mimeType, data } = FileUtils.imageToBase64(imgPath) + parts.push({ + inlineData: { + mime_type: mimeType, + data: data + } + }) + } + + const contents = [{ + role: 'user', + parts: parts + }] + + const response = await GeminiAPI.generateContent(contents, { aspectRatio, imageSize }) + const result = GeminiAPI.parseResponse(response) + + if (result.text) { + console.log(`📝 模型回复: ${result.text}`) + } + + const savedFiles = [] + FileUtils.ensureDir(outputDir) + + for (let i = 0; i < result.images.length; i++) { + const img = result.images[i] + const ext = img.mimeType.split('/')[1] || 'png' + const outputFilename = FileUtils.generateFilename('edited', ext) + const outputPath = path.join(outputDir, outputFilename) + + FileUtils.base64ToImage(img.data, outputPath) + savedFiles.push(outputPath) + console.log(`✅ 已保存: ${outputPath}`) + } + + return { + text: result.text, + images: result.images, + savedFiles + } + } + + /** + * 使用模板生成 + */ + async generateFromTemplate(templateName, ...args) { + const template = Templates[templateName] + if (!template) { + throw new Error(`未知的模板: ${templateName}。可用模板: ${Object.keys(Templates).join(', ')}`) + } + + const options = args[args.length - 1] || {} + const prompt = template.generate(...args) + + console.log(`📋 使用模板: ${template.name}`) + return this.textToImage(prompt, options) + } + + /** + * 批量生成 + */ + async batchGenerate(prompts, options = {}) { + const results = [] + const total = prompts.length + + console.log(`\n🚀 开始批量生成,共 ${total} 个任务`) + + for (let i = 0; i < prompts.length; i++) { + console.log(`\n[${i + 1}/${total}] 处理中...`) + + try { + const result = await this.textToImage(prompts[i], { + ...options, + filename: `batch_${i + 1}.png` + }) + results.push({ success: true, prompt: prompts[i], result }) + } catch (error) { + console.error(`❌ 失败: ${error.message}`) + results.push({ success: false, prompt: prompts[i], error: error.message }) + } + } + + const successCount = results.filter(r => r.success).length + console.log(`\n✨ 批量生成完成: ${successCount}/${total} 成功`) + + return results + } + + /** + * 多轮对话编辑 + */ + createChatSession(options = {}) { + const history = [] + + return { + async send(message, inputImages = null) { + const parts = [{ text: message }] + + // 如果有输入图片 + if (inputImages) { + const images = Array.isArray(inputImages) ? inputImages : [inputImages] + for (const imgPath of images) { + const { mimeType, data } = FileUtils.imageToBase64(imgPath) + parts.push({ + inlineData: { + mime_type: mimeType, + data: data + } + }) + } + } + + // 添加用户消息到历史 + history.push({ + role: 'user', + parts: parts + }) + + const response = await GeminiAPI.generateContent(history, options) + const result = GeminiAPI.parseResponse(response) + + // 添加模型回复到历史(需要包含图片数据以便后续编辑) + const modelParts = [] + if (result.text) { + modelParts.push({ text: result.text }) + } + for (const img of result.images) { + modelParts.push({ + inlineData: { + mime_type: img.mimeType, + data: img.data + } + }) + } + if (modelParts.length > 0) { + history.push({ + role: 'model', + parts: modelParts + }) + } + + // 保存图片 + const savedFiles = [] + FileUtils.ensureDir(options.outputDir || this.outputDir) + + for (const img of result.images) { + const ext = img.mimeType.split('/')[1] || 'png' + const outputFilename = FileUtils.generateFilename('chat', ext) + const outputPath = path.join(options.outputDir || this.outputDir, outputFilename) + + FileUtils.base64ToImage(img.data, outputPath) + savedFiles.push(outputPath) + console.log(`✅ 已保存: ${outputPath}`) + } + + return { + text: result.text, + images: result.images, + savedFiles + } + }, + + getHistory() { + return history + } + } + } +} + +// ============================================================================ +// CLI接口模块 +// ============================================================================ + +const CLI = { + /** + * 解析命令行参数 + */ + parseArgs(args) { + const result = { + command: '', + params: [], + options: {} + } + + let i = 0 + while (i < args.length) { + const arg = args[i] + + if (arg.startsWith('--')) { + const key = arg.substring(2) + const nextArg = args[i + 1] + + if (nextArg && !nextArg.startsWith('-')) { + result.options[key] = nextArg + i += 2 + } else { + result.options[key] = true + i++ + } + } else if (arg.startsWith('-')) { + const key = arg.substring(1) + const shortOptions = { + 'o': 'output', + 'r': 'ratio', + 's': 'size', + 'i': 'input', + 't': 'template', + 'h': 'help' + } + + const fullKey = shortOptions[key] || key + const nextArg = args[i + 1] + + if (nextArg && !nextArg.startsWith('-')) { + result.options[fullKey] = nextArg + i += 2 + } else { + result.options[fullKey] = true + i++ + } + } else if (!result.command) { + result.command = arg + i++ + } else { + result.params.push(arg) + i++ + } + } + + return result + }, + + /** + * 显示帮助信息 + */ + showHelp() { + console.log(` +🎨 Gemini Image Generator - 云雾API图片生成工具 +📦 模型: ${Config.api.model} + +用法: + node gemini-image-generator.js [options] + +命令: + generate 文生图 + edit 图生图(需要 -i 指定输入图片) + template 使用模板生成 + batch 批量生成(从文件读取提示词) + list-templates 列出所有可用模板 + +选项: + -o, --output 输出目录 (默认: ./output) + -r, --ratio 宽高比 (1:1, 16:9, 9:16, 3:2, 2:3 等) + -s, --size 分辨率 (512, 1K, 2K, 4K,默认: 2K) + -i, --input 输入图片路径(用于edit命令) + -t, --template 模板名称 + --text Logo文字(用于logo模板) + --subject 主题内容 + --style