From 61b1de273072393e12620530aecdad090a2422cf Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Sun, 26 Apr 2026 11:41:57 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0qwen=20=E7=94=9F?= =?UTF-8?q?=E5=9B=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- image-generation-minimax/SKILL.md | 176 ++++++++++++ .../mcp_server/minimax_image_mcp.py | 198 +++++++++++++ .../scripts/run.py | 0 image-generation-qwen/SKILL.md | 162 +++++++++++ .../mcp_server/qwen_image_mcp.py | 161 +++++++++++ image-generation-qwen/scripts/run.py | 259 ++++++++++++++++++ image-generation/SKILL.md | 67 ----- 7 files changed, 956 insertions(+), 67 deletions(-) create mode 100644 image-generation-minimax/SKILL.md create mode 100644 image-generation-minimax/mcp_server/minimax_image_mcp.py rename {image-generation => image-generation-minimax}/scripts/run.py (100%) create mode 100644 image-generation-qwen/SKILL.md create mode 100644 image-generation-qwen/mcp_server/qwen_image_mcp.py create mode 100644 image-generation-qwen/scripts/run.py delete mode 100644 image-generation/SKILL.md diff --git a/image-generation-minimax/SKILL.md b/image-generation-minimax/SKILL.md new file mode 100644 index 0000000..80d8213 --- /dev/null +++ b/image-generation-minimax/SKILL.md @@ -0,0 +1,176 @@ +--- +name: image-generation +description: Generate images using MiniMax API with support for custom prompts, aspect ratios, subject references, and multiple image generation. +metadata: {"clawdbot":{"emoji":"🎨","os":["linux","darwin","win32"]}} +--- + +# MiniMax Image Generation SKILL + +## Description + +Generate images using MiniMax API. Supports custom prompts, aspect ratios, generation count, and image-to-image generation. + +## Quick Start (MCP Server) + +### Setup + +1. Set the environment variable: + ```bash + export MINIMAX_API_KEY=your-api-key + ``` + +2. Add to your MCP client config (e.g., Claude Desktop, Cursor, etc.): + + ```json + { + "mcpServers": { + "minimax-image": { + "command": "python", + "args": ["path/to/minimax_image_mcp.py"] + } + } + } + ``` + +3. Use the `generate_image` tool: + + ``` + generate_image({ + prompt: "A beautiful mountain landscape at sunset", + aspect_ratio: "16:9" + }) + ``` + +## MCP Tool: generate_image + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `prompt` | string | true | - | Image generation prompt | +| `model` | string | false | `image-01` | Image generation model | +| `aspect_ratio` | string | false | `1:1` | Image aspect ratio | +| `n` | integer | false | `1` | Number of images (1-3) | +| `prompt_optimizer` | boolean | false | `false` | Enable prompt optimizer | +| `subject_reference` | string | false | - | Reference image URL or local file path | +| `subject_type` | string | false | `character` | Subject type (character, product, logo, video_subject, other) | +| `seed` | integer | false | - | Random seed for reproducible generation | +| `output_path` | string | false | - | Local path to save image | + +### Aspect Ratio Options + +| Ratio | Description | +|-------|-------------| +| `1:1` | Square (default) | +| `16:9` | Landscape | +| `9:16` | Portrait | +| `4:3` | Standard landscape | +| `3:4` | Standard portrait | + +### Example Usage + +```javascript +// Text-to-Image (t2i) - Pure text generation +{ + "prompt": "A man in a white t-shirt, full-body, standing front view, outdoors, fashion photography" +} + +// Multiple images with custom aspect ratio +{ + "prompt": "A realistic portrait", + "aspect_ratio": "16:9", + "n": 3 +} + +// With subject reference (image-to-image) +{ + "prompt": "Transform to anime style", + "subject_reference": "https://example.com/photo.jpg", + "subject_type": "character" +} + +// With seed for reproducibility +{ + "prompt": "A forest", + "seed": 12345 +} + +// Save to local path +{ + "prompt": "A sunset over the ocean", + "output_path": "./output/sunset" +} +``` + +## CLI Usage + +```bash +python scripts/run.py --api-key "your-api-key" --prompt "your-prompt" [options] +``` + +### CLI Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `--api-key` | string | true | MiniMax API key | +| `--prompt` | string | true | Generation prompt | +| `--model` | string | false | Image generation model (default: image-01) | +| `--aspect-ratio` | string | false | Image aspect ratio (default: 1:1) | +| `--response-format` | string | false | Response format (`url` or `base64`) | +| `--n` | integer | false | Number of images (default: 1, max: 3) | +| `--prompt-optimizer` | boolean | false | Enable prompt optimizer (default: false) | +| `--subject-reference` | string | false | Reference image URL or local file path | +| `--subject-type` | string | false | Subject type (default: character) | +| `--seed` | integer | false | Random seed for reproducible generation | +| `--output-dir` | string | false | Output directory (default: ./output) | + +## API Reference + +- **Endpoint**: `POST https://api.minimaxi.com/v1/image_generation` +- **Auth**: Bearer Token +- **Env Var**: `MINIMAX_API_KEY` + +## Examples + +### MCP Server Examples + +``` +generate_image({ prompt: "A sunset over the ocean" }) + +generate_image({ + prompt: "A realistic portrait", + aspect_ratio: "16:9", + n: 3 +}) + +generate_image({ + prompt: "Transform into anime style", + subject_reference: "https://example.com/photo.jpg", + subject_type: "character" +}) + +generate_image({ + prompt: "A forest", + seed: 12345 +}) +``` + +### CLI Examples + +```bash +# Text-to-Image (t2i) - Pure text generation +python scripts/run.py --api-key "sk-xxx" --prompt "A man in a white t-shirt, full-body, fashion photography in 90s documentary style" + +# Multiple images with custom aspect ratio +python scripts/run.py --api-key "sk-xxx" --prompt "A portrait" --aspect-ratio "16:9" --n 3 + +# With subject reference (image-to-image) +python scripts/run.py --api-key "sk-xxx" --prompt "Transform to anime style" --subject-reference "https://example.com/photo.jpg" --subject-type "character" + +# With local subject reference +python scripts/run.py --api-key "sk-xxx" --prompt "A beautiful scene" --subject-reference "./my_character.jpg" --subject-type "character" + +# With seed for reproducibility +python scripts/run.py --api-key "sk-xxx" --prompt "A forest" --seed 42 + +# With custom output directory +python scripts/run.py --api-key "sk-xxx" --prompt "A mountain" --output-dir "./my-images" +``` diff --git a/image-generation-minimax/mcp_server/minimax_image_mcp.py b/image-generation-minimax/mcp_server/minimax_image_mcp.py new file mode 100644 index 0000000..20f3087 --- /dev/null +++ b/image-generation-minimax/mcp_server/minimax_image_mcp.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +MiniMax Image Generation MCP Server +Provides image generation via MiniMax API +""" + +import os +import base64 +import requests +from urllib.parse import urlparse +from mcp.server.fastmcp import FastMCP + + +# Initialize MCP server +mcp = FastMCP("minimax-image-generator") + + +def read_local_image(file_path: str) -> str | None: + """ + Read local image file and return as base64 encoded string. + Returns the base64 string or None if failed. + """ + try: + with open(file_path, "rb") as f: + image_data = f.read() + return base64.b64encode(image_data).decode("utf-8") + except Exception as e: + return None + + +def build_subject_reference(subject_ref: str, subject_type: str) -> dict | None: + """ + Build subject_reference object from URL or local file path. + + Args: + subject_ref: URL or local file path + subject_type: Type of subject (character, product, logo, etc.) + + Returns: + dict with subject_reference structure or None if failed + """ + # Check if it's a URL or local file + if subject_ref.startswith(("http://", "https://")): + # It's a URL + return { + "type": subject_type, + "image_file": subject_ref + } + else: + # It's a local file - convert to base64 + base64_data = read_local_image(subject_ref) + if base64_data: + return { + "type": subject_type, + "image_file": f"data:image/jpeg;base64,{base64_data}" + } + return None + + +@mcp.tool() +def generate_image( + prompt: str, + model: str = "image-01", + aspect_ratio: str = "1:1", + n: int = 1, + prompt_optimizer: bool = False, + subject_reference: str | None = None, + subject_type: str = "character", + seed: int | None = None, + output_path: str | None = None +) -> str: + """ + Generate images using MiniMax image generation API. + + Args: + prompt: The image generation prompt describing what to generate + model: Image generation model (default: image-01) + aspect_ratio: Image aspect ratio (default: 1:1, options: 1:1, 16:9, 9:16, 4:3, 3:4) + n: Number of images to generate (default: 1, max: 3) + prompt_optimizer: Enable prompt optimizer (default: false) + subject_reference: Reference image URL or local file path for image-to-image generation + subject_type: Subject reference type (default: character, options: character, product, logo, video_subject, other) + seed: Random seed for reproducible generation + output_path: Optional local path to save the generated image + + Returns: + Image URLs and generation status + """ + api_key = os.environ.get("MINIMAX_API_KEY", "") + api_base = "https://api.minimaxi.com" + + if not api_key: + return "Error: MINIMAX_API_KEY environment variable is not set" + + # Build request payload + payload = { + "model": model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "response_format": "url", + "n": n, + "prompt_optimizer": prompt_optimizer + } + + # Add seed if provided + if seed is not None: + payload["seed"] = seed + + # Add subject_reference for image-to-image generation + if subject_reference: + subject_ref = build_subject_reference(subject_reference, subject_type) + if subject_ref: + payload["subject_reference"] = [subject_ref] + + # Make API request + url = f"{api_base}/v1/image_generation" + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + try: + response = requests.post(url, headers=headers, json=payload, timeout=60) + response.raise_for_status() + result = response.json() + + # Check for API errors + if result.get("base_resp", {}).get("status_code") != 0: + error_msg = result.get("base_resp", {}).get("status_msg", "Unknown error") + return f"API Error: {error_msg}" + + # Extract image URLs + image_urls = result.get("data", {}).get("image_urls", []) + metadata = result.get("metadata", {}) + request_id = result.get("id", "N/A") + success_count = metadata.get("success_count", len(image_urls)) + + # Save image if output_path provided + saved_paths = [] + if output_path and image_urls: + try: + import time + timestamp = int(time.time()) + + for i, img_url in enumerate(image_urls, 1): + img_response = requests.get(img_url, timeout=30) + img_response.raise_for_status() + + # Determine file extension + parsed = urlparse(img_url) + ext = os.path.splitext(parsed.path)[1] if "." in parsed.path else ".jpeg" + if not ext or len(ext) > 5: + ext = ".jpeg" + + # Handle multiple images + if len(image_urls) > 1: + base_path = output_path.rsplit('.', 1)[0] if '.' in output_path else output_path + file_ext = output_path.rsplit('.', 1)[1] if '.' in output_path else ext + img_path = f"{base_path}_{i}_{timestamp}.{file_ext}" + else: + if not output_path.endswith(ext): + img_path = output_path + ext + else: + img_path = output_path + + # Ensure directory exists + os.makedirs(os.path.dirname(img_path) if os.path.dirname(img_path) else ".", exist_ok=True) + + with open(img_path, "wb") as f: + f.write(img_response.content) + saved_paths.append(os.path.abspath(img_path)) + except Exception as e: + return f"Failed to save image: {str(e)}" + + # Build response + response_text = f"Successfully generated {success_count} image(s)\n\n" + response_text += f"Request ID: {request_id}\n\n" + + if saved_paths: + response_text += "Saved to:\n" + for path in saved_paths: + response_text += f" - {path}\n" + response_text += "\n" + + response_text += "Image URLs:\n" + for i, img_url in enumerate(image_urls, 1): + response_text += f" {i}. {img_url}\n" + + return response_text + + except requests.exceptions.RequestException as e: + return f"Request Error: {str(e)}" + except Exception as e: + return f"Unexpected Error: {str(e)}" + + +if __name__ == "__main__": + mcp.run() diff --git a/image-generation/scripts/run.py b/image-generation-minimax/scripts/run.py similarity index 100% rename from image-generation/scripts/run.py rename to image-generation-minimax/scripts/run.py diff --git a/image-generation-qwen/SKILL.md b/image-generation-qwen/SKILL.md new file mode 100644 index 0000000..da1c86e --- /dev/null +++ b/image-generation-qwen/SKILL.md @@ -0,0 +1,162 @@ +--- +name: qwen-image-generation +description: Generate images using Qwen DashScope API with support for custom prompts, aspect ratios, and multiple image generation. +metadata: {"clawdbot":{"emoji":"🎨","os":["linux","darwin","win32"]}} +--- + +# Qwen Image Generation SKILL + +## Description + +Generate images using Qwen (通义千问) API via DashScope. Supports custom prompts, aspect ratios, generation count, prompt enhancement, and image-to-image generation. + +## Quick Start (MCP Server) + +### Setup + +1. Set the environment variable: + ```bash + export DASHSCOPE_API_KEY=your-api-key + ``` + +2. Add to your MCP client config (e.g., Claude Desktop, Cursor, etc.): + + ```json + { + "mcpServers": { + "qwen-image": { + "command": "python", + "args": ["path/to/qwen_image_mcp.py"] + } + } + } + ``` + +3. Use the `generate_image` tool: + + ``` + generate_image({ + prompt: "A beautiful mountain landscape at sunset", + size: "1024*1024" + }) + ``` + +## MCP Tool: generate_image + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `prompt` | string | true | - | Image generation prompt (max 800 chars) | +| `negative_prompt` | string | false | - | Negative prompt to avoid elements (max 500 chars) | +| `prompt_extend` | boolean | false | `true` | Enable prompt enhancement | +| `size` | string | false | `1024*1024` | Image resolution | +| `n` | integer | false | `1` | Number of images (1-6) | +| `image_url` | string | false | - | Reference image URL for img2img | +| `output_path` | string | false | - | Local path to save image | + +### Size Options + +| Size | Aspect Ratio | +|------|--------------| +| `1024*1024` | 1:1 (default) | +| `1344*768` | 16:9 | +| `768*1344` | 9:16 | +| `1184*864` | 4:3 | +| `864*1184` | 3:4 | + +### Example Usage + +```javascript +// Basic generation +{ + "prompt": "A beautiful mountain landscape at sunset" +} + +// High resolution with multiple images +{ + "prompt": "A realistic portrait", + "size": "1024*1024", + "n": 3 +} + +// With reference image +{ + "prompt": "Transform to oil painting style", + "image_url": "https://example.com/input.jpg" +} + +// With local save +{ + "prompt": "A sunset over the ocean", + "output_path": "./output/sunset" +} + +// With negative prompt +{ + "prompt": "A beautiful garden", + "negative_prompt": "blurry, low quality, distorted" +} +``` + +## CLI Usage + +```bash +python scripts/run.py --api-key "your-api-key" --prompt "your-prompt" [options] +``` + +### CLI Arguments + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `--api-key` | string | true | DashScope API key | +| `--prompt` | string | true | Generation prompt | +| `--size` | string | false | Image size (default: 1024*1024) | +| `--n` | integer | false | Number of images (default: 1, max: 6) | +| `--negative-prompt` | string | false | Negative prompt | +| `--prompt-extend` | boolean | false | Enable prompt extend (default: true) | +| `--image-url` | string | false | Reference image URL for img2img | +| `--output-path` | string | false | Local path to save image | + +## API Reference + +- **Endpoint**: `POST https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation` +- **Auth**: Bearer Token +- **Env Var**: `DASHSCOPE_API_KEY` +- **Model**: `qwen-image-2.0-pro` + +## Examples + +### MCP Server Examples + +``` +generate_image({ prompt: "A sunset over the ocean" }) + +generate_image({ + prompt: "A realistic portrait", + size: "1024*1024", + n: 3 +}) + +generate_image({ + prompt: "Transform into anime style", + image_url: "https://example.com/photo.jpg" +}) +``` + +### CLI Examples + +```bash +# Basic generation +python scripts/run.py --api-key "sk-xxx" --prompt "A sunset" + +# Multiple images with custom size +python scripts/run.py --api-key "sk-xxx" --prompt "A portrait" --size "1024*1024" --n 3 + +# With negative prompt +python scripts/run.py --api-key "sk-xxx" --prompt "A garden" --negative-prompt "blurry, low quality" + +# With reference image +python scripts/run.py --api-key "sk-xxx" --prompt "Transform to anime style" --image-url "https://example.com/photo.jpg" + +# Save to local path +python scripts/run.py --api-key "sk-xxx" --prompt "A mountain" --output-path "./output/mountain" +``` diff --git a/image-generation-qwen/mcp_server/qwen_image_mcp.py b/image-generation-qwen/mcp_server/qwen_image_mcp.py new file mode 100644 index 0000000..75976c9 --- /dev/null +++ b/image-generation-qwen/mcp_server/qwen_image_mcp.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Qwen Image Generation MCP Server +Provides image generation via Qwen (DashScope) API +""" + +import os +import requests +from urllib.parse import urlparse +from mcp.server.fastmcp import FastMCP + + +# Initialize MCP server +mcp = FastMCP("qwen-image-generator") + + +@mcp.tool() +def generate_image( + prompt: str, + negative_prompt: str | None = None, + prompt_extend: bool = True, + size: str = "1024*1024", + n: int = 1, + image_url: str | None = None, + output_path: str | None = None +) -> str: + """ + Generate images using Qwen (通义千问) image generation API. + + Args: + prompt: The image generation prompt describing what to generate (max 800 chars) + negative_prompt: Negative prompt to avoid certain elements (max 500 chars) + prompt_extend: Enable prompt extend to enhance the prompt (default: true) + watermark: Add watermark to generated image (default: false) + size: Image resolution. Options: 1024*1024 (1:1), 1344*768 (16:9), 768*1344 (9:16), 1184*864 (4:3), 864*1184 (3:4) + n: Number of images to generate, 1-6 (default: 1) + image_url: Optional reference image URL for image-to-image generation + output_path: Optional local path to save the generated image + + Returns: + Image URLs and generation status + """ + api_key = os.environ.get("DASHSCOPE_API_KEY", "") + api_base = "https://dashscope.aliyuncs.com" + + if not api_key: + return "Error: DASHSCOPE_API_KEY environment variable is not set" + + # Build content array + content = [{"text": prompt}] + + # Add reference image if provided + if image_url: + content.append({"image_url": {"url": image_url}}) + + # Build parameters dict + parameters = { + "prompt_extend": prompt_extend, + "size": size, + "n": n + } + + # Only add negative_prompt if provided + if negative_prompt: + parameters["negative_prompt"] = negative_prompt + + # Build request payload + payload = { + "model": "qwen-image-2.0-pro", + "input": { + "messages": [ + { + "role": "user", + "content": content + } + ] + }, + "parameters": parameters + } + + # Make API request + url = f"{api_base}/api/v1/services/aigc/multimodal-generation/generation" + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + try: + response = requests.post(url, headers=headers, json=payload, timeout=180) + response.raise_for_status() + result = response.json() + + # Check for API errors + if "error" in result: + error_msg = result.get("error", {}).get("message", "Unknown error") + return f"API Error: {error_msg}" + + # Parse response + choices = result.get("output", {}).get("choices", []) + usage = result.get("usage", {}) + + # Extract image URLs + image_urls = [] + for choice in choices: + message = choice.get("message", {}) + content_items = message.get("content", []) + for item in content_items: + if "image" in item: + image_urls.append(item["image"]) + + width = usage.get("width", 1024) + height = usage.get("height", 1024) + request_id = result.get("request_id", "N/A") + + # Save image if output_path provided + saved_path = None + if output_path and image_urls: + try: + img_response = requests.get(image_urls[0], timeout=30) + img_response.raise_for_status() + + # Determine file extension + parsed = urlparse(image_urls[0]) + ext = os.path.splitext(parsed.path)[1] if "." in parsed.path else ".png" + if not ext or len(ext) > 5: + ext = ".png" + + # Ensure directory exists + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + + # Add extension if not present + if not output_path.endswith(ext): + output_path = output_path + ext + + with open(output_path, "wb") as f: + f.write(img_response.content) + saved_path = os.path.abspath(output_path) + except Exception as e: + return f"Failed to save image: {str(e)}" + + # Build response + response_text = f"Successfully generated {n} image(s) ({width}x{height})\n\n" + response_text += f"Request ID: {request_id}\n\n" + + if saved_path: + response_text += f"Saved to: {saved_path}\n\n" + + response_text += "Image URLs:\n" + for i, img_url in enumerate(image_urls, 1): + response_text += f" {i}. {img_url}\n" + + return response_text + + except requests.exceptions.RequestException as e: + return f"Request Error: {str(e)}" + except Exception as e: + return f"Unexpected Error: {str(e)}" + + +if __name__ == "__main__": + mcp.run() diff --git a/image-generation-qwen/scripts/run.py b/image-generation-qwen/scripts/run.py new file mode 100644 index 0000000..5604b58 --- /dev/null +++ b/image-generation-qwen/scripts/run.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +# @skill: qwen-image-generation + +""" +Qwen Image Generation Script +Generate images using Qwen (DashScope) API +""" + +import argparse +import os +import time +import requests +from urllib.parse import urlparse + + +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Generate images using Qwen (DashScope) API", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--api-key", + type=str, + required=True, + help="DashScope API key (can also be set via DASHSCOPE_API_KEY environment variable)" + ) + + parser.add_argument( + "--prompt", + type=str, + required=True, + help="Image generation prompt (max 800 chars)" + ) + + parser.add_argument( + "--size", + type=str, + default="1024*1024", + help="Image resolution (default: 1024*1024, options: 1344*768, 768*1344, 1184*864, 864*1184)" + ) + + parser.add_argument( + "--n", + type=int, + default=1, + choices=range(1, 7), + help="Number of images to generate (default: 1, max: 6)" + ) + + parser.add_argument( + "--negative-prompt", + type=str, + default=None, + help="Negative prompt to avoid certain elements (max 500 chars)" + ) + + parser.add_argument( + "--prompt-extend", + type=lambda x: x.lower() == "true", + default=True, + help="Enable prompt extend to enhance the prompt (default: true)" + ) + + parser.add_argument( + "--image-url", + type=str, + default=None, + help="Reference image URL for image-to-image generation" + ) + + parser.add_argument( + "--output-path", + type=str, + default=None, + help="Local path to save the generated image" + ) + + parser.add_argument( + "--api-base", + type=str, + default="https://dashscope.aliyuncs.com", + help="API base URL (default: https://dashscope.aliyuncs.com)" + ) + + return parser.parse_args() + + +def download_image(url: str, output_path: str) -> bool: + """Download image to local file""" + try: + response = requests.get(url, timeout=30) + response.raise_for_status() + + with open(output_path, "wb") as f: + f.write(response.content) + + print(f" [OK] Saved: {output_path}") + return True + except Exception as e: + print(f" [FAIL] Download failed: {e}") + return False + + +def generate_images(args): + """Call Qwen (DashScope) API to generate images""" + url = f"{args.api_base}/api/v1/services/aigc/multimodal-generation/generation" + + headers = { + "Authorization": f"Bearer {args.api_key}", + "Content-Type": "application/json" + } + + # Build content array + content = [{"text": args.prompt}] + + # Add reference image if provided + if args.image_url: + content.append({"image_url": {"url": args.image_url}}) + + # Build parameters dict + parameters = { + "prompt_extend": args.prompt_extend, + "size": args.size, + "n": args.n + } + + # Only add negative_prompt if provided + if args.negative_prompt: + parameters["negative_prompt"] = args.negative_prompt + + # Build request payload + payload = { + "model": "qwen-image-2.0-pro", + "input": { + "messages": [ + { + "role": "user", + "content": content + } + ] + }, + "parameters": parameters + } + + print(f"\n{'='*60}") + print(f"Qwen Image Generation") + print(f"{'='*60}") + print(f"Model: qwen-image-2.0-pro") + print(f"Prompt: {args.prompt}") + print(f"Size: {args.size}") + print(f"Number: {args.n}") + print(f"Prompt Extend: {'Enabled' if args.prompt_extend else 'Disabled'}") + if args.negative_prompt: + print(f"Negative Prompt: {args.negative_prompt}") + if args.image_url: + print(f"Reference Image: {args.image_url}") + print(f"{'='*60}\n") + + try: + print("Generating images...") + response = requests.post(url, headers=headers, json=payload, timeout=180) + response.raise_for_status() + + result = response.json() + + # Check for API errors + if "error" in result: + error_msg = result.get("error", {}).get("message", "Unknown error") + print(f"API Error: {error_msg}") + return False + + # Parse response + choices = result.get("output", {}).get("choices", []) + usage = result.get("usage", {}) + + # Extract image URLs + image_urls = [] + for choice in choices: + message = choice.get("message", {}) + content_items = message.get("content", []) + for item in content_items: + if "image" in item: + image_urls.append(item["image"]) + + width = usage.get("width", 1024) + height = usage.get("height", 1024) + request_id = result.get("request_id", "N/A") + + print(f"\nSuccessfully generated {len(image_urls)} image(s) ({width}x{height})") + print(f"Request ID: {request_id}\n") + + saved_count = 0 + + # If output_path is provided, save all images + if args.output_path: + timestamp = int(time.time()) + for i, img_url in enumerate(image_urls, 1): + # Determine file extension from URL + parsed = urlparse(img_url) + ext = os.path.splitext(parsed.path)[1] if "." in parsed.path else ".png" + if not ext or len(ext) > 5: + ext = ".png" + + # Handle multiple images + if len(image_urls) > 1: + base_path = args.output_path.rsplit('.', 1)[0] if '.' in args.output_path else args.output_path + ext = args.output_path.rsplit('.', 1)[1] if '.' in args.output_path else ext + output_path = f"{base_path}_{i}_{timestamp}.{ext}" + else: + if not args.output_path.endswith(ext): + output_path = f"{args.output_path}{ext}" + else: + output_path = args.output_path + + # Ensure directory exists + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + + if download_image(img_url, output_path): + saved_count += 1 + else: + # Print URLs + print("Image URLs:") + for i, img_url in enumerate(image_urls, 1): + print(f" {i}. {img_url}") + + print(f"\n{'='*60}") + if args.output_path: + print(f"Done! Successfully saved {saved_count}/{len(image_urls)} images") + print(f"{'='*60}\n") + + return saved_count > 0 or (len(image_urls) > 0 and not args.output_path) + + except requests.exceptions.RequestException as e: + print(f"\nRequest Error: {e}") + return False + except Exception as e: + print(f"\nUnexpected Error: {e}") + return False + + +def main(): + """Main function""" + args = parse_args() + + # Get API key from argument or environment variable + if not args.api_key: + args.api_key = os.environ.get("DASHSCOPE_API_KEY", "") + + # If still no API key, prompt user to enter it + if not args.api_key: + print("Error: API key is required (--api-key or DASHSCOPE_API_KEY)") + else: + generate_images(args) + + +if __name__ == "__main__": + main() diff --git a/image-generation/SKILL.md b/image-generation/SKILL.md deleted file mode 100644 index 21565fd..0000000 --- a/image-generation/SKILL.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -name: image-generation -description: Generate images using MiniMax API with support for custom prompts, aspect ratios, and multiple image generation. -metadata: {"clawdbot":{"emoji":"🎨","os":["linux","darwin","win32"]}} ---- - -# MiniMax Image Generation SKILL - -## Description - -Generate images using MiniMax API. Supports custom prompts, aspect ratios, generation count, and more. - -## Usage - -```bash -python scripts/run.py --api-key "your-api-key" --prompt "your-prompt" [options] -``` - -## Arguments - -| Argument | Type | Required | Default | Description | -|----------|------|----------|---------|-------------| -| `--api-key` | string | Yes | - | MiniMax API key (can also be set via `MINIMAX_API_KEY` env var) | -| `--prompt` | string | Yes | - | Image generation prompt | -| `--model` | string | No | `image-01` | Image generation model | -| `--aspect-ratio` | string | No | `1:1` | Image aspect ratio (e.g., 16:9, 1:1, 9:16) | -| `--response-format` | string | No | `url` | Response format (`url` or `base64`) | -| `--n` | int | No | `1` | Number of images to generate (1-3) | -| `--prompt-optimizer` | bool | No | `false` | Enable prompt optimizer | -| `--subject-reference` | string | No | - | Reference image URL or local file path for image-to-image generation | -| `--subject-type` | string | No | `character` | Subject reference type (character, product, logo, video_subject, other) | -| `--seed` | int | No | - | Random seed for reproducible generation | -| `--output-dir` | string | No | `./output` | Output directory for images | - -## API Reference - -- **Endpoint**: `POST https://api.minimaxi.com/v1/image_generation` -- **Auth**: Bearer Token - -## Examples - -```bash - -python scripts/run.py --api-key "sk-xxx" --prompt "A sunset over the ocean" - -# Generate multiple images -python scripts/run.py --api-key "sk-xxx" --prompt "A man in a white t-shirt, full-body, standing front view, outdoors" --n 3 --aspect-ratio "16:9" - -# Enable prompt optimizer -python scripts/run.py --api-key "sk-xxx" --prompt "sunset ocean" --prompt-optimizer true - -# Image-to-image generation with reference image URL -python scripts/run.py --api-key "sk-xxx" --prompt "A girl looking into the distance from a library window" --subject-reference "https://example.com/reference.jpg" --subject-type "character" --n 2 - -# Image-to-image with local file -python scripts/run.py --api-key "sk-xxx" --prompt "A girl looking into the distance from a library window" --subject-reference "./my_character.jpg" --subject-type "character" - -# Image-to-image with product reference -python scripts/run.py --api-key "sk-xxx" --prompt "A beautiful product shot" --subject-reference "https://example.com/product.jpg" --subject-type "product" - -# Use seed for reproducible generation -python scripts/run.py --api-key "sk-xxx" --prompt "A sunset over the ocean" --seed 42 -``` - -## Output - -Images are saved to the specified output directory with the naming format: `generated_image_{index}_{timestamp}.{ext}`. \ No newline at end of file