# Directory Structure ``` ├── .docs │ ├── llms-full.txt │ ├── openai images 1.txt │ └── typescript-sdk mcp README.md ├── .gitignore ├── CHANGELOG.md ├── CONTEXT.md ├── LICENSE ├── logo.png ├── package-lock.json ├── package.json ├── README.md └── src ├── index.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Dependency directories 2 | node_modules/ 3 | jspm_packages/ 4 | 5 | # Build outputs 6 | build/ 7 | dist/ 8 | lib/ 9 | out/ 10 | *.tsbuildinfo 11 | 12 | # Generated images 13 | generated-images/ 14 | 15 | # Environment variables 16 | .env 17 | .env.local 18 | .env.development.local 19 | .env.test.local 20 | .env.production.local 21 | 22 | # Logs 23 | logs 24 | *.log 25 | npm-debug.log* 26 | yarn-debug.log* 27 | yarn-error.log* 28 | lerna-debug.log* 29 | .pnpm-debug.log* 30 | 31 | # Coverage directory used by tools like istanbul 32 | coverage/ 33 | *.lcov 34 | 35 | # TypeScript cache 36 | *.tsbuildinfo 37 | 38 | # Optional npm cache directory 39 | .npm 40 | 41 | # Optional eslint cache 42 | .eslintcache 43 | 44 | # Optional stylelint cache 45 | .stylelintcache 46 | 47 | # Microbundle cache 48 | .rpt2_cache/ 49 | .rts2_cache_cjs/ 50 | .rts2_cache_es/ 51 | .rts2_cache_umd/ 52 | 53 | # Optional REPL history 54 | .node_repl_history 55 | 56 | # Output of 'npm pack' 57 | *.tgz 58 | 59 | # Yarn Integrity file 60 | .yarn-integrity 61 | 62 | # dotenv environment variable files 63 | .env 64 | .env.development.local 65 | .env.test.local 66 | .env.production.local 67 | .env.local 68 | 69 | # parcel-bundler cache (https://parceljs.org/) 70 | .cache 71 | .parcel-cache 72 | 73 | # Next.js build output 74 | .next 75 | out 76 | 77 | # Nuxt.js build / generate output 78 | .nuxt 79 | dist 80 | 81 | # Gatsby files 82 | .cache/ 83 | # Comment in the public line in if your project uses Gatsby and not Next.js 84 | # https://nextjs.org/blog/next-9-1#public-directory-support 85 | # public 86 | 87 | # vuepress build output 88 | .vuepress/dist 89 | 90 | # vuepress v2.x temp and cache directory 91 | .temp 92 | .cache 93 | 94 | # Docusaurus cache and generated files 95 | .docusaurus 96 | 97 | # Serverless directories 98 | .serverless/ 99 | 100 | # FuseBox cache 101 | .fusebox/ 102 | 103 | # DynamoDB Local files 104 | .dynamodb/ 105 | 106 | # TernJS port file 107 | .tern-port 108 | 109 | # Stores VSCode versions used for testing VSCode extensions 110 | .vscode-test 111 | 112 | # yarn v2 113 | .yarn/cache 114 | .yarn/unplugged 115 | .yarn/build-state.yml 116 | .yarn/install-state.gz 117 | .pnp.* 118 | 119 | # IDE specific files 120 | .idea/ 121 | .vscode/ 122 | *.swp 123 | *.swo 124 | .DS_Store 125 | 126 | ].docs/ ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | <p align="center"> 2 | <img src="logo.png" alt="GPT Image 1 MCP Logo" width="200"/> 3 | </p> 4 | 5 | <h1 align="center">@cloudwerxlab/gpt-image-1-mcp</h1> 6 | 7 | <p align="center"> 8 | <a href="https://www.npmjs.com/package/@cloudwerxlab/gpt-image-1-mcp"><img src="https://img.shields.io/npm/v/@cloudwerxlab/gpt-image-1-mcp.svg" alt="npm version"></a> 9 | <a href="https://www.npmjs.com/package/@cloudwerxlab/gpt-image-1-mcp"><img src="https://img.shields.io/npm/dm/@cloudwerxlab/gpt-image-1-mcp.svg" alt="npm downloads"></a> 10 | <a href="https://github.com/CLOUDWERX-DEV/gpt-image-1-mcp/blob/main/LICENSE"><img src="https://img.shields.io/github/license/CLOUDWERX-DEV/gpt-image-1-mcp.svg" alt="license"></a> 11 | <a href="https://nodejs.org/"><img src="https://img.shields.io/node/v/@cloudwerxlab/gpt-image-1-mcp.svg" alt="node version"></a> 12 | <a href="https://cloudwerx.dev"><img src="https://img.shields.io/badge/website-cloudwerx.dev-blue" alt="Website"></a> 13 | </p> 14 | 15 | <p align="center"> 16 | A Model Context Protocol (MCP) server for generating and editing images using the OpenAI <code>gpt-image-1</code> model. 17 | </p> 18 | 19 | <p align="center"> 20 | <img src="https://img.shields.io/badge/OpenAI-GPT--Image--1-6E46AE" alt="OpenAI GPT-Image-1"> 21 | <img src="https://img.shields.io/badge/MCP-Compatible-00A3E0" alt="MCP Compatible"> 22 | </p> 23 | 24 | ## 🚀 Quick Start 25 | 26 | <div align="center"> 27 | <a href="https://www.npmjs.com/package/@cloudwerxlab/gpt-image-1-mcp"><img src="https://img.shields.io/badge/NPX-Ready-red.svg" alt="NPX Ready"></a> 28 | </div> 29 | 30 | <p align="center">Run this MCP server directly using NPX without installing it. <a href="https://www.npmjs.com/package/@cloudwerxlab/gpt-image-1-mcp">View on npm</a>.</p> 31 | 32 | ```bash 33 | npx -y @cloudwerxlab/gpt-image-1-mcp 34 | ``` 35 | 36 | <p align="center">The <code>-y</code> flag automatically answers "yes" to any prompts that might appear during the installation process.</p> 37 | 38 | ### 📋 Prerequisites 39 | 40 | <table> 41 | <tr> 42 | <td width="50%" align="center"> 43 | <img src="https://img.shields.io/badge/Node.js-v14+-339933?logo=node.js&logoColor=white" alt="Node.js v14+"> 44 | <p>Node.js (v14 or higher)</p> 45 | </td> 46 | <td width="50%" align="center"> 47 | <img src="https://img.shields.io/badge/OpenAI-API_Key-412991?logo=openai&logoColor=white" alt="OpenAI API Key"> 48 | <p>OpenAI API key with access to gpt-image-1</p> 49 | </td> 50 | </tr> 51 | </table> 52 | 53 | ### 🔑 Environment Variables 54 | 55 | <table> 56 | <tr> 57 | <th>Variable</th> 58 | <th>Required</th> 59 | <th>Description</th> 60 | </tr> 61 | <tr> 62 | <td><code>OPENAI_API_KEY</code></td> 63 | <td>✅ Yes</td> 64 | <td>Your OpenAI API key with access to the gpt-image-1 model</td> 65 | </tr> 66 | <tr> 67 | <td><code>GPT_IMAGE_OUTPUT_DIR</code></td> 68 | <td>❌ No</td> 69 | <td>Custom directory for saving generated images (defaults to user's Pictures folder under <code>gpt-image-1</code> subfolder)</td> 70 | </tr> 71 | </table> 72 | 73 | ### 💻 Example Usage with NPX 74 | 75 | <table> 76 | <tr> 77 | <th>Operating System</th> 78 | <th>Command Line Example</th> 79 | </tr> 80 | <tr> 81 | <td><strong>Linux/macOS</strong></td> 82 | <td> 83 | 84 | ```bash 85 | # Set your OpenAI API key 86 | export OPENAI_API_KEY=sk-your-openai-api-key 87 | 88 | # Optional: Set custom output directory 89 | export GPT_IMAGE_OUTPUT_DIR=/home/username/Pictures/ai-generated-images 90 | 91 | # Run the server with NPX 92 | npx -y @cloudwerxlab/gpt-image-1-mcp 93 | ``` 94 | </tr> 95 | <tr> 96 | <td><strong>Windows (PowerShell)</strong></td> 97 | <td> 98 | 99 | ```powershell 100 | # Set your OpenAI API key 101 | $env:OPENAI_API_KEY = "sk-your-openai-api-key" 102 | 103 | # Optional: Set custom output directory 104 | $env:GPT_IMAGE_OUTPUT_DIR = "C:\Users\username\Pictures\ai-generated-images" 105 | 106 | # Run the server with NPX 107 | npx -y @cloudwerxlab/gpt-image-1-mcp 108 | ``` 109 | </tr> 110 | <tr> 111 | <td><strong>Windows (Command Prompt)</strong></td> 112 | <td> 113 | 114 | ```cmd 115 | :: Set your OpenAI API key 116 | set OPENAI_API_KEY=sk-your-openai-api-key 117 | 118 | :: Optional: Set custom output directory 119 | set GPT_IMAGE_OUTPUT_DIR=C:\Users\username\Pictures\ai-generated-images 120 | 121 | :: Run the server with NPX 122 | npx -y @cloudwerxlab/gpt-image-1-mcp 123 | ``` 124 | </tr> 125 | </table> 126 | 127 | ## 🔌 Integration with MCP Clients 128 | 129 | <div align="center"> 130 | <img src="https://img.shields.io/badge/VS_Code-MCP_Extension-007ACC?logo=visual-studio-code&logoColor=white" alt="VS Code MCP Extension"> 131 | <img src="https://img.shields.io/badge/Roo-Compatible-FF6B6B" alt="Roo Compatible"> 132 | <img src="https://img.shields.io/badge/Cursor-Compatible-4C2889" alt="Cursor Compatible"> 133 | <img src="https://img.shields.io/badge/Augment-Compatible-6464FF" alt="Augment Compatible"> 134 | <img src="https://img.shields.io/badge/Windsurf-Compatible-00B4D8" alt="Windsurf Compatible"> 135 | </div> 136 | 137 | ### 🛠️ Setting Up in an MCP Client 138 | 139 | <table> 140 | <tr> 141 | <td> 142 | <h4>Step 1: Locate Settings File</h4> 143 | <ul> 144 | <li>For <strong>Roo</strong>: <code>c:\Users\<username>\AppData\Roaming\Code\User\globalStorage\rooveterinaryinc.roo-cline\settings\mcp_settings.json</code></li> 145 | <li>For <strong>VS Code MCP Extension</strong>: Check your extension documentation for the settings file location</li> 146 | <li>For <strong>Cursor</strong>: <code>~/.config/cursor/mcp_settings.json</code> (Linux/macOS) or <code>%APPDATA%\Cursor\mcp_settings.json</code> (Windows)</li> 147 | <li>For <strong>Augment</strong>: <code>~/.config/augment/mcp_settings.json</code> (Linux/macOS) or <code>%APPDATA%\Augment\mcp_settings.json</code> (Windows)</li> 148 | <li>For <strong>Windsurf</strong>: <code>~/.config/windsurf/mcp_settings.json</code> (Linux/macOS) or <code>%APPDATA%\Windsurf\mcp_settings.json</code> (Windows)</li> 149 | </ul> 150 | </td> 151 | </tr> 152 | <tr> 153 | <td> 154 | <h4>Step 2: Add Configuration</h4> 155 | <p>Add the following configuration to the <code>mcpServers</code> object:</p> 156 | </td> 157 | </tr> 158 | </table> 159 | 160 | ```json 161 | { 162 | "mcpServers": { 163 | "gpt-image-1": { 164 | "command": "npx", 165 | "args": [ 166 | "-y", 167 | "@cloudwerxlab/gpt-image-1-mcp" 168 | ], 169 | "env": { 170 | "OPENAI_API_KEY": "PASTE YOUR OPEN-AI KEY HERE", 171 | "GPT_IMAGE_OUTPUT_DIR": "OPTIONAL: PATH TO SAVE GENERATED IMAGES" 172 | } 173 | } 174 | } 175 | } 176 | ``` 177 | 178 | #### Example Configurations for Different Operating Systems 179 | 180 | <table> 181 | <tr> 182 | <th>Operating System</th> 183 | <th>Example Configuration</th> 184 | </tr> 185 | <tr> 186 | <td><strong>Windows</strong></td> 187 | <td> 188 | 189 | ```json 190 | { 191 | "mcpServers": { 192 | "gpt-image-1": { 193 | "command": "npx", 194 | "args": ["-y", "@cloudwerxlab/gpt-image-1-mcp"], 195 | "env": { 196 | "OPENAI_API_KEY": "sk-your-openai-api-key", 197 | "GPT_IMAGE_OUTPUT_DIR": "C:\\Users\\username\\Pictures\\ai-generated-images" 198 | } 199 | } 200 | } 201 | } 202 | ``` 203 | </tr> 204 | <tr> 205 | <td><strong>Linux/macOS</strong></td> 206 | <td> 207 | 208 | ```json 209 | { 210 | "mcpServers": { 211 | "gpt-image-1": { 212 | "command": "npx", 213 | "args": ["-y", "@cloudwerxlab/gpt-image-1-mcp"], 214 | "env": { 215 | "OPENAI_API_KEY": "sk-your-openai-api-key", 216 | "GPT_IMAGE_OUTPUT_DIR": "/home/username/Pictures/ai-generated-images" 217 | } 218 | } 219 | } 220 | } 221 | ``` 222 | </tr> 223 | </table> 224 | 225 | > **Note**: For Windows paths, use double backslashes (`\\`) to escape the backslash character in JSON. For Linux/macOS, use forward slashes (`/`). 226 | 227 | ## ✨ Features 228 | 229 | <div align="center"> 230 | <table> 231 | <tr> 232 | <td align="center"> 233 | <h3>🎨 Core Tools</h3> 234 | <ul> 235 | <li><code>create_image</code>: Generate new images from text prompts</li> 236 | <li><code>create_image_edit</code>: Edit existing images with text prompts and masks</li> 237 | </ul> 238 | </td> 239 | <td align="center"> 240 | <h3>🚀 Key Benefits</h3> 241 | <ul> 242 | <li>Simple integration with MCP clients</li> 243 | <li>Full access to OpenAI's gpt-image-1 capabilities</li> 244 | <li>Streamlined workflow for AI image generation</li> 245 | </ul> 246 | </td> 247 | </tr> 248 | </table> 249 | </div> 250 | 251 | ### 💡 Enhanced Capabilities 252 | 253 | <table> 254 | <tr> 255 | <td> 256 | <h4>📊 Output & Formatting</h4> 257 | <ul> 258 | <li>✅ <strong>Beautifully Formatted Output</strong>: Responses include emojis and detailed information</li> 259 | <li>✅ <strong>Automatic Image Saving</strong>: All generated images saved to disk for easy access</li> 260 | <li>✅ <strong>Detailed Token Usage</strong>: View token consumption for each request</li> 261 | </ul> 262 | </td> 263 | <td> 264 | <h4>⚙️ Configuration & Handling</h4> 265 | <ul> 266 | <li>✅ <strong>Configurable Output Directory</strong>: Customize where images are saved</li> 267 | <li>✅ <strong>File Path Support</strong>: Edit images using file paths instead of base64 encoding</li> 268 | <li>✅ <strong>Comprehensive Error Handling</strong>: Detailed error reporting with specific error codes, descriptions, and troubleshooting suggestions</li> 269 | </ul> 270 | </td> 271 | </tr> 272 | </table> 273 | 274 | ## 🔄 How It Works 275 | 276 | <div align="center"> 277 | <table> 278 | <tr> 279 | <th align="center">🖼️ Image Generation</th> 280 | <th align="center">✏️ Image Editing</th> 281 | </tr> 282 | <tr> 283 | <td> 284 | <ol> 285 | <li>Server receives prompt and parameters</li> 286 | <li>Calls OpenAI API using gpt-image-1 model</li> 287 | <li>API returns base64-encoded images</li> 288 | <li>Server saves images to configured directory</li> 289 | <li>Returns formatted response with paths and metadata</li> 290 | </ol> 291 | </td> 292 | <td> 293 | <ol> 294 | <li>Server receives image, prompt, and optional mask</li> 295 | <li>For file paths, reads and prepares files for API</li> 296 | <li>Uses direct curl command for proper MIME handling</li> 297 | <li>API returns base64-encoded edited images</li> 298 | <li>Server saves images to configured directory</li> 299 | <li>Returns formatted response with paths and metadata</li> 300 | </ol> 301 | </td> 302 | </tr> 303 | </table> 304 | </div> 305 | 306 | ### 📁 Output Directory Behavior 307 | 308 | <table> 309 | <tr> 310 | <td width="50%"> 311 | <h4>📂 Storage Location</h4> 312 | <ul> 313 | <li>🔹 <strong>Default Location</strong>: User's Pictures folder under <code>gpt-image-1</code> subfolder (e.g., <code>C:\Users\username\Pictures\gpt-image-1</code> on Windows)</li> 314 | <li>🔹 <strong>Custom Location</strong>: Set via <code>GPT_IMAGE_OUTPUT_DIR</code> environment variable</li> 315 | <li>🔹 <strong>Fallback Location</strong>: <code>./generated-images</code> (if Pictures folder can't be determined)</li> 316 | </ul> 317 | </td> 318 | <td width="50%"> 319 | <h4>🗂️ File Management</h4> 320 | <ul> 321 | <li>🔹 <strong>Directory Creation</strong>: Automatically creates output directory if it doesn't exist</li> 322 | <li>🔹 <strong>File Naming</strong>: Images saved with timestamped filenames (e.g., <code>image-2023-05-05T12-34-56-789Z.png</code>)</li> 323 | <li>🔹 <strong>Cross-Platform</strong>: Works on Windows, macOS, and Linux with appropriate Pictures folder detection</li> 324 | </ul> 325 | </td> 326 | </tr> 327 | </table> 328 | 329 | ## Installation & Usage 330 | 331 | ### NPM Package 332 | 333 | This package is available on npm: [@cloudwerxlab/gpt-image-1-mcp](https://www.npmjs.com/package/@cloudwerxlab/gpt-image-1-mcp) 334 | 335 | You can install it globally: 336 | 337 | ```bash 338 | npm install -g @cloudwerxlab/gpt-image-1-mcp 339 | ``` 340 | 341 | Or run it directly with npx as shown in the Quick Start section. 342 | 343 | ### Tool: `create_image` 344 | 345 | Generates a new image based on a text prompt. 346 | 347 | #### Parameters 348 | 349 | | Parameter | Type | Required | Description | 350 | |-----------|------|----------|-------------| 351 | | `prompt` | string | Yes | The text description of the image to generate (max 32,000 chars) | 352 | | `size` | string | No | Image size: "1024x1024" (default), "1536x1024", or "1024x1536" | 353 | | `quality` | string | No | Image quality: "high" (default), "medium", or "low" | 354 | | `n` | integer | No | Number of images to generate (1-10, default: 1) | 355 | | `background` | string | No | Background style: "transparent", "opaque", or "auto" (default) | 356 | | `output_format` | string | No | Output format: "png" (default), "jpeg", or "webp" | 357 | | `output_compression` | integer | No | Compression level (0-100, default: 0) | 358 | | `user` | string | No | User identifier for OpenAI usage tracking | 359 | | `moderation` | string | No | Moderation level: "low" or "auto" (default) | 360 | 361 | #### Example 362 | 363 | ```xml 364 | <use_mcp_tool> 365 | <server_name>gpt-image-1</server_name> 366 | <tool_name>create_image</tool_name> 367 | <arguments> 368 | { 369 | "prompt": "A futuristic city skyline at sunset, digital art", 370 | "size": "1024x1024", 371 | "quality": "high", 372 | "n": 1, 373 | "background": "auto" 374 | } 375 | </arguments> 376 | </use_mcp_tool> 377 | ``` 378 | 379 | #### Response 380 | 381 | The tool returns: 382 | - A formatted text message with details about the generated image(s) 383 | - The image(s) as base64-encoded data 384 | - Metadata including token usage and file paths 385 | 386 | ### Tool: `create_image_edit` 387 | 388 | Edits an existing image based on a text prompt and optional mask. 389 | 390 | #### Parameters 391 | 392 | | Parameter | Type | Required | Description | 393 | |-----------|------|----------|-------------| 394 | | `image` | string, object, or array | Yes | The image(s) to edit (base64 string or file path object) | 395 | | `prompt` | string | Yes | The text description of the desired edit (max 32,000 chars) | 396 | | `mask` | string or object | No | The mask that defines areas to edit (base64 string or file path object) | 397 | | `size` | string | No | Image size: "1024x1024" (default), "1536x1024", or "1024x1536" | 398 | | `quality` | string | No | Image quality: "high" (default), "medium", or "low" | 399 | | `n` | integer | No | Number of images to generate (1-10, default: 1) | 400 | | `background` | string | No | Background style: "transparent", "opaque", or "auto" (default) | 401 | | `user` | string | No | User identifier for OpenAI usage tracking | 402 | 403 | #### Example with Base64 Encoded Image 404 | 405 | ```xml 406 | <use_mcp_tool> 407 | <server_name>gpt-image-1</server_name> 408 | <tool_name>create_image_edit</tool_name> 409 | <arguments> 410 | { 411 | "image": "BASE64_ENCODED_IMAGE_STRING", 412 | "prompt": "Add a small robot in the corner", 413 | "mask": "BASE64_ENCODED_MASK_STRING", 414 | "quality": "high" 415 | } 416 | </arguments> 417 | </use_mcp_tool> 418 | ``` 419 | 420 | #### Example with File Path 421 | 422 | ```xml 423 | <use_mcp_tool> 424 | <server_name>gpt-image-1</server_name> 425 | <tool_name>create_image_edit</tool_name> 426 | <arguments> 427 | { 428 | "image": { 429 | "filePath": "C:/path/to/your/image.png" 430 | }, 431 | "prompt": "Add a small robot in the corner", 432 | "mask": { 433 | "filePath": "C:/path/to/your/mask.png" 434 | }, 435 | "quality": "high" 436 | } 437 | </arguments> 438 | </use_mcp_tool> 439 | ``` 440 | 441 | #### Response 442 | 443 | The tool returns: 444 | - A formatted text message with details about the edited image(s) 445 | - The edited image(s) as base64-encoded data 446 | - Metadata including token usage and file paths 447 | 448 | ## 🔧 Troubleshooting 449 | 450 | <div align="center"> 451 | <img src="https://img.shields.io/badge/Support-Available-brightgreen" alt="Support Available"> 452 | </div> 453 | 454 | ### 🚨 Common Issues 455 | 456 | <table> 457 | <tr> 458 | <th align="center">Issue</th> 459 | <th align="center">Solution</th> 460 | </tr> 461 | <tr> 462 | <td> 463 | <h4>🖼️ MIME Type Errors</h4> 464 | <p>Errors related to image format or MIME type handling</p> 465 | </td> 466 | <td> 467 | <p>Ensure image files have the correct extension (.png, .jpg, etc.) that matches their actual format. The server uses file extensions to determine MIME types.</p> 468 | </td> 469 | </tr> 470 | <tr> 471 | <td> 472 | <h4>🔑 API Key Issues</h4> 473 | <p>Authentication errors with OpenAI API</p> 474 | </td> 475 | <td> 476 | <p>Verify your OpenAI API key is correct and has access to the gpt-image-1 model. Check for any spaces or special characters that might have been accidentally included.</p> 477 | </td> 478 | </tr> 479 | <tr> 480 | <td> 481 | <h4>🛠️ Build Errors</h4> 482 | <p>Issues when building from source</p> 483 | </td> 484 | <td> 485 | <p>Ensure you have the correct TypeScript version installed (v5.3.3 or compatible) and that your <code>tsconfig.json</code> is properly configured. Run <code>npm install</code> to ensure all dependencies are installed.</p> 486 | </td> 487 | </tr> 488 | <tr> 489 | <td> 490 | <h4>📁 Output Directory Issues</h4> 491 | <p>Problems with saving generated images</p> 492 | </td> 493 | <td> 494 | <p>Check if the process has write permissions to the configured output directory. Try using an absolute path for <code>GPT_IMAGE_OUTPUT_DIR</code> if relative paths aren't working.</p> 495 | </td> 496 | </tr> 497 | </table> 498 | 499 | ### 🔍 Error Handling and Reporting 500 | 501 | The MCP server includes comprehensive error handling that provides detailed information when something goes wrong. When an error occurs: 502 | 503 | 1. **Error Format**: All errors are returned with: 504 | - A clear error message describing what went wrong 505 | - The specific error code or type 506 | - Additional context about the error when available 507 | 508 | 2. **AI Assistant Behavior**: When using this MCP server with AI assistants: 509 | - The AI will always report the full error message to help with troubleshooting 510 | - The AI will explain the likely cause of the error in plain language 511 | - The AI will suggest specific steps to resolve the issue 512 | 513 | ## 📄 License 514 | 515 | <div align="center"> 516 | <a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="MIT License"></a> 517 | </div> 518 | 519 | <p align="center"> 520 | This project is licensed under the MIT License - see the <a href="LICENSE">LICENSE</a> file for details. 521 | </p> 522 | 523 | <details> 524 | <summary>License Summary</summary> 525 | 526 | <p>The MIT License is a permissive license that is short and to the point. It lets people do anything with your code with proper attribution and without warranty.</p> 527 | 528 | <p><strong>You are free to:</strong></p> 529 | <ul> 530 | <li>Use the software commercially</li> 531 | <li>Modify the software</li> 532 | <li>Distribute the software</li> 533 | <li>Use and modify the software privately</li> 534 | </ul> 535 | 536 | <p><strong>Under the following terms:</strong></p> 537 | <ul> 538 | <li>Include the original copyright notice and the license notice in all copies or substantial uses of the work</li> 539 | </ul> 540 | 541 | <p><strong>Limitations:</strong></p> 542 | <ul> 543 | <li>The authors provide no warranty with the software and are not liable for any damages</li> 544 | </ul> 545 | </details> 546 | 547 | ## 🙏 Acknowledgments 548 | 549 | <div align="center"> 550 | <table> 551 | <tr> 552 | <td align="center"> 553 | <a href="https://openai.com/"> 554 | <img src="https://img.shields.io/badge/OpenAI-412991?logo=openai&logoColor=white" alt="OpenAI"> 555 | <p>For providing the gpt-image-1 model</p> 556 | </a> 557 | </td> 558 | <td align="center"> 559 | <a href="https://github.com/model-context-protocol/mcp"> 560 | <img src="https://img.shields.io/badge/MCP-Protocol-00A3E0" alt="MCP Protocol"> 561 | <p>For the protocol specification</p> 562 | </a> 563 | </td> 564 | </tr> 565 | </table> 566 | </div> 567 | 568 | <div align="center"> 569 | <p> 570 | <a href="https://github.com/CLOUDWERX-DEV/gpt-image-1-mcp/issues">Report Bug</a> • 571 | <a href="https://github.com/CLOUDWERX-DEV/gpt-image-1-mcp/issues">Request Feature</a> • 572 | <a href="https://cloudwerx.dev">Visit Our Website</a> 573 | </p> 574 | </div> 575 | 576 | <div align="center"> 577 | <p> 578 | Developed with ❤️ by <a href="https://cloudwerx.dev">CLOUDWERX</a> 579 | </p> 580 | </div> 581 | ``` -------------------------------------------------------------------------------- /src/tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "../build", 7 | "rootDir": ".", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "paths": { 13 | "@modelcontextprotocol/sdk/*": ["./node_modules/@modelcontextprotocol/sdk/*"] 14 | } 15 | }, 16 | "include": ["./**/*.ts"], 17 | "exclude": ["node_modules", "build"] 18 | } 19 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@cloudwerxlab/gpt-image-1-mcp", 3 | "version": "1.1.7", 4 | "description": "A Model Context Protocol server for OpenAI's gpt-image-1 model", 5 | "type": "module", 6 | "bin": { 7 | "@cloudwerxlab/gpt-image-1-mcp": "build/index.js" 8 | }, 9 | "files": [ 10 | "build", 11 | "README.md", 12 | "CHANGELOG.md", 13 | "LICENSE", 14 | "package.json", 15 | "tsconfig.json", 16 | "logo.png" 17 | ], 18 | "scripts": { 19 | "build": "cd src && tsc && node -e \"require('fs').chmodSync('../build/index.js', '755')\"", 20 | "watch": "cd src && tsc --watch", 21 | "test": "node test-mcp-server.js", 22 | "test:npx": "node test-npx.js", 23 | "prepare": "npm run build", 24 | "inspector": "npx @modelcontextprotocol/inspector ./build/index.js" 25 | }, 26 | "dependencies": { 27 | "@modelcontextprotocol/sdk": "^1.11.0", 28 | "node-fetch": "^3.3.2", 29 | "openai": "^4.97.0", 30 | "zod": "^3.24.4", 31 | "form-data": "^4.0.0" 32 | }, 33 | "devDependencies": { 34 | "@types/node": "^20.11.24", 35 | "typescript": "^5.3.3" 36 | }, 37 | "keywords": [ 38 | "mcp", 39 | "openai", 40 | "gpt-image-1", 41 | "image-generation", 42 | "model-context-protocol" 43 | ], 44 | "author": "", 45 | "license": "MIT", 46 | "engines": { 47 | "node": ">=14.0.0" 48 | } 49 | } 50 | 51 | ``` -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- ```markdown 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## 1.1.7 - 2025-05-07 6 | 7 | ### Fixed 8 | 9 | - **Documentation**: Fixed formatting issues in README.md 10 | - **Documentation**: Restored enhanced README with centered logo and improved layout 11 | 12 | ## 1.1.6 - 2025-05-07 13 | 14 | ### Changed 15 | 16 | - **Default Output Directory**: Changed default image save location to user's Pictures folder under `gpt-image-1` subfolder 17 | - **Cross-Platform Support**: Added detection of Pictures folder location on Windows, macOS, and Linux 18 | - **Documentation**: Updated README with new default output directory information 19 | 20 | ## 1.1.0 - 2025-05-05 21 | 22 | ### Added 23 | 24 | - **File Path Support**: Added ability to use file paths for images and masks in the `create_image_edit` tool 25 | - **Configurable Output Directory**: Added support for customizing the output directory via the `GPT_IMAGE_OUTPUT_DIR` environment variable 26 | - **Enhanced Output Formatting**: Improved response formatting with emojis and detailed information 27 | - **Detailed Token Usage**: Added token usage information to the response metadata 28 | - **Comprehensive Documentation**: Completely rewrote the README.md with detailed usage examples and configuration options 29 | - **Proper .gitignore**: Added a comprehensive .gitignore file to exclude build artifacts and generated images 30 | 31 | ### Fixed 32 | 33 | - **Build Structure**: Fixed the build process to output to the root build directory instead of inside the src folder 34 | - **MIME Type Handling**: Improved MIME type handling for image uploads in the `create_image_edit` tool 35 | - **Error Handling**: Enhanced error handling with more informative error messages 36 | - **Cleanup Process**: Improved the cleanup process for temporary files 37 | 38 | ### Changed 39 | 40 | - **API Implementation**: Changed the image editing implementation to use a direct curl command for better MIME type handling 41 | - **Response Structure**: Updated the response structure to include more detailed information about generated images 42 | - **File Naming**: Improved the file naming convention for saved images with timestamps 43 | - **Dependencies**: Added node-fetch and form-data dependencies for improved HTTP requests 44 | 45 | ## 1.0.0 - 2025-05-04 46 | 47 | ### Added 48 | 49 | - Initial release of the GPT-Image-1 MCP Server. 50 | - Implemented `create_image` tool for generating images using OpenAI `gpt-image-1`. 51 | - Implemented `create_image_edit` tool for editing images using OpenAI `gpt-image-1`. 52 | - Added support for all `gpt-image-1` specific parameters in both tools (`background`, `output_compression`, `output_format`, `quality`, `size`). 53 | - Included basic error handling for OpenAI API calls. 54 | - Created `README.md` with installation and configuration instructions. 55 | - Created `gpt-image-1-mcp.md` with a detailed architecture and tool overview. ``` -------------------------------------------------------------------------------- /.docs/openai images 1.txt: -------------------------------------------------------------------------------- ``` 1 | 2 | 3 | Create image 4 | post 5 | 6 | https://api.openai.com/v1/images/generations 7 | Creates an image given a prompt. Learn more. 8 | 9 | Request body 10 | prompt 11 | string 12 | 13 | Required 14 | A text description of the desired image(s). The maximum length is 32000 characters for gpt-image-1, 1000 characters for dall-e-2 and 4000 characters for dall-e-3. 15 | 16 | background 17 | string or null 18 | 19 | Optional 20 | Defaults to auto 21 | Allows to set transparency for the background of the generated image(s). This parameter is only supported for gpt-image-1. Must be one of transparent, opaque or auto (default value). When auto is used, the model will automatically determine the best background for the image. 22 | 23 | If transparent, the output format needs to support transparency, so it should be set to either png (default value) or webp. 24 | 25 | model 26 | string 27 | 28 | Optional 29 | Defaults to dall-e-2 30 | The model to use for image generation. One of dall-e-2, dall-e-3, or gpt-image-1. Defaults to dall-e-2 unless a parameter specific to gpt-image-1 is used. 31 | 32 | moderation 33 | string or null 34 | 35 | Optional 36 | Defaults to auto 37 | Control the content-moderation level for images generated by gpt-image-1. Must be either low for less restrictive filtering or auto (default value). 38 | 39 | n 40 | integer or null 41 | 42 | Optional 43 | Defaults to 1 44 | The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. 45 | 46 | output_compression 47 | integer or null 48 | 49 | Optional 50 | Defaults to 100 51 | The compression level (0-100%) for the generated images. This parameter is only supported for gpt-image-1 with the webp or jpeg output formats, and defaults to 100. 52 | 53 | output_format 54 | string or null 55 | 56 | Optional 57 | Defaults to png 58 | The format in which the generated images are returned. This parameter is only supported for gpt-image-1. Must be one of png, jpeg, or webp. 59 | 60 | quality 61 | string or null 62 | 63 | Optional 64 | Defaults to auto 65 | The quality of the image that will be generated. 66 | 67 | auto (default value) will automatically select the best quality for the given model. 68 | high, medium and low are supported for gpt-image-1. 69 | hd and standard are supported for dall-e-3. 70 | standard is the only option for dall-e-2. 71 | response_format 72 | string or null 73 | 74 | Optional 75 | Defaults to url 76 | The format in which generated images with dall-e-2 and dall-e-3 are returned. Must be one of url or b64_json. URLs are only valid for 60 minutes after the image has been generated. This parameter isn't supported for gpt-image-1 which will always return base64-encoded images. 77 | 78 | size 79 | string or null 80 | 81 | Optional 82 | Defaults to auto 83 | The size of the generated images. Must be one of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default value) for gpt-image-1, one of 256x256, 512x512, or 1024x1024 for dall-e-2, and one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3. 84 | 85 | style 86 | string or null 87 | 88 | Optional 89 | Defaults to vivid 90 | The style of the generated images. This parameter is only supported for dall-e-3. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. 91 | 92 | user 93 | string 94 | 95 | Optional 96 | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more. 97 | 98 | Returns 99 | Returns a list of image objects. 100 | 101 | Example request 102 | curl https://api.openai.com/v1/images/generations \ 103 | -H "Content-Type: application/json" \ 104 | -H "Authorization: Bearer $OPENAI_API_KEY" \ 105 | -d '{ 106 | "model": "gpt-image-1", 107 | "prompt": "A cute baby sea otter", 108 | "n": 1, 109 | "size": "1024x1024" 110 | }' 111 | Response 112 | { 113 | "created": 1713833628, 114 | "data": [ 115 | { 116 | "b64_json": "..." 117 | } 118 | ], 119 | "usage": { 120 | "total_tokens": 100, 121 | "input_tokens": 50, 122 | "output_tokens": 50, 123 | "input_tokens_details": { 124 | "text_tokens": 10, 125 | "image_tokens": 40 126 | } 127 | } 128 | } 129 | Create image edit 130 | post 131 | 132 | https://api.openai.com/v1/images/edits 133 | Creates an edited or extended image given one or more source images and a prompt. This endpoint only supports gpt-image-1 and dall-e-2. 134 | 135 | Request body 136 | image 137 | string or array 138 | 139 | Required 140 | The image(s) to edit. Must be a supported image file or an array of images. 141 | 142 | For gpt-image-1, each image should be a png, webp, or jpg file less than 25MB. You can provide up to 16 images. 143 | 144 | For dall-e-2, you can only provide one image, and it should be a square png file less than 4MB. 145 | 146 | prompt 147 | string 148 | 149 | Required 150 | A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2, and 32000 characters for gpt-image-1. 151 | 152 | background 153 | string or null 154 | 155 | Optional 156 | Defaults to auto 157 | Allows to set transparency for the background of the generated image(s). This parameter is only supported for gpt-image-1. Must be one of transparent, opaque or auto (default value). When auto is used, the model will automatically determine the best background for the image. 158 | 159 | If transparent, the output format needs to support transparency, so it should be set to either png (default value) or webp. 160 | 161 | mask 162 | file 163 | 164 | Optional 165 | An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. If there are multiple images provided, the mask will be applied on the first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as image. 166 | 167 | model 168 | string 169 | 170 | Optional 171 | Defaults to dall-e-2 172 | The model to use for image generation. Only dall-e-2 and gpt-image-1 are supported. Defaults to dall-e-2 unless a parameter specific to gpt-image-1 is used. 173 | 174 | n 175 | integer or null 176 | 177 | Optional 178 | Defaults to 1 179 | The number of images to generate. Must be between 1 and 10. 180 | 181 | quality 182 | string or null 183 | 184 | Optional 185 | Defaults to auto 186 | The quality of the image that will be generated. high, medium and low are only supported for gpt-image-1. dall-e-2 only supports standard quality. Defaults to auto. 187 | 188 | response_format 189 | string or null 190 | 191 | Optional 192 | Defaults to url 193 | The format in which the generated images are returned. Must be one of url or b64_json. URLs are only valid for 60 minutes after the image has been generated. This parameter is only supported for dall-e-2, as gpt-image-1 will always return base64-encoded images. 194 | 195 | size 196 | string or null 197 | 198 | Optional 199 | Defaults to 1024x1024 200 | The size of the generated images. Must be one of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default value) for gpt-image-1, and one of 256x256, 512x512, or 1024x1024 for dall-e-2. 201 | 202 | user 203 | string 204 | 205 | Optional 206 | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more. 207 | 208 | Returns 209 | Returns a list of image objects. 210 | 211 | Example request 212 | curl -s -D >(grep -i x-request-id >&2) \ 213 | -o >(jq -r '.data[0].b64_json' | base64 --decode > gift-basket.png) \ 214 | -X POST "https://api.openai.com/v1/images/edits" \ 215 | -H "Authorization: Bearer $OPENAI_API_KEY" \ 216 | -F "model=gpt-image-1" \ 217 | -F "image[][email protected]" \ 218 | -F "image[][email protected]" \ 219 | -F "image[][email protected]" \ 220 | -F "image[][email protected]" \ 221 | -F 'prompt=Create a lovely gift basket with these four items in it' 222 | Response 223 | { 224 | "created": 1713833628, 225 | "data": [ 226 | { 227 | "b64_json": "..." 228 | } 229 | ], 230 | "usage": { 231 | "total_tokens": 100, 232 | "input_tokens": 50, 233 | "output_tokens": 50, 234 | "input_tokens_details": { 235 | "text_tokens": 10, 236 | "image_tokens": 40 237 | } 238 | } 239 | } 240 | Create image variation 241 | post 242 | 243 | https://api.openai.com/v1/images/variations 244 | Creates a variation of a given image. This endpoint only supports dall-e-2. 245 | 246 | Request body 247 | image 248 | file 249 | 250 | Required 251 | The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square. 252 | 253 | model 254 | string or "dall-e-2" 255 | 256 | Optional 257 | Defaults to dall-e-2 258 | The model to use for image generation. Only dall-e-2 is supported at this time. 259 | 260 | n 261 | integer or null 262 | 263 | Optional 264 | Defaults to 1 265 | The number of images to generate. Must be between 1 and 10. 266 | 267 | response_format 268 | string or null 269 | 270 | Optional 271 | Defaults to url 272 | The format in which the generated images are returned. Must be one of url or b64_json. URLs are only valid for 60 minutes after the image has been generated. 273 | 274 | size 275 | string or null 276 | 277 | Optional 278 | Defaults to 1024x1024 279 | The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. 280 | 281 | user 282 | string 283 | 284 | Optional 285 | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more. 286 | 287 | Returns 288 | Returns a list of image objects. 289 | 290 | Example request 291 | curl https://api.openai.com/v1/images/variations \ 292 | -H "Authorization: Bearer $OPENAI_API_KEY" \ 293 | -F image="@otter.png" \ 294 | -F n=2 \ 295 | -F size="1024x1024" 296 | Response 297 | { 298 | "created": 1589478378, 299 | "data": [ 300 | { 301 | "url": "https://..." 302 | }, 303 | { 304 | "url": "https://..." 305 | } 306 | ] 307 | } 308 | The image generation response 309 | The response from the image generation endpoint. 310 | 311 | created 312 | integer 313 | 314 | The Unix timestamp (in seconds) of when the image was created. 315 | 316 | data 317 | array 318 | 319 | The list of generated images. 320 | 321 | 322 | Show properties 323 | usage 324 | object 325 | 326 | For gpt-image-1 only, the token usage information for the image generation. 327 | 328 | 329 | Show properties 330 | OBJECT The image generation response 331 | { 332 | "created": 1713833628, 333 | "data": [ 334 | { 335 | "b64_json": "..." 336 | } 337 | ], 338 | "usage": { 339 | "total_tokens": 100, 340 | "input_tokens": 50, 341 | "output_tokens": 50, 342 | "input_tokens_details": { 343 | "text_tokens": 10, 344 | "image_tokens": 40 345 | } 346 | } 347 | } 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | ``` -------------------------------------------------------------------------------- /CONTEXT.md: -------------------------------------------------------------------------------- ```markdown 1 | # GPT-Image-1 MCP Server: Project Context 2 | 3 | This document provides a comprehensive overview of the GPT-Image-1 MCP Server project, including its architecture, functionality, implementation details, and development history. It's designed to quickly bring developers and AI assistants up to speed on all aspects of the project. 4 | 5 | ## Project Overview 6 | 7 | The GPT-Image-1 MCP Server is a Node.js application that implements the Model Context Protocol (MCP) to provide image generation and editing capabilities using OpenAI's gpt-image-1 model. It serves as a bridge between MCP clients (like Roo or VS Code extensions) and the OpenAI API, allowing users to generate and edit images using natural language prompts. 8 | 9 | ## Core Functionality 10 | 11 | ### Image Generation 12 | 13 | The server provides the `create_image` tool, which: 14 | 1. Accepts a text prompt and optional parameters 15 | 2. Validates the input using Zod schemas 16 | 3. Calls the OpenAI API's images.generate endpoint 17 | 4. Saves the generated images to a configurable output directory 18 | 5. Returns a formatted response with image paths, base64 data, and metadata 19 | 20 | ### Image Editing 21 | 22 | The server provides the `create_image_edit` tool, which: 23 | 1. Accepts an image (as base64 or file path), a text prompt, and an optional mask 24 | 2. Supports both base64-encoded images and file paths 25 | 3. Uses a direct curl command to ensure proper MIME type handling 26 | 4. Calls the OpenAI API's images.edit endpoint 27 | 5. Saves the edited images to the configured output directory 28 | 6. Returns a formatted response with image paths, base64 data, and metadata 29 | 30 | ## Technical Architecture 31 | 32 | ### Project Structure 33 | 34 | ``` 35 | gpt-image-1-server/ 36 | ├── src/ # TypeScript source code 37 | │ └── index.ts # Main server implementation 38 | ├── build/ # Compiled JavaScript (output of build process) 39 | ├── generated-images/ # Default location for saved images (created at runtime) 40 | ├── node_modules/ # Dependencies (not in version control) 41 | ├── .gitignore # Git ignore configuration 42 | ├── package.json # Project configuration and dependencies 43 | ├── tsconfig.json # TypeScript compiler configuration 44 | ├── README.md # User documentation 45 | ├── CHANGELOG.md # Version history and changes 46 | └── CONTEXT.md # This comprehensive project overview 47 | ``` 48 | 49 | ### Dependencies 50 | 51 | The server relies on several key dependencies: 52 | - `@modelcontextprotocol/sdk`: For implementing the MCP protocol 53 | - `openai`: The official OpenAI SDK for API access 54 | - `zod`: For input validation and type safety 55 | - `node-fetch`: For making HTTP requests 56 | - `form-data`: For handling multipart/form-data requests 57 | - `child_process`: For executing curl commands 58 | 59 | ### Implementation Details 60 | 61 | #### MCP Server Setup 62 | 63 | The server is implemented using the MCP SDK's `McpServer` class. It registers two tools: 64 | 1. `create_image`: For generating images 65 | 2. `create_image_edit`: For editing images 66 | 67 | Each tool has a defined schema for its parameters and a handler function that processes requests. 68 | 69 | #### Image Generation Implementation 70 | 71 | The image generation functionality uses the OpenAI SDK directly: 72 | 73 | ```typescript 74 | const response = await openai.images.generate({ 75 | model: "gpt-image-1", 76 | prompt: args.prompt, 77 | n: args.n || 1, 78 | size: args.size || "1024x1024", 79 | quality: args.quality || "high", 80 | // ... other parameters 81 | }); 82 | ``` 83 | 84 | The server then processes the response, saves the images to disk, and returns a formatted response. 85 | 86 | #### Image Editing Implementation 87 | 88 | The image editing functionality uses a direct curl command for better MIME type handling: 89 | 90 | ```typescript 91 | // Build the curl command 92 | let curlCommand = `curl -s -X POST "https://api.openai.com/v1/images/edits" -H "Authorization: Bearer ${process.env.OPENAI_API_KEY}"`; 93 | 94 | // Add parameters 95 | curlCommand += ` -F "model=gpt-image-1"`; 96 | curlCommand += ` -F "prompt=${args.prompt}"`; 97 | curlCommand += ` -F "image[]=@${imageFile}"`; 98 | // ... other parameters 99 | 100 | // Execute the command 101 | execSync(curlCommand, { stdio: ['pipe', 'pipe', 'inherit'] }); 102 | ``` 103 | 104 | This approach ensures proper handling of file uploads with correct MIME types. 105 | 106 | #### Image Saving 107 | 108 | Images are saved to a configurable output directory: 109 | 110 | ```typescript 111 | function saveImageToDisk(base64Data: string, format: string = 'png'): string { 112 | // Determine the output directory 113 | const outputDir = process.env.GPT_IMAGE_OUTPUT_DIR || path.join(process.cwd(), 'generated-images'); 114 | 115 | // Create the directory if it doesn't exist 116 | if (!fs.existsSync(outputDir)) { 117 | fs.mkdirSync(outputDir, { recursive: true }); 118 | } 119 | 120 | // Generate a filename with timestamp 121 | const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); 122 | const filename = `image-${timestamp}.${format}`; 123 | const outputPath = path.join(outputDir, filename); 124 | 125 | // Save the image 126 | fs.writeFileSync(outputPath, Buffer.from(base64Data, 'base64')); 127 | 128 | return outputPath; 129 | } 130 | ``` 131 | 132 | #### Response Formatting 133 | 134 | The server provides beautifully formatted responses with emojis and detailed information: 135 | 136 | ``` 137 | 🎨 **Image Generated Successfully!** 138 | 139 | 📝 **Prompt**: A futuristic city skyline at sunset, digital art 140 | 141 | 📁 **Saved 1 Image**: 142 | 1. C:\Users\username\project\generated-images\image-2025-05-05T12-34-56-789Z.png 143 | 144 | ⚡ **Token Usage**: 145 | • Total Tokens: 123 146 | • Input Tokens: 45 147 | • Output Tokens: 78 148 | ``` 149 | 150 | ## Configuration 151 | 152 | ### Environment Variables 153 | 154 | The server uses the following environment variables: 155 | 156 | | Variable | Required | Description | 157 | |----------|----------|-------------| 158 | | `OPENAI_API_KEY` | Yes | OpenAI API key with access to the gpt-image-1 model | 159 | | `GPT_IMAGE_OUTPUT_DIR` | No | Custom directory for saving generated images (defaults to `./generated-images`) | 160 | 161 | ### MCP Client Configuration 162 | 163 | To use the server with an MCP client, the following configuration is needed: 164 | 165 | ```json 166 | { 167 | "mcpServers": { 168 | "gpt-image-1": { 169 | "command": "node", 170 | "args": ["<path-to-project>/build/index.js"], 171 | "env": { 172 | "OPENAI_API_KEY": "sk-your-openai-api-key", 173 | "GPT_IMAGE_OUTPUT_DIR": "C:/path/to/output/directory" // Optional 174 | }, 175 | "disabled": false, 176 | "alwaysAllow": [] 177 | } 178 | } 179 | } 180 | ``` 181 | 182 | ## Development History 183 | 184 | ### Version 1.0.0 (May 4, 2025) 185 | 186 | The initial release included: 187 | - Basic implementation of the `create_image` and `create_image_edit` tools 188 | - Support for all gpt-image-1 specific parameters 189 | - Basic error handling 190 | - Initial documentation 191 | 192 | ### Version 1.1.0 (May 5, 2025) 193 | 194 | Major improvements included: 195 | - Added file path support for the `create_image_edit` tool 196 | - Fixed the build structure to output to the root build directory 197 | - Enhanced output formatting with emojis and detailed information 198 | - Added configurable output directory via environment variable 199 | - Improved MIME type handling for image uploads 200 | - Enhanced error handling and cleanup processes 201 | - Added comprehensive documentation 202 | - Added proper .gitignore file 203 | 204 | ## Key Challenges and Solutions 205 | 206 | ### MIME Type Handling 207 | 208 | **Challenge**: The OpenAI SDK didn't properly handle MIME types for file uploads in the image edit endpoint. 209 | 210 | **Solution**: Implemented a direct curl command approach that ensures proper MIME type handling: 211 | ```typescript 212 | curlCommand += ` -F "image[]=@${imageFile}"`; 213 | ``` 214 | 215 | ### File Path Support 216 | 217 | **Challenge**: The original implementation only supported base64-encoded images. 218 | 219 | **Solution**: Added support for file paths by: 220 | 1. Detecting if the input is a file path object 221 | 2. Reading the file from disk 222 | 3. Handling the file appropriately based on whether using the SDK or curl approach 223 | 224 | ### Build Structure 225 | 226 | **Challenge**: The build process was outputting to a directory inside the src folder. 227 | 228 | **Solution**: Updated the tsconfig.json to output to the root build directory: 229 | ```json 230 | { 231 | "compilerOptions": { 232 | "outDir": "./build", 233 | // other options... 234 | } 235 | } 236 | ``` 237 | 238 | ## Usage Examples 239 | 240 | ### Generating an Image 241 | 242 | ```xml 243 | <use_mcp_tool> 244 | <server_name>gpt-image-1</server_name> 245 | <tool_name>create_image</tool_name> 246 | <arguments> 247 | { 248 | "prompt": "A futuristic city skyline at sunset, digital art", 249 | "size": "1024x1024", 250 | "quality": "high" 251 | } 252 | </arguments> 253 | </use_mcp_tool> 254 | ``` 255 | 256 | ### Editing an Image with File Path 257 | 258 | ```xml 259 | <use_mcp_tool> 260 | <server_name>gpt-image-1</server_name> 261 | <tool_name>create_image_edit</tool_name> 262 | <arguments> 263 | { 264 | "image": { 265 | "filePath": "C:/path/to/your/image.png" 266 | }, 267 | "prompt": "Add a small robot in the corner", 268 | "quality": "high" 269 | } 270 | </arguments> 271 | </use_mcp_tool> 272 | ``` 273 | 274 | ## Future Improvements 275 | 276 | Potential areas for future development: 277 | 1. Add support for the DALL-E 3 model 278 | 2. Implement image variation functionality 279 | 3. Add batch processing capabilities 280 | 4. Create a web interface for easier testing 281 | 5. Add support for more image formats 282 | 6. Implement caching to reduce API calls 283 | 7. Add unit and integration tests 284 | 285 | ## Troubleshooting Guide 286 | 287 | ### Common Issues 288 | 289 | 1. **MIME Type Errors**: Ensure image files have the correct extension (.png, .jpg, etc.) that matches their actual format. 290 | 291 | 2. **API Key Issues**: Verify your OpenAI API key is correct and has access to the gpt-image-1 model. 292 | 293 | 3. **Build Errors**: Ensure you have the correct TypeScript version installed and that your tsconfig.json is properly configured. 294 | 295 | 4. **File Path Issues**: Make sure file paths are absolute or correctly relative to the current working directory. 296 | 297 | 5. **Output Directory Issues**: Check if the process has write permissions to the configured output directory. 298 | 299 | ## Conclusion 300 | 301 | The GPT-Image-1 MCP Server provides a robust and user-friendly interface to OpenAI's image generation capabilities. With features like file path support, configurable output directories, and detailed response formatting, it enhances the image generation experience for users of MCP-compatible clients. 302 | 303 | This document should provide a comprehensive understanding of the project's architecture, functionality, and development history, enabling developers and AI assistants to quickly get up to speed and contribute effectively. 304 | ``` -------------------------------------------------------------------------------- /.docs/typescript-sdk mcp README.md: -------------------------------------------------------------------------------- ```markdown 1 | # MCP TypeScript SDK   2 | 3 | ## Table of Contents 4 | - [Overview](#overview) 5 | - [Installation](#installation) 6 | - [Quickstart](#quickstart) 7 | - [What is MCP?](#what-is-mcp) 8 | - [Core Concepts](#core-concepts) 9 | - [Server](#server) 10 | - [Resources](#resources) 11 | - [Tools](#tools) 12 | - [Prompts](#prompts) 13 | - [Running Your Server](#running-your-server) 14 | - [stdio](#stdio) 15 | - [Streamable HTTP](#streamable-http) 16 | - [Testing and Debugging](#testing-and-debugging) 17 | - [Examples](#examples) 18 | - [Echo Server](#echo-server) 19 | - [SQLite Explorer](#sqlite-explorer) 20 | - [Advanced Usage](#advanced-usage) 21 | - [Low-Level Server](#low-level-server) 22 | - [Writing MCP Clients](#writing-mcp-clients) 23 | - [Server Capabilities](#server-capabilities) 24 | - [Proxy OAuth Server](#proxy-authorization-requests-upstream) 25 | - [Backwards Compatibility](#backwards-compatibility) 26 | 27 | ## Overview 28 | 29 | The Model Context Protocol allows applications to provide context for LLMs in a standardized way, separating the concerns of providing context from the actual LLM interaction. This TypeScript SDK implements the full MCP specification, making it easy to: 30 | 31 | - Build MCP clients that can connect to any MCP server 32 | - Create MCP servers that expose resources, prompts and tools 33 | - Use standard transports like stdio and Streamable HTTP 34 | - Handle all MCP protocol messages and lifecycle events 35 | 36 | ## Installation 37 | 38 | ```bash 39 | npm install @modelcontextprotocol/sdk 40 | ``` 41 | 42 | ## Quick Start 43 | 44 | Let's create a simple MCP server that exposes a calculator tool and some data: 45 | 46 | ```typescript 47 | import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; 48 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 49 | import { z } from "zod"; 50 | 51 | // Create an MCP server 52 | const server = new McpServer({ 53 | name: "Demo", 54 | version: "1.0.0" 55 | }); 56 | 57 | // Add an addition tool 58 | server.tool("add", 59 | { a: z.number(), b: z.number() }, 60 | async ({ a, b }) => ({ 61 | content: [{ type: "text", text: String(a + b) }] 62 | }) 63 | ); 64 | 65 | // Add a dynamic greeting resource 66 | server.resource( 67 | "greeting", 68 | new ResourceTemplate("greeting://{name}", { list: undefined }), 69 | async (uri, { name }) => ({ 70 | contents: [{ 71 | uri: uri.href, 72 | text: `Hello, ${name}!` 73 | }] 74 | }) 75 | ); 76 | 77 | // Start receiving messages on stdin and sending messages on stdout 78 | const transport = new StdioServerTransport(); 79 | await server.connect(transport); 80 | ``` 81 | 82 | ## What is MCP? 83 | 84 | The [Model Context Protocol (MCP)](https://modelcontextprotocol.io) lets you build servers that expose data and functionality to LLM applications in a secure, standardized way. Think of it like a web API, but specifically designed for LLM interactions. MCP servers can: 85 | 86 | - Expose data through **Resources** (think of these sort of like GET endpoints; they are used to load information into the LLM's context) 87 | - Provide functionality through **Tools** (sort of like POST endpoints; they are used to execute code or otherwise produce a side effect) 88 | - Define interaction patterns through **Prompts** (reusable templates for LLM interactions) 89 | - And more! 90 | 91 | ## Core Concepts 92 | 93 | ### Server 94 | 95 | The McpServer is your core interface to the MCP protocol. It handles connection management, protocol compliance, and message routing: 96 | 97 | ```typescript 98 | const server = new McpServer({ 99 | name: "My App", 100 | version: "1.0.0" 101 | }); 102 | ``` 103 | 104 | ### Resources 105 | 106 | Resources are how you expose data to LLMs. They're similar to GET endpoints in a REST API - they provide data but shouldn't perform significant computation or have side effects: 107 | 108 | ```typescript 109 | // Static resource 110 | server.resource( 111 | "config", 112 | "config://app", 113 | async (uri) => ({ 114 | contents: [{ 115 | uri: uri.href, 116 | text: "App configuration here" 117 | }] 118 | }) 119 | ); 120 | 121 | // Dynamic resource with parameters 122 | server.resource( 123 | "user-profile", 124 | new ResourceTemplate("users://{userId}/profile", { list: undefined }), 125 | async (uri, { userId }) => ({ 126 | contents: [{ 127 | uri: uri.href, 128 | text: `Profile data for user ${userId}` 129 | }] 130 | }) 131 | ); 132 | ``` 133 | 134 | ### Tools 135 | 136 | Tools let LLMs take actions through your server. Unlike resources, tools are expected to perform computation and have side effects: 137 | 138 | ```typescript 139 | // Simple tool with parameters 140 | server.tool( 141 | "calculate-bmi", 142 | { 143 | weightKg: z.number(), 144 | heightM: z.number() 145 | }, 146 | async ({ weightKg, heightM }) => ({ 147 | content: [{ 148 | type: "text", 149 | text: String(weightKg / (heightM * heightM)) 150 | }] 151 | }) 152 | ); 153 | 154 | // Async tool with external API call 155 | server.tool( 156 | "fetch-weather", 157 | { city: z.string() }, 158 | async ({ city }) => { 159 | const response = await fetch(`https://api.weather.com/${city}`); 160 | const data = await response.text(); 161 | return { 162 | content: [{ type: "text", text: data }] 163 | }; 164 | } 165 | ); 166 | ``` 167 | 168 | ### Prompts 169 | 170 | Prompts are reusable templates that help LLMs interact with your server effectively: 171 | 172 | ```typescript 173 | server.prompt( 174 | "review-code", 175 | { code: z.string() }, 176 | ({ code }) => ({ 177 | messages: [{ 178 | role: "user", 179 | content: { 180 | type: "text", 181 | text: `Please review this code:\n\n${code}` 182 | } 183 | }] 184 | }) 185 | ); 186 | ``` 187 | 188 | ## Running Your Server 189 | 190 | MCP servers in TypeScript need to be connected to a transport to communicate with clients. How you start the server depends on the choice of transport: 191 | 192 | ### stdio 193 | 194 | For command-line tools and direct integrations: 195 | 196 | ```typescript 197 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 198 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 199 | 200 | const server = new McpServer({ 201 | name: "example-server", 202 | version: "1.0.0" 203 | }); 204 | 205 | // ... set up server resources, tools, and prompts ... 206 | 207 | const transport = new StdioServerTransport(); 208 | await server.connect(transport); 209 | ``` 210 | 211 | ### Streamable HTTP 212 | 213 | For remote servers, set up a Streamable HTTP transport that handles both client requests and server-to-client notifications. 214 | 215 | #### With Session Management 216 | 217 | In some cases, servers need to be stateful. This is achieved by [session management](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#session-management). 218 | 219 | ```typescript 220 | import express from "express"; 221 | import { randomUUID } from "node:crypto"; 222 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 223 | import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; 224 | import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js" 225 | 226 | 227 | 228 | const app = express(); 229 | app.use(express.json()); 230 | 231 | // Map to store transports by session ID 232 | const transports: { [sessionId: string]: StreamableHTTPServerTransport } = {}; 233 | 234 | // Handle POST requests for client-to-server communication 235 | app.post('/mcp', async (req, res) => { 236 | // Check for existing session ID 237 | const sessionId = req.headers['mcp-session-id'] as string | undefined; 238 | let transport: StreamableHTTPServerTransport; 239 | 240 | if (sessionId && transports[sessionId]) { 241 | // Reuse existing transport 242 | transport = transports[sessionId]; 243 | } else if (!sessionId && isInitializeRequest(req.body)) { 244 | // New initialization request 245 | transport = new StreamableHTTPServerTransport({ 246 | sessionIdGenerator: () => randomUUID(), 247 | onsessioninitialized: (sessionId) => { 248 | // Store the transport by session ID 249 | transports[sessionId] = transport; 250 | } 251 | }); 252 | 253 | // Clean up transport when closed 254 | transport.onclose = () => { 255 | if (transport.sessionId) { 256 | delete transports[transport.sessionId]; 257 | } 258 | }; 259 | const server = new McpServer({ 260 | name: "example-server", 261 | version: "1.0.0" 262 | }); 263 | 264 | // ... set up server resources, tools, and prompts ... 265 | 266 | // Connect to the MCP server 267 | await server.connect(transport); 268 | } else { 269 | // Invalid request 270 | res.status(400).json({ 271 | jsonrpc: '2.0', 272 | error: { 273 | code: -32000, 274 | message: 'Bad Request: No valid session ID provided', 275 | }, 276 | id: null, 277 | }); 278 | return; 279 | } 280 | 281 | // Handle the request 282 | await transport.handleRequest(req, res, req.body); 283 | }); 284 | 285 | // Reusable handler for GET and DELETE requests 286 | const handleSessionRequest = async (req: express.Request, res: express.Response) => { 287 | const sessionId = req.headers['mcp-session-id'] as string | undefined; 288 | if (!sessionId || !transports[sessionId]) { 289 | res.status(400).send('Invalid or missing session ID'); 290 | return; 291 | } 292 | 293 | const transport = transports[sessionId]; 294 | await transport.handleRequest(req, res); 295 | }; 296 | 297 | // Handle GET requests for server-to-client notifications via SSE 298 | app.get('/mcp', handleSessionRequest); 299 | 300 | // Handle DELETE requests for session termination 301 | app.delete('/mcp', handleSessionRequest); 302 | 303 | app.listen(3000); 304 | ``` 305 | 306 | #### Without Session Management (Stateless) 307 | 308 | For simpler use cases where session management isn't needed: 309 | 310 | ```typescript 311 | const app = express(); 312 | app.use(express.json()); 313 | 314 | app.post('/mcp', async (req: Request, res: Response) => { 315 | // In stateless mode, create a new instance of transport and server for each request 316 | // to ensure complete isolation. A single instance would cause request ID collisions 317 | // when multiple clients connect concurrently. 318 | 319 | try { 320 | const server = getServer(); 321 | const transport: StreamableHTTPServerTransport = new StreamableHTTPServerTransport({ 322 | sessionIdGenerator: undefined, 323 | }); 324 | res.on('close', () => { 325 | console.log('Request closed'); 326 | transport.close(); 327 | server.close(); 328 | }); 329 | await server.connect(transport); 330 | await transport.handleRequest(req, res, req.body); 331 | } catch (error) { 332 | console.error('Error handling MCP request:', error); 333 | if (!res.headersSent) { 334 | res.status(500).json({ 335 | jsonrpc: '2.0', 336 | error: { 337 | code: -32603, 338 | message: 'Internal server error', 339 | }, 340 | id: null, 341 | }); 342 | } 343 | } 344 | }); 345 | 346 | app.get('/mcp', async (req: Request, res: Response) => { 347 | console.log('Received GET MCP request'); 348 | res.writeHead(405).end(JSON.stringify({ 349 | jsonrpc: "2.0", 350 | error: { 351 | code: -32000, 352 | message: "Method not allowed." 353 | }, 354 | id: null 355 | })); 356 | }); 357 | 358 | app.delete('/mcp', async (req: Request, res: Response) => { 359 | console.log('Received DELETE MCP request'); 360 | res.writeHead(405).end(JSON.stringify({ 361 | jsonrpc: "2.0", 362 | error: { 363 | code: -32000, 364 | message: "Method not allowed." 365 | }, 366 | id: null 367 | })); 368 | }); 369 | 370 | 371 | // Start the server 372 | const PORT = 3000; 373 | app.listen(PORT, () => { 374 | console.log(`MCP Stateless Streamable HTTP Server listening on port ${PORT}`); 375 | }); 376 | 377 | ``` 378 | 379 | This stateless approach is useful for: 380 | - Simple API wrappers 381 | - RESTful scenarios where each request is independent 382 | - Horizontally scaled deployments without shared session state 383 | 384 | ### Testing and Debugging 385 | 386 | To test your server, you can use the [MCP Inspector](https://github.com/modelcontextprotocol/inspector). See its README for more information. 387 | 388 | ## Examples 389 | 390 | ### Echo Server 391 | 392 | A simple server demonstrating resources, tools, and prompts: 393 | 394 | ```typescript 395 | import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; 396 | import { z } from "zod"; 397 | 398 | const server = new McpServer({ 399 | name: "Echo", 400 | version: "1.0.0" 401 | }); 402 | 403 | server.resource( 404 | "echo", 405 | new ResourceTemplate("echo://{message}", { list: undefined }), 406 | async (uri, { message }) => ({ 407 | contents: [{ 408 | uri: uri.href, 409 | text: `Resource echo: ${message}` 410 | }] 411 | }) 412 | ); 413 | 414 | server.tool( 415 | "echo", 416 | { message: z.string() }, 417 | async ({ message }) => ({ 418 | content: [{ type: "text", text: `Tool echo: ${message}` }] 419 | }) 420 | ); 421 | 422 | server.prompt( 423 | "echo", 424 | { message: z.string() }, 425 | ({ message }) => ({ 426 | messages: [{ 427 | role: "user", 428 | content: { 429 | type: "text", 430 | text: `Please process this message: ${message}` 431 | } 432 | }] 433 | }) 434 | ); 435 | ``` 436 | 437 | ### SQLite Explorer 438 | 439 | A more complex example showing database integration: 440 | 441 | ```typescript 442 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 443 | import sqlite3 from "sqlite3"; 444 | import { promisify } from "util"; 445 | import { z } from "zod"; 446 | 447 | const server = new McpServer({ 448 | name: "SQLite Explorer", 449 | version: "1.0.0" 450 | }); 451 | 452 | // Helper to create DB connection 453 | const getDb = () => { 454 | const db = new sqlite3.Database("database.db"); 455 | return { 456 | all: promisify<string, any[]>(db.all.bind(db)), 457 | close: promisify(db.close.bind(db)) 458 | }; 459 | }; 460 | 461 | server.resource( 462 | "schema", 463 | "schema://main", 464 | async (uri) => { 465 | const db = getDb(); 466 | try { 467 | const tables = await db.all( 468 | "SELECT sql FROM sqlite_master WHERE type='table'" 469 | ); 470 | return { 471 | contents: [{ 472 | uri: uri.href, 473 | text: tables.map((t: {sql: string}) => t.sql).join("\n") 474 | }] 475 | }; 476 | } finally { 477 | await db.close(); 478 | } 479 | } 480 | ); 481 | 482 | server.tool( 483 | "query", 484 | { sql: z.string() }, 485 | async ({ sql }) => { 486 | const db = getDb(); 487 | try { 488 | const results = await db.all(sql); 489 | return { 490 | content: [{ 491 | type: "text", 492 | text: JSON.stringify(results, null, 2) 493 | }] 494 | }; 495 | } catch (err: unknown) { 496 | const error = err as Error; 497 | return { 498 | content: [{ 499 | type: "text", 500 | text: `Error: ${error.message}` 501 | }], 502 | isError: true 503 | }; 504 | } finally { 505 | await db.close(); 506 | } 507 | } 508 | ); 509 | ``` 510 | 511 | ## Advanced Usage 512 | 513 | ### Dynamic Servers 514 | 515 | If you want to offer an initial set of tools/prompts/resources, but later add additional ones based on user action or external state change, you can add/update/remove them _after_ the Server is connected. This will automatically emit the corresponding `listChanged` notificaions: 516 | 517 | ```ts 518 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 519 | import { z } from "zod"; 520 | 521 | const server = new McpServer({ 522 | name: "Dynamic Example", 523 | version: "1.0.0" 524 | }); 525 | 526 | const listMessageTool = server.tool( 527 | "listMessages", 528 | { channel: z.string() }, 529 | async ({ channel }) => ({ 530 | content: [{ type: "text", text: await listMessages(channel) }] 531 | }) 532 | ); 533 | 534 | const putMessageTool = server.tool( 535 | "putMessage", 536 | { channel: z.string(), message: z.string() }, 537 | async ({ channel, message }) => ({ 538 | content: [{ type: "text", text: await putMessage(channel, string) }] 539 | }) 540 | ); 541 | // Until we upgrade auth, `putMessage` is disabled (won't show up in listTools) 542 | putMessageTool.disable() 543 | 544 | const upgradeAuthTool = server.tool( 545 | "upgradeAuth", 546 | { permission: z.enum(["write', vadmin"])}, 547 | // Any mutations here will automatically emit `listChanged` notifications 548 | async ({ permission }) => { 549 | const { ok, err, previous } = await upgradeAuthAndStoreToken(permission) 550 | if (!ok) return {content: [{ type: "text", text: `Error: ${err}` }]} 551 | 552 | // If we previously had read-only access, 'putMessage' is now available 553 | if (previous === "read") { 554 | putMessageTool.enable() 555 | } 556 | 557 | if (permission === 'write') { 558 | // If we've just upgraded to 'write' permissions, we can still call 'upgradeAuth' 559 | // but can only upgrade to 'admin'. 560 | upgradeAuthTool.update({ 561 | paramSchema: { permission: z.enum(["admin"]) }, // change validation rules 562 | }) 563 | } else { 564 | // If we're now an admin, we no longer have anywhere to upgrade to, so fully remove that tool 565 | upgradeAuthTool.remove() 566 | } 567 | } 568 | ) 569 | 570 | // Connect as normal 571 | const transport = new StdioServerTransport(); 572 | await server.connect(transport); 573 | ``` 574 | 575 | ### Low-Level Server 576 | 577 | For more control, you can use the low-level Server class directly: 578 | 579 | ```typescript 580 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 581 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 582 | import { 583 | ListPromptsRequestSchema, 584 | GetPromptRequestSchema 585 | } from "@modelcontextprotocol/sdk/types.js"; 586 | 587 | const server = new Server( 588 | { 589 | name: "example-server", 590 | version: "1.0.0" 591 | }, 592 | { 593 | capabilities: { 594 | prompts: {} 595 | } 596 | } 597 | ); 598 | 599 | server.setRequestHandler(ListPromptsRequestSchema, async () => { 600 | return { 601 | prompts: [{ 602 | name: "example-prompt", 603 | description: "An example prompt template", 604 | arguments: [{ 605 | name: "arg1", 606 | description: "Example argument", 607 | required: true 608 | }] 609 | }] 610 | }; 611 | }); 612 | 613 | server.setRequestHandler(GetPromptRequestSchema, async (request) => { 614 | if (request.params.name !== "example-prompt") { 615 | throw new Error("Unknown prompt"); 616 | } 617 | return { 618 | description: "Example prompt", 619 | messages: [{ 620 | role: "user", 621 | content: { 622 | type: "text", 623 | text: "Example prompt text" 624 | } 625 | }] 626 | }; 627 | }); 628 | 629 | const transport = new StdioServerTransport(); 630 | await server.connect(transport); 631 | ``` 632 | 633 | ### Writing MCP Clients 634 | 635 | The SDK provides a high-level client interface: 636 | 637 | ```typescript 638 | import { Client } from "@modelcontextprotocol/sdk/client/index.js"; 639 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; 640 | 641 | const transport = new StdioClientTransport({ 642 | command: "node", 643 | args: ["server.js"] 644 | }); 645 | 646 | const client = new Client( 647 | { 648 | name: "example-client", 649 | version: "1.0.0" 650 | } 651 | ); 652 | 653 | await client.connect(transport); 654 | 655 | // List prompts 656 | const prompts = await client.listPrompts(); 657 | 658 | // Get a prompt 659 | const prompt = await client.getPrompt({ 660 | name: "example-prompt", 661 | arguments: { 662 | arg1: "value" 663 | } 664 | }); 665 | 666 | // List resources 667 | const resources = await client.listResources(); 668 | 669 | // Read a resource 670 | const resource = await client.readResource({ 671 | uri: "file:///example.txt" 672 | }); 673 | 674 | // Call a tool 675 | const result = await client.callTool({ 676 | name: "example-tool", 677 | arguments: { 678 | arg1: "value" 679 | } 680 | }); 681 | ``` 682 | 683 | ### Proxy Authorization Requests Upstream 684 | 685 | You can proxy OAuth requests to an external authorization provider: 686 | 687 | ```typescript 688 | import express from 'express'; 689 | import { ProxyOAuthServerProvider, mcpAuthRouter } from '@modelcontextprotocol/sdk'; 690 | 691 | const app = express(); 692 | 693 | const proxyProvider = new ProxyOAuthServerProvider({ 694 | endpoints: { 695 | authorizationUrl: "https://auth.external.com/oauth2/v1/authorize", 696 | tokenUrl: "https://auth.external.com/oauth2/v1/token", 697 | revocationUrl: "https://auth.external.com/oauth2/v1/revoke", 698 | }, 699 | verifyAccessToken: async (token) => { 700 | return { 701 | token, 702 | clientId: "123", 703 | scopes: ["openid", "email", "profile"], 704 | } 705 | }, 706 | getClient: async (client_id) => { 707 | return { 708 | client_id, 709 | redirect_uris: ["http://localhost:3000/callback"], 710 | } 711 | } 712 | }) 713 | 714 | app.use(mcpAuthRouter({ 715 | provider: proxyProvider, 716 | issuerUrl: new URL("http://auth.external.com"), 717 | baseUrl: new URL("http://mcp.example.com"), 718 | serviceDocumentationUrl: new URL("https://docs.example.com/"), 719 | })) 720 | ``` 721 | 722 | This setup allows you to: 723 | - Forward OAuth requests to an external provider 724 | - Add custom token validation logic 725 | - Manage client registrations 726 | - Provide custom documentation URLs 727 | - Maintain control over the OAuth flow while delegating to an external provider 728 | 729 | ### Backwards Compatibility 730 | 731 | Clients and servers with StreamableHttp tranport can maintain [backwards compatibility](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#backwards-compatibility) with the deprecated HTTP+SSE transport (from protocol version 2024-11-05) as follows 732 | 733 | #### Client-Side Compatibility 734 | 735 | For clients that need to work with both Streamable HTTP and older SSE servers: 736 | 737 | ```typescript 738 | import { Client } from "@modelcontextprotocol/sdk/client/index.js"; 739 | import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; 740 | import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js"; 741 | let client: Client|undefined = undefined 742 | const baseUrl = new URL(url); 743 | try { 744 | client = new Client({ 745 | name: 'streamable-http-client', 746 | version: '1.0.0' 747 | }); 748 | const transport = new StreamableHTTPClientTransport( 749 | new URL(baseUrl) 750 | ); 751 | await client.connect(transport); 752 | console.log("Connected using Streamable HTTP transport"); 753 | } catch (error) { 754 | // If that fails with a 4xx error, try the older SSE transport 755 | console.log("Streamable HTTP connection failed, falling back to SSE transport"); 756 | client = new Client({ 757 | name: 'sse-client', 758 | version: '1.0.0' 759 | }); 760 | const sseTransport = new SSEClientTransport(baseUrl); 761 | await client.connect(sseTransport); 762 | console.log("Connected using SSE transport"); 763 | } 764 | ``` 765 | 766 | #### Server-Side Compatibility 767 | 768 | For servers that need to support both Streamable HTTP and older clients: 769 | 770 | ```typescript 771 | import express from "express"; 772 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 773 | import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; 774 | import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; 775 | 776 | const server = new McpServer({ 777 | name: "backwards-compatible-server", 778 | version: "1.0.0" 779 | }); 780 | 781 | // ... set up server resources, tools, and prompts ... 782 | 783 | const app = express(); 784 | app.use(express.json()); 785 | 786 | // Store transports for each session type 787 | const transports = { 788 | streamable: {} as Record<string, StreamableHTTPServerTransport>, 789 | sse: {} as Record<string, SSEServerTransport> 790 | }; 791 | 792 | // Modern Streamable HTTP endpoint 793 | app.all('/mcp', async (req, res) => { 794 | // Handle Streamable HTTP transport for modern clients 795 | // Implementation as shown in the "With Session Management" example 796 | // ... 797 | }); 798 | 799 | // Legacy SSE endpoint for older clients 800 | app.get('/sse', async (req, res) => { 801 | // Create SSE transport for legacy clients 802 | const transport = new SSEServerTransport('/messages', res); 803 | transports.sse[transport.sessionId] = transport; 804 | 805 | res.on("close", () => { 806 | delete transports.sse[transport.sessionId]; 807 | }); 808 | 809 | await server.connect(transport); 810 | }); 811 | 812 | // Legacy message endpoint for older clients 813 | app.post('/messages', async (req, res) => { 814 | const sessionId = req.query.sessionId as string; 815 | const transport = transports.sse[sessionId]; 816 | if (transport) { 817 | await transport.handlePostMessage(req, res, req.body); 818 | } else { 819 | res.status(400).send('No transport found for sessionId'); 820 | } 821 | }); 822 | 823 | app.listen(3000); 824 | ``` 825 | 826 | **Note**: The SSE transport is now deprecated in favor of Streamable HTTP. New implementations should use Streamable HTTP, and existing SSE implementations should plan to migrate. 827 | 828 | ## Documentation 829 | 830 | - [Model Context Protocol documentation](https://modelcontextprotocol.io) 831 | - [MCP Specification](https://spec.modelcontextprotocol.io) 832 | - [Example Servers](https://github.com/modelcontextprotocol/servers) 833 | 834 | ## Contributing 835 | 836 | Issues and pull requests are welcome on GitHub at https://github.com/modelcontextprotocol/typescript-sdk. 837 | 838 | ## License 839 | 840 | This project is licensed under the MIT License—see the [LICENSE](LICENSE) file for details. 841 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #!/usr/bin/env node 2 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 3 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 4 | import { z } from "zod"; 5 | import OpenAI from "openai"; 6 | import type { ImageGenerateParams, ImageEditParams } from "openai/resources"; 7 | import { Readable } from "stream"; 8 | import { toFile } from "openai/uploads"; 9 | import fs from 'fs'; 10 | import path from 'path'; 11 | import os from 'os'; 12 | import fetch from 'node-fetch'; 13 | import FormData from 'form-data'; 14 | import { execSync } from 'child_process'; 15 | 16 | // Get the API key from the environment variable 17 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY; 18 | if (!OPENAI_API_KEY) { 19 | console.error("OPENAI_API_KEY environment variable is required."); 20 | process.exit(1); 21 | } 22 | 23 | // Configure OpenAI client with strict defaults for gpt-image-1 24 | const openai = new OpenAI({ 25 | apiKey: OPENAI_API_KEY, 26 | defaultQuery: {}, // Ensure no default query parameters 27 | defaultHeaders: {} // Ensure no default headers that might affect the request 28 | }); 29 | 30 | // Determine the output directory for saving images 31 | // Priority: 32 | // 1. Environment variable GPT_IMAGE_OUTPUT_DIR if set 33 | // 2. User's Pictures folder with a gpt-image-1 subfolder 34 | // 3. Fallback to a 'generated-images' folder in the current directory if Pictures folder can't be determined 35 | const OUTPUT_DIR_ENV = process.env.GPT_IMAGE_OUTPUT_DIR; 36 | let outputDir: string; 37 | 38 | if (OUTPUT_DIR_ENV) { 39 | // Use the directory specified in the environment variable 40 | outputDir = OUTPUT_DIR_ENV; 41 | console.error(`Using output directory from environment variable: ${outputDir}`); 42 | } else { 43 | // Try to use the user's Pictures folder 44 | try { 45 | // Determine the user's home directory 46 | const homeDir = os.homedir(); 47 | 48 | // Determine the Pictures folder based on the OS 49 | let picturesDir: string; 50 | 51 | if (process.platform === 'win32') { 52 | // Windows: Use the standard Pictures folder 53 | picturesDir = path.join(homeDir, 'Pictures'); 54 | } else if (process.platform === 'darwin') { 55 | // macOS: Use the standard Pictures folder 56 | picturesDir = path.join(homeDir, 'Pictures'); 57 | } else { 58 | // Linux and other Unix-like systems: Use the XDG standard if possible 59 | const xdgPicturesDir = process.env.XDG_PICTURES_DIR; 60 | if (xdgPicturesDir) { 61 | picturesDir = xdgPicturesDir; 62 | } else { 63 | // Fallback to a standard location 64 | picturesDir = path.join(homeDir, 'Pictures'); 65 | } 66 | } 67 | 68 | // Create a gpt-image-1 subfolder in the Pictures directory 69 | outputDir = path.join(picturesDir, 'gpt-image-1'); 70 | console.error(`Using user's Pictures folder for output: ${outputDir}`); 71 | } catch (error) { 72 | // If there's any error determining the Pictures folder, fall back to the current directory 73 | outputDir = path.join(process.cwd(), 'generated-images'); 74 | console.error(`Could not determine Pictures folder, using fallback directory: ${outputDir}`); 75 | } 76 | } 77 | 78 | // Create the output directory if it doesn't exist 79 | if (!fs.existsSync(outputDir)) { 80 | fs.mkdirSync(outputDir, { recursive: true }); 81 | console.error(`Created output directory: ${outputDir}`); 82 | } else { 83 | console.error(`Using existing output directory: ${outputDir}`); 84 | } 85 | 86 | // Function to save base64 image to disk and return the file path 87 | function saveImageToDisk(base64Data: string, format: string = 'png'): string { 88 | // Create a dedicated folder for generated images if we're using the workspace root 89 | // This keeps the workspace organized while still saving in the current directory 90 | const imagesFolder = path.join(outputDir, 'gpt-images'); 91 | 92 | // Create the images folder if it doesn't exist 93 | if (!fs.existsSync(imagesFolder)) { 94 | fs.mkdirSync(imagesFolder, { recursive: true }); 95 | console.error(`Created images folder: ${imagesFolder}`); 96 | } 97 | 98 | const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); 99 | const filename = `image-${timestamp}.${format}`; 100 | const outputPath = path.join(imagesFolder, filename); 101 | 102 | // Remove the data URL prefix if present 103 | const base64Image = base64Data.replace(/^data:image\/\w+;base64,/, ''); 104 | 105 | // Write the image to disk 106 | fs.writeFileSync(outputPath, Buffer.from(base64Image, 'base64')); 107 | console.error(`Image saved to: ${outputPath}`); 108 | 109 | return outputPath; 110 | } 111 | 112 | // Function to read an image file and convert it to base64 113 | function readImageAsBase64(imagePath: string): string { 114 | try { 115 | // Check if the file exists 116 | if (!fs.existsSync(imagePath)) { 117 | throw new Error(`Image file not found: ${imagePath}`); 118 | } 119 | 120 | // Read the file as a buffer 121 | const imageBuffer = fs.readFileSync(imagePath); 122 | 123 | // Determine the MIME type based on file extension 124 | const fileExtension = path.extname(imagePath).toLowerCase(); 125 | let mimeType = 'image/png'; // Default to PNG 126 | 127 | if (fileExtension === '.jpg' || fileExtension === '.jpeg') { 128 | mimeType = 'image/jpeg'; 129 | } else if (fileExtension === '.webp') { 130 | mimeType = 'image/webp'; 131 | } else if (fileExtension === '.gif') { 132 | mimeType = 'image/gif'; 133 | } 134 | 135 | // Convert the buffer to a base64 string with data URL prefix 136 | const base64Data = imageBuffer.toString('base64'); 137 | const dataUrl = `data:${mimeType};base64,${base64Data}`; 138 | 139 | console.error(`Read image from: ${imagePath} (${mimeType})`); 140 | 141 | return dataUrl; 142 | } catch (error: any) { 143 | console.error(`Error reading image: ${error.message}`); 144 | throw error; 145 | } 146 | } 147 | 148 | const server = new McpServer({ 149 | name: "@cloudwerxlab/gpt-image-1-mcp", 150 | version: "1.1.7", 151 | description: "An MCP server for generating and editing images using the OpenAI gpt-image-1 model.", 152 | }); 153 | 154 | // Define the create_image tool 155 | const createImageSchema = z.object({ 156 | prompt: z.string().max(32000, "Prompt exceeds maximum length for gpt-image-1."), 157 | background: z.enum(["transparent", "opaque", "auto"]).optional(), 158 | n: z.number().int().min(1).max(10).optional(), 159 | output_compression: z.number().int().min(0).max(100).optional(), 160 | output_format: z.enum(["png", "jpeg", "webp"]).optional(), 161 | quality: z.enum(["high", "medium", "low", "auto"]).optional(), 162 | size: z.enum(["1024x1024", "1536x1024", "1024x1536", "auto"]).optional(), 163 | user: z.string().optional(), 164 | moderation: z.enum(["low", "auto"]).optional() 165 | }); 166 | type CreateImageArgs = z.infer<typeof createImageSchema>; 167 | 168 | server.tool( 169 | "create_image", 170 | createImageSchema.shape, 171 | { 172 | title: "Generate new images using OpenAI's gpt-image-1 model" 173 | }, 174 | async (args: CreateImageArgs, extra: any) => { 175 | try { 176 | // Use the OpenAI SDK's createImage method with detailed error handling 177 | let apiResponse; 178 | try { 179 | apiResponse = await openai.images.generate({ 180 | model: "gpt-image-1", 181 | prompt: args.prompt, 182 | size: args.size || "1024x1024", 183 | quality: args.quality || "high", 184 | n: args.n || 1 185 | }); 186 | 187 | // Check if the response contains an error field (shouldn't happen with SDK but just in case) 188 | if (apiResponse && 'error' in apiResponse) { 189 | const error = (apiResponse as any).error; 190 | throw { 191 | message: error.message || 'Unknown API error', 192 | type: error.type || 'api_error', 193 | code: error.code || 'unknown', 194 | response: { data: { error } } 195 | }; 196 | } 197 | } catch (apiError: any) { 198 | // Enhance the error with more details if possible 199 | console.error("OpenAI API Error:", apiError); 200 | 201 | // Rethrow with enhanced information 202 | throw apiError; 203 | } 204 | 205 | // Create a Response-like object with a json() method for compatibility with the built-in tool 206 | const response = { 207 | json: () => Promise.resolve(apiResponse) 208 | }; 209 | 210 | const responseData = apiResponse; 211 | const format = args.output_format || "png"; 212 | 213 | // Save images to disk and create response with file paths 214 | const savedImages = []; 215 | const imageContents = []; 216 | 217 | if (responseData.data && responseData.data.length > 0) { 218 | for (const item of responseData.data) { 219 | if (item.b64_json) { 220 | // Save the image to disk 221 | const imagePath = saveImageToDisk(item.b64_json, format); 222 | 223 | // Add the saved image info to our response 224 | savedImages.push({ 225 | path: imagePath, 226 | format: format 227 | }); 228 | 229 | // Also include the image content for compatibility 230 | imageContents.push({ 231 | type: "image" as const, 232 | data: item.b64_json, 233 | mimeType: `image/${format}` 234 | }); 235 | } else if (item.url) { 236 | console.error(`Image URL: ${item.url}`); 237 | console.error("The gpt-image-1 model returned a URL instead of base64 data."); 238 | console.error("To view the image, open the URL in your browser."); 239 | 240 | // Add the URL info to our response 241 | savedImages.push({ 242 | url: item.url, 243 | format: format 244 | }); 245 | 246 | // Include a text message about the URL in the content 247 | imageContents.push({ 248 | type: "text" as const, 249 | text: `Image available at URL: ${item.url}` 250 | }); 251 | } 252 | } 253 | } 254 | 255 | // Create a beautifully formatted response with emojis and details 256 | const formatSize = (size: string | undefined) => size || "1024x1024"; 257 | const formatQuality = (quality: string | undefined) => quality || "high"; 258 | 259 | // Create a beautiful formatted message 260 | const formattedMessage = ` 261 | 🎨 **Image Generation Complete!** 🎨 262 | 263 | ✨ **Prompt**: "${args.prompt}" 264 | 265 | 📊 **Generation Parameters**: 266 | • Size: ${formatSize(args.size)} 267 | • Quality: ${formatQuality(args.quality)} 268 | • Number of Images: ${args.n || 1} 269 | ${args.background ? `• Background: ${args.background}` : ''} 270 | ${args.output_format ? `• Format: ${args.output_format}` : ''} 271 | ${args.output_compression ? `• Compression: ${args.output_compression}%` : ''} 272 | ${args.moderation ? `• Moderation: ${args.moderation}` : ''} 273 | 274 | 📁 **Generated ${savedImages.length} Image${savedImages.length > 1 ? 's' : ''}**: 275 | ${savedImages.map((img, index) => ` ${index + 1}. ${img.path || img.url}`).join('\n')} 276 | 277 | ${responseData.usage ? `⚡ **Token Usage**: 278 | • Total Tokens: ${responseData.usage.total_tokens} 279 | • Input Tokens: ${responseData.usage.input_tokens} 280 | • Output Tokens: ${responseData.usage.output_tokens}` : ''} 281 | 282 | 🔍 You can find your image${savedImages.length > 1 ? 's' : ''} at the path${savedImages.length > 1 ? 's' : ''} above! 283 | `; 284 | 285 | // Return both the image content and the saved file paths with the beautiful message 286 | return { 287 | content: [ 288 | { 289 | type: "text" as const, 290 | text: formattedMessage 291 | }, 292 | ...imageContents 293 | ], 294 | ...(responseData.usage && { 295 | _meta: { 296 | usage: responseData.usage, 297 | savedImages: savedImages 298 | } 299 | }) 300 | }; 301 | } catch (error: any) { 302 | // Log the full error for debugging 303 | console.error("Error generating image:", error); 304 | 305 | // Extract detailed error information 306 | const errorCode = error.status || error.code || 'Unknown'; 307 | const errorType = error.type || 'Error'; 308 | const errorMessage = error.message || 'An unknown error occurred'; 309 | 310 | // Check for specific OpenAI API errors 311 | let detailedError = ''; 312 | 313 | if (error.response) { 314 | // If we have a response object from OpenAI, extract more details 315 | try { 316 | const responseData = error.response.data || {}; 317 | if (responseData.error) { 318 | detailedError = `\n📋 **Details**: ${responseData.error.message || 'No additional details available'}`; 319 | 320 | // Add parameter errors if available 321 | if (responseData.error.param) { 322 | detailedError += `\n🔍 **Parameter**: ${responseData.error.param}`; 323 | } 324 | 325 | // Add code if available 326 | if (responseData.error.code) { 327 | detailedError += `\n🔢 **Error Code**: ${responseData.error.code}`; 328 | } 329 | 330 | // Add type if available 331 | if (responseData.error.type) { 332 | detailedError += `\n📝 **Error Type**: ${responseData.error.type}`; 333 | } 334 | } 335 | } catch (parseError) { 336 | // If we can't parse the response, just use what we have 337 | detailedError = '\n📋 **Details**: Could not parse error details from API response'; 338 | } 339 | } 340 | 341 | // Construct a comprehensive error message 342 | const fullErrorMessage = `❌ **Image Generation Failed**\n\n⚠️ **Error ${errorCode}**: ${errorType} - ${errorMessage}${detailedError}\n\n🔄 Please try again with a different prompt or parameters.`; 343 | 344 | // Return the detailed error to the client 345 | return { 346 | content: [{ 347 | type: "text", 348 | text: fullErrorMessage 349 | }], 350 | isError: true, 351 | _meta: { 352 | error: { 353 | code: errorCode, 354 | type: errorType, 355 | message: errorMessage, 356 | raw: JSON.stringify(error, Object.getOwnPropertyNames(error)) 357 | } 358 | } 359 | }; 360 | } 361 | } 362 | ); 363 | 364 | // Define the create_image_edit tool 365 | const createImageEditSchema = z.object({ 366 | image: z.union([ 367 | z.string(), // Can be base64 encoded image string 368 | z.array(z.string()), // Can be array of base64 encoded image strings 369 | z.object({ // Can be an object with a file path 370 | filePath: z.string(), 371 | isBase64: z.boolean().optional().default(false) 372 | }), 373 | z.array(z.object({ // Can be an array of objects with file paths 374 | filePath: z.string(), 375 | isBase64: z.boolean().optional().default(false) 376 | })) 377 | ]), 378 | prompt: z.string().max(32000, "Prompt exceeds maximum length for gpt-image-1."), 379 | background: z.enum(["transparent", "opaque", "auto"]).optional(), 380 | mask: z.union([ 381 | z.string(), // Can be base64 encoded mask string 382 | z.object({ // Can be an object with a file path 383 | filePath: z.string(), 384 | isBase64: z.boolean().optional().default(false) 385 | }) 386 | ]).optional(), 387 | n: z.number().int().min(1).max(10).optional(), 388 | quality: z.enum(["high", "medium", "low", "auto"]).optional(), 389 | size: z.enum(["1024x1024", "1536x1024", "1024x1536", "auto"]).optional(), 390 | user: z.string().optional() 391 | }); 392 | type CreateImageEditArgs = z.infer<typeof createImageEditSchema>; 393 | 394 | server.tool( 395 | "create_image_edit", 396 | createImageEditSchema.shape, 397 | { 398 | title: "Edit existing images using OpenAI's gpt-image-1 model" 399 | }, 400 | async (args: CreateImageEditArgs, extra: any) => { 401 | try { 402 | // The OpenAI SDK expects 'image' and 'mask' to be Node.js ReadStream or Blob. 403 | // Since we are receiving base64 strings from the client, we need to convert them. 404 | // This is a simplified approach. A robust solution might involve handling file uploads 405 | // or different data formats depending on the client's capabilities. 406 | // For this implementation, we'll assume base64 and convert to Buffer, which the SDK might accept 407 | // or require further processing depending on its exact requirements for file-like objects. 408 | // NOTE: The OpenAI SDK's `images.edit` method specifically expects `File` or `Blob` in browser 409 | // environments and `ReadableStream` or `Buffer` in Node.js. Converting base64 to Buffer is 410 | // the most straightforward approach for a Node.js server receiving base64. 411 | 412 | // Process image input which can be file paths or base64 strings 413 | const imageFiles = []; 414 | 415 | // Handle different image input formats 416 | if (Array.isArray(args.image)) { 417 | // Handle array of strings or objects 418 | for (const img of args.image) { 419 | if (typeof img === 'string') { 420 | // Base64 string - create a temporary file 421 | const tempFile = path.join(os.tmpdir(), `image-${Date.now()}-${Math.random().toString(36).substring(2, 15)}.png`); 422 | const base64Data = img.replace(/^data:image\/\w+;base64,/, ''); 423 | fs.writeFileSync(tempFile, Buffer.from(base64Data, 'base64')); 424 | imageFiles.push(tempFile); 425 | } else { 426 | // Object with filePath - use the file directly 427 | imageFiles.push(img.filePath); 428 | } 429 | } 430 | } else if (typeof args.image === 'string') { 431 | // Single base64 string - create a temporary file 432 | const tempFile = path.join(os.tmpdir(), `image-${Date.now()}-${Math.random().toString(36).substring(2, 15)}.png`); 433 | const base64Data = args.image.replace(/^data:image\/\w+;base64,/, ''); 434 | fs.writeFileSync(tempFile, Buffer.from(base64Data, 'base64')); 435 | imageFiles.push(tempFile); 436 | } else { 437 | // Single object with filePath - use the file directly 438 | imageFiles.push(args.image.filePath); 439 | } 440 | 441 | // Process mask input which can be a file path or base64 string 442 | let maskFile = undefined; 443 | 444 | if (args.mask) { 445 | if (typeof args.mask === 'string') { 446 | // Mask is a base64 string - create a temporary file 447 | const tempFile = path.join(os.tmpdir(), `mask-${Date.now()}-${Math.random().toString(36).substring(2, 15)}.png`); 448 | const base64Data = args.mask.replace(/^data:image\/\w+;base64,/, ''); 449 | fs.writeFileSync(tempFile, Buffer.from(base64Data, 'base64')); 450 | maskFile = tempFile; 451 | } else { 452 | // Mask is an object with filePath - use the file directly 453 | maskFile = args.mask.filePath; 454 | } 455 | } 456 | 457 | // Use a direct curl command to call the OpenAI API 458 | // This is more reliable than using the SDK for file uploads 459 | 460 | // Create a temporary file to store the response 461 | const tempResponseFile = path.join(os.tmpdir(), `response-${Date.now()}.json`); 462 | 463 | // Build the curl command 464 | let curlCommand = `curl -s -X POST "https://api.openai.com/v1/images/edits" -H "Authorization: Bearer ${process.env.OPENAI_API_KEY}"`; 465 | 466 | // Add the model 467 | curlCommand += ` -F "model=gpt-image-1"`; 468 | 469 | // Add the prompt 470 | curlCommand += ` -F "prompt=${args.prompt}"`; 471 | 472 | // Add the images 473 | for (const imageFile of imageFiles) { 474 | curlCommand += ` -F "image[]=@${imageFile}"`; 475 | } 476 | 477 | // Add the mask if it exists 478 | if (maskFile) { 479 | curlCommand += ` -F "mask=@${maskFile}"`; 480 | } 481 | 482 | // Add other parameters 483 | if (args.n) curlCommand += ` -F "n=${args.n}"`; 484 | if (args.size) curlCommand += ` -F "size=${args.size}"`; 485 | if (args.quality) curlCommand += ` -F "quality=${args.quality}"`; 486 | if (args.background) curlCommand += ` -F "background=${args.background}"`; 487 | if (args.user) curlCommand += ` -F "user=${args.user}"`; 488 | 489 | // Add output redirection 490 | curlCommand += ` > "${tempResponseFile}"`; 491 | 492 | // Execute the curl command 493 | // Use execSync to run the curl command 494 | 495 | try { 496 | console.error(`Executing curl command to edit image...`); 497 | execSync(curlCommand, { stdio: ['pipe', 'pipe', 'inherit'] }); 498 | console.error(`Curl command executed successfully.`); 499 | } catch (error: any) { 500 | console.error(`Error executing curl command: ${error.message}`); 501 | throw new Error(`Failed to edit image: ${error.message}`); 502 | } 503 | 504 | // Read the response from the temporary file 505 | let responseJson; 506 | try { 507 | responseJson = fs.readFileSync(tempResponseFile, 'utf8'); 508 | console.error(`Response file read successfully.`); 509 | } catch (error: any) { 510 | console.error(`Error reading response file: ${error.message}`); 511 | throw new Error(`Failed to read response file: ${error.message}`); 512 | } 513 | 514 | // Parse the response 515 | let responseData; 516 | try { 517 | responseData = JSON.parse(responseJson); 518 | console.error(`Response parsed successfully.`); 519 | 520 | // Check if the response contains an error 521 | if (responseData.error) { 522 | console.error(`OpenAI API returned an error:`, responseData.error); 523 | const errorMessage = responseData.error.message || 'Unknown API error'; 524 | const errorType = responseData.error.type || 'api_error'; 525 | const errorCode = responseData.error.code || responseData.error.status || 'unknown'; 526 | 527 | throw { 528 | message: errorMessage, 529 | type: errorType, 530 | code: errorCode, 531 | response: { data: responseData } 532 | }; 533 | } 534 | } catch (error: any) { 535 | // If the error is from our API error check, rethrow it 536 | if (error.response && error.response.data) { 537 | throw error; 538 | } 539 | 540 | console.error(`Error parsing response: ${error.message}`); 541 | throw new Error(`Failed to parse response: ${error.message}`); 542 | } 543 | 544 | // Delete the temporary response file 545 | try { 546 | fs.unlinkSync(tempResponseFile); 547 | console.error(`Temporary response file deleted.`); 548 | } catch (error: any) { 549 | console.error(`Error deleting temporary file: ${error.message}`); 550 | // Don't throw an error here, just log it 551 | } 552 | 553 | // Clean up temporary files 554 | try { 555 | // Delete temporary image files 556 | for (const imageFile of imageFiles) { 557 | // Only delete files we created (temporary files in the os.tmpdir directory) 558 | if (imageFile.startsWith(os.tmpdir())) { 559 | try { fs.unlinkSync(imageFile); } catch (e) { /* ignore errors */ } 560 | } 561 | } 562 | 563 | // Delete temporary mask file 564 | if (maskFile && maskFile.startsWith(os.tmpdir())) { 565 | try { fs.unlinkSync(maskFile); } catch (e) { /* ignore errors */ } 566 | } 567 | } catch (cleanupError) { 568 | console.error("Error cleaning up temporary files:", cleanupError); 569 | } 570 | 571 | // No need for a Response-like object anymore since we're using fetch directly 572 | 573 | // Save images to disk and create response with file paths 574 | const savedImages = []; 575 | const imageContents = []; 576 | const format = "png"; // Assuming png for edits based on common practice 577 | 578 | if (responseData.data && responseData.data.length > 0) { 579 | for (const item of responseData.data) { 580 | if (item.b64_json) { 581 | // Save the image to disk 582 | const imagePath = saveImageToDisk(item.b64_json, format); 583 | 584 | // Add the saved image info to our response 585 | savedImages.push({ 586 | path: imagePath, 587 | format: format 588 | }); 589 | 590 | // Also include the image content for compatibility 591 | imageContents.push({ 592 | type: "image" as const, 593 | data: item.b64_json, 594 | mimeType: `image/${format}` 595 | }); 596 | } else if (item.url) { 597 | console.error(`Image URL: ${item.url}`); 598 | console.error("The gpt-image-1 model returned a URL instead of base64 data."); 599 | console.error("To view the image, open the URL in your browser."); 600 | 601 | // Add the URL info to our response 602 | savedImages.push({ 603 | url: item.url, 604 | format: format 605 | }); 606 | 607 | // Include a text message about the URL in the content 608 | imageContents.push({ 609 | type: "text" as const, 610 | text: `Image available at URL: ${item.url}` 611 | }); 612 | } 613 | } 614 | } 615 | 616 | // Create a beautifully formatted response with emojis and details 617 | const formatSize = (size: string | undefined) => size || "1024x1024"; 618 | const formatQuality = (quality: string | undefined) => quality || "high"; 619 | 620 | // Get source image information 621 | let sourceImageInfo = ""; 622 | if (Array.isArray(args.image)) { 623 | // Handle array of strings or objects 624 | sourceImageInfo = args.image.map((img, index) => { 625 | if (typeof img === 'string') { 626 | return ` ${index + 1}. Base64 encoded image`; 627 | } else { 628 | return ` ${index + 1}. ${img.filePath}`; 629 | } 630 | }).join('\n'); 631 | } else if (typeof args.image === 'string') { 632 | sourceImageInfo = " Base64 encoded image"; 633 | } else { 634 | sourceImageInfo = ` ${args.image.filePath}`; 635 | } 636 | 637 | // Get mask information 638 | let maskInfo = ""; 639 | if (args.mask) { 640 | if (typeof args.mask === 'string') { 641 | maskInfo = "🎭 **Mask**: Base64 encoded mask applied"; 642 | } else { 643 | maskInfo = `🎭 **Mask**: Mask from ${args.mask.filePath} applied`; 644 | } 645 | } 646 | 647 | // Create a beautiful formatted message 648 | const formattedMessage = ` 649 | ✏️ **Image Edit Complete!** 🖌️ 650 | 651 | ✨ **Edit Prompt**: "${args.prompt}" 652 | 653 | 🖼️ **Source Image${imageFiles.length > 1 ? 's' : ''}**: 654 | ${sourceImageInfo} 655 | ${maskInfo} 656 | 657 | 📊 **Edit Parameters**: 658 | • Size: ${formatSize(args.size)} 659 | • Quality: ${formatQuality(args.quality)} 660 | • Number of Results: ${args.n || 1} 661 | ${args.background ? `• Background: ${args.background}` : ''} 662 | 663 | 📁 **Edited ${savedImages.length} Image${savedImages.length > 1 ? 's' : ''}**: 664 | ${savedImages.map((img, index) => ` ${index + 1}. ${img.path || img.url}`).join('\n')} 665 | 666 | ${responseData.usage ? `⚡ **Token Usage**: 667 | • Total Tokens: ${responseData.usage.total_tokens} 668 | • Input Tokens: ${responseData.usage.input_tokens} 669 | • Output Tokens: ${responseData.usage.output_tokens}` : ''} 670 | 671 | 🔍 You can find your edited image${savedImages.length > 1 ? 's' : ''} at the path${savedImages.length > 1 ? 's' : ''} above! 672 | `; 673 | 674 | // Return both the image content and the saved file paths with the beautiful message 675 | return { 676 | content: [ 677 | { 678 | type: "text" as const, 679 | text: formattedMessage 680 | }, 681 | ...imageContents 682 | ], 683 | ...(responseData.usage && { 684 | _meta: { 685 | usage: { 686 | totalTokens: responseData.usage.total_tokens, 687 | inputTokens: responseData.usage.input_tokens, 688 | outputTokens: responseData.usage.output_tokens, 689 | }, 690 | savedImages: savedImages 691 | } 692 | }) 693 | }; 694 | } catch (error: any) { 695 | // Log the full error for debugging 696 | console.error("Error creating image edit:", error); 697 | 698 | // Extract detailed error information 699 | const errorCode = error.status || error.code || 'Unknown'; 700 | const errorType = error.type || 'Error'; 701 | const errorMessage = error.message || 'An unknown error occurred'; 702 | 703 | // Check for specific error types and provide more helpful messages 704 | let detailedError = ''; 705 | let suggestedFix = ''; 706 | 707 | // Handle file-related errors 708 | if (errorMessage.includes('ENOENT') || errorMessage.includes('no such file')) { 709 | detailedError = '\n📋 **Details**: The specified image or mask file could not be found'; 710 | suggestedFix = '\n💡 **Suggestion**: Verify that the file path is correct and the file exists'; 711 | } 712 | // Handle permission errors 713 | else if (errorMessage.includes('EACCES') || errorMessage.includes('permission denied')) { 714 | detailedError = '\n📋 **Details**: Permission denied when trying to access the file'; 715 | suggestedFix = '\n💡 **Suggestion**: Check file permissions or try running with elevated privileges'; 716 | } 717 | // Handle curl errors 718 | else if (errorMessage.includes('curl')) { 719 | detailedError = '\n📋 **Details**: Error occurred while sending the request to OpenAI API'; 720 | suggestedFix = '\n💡 **Suggestion**: Check your internet connection and API key'; 721 | } 722 | // Handle OpenAI API errors 723 | else if (error.response) { 724 | try { 725 | const responseData = error.response.data || {}; 726 | if (responseData.error) { 727 | detailedError = `\n📋 **Details**: ${responseData.error.message || 'No additional details available'}`; 728 | 729 | // Add parameter errors if available 730 | if (responseData.error.param) { 731 | detailedError += `\n🔍 **Parameter**: ${responseData.error.param}`; 732 | } 733 | 734 | // Add code if available 735 | if (responseData.error.code) { 736 | detailedError += `\n🔢 **Error Code**: ${responseData.error.code}`; 737 | } 738 | 739 | // Add type if available 740 | if (responseData.error.type) { 741 | detailedError += `\n📝 **Error Type**: ${responseData.error.type}`; 742 | } 743 | 744 | // Provide suggestions based on error type 745 | if (responseData.error.type === 'invalid_request_error') { 746 | suggestedFix = '\n💡 **Suggestion**: Check that your image format is supported (PNG, JPEG) and the prompt is valid'; 747 | } else if (responseData.error.type === 'authentication_error') { 748 | suggestedFix = '\n💡 **Suggestion**: Verify your OpenAI API key is correct and has access to the gpt-image-1 model'; 749 | } 750 | } 751 | } catch (parseError) { 752 | detailedError = '\n📋 **Details**: Could not parse error details from API response'; 753 | } 754 | } 755 | 756 | // If we have a JSON response with an error, try to extract it 757 | if (errorMessage.includes('{') && errorMessage.includes('}')) { 758 | try { 759 | const jsonStartIndex = errorMessage.indexOf('{'); 760 | const jsonEndIndex = errorMessage.lastIndexOf('}') + 1; 761 | const jsonStr = errorMessage.substring(jsonStartIndex, jsonEndIndex); 762 | const jsonError = JSON.parse(jsonStr); 763 | 764 | if (jsonError.error) { 765 | detailedError = `\n📋 **Details**: ${jsonError.error.message || 'No additional details available'}`; 766 | 767 | if (jsonError.error.code) { 768 | detailedError += `\n🔢 **Error Code**: ${jsonError.error.code}`; 769 | } 770 | 771 | if (jsonError.error.type) { 772 | detailedError += `\n📝 **Error Type**: ${jsonError.error.type}`; 773 | } 774 | } 775 | } catch (e) { 776 | // If we can't parse JSON from the error message, just continue 777 | } 778 | } 779 | 780 | // Construct a comprehensive error message 781 | const fullErrorMessage = `❌ **Image Edit Failed**\n\n⚠️ **Error ${errorCode}**: ${errorType} - ${errorMessage}${detailedError}${suggestedFix}\n\n🔄 Please try again with a different prompt, image, or parameters.`; 782 | 783 | // Return the detailed error to the client 784 | return { 785 | content: [{ 786 | type: "text", 787 | text: fullErrorMessage 788 | }], 789 | isError: true, 790 | _meta: { 791 | error: { 792 | code: errorCode, 793 | type: errorType, 794 | message: errorMessage, 795 | details: detailedError.replace(/\n📋 \*\*Details\*\*: /, ''), 796 | suggestion: suggestedFix.replace(/\n💡 \*\*Suggestion\*\*: /, ''), 797 | raw: JSON.stringify(error, Object.getOwnPropertyNames(error)) 798 | } 799 | } 800 | }; 801 | } 802 | } 803 | ); 804 | 805 | 806 | // Start the server 807 | const transport = new StdioServerTransport(); 808 | server.connect(transport).then(() => { 809 | console.error("✅ GPT-Image-1 MCP server running on stdio"); 810 | console.error("🎨 Ready to generate and edit images!"); 811 | }).catch(console.error); 812 | 813 | // Handle graceful shutdown 814 | process.on('SIGINT', async () => { 815 | console.error("🛑 Shutting down GPT-Image-1 MCP server..."); 816 | await server.close(); 817 | console.error("👋 Server shutdown complete. Goodbye!"); 818 | process.exit(0); 819 | }); 820 | 821 | process.on('SIGTERM', async () => { 822 | console.error("🛑 Shutting down GPT-Image-1 MCP server..."); 823 | await server.close(); 824 | console.error("👋 Server shutdown complete. Goodbye!"); 825 | process.exit(0); 826 | }); 827 | ```