# Directory Structure
```
├── .gitignore
├── commit_message.txt
├── package.json
├── pnpm-lock.yaml
├── README.md
├── src
│ └── index.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | node_modules/
2 | build/
3 | *.log
4 | .env*
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # Ollama MCP Server
2 |
3 | 🚀 A powerful bridge between Ollama and the Model Context Protocol (MCP), enabling seamless integration of Ollama's local LLM capabilities into your MCP-powered applications.
4 |
5 | ## 🌟 Features
6 |
7 | ### Complete Ollama Integration
8 | - **Full API Coverage**: Access all essential Ollama functionality through a clean MCP interface
9 | - **OpenAI-Compatible Chat**: Drop-in replacement for OpenAI's chat completion API
10 | - **Local LLM Power**: Run AI models locally with full control and privacy
11 |
12 | ### Core Capabilities
13 | - 🔄 **Model Management**
14 | - Pull models from registries
15 | - Push models to registries
16 | - List available models
17 | - Create custom models from Modelfiles
18 | - Copy and remove models
19 |
20 | - 🤖 **Model Execution**
21 | - Run models with customizable prompts
22 | - Chat completion API with system/user/assistant roles
23 | - Configurable parameters (temperature, timeout)
24 | - Raw mode support for direct responses
25 |
26 | - 🛠 **Server Control**
27 | - Start and manage Ollama server
28 | - View detailed model information
29 | - Error handling and timeout management
30 |
31 | ## 🚀 Getting Started
32 |
33 | ### Prerequisites
34 | - [Ollama](https://ollama.ai) installed on your system
35 | - Node.js and npm/pnpm
36 |
37 | ### Installation
38 |
39 | 1. Install dependencies:
40 | ```bash
41 | pnpm install
42 | ```
43 |
44 | 2. Build the server:
45 | ```bash
46 | pnpm run build
47 | ```
48 |
49 | ### Configuration
50 |
51 | Add the server to your MCP configuration:
52 |
53 | #### For Claude Desktop:
54 | MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
55 | Windows: `%APPDATA%/Claude/claude_desktop_config.json`
56 |
57 | ```json
58 | {
59 | "mcpServers": {
60 | "ollama": {
61 | "command": "node",
62 | "args": ["/path/to/ollama-server/build/index.js"],
63 | "env": {
64 | "OLLAMA_HOST": "http://127.0.0.1:11434" // Optional: customize Ollama API endpoint
65 | }
66 | }
67 | }
68 | }
69 | ```
70 |
71 | ## 🛠 Usage Examples
72 |
73 | ### Pull and Run a Model
74 | ```typescript
75 | // Pull a model
76 | await mcp.use_mcp_tool({
77 | server_name: "ollama",
78 | tool_name: "pull",
79 | arguments: {
80 | name: "llama2"
81 | }
82 | });
83 |
84 | // Run the model
85 | await mcp.use_mcp_tool({
86 | server_name: "ollama",
87 | tool_name: "run",
88 | arguments: {
89 | name: "llama2",
90 | prompt: "Explain quantum computing in simple terms"
91 | }
92 | });
93 | ```
94 |
95 | ### Chat Completion (OpenAI-compatible)
96 | ```typescript
97 | await mcp.use_mcp_tool({
98 | server_name: "ollama",
99 | tool_name: "chat_completion",
100 | arguments: {
101 | model: "llama2",
102 | messages: [
103 | {
104 | role: "system",
105 | content: "You are a helpful assistant."
106 | },
107 | {
108 | role: "user",
109 | content: "What is the meaning of life?"
110 | }
111 | ],
112 | temperature: 0.7
113 | }
114 | });
115 | ```
116 |
117 | ### Create Custom Model
118 | ```typescript
119 | await mcp.use_mcp_tool({
120 | server_name: "ollama",
121 | tool_name: "create",
122 | arguments: {
123 | name: "custom-model",
124 | modelfile: "./path/to/Modelfile"
125 | }
126 | });
127 | ```
128 |
129 | ## 🔧 Advanced Configuration
130 |
131 | - `OLLAMA_HOST`: Configure custom Ollama API endpoint (default: http://127.0.0.1:11434)
132 | - Timeout settings for model execution (default: 60 seconds)
133 | - Temperature control for response randomness (0-2 range)
134 |
135 | ## 🤝 Contributing
136 |
137 | Contributions are welcome! Feel free to:
138 | - Report bugs
139 | - Suggest new features
140 | - Submit pull requests
141 |
142 | ## 📝 License
143 |
144 | MIT License - feel free to use in your own projects!
145 |
146 | ---
147 |
148 | Built with ❤️ for the MCP ecosystem
149 |
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "Node16",
5 | "moduleResolution": "Node16",
6 | "outDir": "./build",
7 | "rootDir": "./src",
8 | "strict": true,
9 | "esModuleInterop": true,
10 | "skipLibCheck": true,
11 | "forceConsistentCasingInFileNames": true
12 | },
13 | "include": ["src/**/*"],
14 | "exclude": ["node_modules"]
15 | }
16 |
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "ollama-mcp",
3 | "version": "0.1.0",
4 | "description": "An ollama MCP server designed to allow Cline or other MCP supporting tools to access ollama",
5 | "private": true,
6 | "type": "module",
7 | "bin": {
8 | "ollama-mcp": "./build/index.js"
9 | },
10 | "files": [
11 | "build"
12 | ],
13 | "scripts": {
14 | "build": "tsc && node -e \"import('node:fs').then(fs => fs.chmodSync('build/index.js', '755'))\"",
15 | "prepare": "npm run build",
16 | "watch": "tsc --watch",
17 | "inspector": "npx @modelcontextprotocol/inspector build/index.js"
18 | },
19 | "dependencies": {
20 | "@modelcontextprotocol/sdk": "0.6.0",
21 | "axios": "^1.7.9"
22 | },
23 | "devDependencies": {
24 | "@types/node": "^20.11.24",
25 | "typescript": "^5.3.3"
26 | }
27 | }
28 |
```
--------------------------------------------------------------------------------
/commit_message.txt:
--------------------------------------------------------------------------------
```
1 | feat: add streaming support for chat completions
2 |
3 | Implemented real-time streaming capability for the chat completion API to:
4 | - Enable progressive output of long responses
5 | - Improve user experience with immediate feedback
6 | - Reduce perceived latency for large generations
7 | - Support interactive applications
8 |
9 | The streaming is implemented using Server-Sent Events (SSE) protocol:
10 | - Added SSE transport handler in OllamaServer
11 | - Modified chat_completion tool to support streaming
12 | - Configured proper response headers and event formatting
13 | - Added streaming parameter to API schema
14 |
15 | Testing confirmed successful streaming of:
16 | - Short responses (sonnets)
17 | - Long responses (technical articles)
18 | - Various content types and lengths
19 |
20 | Resolves: #123 (Add streaming support for chat completions)
21 |
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | #!/usr/bin/env node
2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4 | import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
5 | import {
6 | CallToolRequestSchema,
7 | ErrorCode,
8 | ListToolsRequestSchema,
9 | McpError,
10 | } from '@modelcontextprotocol/sdk/types.js';
11 | import axios from 'axios';
12 | import { exec } from 'child_process';
13 | import { promisify } from 'util';
14 | import http from 'node:http';
15 | import type { AddressInfo } from 'node:net';
16 |
17 | const execAsync = promisify(exec);
18 |
19 | // Default Ollama API endpoint
20 | const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://127.0.0.1:11434';
21 | const DEFAULT_TIMEOUT = 60000; // 60 seconds default timeout
22 |
23 | interface OllamaGenerateResponse {
24 | model: string;
25 | created_at: string;
26 | response: string;
27 | done: boolean;
28 | }
29 |
30 | // Helper function to format error messages
31 | const formatError = (error: unknown): string => {
32 | if (error instanceof Error) {
33 | return error.message;
34 | }
35 | return String(error);
36 | };
37 |
38 | class OllamaServer {
39 | private server: Server;
40 |
41 | constructor() {
42 | this.server = new Server(
43 | {
44 | name: 'ollama-mcp',
45 | version: '0.1.0',
46 | },
47 | {
48 | capabilities: {
49 | tools: {},
50 | },
51 | }
52 | );
53 |
54 | this.setupToolHandlers();
55 |
56 | // Error handling
57 | this.server.onerror = (error) => console.error('[MCP Error]', error);
58 | process.on('SIGINT', async () => {
59 | await this.server.close();
60 | process.exit(0);
61 | });
62 | }
63 |
64 | private setupToolHandlers() {
65 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
66 | tools: [
67 | {
68 | name: 'serve',
69 | description: 'Start Ollama server',
70 | inputSchema: {
71 | type: 'object',
72 | properties: {},
73 | additionalProperties: false,
74 | },
75 | },
76 | {
77 | name: 'create',
78 | description: 'Create a model from a Modelfile',
79 | inputSchema: {
80 | type: 'object',
81 | properties: {
82 | name: {
83 | type: 'string',
84 | description: 'Name for the model',
85 | },
86 | modelfile: {
87 | type: 'string',
88 | description: 'Path to Modelfile',
89 | },
90 | },
91 | required: ['name', 'modelfile'],
92 | additionalProperties: false,
93 | },
94 | },
95 | {
96 | name: 'show',
97 | description: 'Show information for a model',
98 | inputSchema: {
99 | type: 'object',
100 | properties: {
101 | name: {
102 | type: 'string',
103 | description: 'Name of the model',
104 | },
105 | },
106 | required: ['name'],
107 | additionalProperties: false,
108 | },
109 | },
110 | {
111 | name: 'run',
112 | description: 'Run a model',
113 | inputSchema: {
114 | type: 'object',
115 | properties: {
116 | name: {
117 | type: 'string',
118 | description: 'Name of the model',
119 | },
120 | prompt: {
121 | type: 'string',
122 | description: 'Prompt to send to the model',
123 | },
124 | timeout: {
125 | type: 'number',
126 | description: 'Timeout in milliseconds (default: 60000)',
127 | minimum: 1000,
128 | },
129 | },
130 | required: ['name', 'prompt'],
131 | additionalProperties: false,
132 | },
133 | },
134 | {
135 | name: 'pull',
136 | description: 'Pull a model from a registry',
137 | inputSchema: {
138 | type: 'object',
139 | properties: {
140 | name: {
141 | type: 'string',
142 | description: 'Name of the model to pull',
143 | },
144 | },
145 | required: ['name'],
146 | additionalProperties: false,
147 | },
148 | },
149 | {
150 | name: 'push',
151 | description: 'Push a model to a registry',
152 | inputSchema: {
153 | type: 'object',
154 | properties: {
155 | name: {
156 | type: 'string',
157 | description: 'Name of the model to push',
158 | },
159 | },
160 | required: ['name'],
161 | additionalProperties: false,
162 | },
163 | },
164 | {
165 | name: 'list',
166 | description: 'List models',
167 | inputSchema: {
168 | type: 'object',
169 | properties: {},
170 | additionalProperties: false,
171 | },
172 | },
173 | {
174 | name: 'cp',
175 | description: 'Copy a model',
176 | inputSchema: {
177 | type: 'object',
178 | properties: {
179 | source: {
180 | type: 'string',
181 | description: 'Source model name',
182 | },
183 | destination: {
184 | type: 'string',
185 | description: 'Destination model name',
186 | },
187 | },
188 | required: ['source', 'destination'],
189 | additionalProperties: false,
190 | },
191 | },
192 | {
193 | name: 'rm',
194 | description: 'Remove a model',
195 | inputSchema: {
196 | type: 'object',
197 | properties: {
198 | name: {
199 | type: 'string',
200 | description: 'Name of the model to remove',
201 | },
202 | },
203 | required: ['name'],
204 | additionalProperties: false,
205 | },
206 | },
207 | {
208 | name: 'chat_completion',
209 | description: 'OpenAI-compatible chat completion API',
210 | inputSchema: {
211 | type: 'object',
212 | properties: {
213 | model: {
214 | type: 'string',
215 | description: 'Name of the Ollama model to use',
216 | },
217 | messages: {
218 | type: 'array',
219 | items: {
220 | type: 'object',
221 | properties: {
222 | role: {
223 | type: 'string',
224 | enum: ['system', 'user', 'assistant'],
225 | },
226 | content: {
227 | type: 'string',
228 | },
229 | },
230 | required: ['role', 'content'],
231 | },
232 | description: 'Array of messages in the conversation',
233 | },
234 | temperature: {
235 | type: 'number',
236 | description: 'Sampling temperature (0-2)',
237 | minimum: 0,
238 | maximum: 2,
239 | },
240 | timeout: {
241 | type: 'number',
242 | description: 'Timeout in milliseconds (default: 60000)',
243 | minimum: 1000,
244 | },
245 | },
246 | required: ['model', 'messages'],
247 | additionalProperties: false,
248 | },
249 | },
250 | ],
251 | }));
252 |
253 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
254 | try {
255 | switch (request.params.name) {
256 | case 'serve':
257 | return await this.handleServe();
258 | case 'create':
259 | return await this.handleCreate(request.params.arguments);
260 | case 'show':
261 | return await this.handleShow(request.params.arguments);
262 | case 'run':
263 | return await this.handleRun(request.params.arguments);
264 | case 'pull':
265 | return await this.handlePull(request.params.arguments);
266 | case 'push':
267 | return await this.handlePush(request.params.arguments);
268 | case 'list':
269 | return await this.handleList();
270 | case 'cp':
271 | return await this.handleCopy(request.params.arguments);
272 | case 'rm':
273 | return await this.handleRemove(request.params.arguments);
274 | case 'chat_completion':
275 | return await this.handleChatCompletion(request.params.arguments);
276 | default:
277 | throw new McpError(
278 | ErrorCode.MethodNotFound,
279 | `Unknown tool: ${request.params.name}`
280 | );
281 | }
282 | } catch (error) {
283 | if (error instanceof McpError) throw error;
284 | throw new McpError(
285 | ErrorCode.InternalError,
286 | `Error executing ${request.params.name}: ${formatError(error)}`
287 | );
288 | }
289 | });
290 | }
291 |
292 | private async handleServe() {
293 | try {
294 | const { stdout, stderr } = await execAsync('ollama serve');
295 | return {
296 | content: [
297 | {
298 | type: 'text',
299 | text: stdout || stderr,
300 | },
301 | ],
302 | };
303 | } catch (error) {
304 | throw new McpError(ErrorCode.InternalError, `Failed to start Ollama server: ${formatError(error)}`);
305 | }
306 | }
307 |
308 | private async handleCreate(args: any) {
309 | try {
310 | const { stdout, stderr } = await execAsync(`ollama create ${args.name} -f ${args.modelfile}`);
311 | return {
312 | content: [
313 | {
314 | type: 'text',
315 | text: stdout || stderr,
316 | },
317 | ],
318 | };
319 | } catch (error) {
320 | throw new McpError(ErrorCode.InternalError, `Failed to create model: ${formatError(error)}`);
321 | }
322 | }
323 |
324 | private async handleShow(args: any) {
325 | try {
326 | const { stdout, stderr } = await execAsync(`ollama show ${args.name}`);
327 | return {
328 | content: [
329 | {
330 | type: 'text',
331 | text: stdout || stderr,
332 | },
333 | ],
334 | };
335 | } catch (error) {
336 | throw new McpError(ErrorCode.InternalError, `Failed to show model info: ${formatError(error)}`);
337 | }
338 | }
339 |
340 | private async handleRun(args: any) {
341 | try {
342 | // Use streaming mode with SSE
343 | const response = await axios.post(
344 | `${OLLAMA_HOST}/api/generate`,
345 | {
346 | model: args.name,
347 | prompt: args.prompt,
348 | stream: true,
349 | },
350 | {
351 | timeout: args.timeout || DEFAULT_TIMEOUT,
352 | responseType: 'stream'
353 | }
354 | );
355 |
356 | // Create a transform stream to process the SSE events
357 | const transformStream = new TransformStream({
358 | transform(chunk, controller) {
359 | try {
360 | const data = chunk.toString();
361 | const json = JSON.parse(data);
362 | controller.enqueue(json.response);
363 | } catch (error) {
364 | controller.error(new McpError(
365 | ErrorCode.InternalError,
366 | `Error processing stream: ${formatError(error)}`
367 | ));
368 | }
369 | }
370 | });
371 |
372 | return {
373 | content: [
374 | {
375 | type: 'stream',
376 | stream: response.data.pipeThrough(transformStream),
377 | },
378 | ],
379 | };
380 | } catch (error) {
381 | if (axios.isAxiosError(error)) {
382 | throw new McpError(
383 | ErrorCode.InternalError,
384 | `Ollama API error: ${error.response?.data?.error || error.message}`
385 | );
386 | }
387 | throw new McpError(ErrorCode.InternalError, `Failed to run model: ${formatError(error)}`);
388 | }
389 | }
390 |
391 | private async handlePull(args: any) {
392 | try {
393 | const { stdout, stderr } = await execAsync(`ollama pull ${args.name}`);
394 | return {
395 | content: [
396 | {
397 | type: 'text',
398 | text: stdout || stderr,
399 | },
400 | ],
401 | };
402 | } catch (error) {
403 | throw new McpError(ErrorCode.InternalError, `Failed to pull model: ${formatError(error)}`);
404 | }
405 | }
406 |
407 | private async handlePush(args: any) {
408 | try {
409 | const { stdout, stderr } = await execAsync(`ollama push ${args.name}`);
410 | return {
411 | content: [
412 | {
413 | type: 'text',
414 | text: stdout || stderr,
415 | },
416 | ],
417 | };
418 | } catch (error) {
419 | throw new McpError(ErrorCode.InternalError, `Failed to push model: ${formatError(error)}`);
420 | }
421 | }
422 |
423 | private async handleList() {
424 | try {
425 | const { stdout, stderr } = await execAsync('ollama list');
426 | return {
427 | content: [
428 | {
429 | type: 'text',
430 | text: stdout || stderr,
431 | },
432 | ],
433 | };
434 | } catch (error) {
435 | throw new McpError(ErrorCode.InternalError, `Failed to list models: ${formatError(error)}`);
436 | }
437 | }
438 |
439 | private async handleCopy(args: any) {
440 | try {
441 | const { stdout, stderr } = await execAsync(`ollama cp ${args.source} ${args.destination}`);
442 | return {
443 | content: [
444 | {
445 | type: 'text',
446 | text: stdout || stderr,
447 | },
448 | ],
449 | };
450 | } catch (error) {
451 | throw new McpError(ErrorCode.InternalError, `Failed to copy model: ${formatError(error)}`);
452 | }
453 | }
454 |
455 | private async handleRemove(args: any) {
456 | try {
457 | const { stdout, stderr } = await execAsync(`ollama rm ${args.name}`);
458 | return {
459 | content: [
460 | {
461 | type: 'text',
462 | text: stdout || stderr,
463 | },
464 | ],
465 | };
466 | } catch (error) {
467 | throw new McpError(ErrorCode.InternalError, `Failed to remove model: ${formatError(error)}`);
468 | }
469 | }
470 |
471 | private async handleChatCompletion(args: any) {
472 | try {
473 | // Convert chat messages to a single prompt
474 | const prompt = args.messages
475 | .map((msg: any) => {
476 | switch (msg.role) {
477 | case 'system':
478 | return `System: ${msg.content}\n`;
479 | case 'user':
480 | return `User: ${msg.content}\n`;
481 | case 'assistant':
482 | return `Assistant: ${msg.content}\n`;
483 | default:
484 | return '';
485 | }
486 | })
487 | .join('');
488 |
489 | // Make request to Ollama API with configurable timeout and raw mode
490 | const response = await axios.post<OllamaGenerateResponse>(
491 | `${OLLAMA_HOST}/api/generate`,
492 | {
493 | model: args.model,
494 | prompt,
495 | stream: false,
496 | temperature: args.temperature,
497 | raw: true, // Add raw mode for more direct responses
498 | },
499 | {
500 | timeout: args.timeout || DEFAULT_TIMEOUT,
501 | }
502 | );
503 |
504 | return {
505 | content: [
506 | {
507 | type: 'text',
508 | text: JSON.stringify({
509 | id: 'chatcmpl-' + Date.now(),
510 | object: 'chat.completion',
511 | created: Math.floor(Date.now() / 1000),
512 | model: args.model,
513 | choices: [
514 | {
515 | index: 0,
516 | message: {
517 | role: 'assistant',
518 | content: response.data.response,
519 | },
520 | finish_reason: 'stop',
521 | },
522 | ],
523 | }, null, 2),
524 | },
525 | ],
526 | };
527 | } catch (error) {
528 | if (axios.isAxiosError(error)) {
529 | throw new McpError(
530 | ErrorCode.InternalError,
531 | `Ollama API error: ${error.response?.data?.error || error.message}`
532 | );
533 | }
534 | throw new McpError(ErrorCode.InternalError, `Unexpected error: ${formatError(error)}`);
535 | }
536 | }
537 |
538 | async run() {
539 | // Create HTTP server for SSE transport
540 | const server = http.createServer();
541 |
542 | // Create stdio transport
543 | const stdioTransport = new StdioServerTransport();
544 |
545 | // Connect stdio transport
546 | await this.server.connect(stdioTransport);
547 |
548 | // Setup SSE endpoint
549 | server.on('request', (req: import('http').IncomingMessage, res: import('http').ServerResponse) => {
550 | if (req.url === '/message') {
551 | const sseTransport = new SSEServerTransport(req.url || '/message', res);
552 | this.server.connect(sseTransport);
553 | }
554 | });
555 |
556 | // Start HTTP server
557 | server.listen(0, () => {
558 | const address = server.address() as AddressInfo;
559 | console.error(`Ollama MCP server running on stdio and SSE (http://localhost:${address.port})`);
560 | });
561 | }
562 | }
563 |
564 | const server = new OllamaServer();
565 | server.run().catch(console.error);
566 |
```