#
tokens: 1884/50000 6/6 files
lines: on (toggle) GitHub
raw markdown copy reset
# Directory Structure

```
├── .gitignore
├── .python-version
├── main.py
├── pyproject.toml
├── README.md
├── tts-mcp.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------

```
1 | 3.10
2 | 
```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
 1 | # Python-generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | 
 9 | # Virtual environments
10 | .venv
11 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
 1 | # Kokoro TTS MCP Server
 2 | 
 3 | A Model Context Protocol (MCP) server that provides text-to-speech capabilities using the Kokoro TTS engine. This server exposes TTS functionality through MCP tools, making it easy to integrate speech synthesis into your applications.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Python 3.10 or higher
 8 | - `uv` package manager
 9 | 
10 | ## Installation
11 | 
12 | 1. First, install the `uv` package manager:
13 | 
14 | ```bash
15 | curl -LsSf https://astral.sh/uv/install.sh | sh
16 | ```
17 | 
18 | 2. Clone this repository and install dependencies:
19 | 
20 | ```bash
21 | uv venv
22 | source .venv/bin/activate  # On Windows, use: .venv\Scripts\activate
23 | uv pip install .
24 | ```
25 | 
26 | ## Features
27 | 
28 | - Text-to-speech synthesis with customizable voices
29 | - Adjustable speech speed
30 | - Support for saving audio to files or direct playback
31 | - Cross-platform audio playback support (Windows, macOS, Linux)
32 | 
33 | ## Usage
34 | 
35 | The server provides a single MCP tool `generate_speech` with the following parameters:
36 | 
37 | - `text` (required): The text to convert to speech
38 | - `voice` (optional): Voice to use for synthesis (default: "af_heart")
39 | - `speed` (optional): Speech speed multiplier (default: 1.0)
40 | - `save_path` (optional): Directory to save audio files
41 | - `play_audio` (optional): Whether to play the audio immediately (default: False)
42 | 
43 | ### Example Usage
44 | 
45 | ```python
46 | from mcp.client import Client
47 | 
48 | async with Client() as client:
49 |     await client.connect("kokoro-tts")
50 |     
51 |     # Generate and play speech
52 |     result = await client.call_tool(
53 |         "generate_speech",
54 |         {
55 |             "text": "Hello, world!",
56 |             "voice": "af_heart",
57 |             "speed": 1.0,
58 |             "play_audio": True
59 |         }
60 |     )
61 | ```
62 | 
63 | ## Dependencies
64 | 
65 | - kokoro >= 0.8.4
66 | - mcp[cli] >= 1.3.0
67 | - soundfile >= 0.13.1
68 | 
69 | ## Platform Support
70 | 
71 | Audio playback is supported on:
72 | - Windows (using `start`)
73 | - macOS (using `afplay`)
74 | - Linux (using `aplay`)
75 | 
76 | ## MCP Configuration
77 | 
78 | Add the following configuration to your MCP settings file:
79 | 
80 | ```json
81 | {
82 |   "mcpServers": {
83 |     "kokoro-tts": {
84 |       "command": "/Users/giannisan/pinokio/bin/miniconda/bin/uv",
85 |       "args": [
86 |         "--directory",
87 |         "/Users/giannisan/Documents/Cline/MCP/kokoro-tts-mcp",
88 |         "run",
89 |         "tts-mcp.py"
90 |       ]
91 |     }
92 |   }
93 | }
94 | ```
95 | 
96 | ## License
97 | 
98 | [Add your license information here]
99 | 
```

--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------

```python
1 | def main():
2 |     print("Hello from kokoro-tts-mcp!")
3 | 
4 | 
5 | if __name__ == "__main__":
6 |     main()
7 | 
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
 1 | [project]
 2 | name = "kokoro-tts-mcp"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | dependencies = [
 8 |     "kokoro>=0.8.4",
 9 |     "mcp[cli]>=1.3.0",
10 |     "soundfile>=0.13.1",
11 | ]
12 | 
```

--------------------------------------------------------------------------------
/tts-mcp.py:
--------------------------------------------------------------------------------

```python
 1 | import sys
 2 | import os
 3 | import logging
 4 | import subprocess
 5 | import tempfile
 6 | from typing import List
 7 | import torch
 8 | import soundfile as sf
 9 | from kokoro import KPipeline
10 | from mcp.server.fastmcp import FastMCP
11 | from pathlib import Path
12 | 
13 | # Disable ALL logging
14 | logging.disable(logging.CRITICAL)
15 | logging.getLogger().setLevel(logging.CRITICAL)
16 | logging.captureWarnings(True)
17 | 
18 | # Initialize components
19 | mcp = FastMCP("kokoro-tts")
20 | pipeline = KPipeline(lang_code='a')
21 | 
22 | def _play_audio(path: Path):
23 |     """Silent audio playback"""
24 |     try:
25 |         if sys.platform == "win32":
26 |             subprocess.call(["start", str(path)], shell=True)
27 |         elif sys.platform == "darwin":
28 |             subprocess.call(["afplay", str(path)])
29 |         else:
30 |             subprocess.call(["aplay", str(path)])
31 |     except:
32 |         pass
33 | 
34 | @mcp.tool()
35 | async def generate_speech(
36 |     text: str,
37 |     voice: str = "af_heart",
38 |     speed: float = 1.0,
39 |     save_path: str = None,
40 |     play_audio: bool = False
41 | ) -> List[dict]:
42 |     results = []
43 |     
44 |     voice_tensor = None
45 |     if isinstance(voice, str) and Path(voice).exists():
46 |         try:
47 |             voice_tensor = torch.load(voice, weights_only=True)
48 |         except:
49 |             raise ValueError("Invalid voice tensor")
50 | 
51 |     if save_path:
52 |         (save_path := Path(save_path)).mkdir(parents=True, exist_ok=True)
53 | 
54 |     try:
55 |         generator = pipeline(text, voice=voice_tensor or voice, speed=speed, split_pattern=r'\n+')
56 |     except:
57 |         raise RuntimeError("TTS failed")
58 | 
59 |     with tempfile.TemporaryDirectory() as tmp_dir:
60 |         for i, (graphemes, _, audio) in enumerate(generator):
61 |             audio_numpy = audio.cpu().numpy()
62 |             
63 |             if save_path:
64 |                 sf.write(save_path/f'segment_{i}.wav', audio_numpy, 24000)
65 |             
66 |             if play_audio:
67 |                 # Fixed temp_path definition
68 |                 temp_path = Path(tmp_dir) / f'segment_{i}.wav'
69 |                 sf.write(temp_path, audio_numpy, 24000)
70 |                 _play_audio(temp_path)
71 | 
72 |             results.append({'text': graphemes})
73 |     
74 |     return results
75 | 
76 | if __name__ == "__main__":
77 |     try:
78 |         mcp.run(transport=os.getenv("MCP_TRANSPORT", "stdio"))
79 |     except:
80 |         sys.exit(1)
81 | 
```