# Directory Structure
```
├── .gitignore
├── .python-version
├── main.py
├── pyproject.toml
├── README.md
├── tts-mcp.py
└── uv.lock
```
# Files
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
```
1 | 3.10
2 |
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Python-generated files
2 | __pycache__/
3 | *.py[oc]
4 | build/
5 | dist/
6 | wheels/
7 | *.egg-info
8 |
9 | # Virtual environments
10 | .venv
11 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # Kokoro TTS MCP Server
2 |
3 | A Model Context Protocol (MCP) server that provides text-to-speech capabilities using the Kokoro TTS engine. This server exposes TTS functionality through MCP tools, making it easy to integrate speech synthesis into your applications.
4 |
5 | ## Prerequisites
6 |
7 | - Python 3.10 or higher
8 | - `uv` package manager
9 |
10 | ## Installation
11 |
12 | 1. First, install the `uv` package manager:
13 |
14 | ```bash
15 | curl -LsSf https://astral.sh/uv/install.sh | sh
16 | ```
17 |
18 | 2. Clone this repository and install dependencies:
19 |
20 | ```bash
21 | uv venv
22 | source .venv/bin/activate # On Windows, use: .venv\Scripts\activate
23 | uv pip install .
24 | ```
25 |
26 | ## Features
27 |
28 | - Text-to-speech synthesis with customizable voices
29 | - Adjustable speech speed
30 | - Support for saving audio to files or direct playback
31 | - Cross-platform audio playback support (Windows, macOS, Linux)
32 |
33 | ## Usage
34 |
35 | The server provides a single MCP tool `generate_speech` with the following parameters:
36 |
37 | - `text` (required): The text to convert to speech
38 | - `voice` (optional): Voice to use for synthesis (default: "af_heart")
39 | - `speed` (optional): Speech speed multiplier (default: 1.0)
40 | - `save_path` (optional): Directory to save audio files
41 | - `play_audio` (optional): Whether to play the audio immediately (default: False)
42 |
43 | ### Example Usage
44 |
45 | ```python
46 | from mcp.client import Client
47 |
48 | async with Client() as client:
49 | await client.connect("kokoro-tts")
50 |
51 | # Generate and play speech
52 | result = await client.call_tool(
53 | "generate_speech",
54 | {
55 | "text": "Hello, world!",
56 | "voice": "af_heart",
57 | "speed": 1.0,
58 | "play_audio": True
59 | }
60 | )
61 | ```
62 |
63 | ## Dependencies
64 |
65 | - kokoro >= 0.8.4
66 | - mcp[cli] >= 1.3.0
67 | - soundfile >= 0.13.1
68 |
69 | ## Platform Support
70 |
71 | Audio playback is supported on:
72 | - Windows (using `start`)
73 | - macOS (using `afplay`)
74 | - Linux (using `aplay`)
75 |
76 | ## MCP Configuration
77 |
78 | Add the following configuration to your MCP settings file:
79 |
80 | ```json
81 | {
82 | "mcpServers": {
83 | "kokoro-tts": {
84 | "command": "/Users/giannisan/pinokio/bin/miniconda/bin/uv",
85 | "args": [
86 | "--directory",
87 | "/Users/giannisan/Documents/Cline/MCP/kokoro-tts-mcp",
88 | "run",
89 | "tts-mcp.py"
90 | ]
91 | }
92 | }
93 | }
94 | ```
95 |
96 | ## License
97 |
98 | [Add your license information here]
99 |
```
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
```python
1 | def main():
2 | print("Hello from kokoro-tts-mcp!")
3 |
4 |
5 | if __name__ == "__main__":
6 | main()
7 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [project]
2 | name = "kokoro-tts-mcp"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | dependencies = [
8 | "kokoro>=0.8.4",
9 | "mcp[cli]>=1.3.0",
10 | "soundfile>=0.13.1",
11 | ]
12 |
```
--------------------------------------------------------------------------------
/tts-mcp.py:
--------------------------------------------------------------------------------
```python
1 | import sys
2 | import os
3 | import logging
4 | import subprocess
5 | import tempfile
6 | from typing import List
7 | import torch
8 | import soundfile as sf
9 | from kokoro import KPipeline
10 | from mcp.server.fastmcp import FastMCP
11 | from pathlib import Path
12 |
13 | # Disable ALL logging
14 | logging.disable(logging.CRITICAL)
15 | logging.getLogger().setLevel(logging.CRITICAL)
16 | logging.captureWarnings(True)
17 |
18 | # Initialize components
19 | mcp = FastMCP("kokoro-tts")
20 | pipeline = KPipeline(lang_code='a')
21 |
22 | def _play_audio(path: Path):
23 | """Silent audio playback"""
24 | try:
25 | if sys.platform == "win32":
26 | subprocess.call(["start", str(path)], shell=True)
27 | elif sys.platform == "darwin":
28 | subprocess.call(["afplay", str(path)])
29 | else:
30 | subprocess.call(["aplay", str(path)])
31 | except:
32 | pass
33 |
34 | @mcp.tool()
35 | async def generate_speech(
36 | text: str,
37 | voice: str = "af_heart",
38 | speed: float = 1.0,
39 | save_path: str = None,
40 | play_audio: bool = False
41 | ) -> List[dict]:
42 | results = []
43 |
44 | voice_tensor = None
45 | if isinstance(voice, str) and Path(voice).exists():
46 | try:
47 | voice_tensor = torch.load(voice, weights_only=True)
48 | except:
49 | raise ValueError("Invalid voice tensor")
50 |
51 | if save_path:
52 | (save_path := Path(save_path)).mkdir(parents=True, exist_ok=True)
53 |
54 | try:
55 | generator = pipeline(text, voice=voice_tensor or voice, speed=speed, split_pattern=r'\n+')
56 | except:
57 | raise RuntimeError("TTS failed")
58 |
59 | with tempfile.TemporaryDirectory() as tmp_dir:
60 | for i, (graphemes, _, audio) in enumerate(generator):
61 | audio_numpy = audio.cpu().numpy()
62 |
63 | if save_path:
64 | sf.write(save_path/f'segment_{i}.wav', audio_numpy, 24000)
65 |
66 | if play_audio:
67 | # Fixed temp_path definition
68 | temp_path = Path(tmp_dir) / f'segment_{i}.wav'
69 | sf.write(temp_path, audio_numpy, 24000)
70 | _play_audio(temp_path)
71 |
72 | results.append({'text': graphemes})
73 |
74 | return results
75 |
76 | if __name__ == "__main__":
77 | try:
78 | mcp.run(transport=os.getenv("MCP_TRANSPORT", "stdio"))
79 | except:
80 | sys.exit(1)
81 |
```