phialsbasement/zonos-tts-mcp # codebase.md

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── package.json
├── README.md
├── smithery.yaml
├── src
│   └── server.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
1 | node_modules/
2 | dist/
3 | .env
4 | *.log
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
 1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/phialsbasement-zonos-tts-mcp-badge.png)](https://mseep.ai/app/phialsbasement-zonos-tts-mcp)
 2 | 
 3 | # Zonos MCP Integration
 4 | [![smithery badge](https://smithery.ai/badge/@PhialsBasement/zonos-tts-mcp)](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp)
 5 | 
 6 | A Model Context Protocol integration for Zonos TTS, allowing Claude to generate speech directly.
 7 | 
 8 | ## Setup
 9 | 
10 | ### Installing via Smithery
11 | 
12 | To install Zonos TTS Integration for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp):
13 | 
14 | ```bash
15 | npx -y @smithery/cli install @PhialsBasement/zonos-tts-mcp --client claude
16 | ```
17 | 
18 | ### Manual installation
19 | 
20 | 1. Make sure you have Zonos running with our API implementation ([PhialsBasement/zonos-api](https://github.com/PhialsBasement/Zonos-API))
21 | 
22 | 2. Install dependencies:
23 | ```bash
24 | npm install @modelcontextprotocol/sdk axios
25 | ```
26 | 
27 | 3. Configure PulseAudio access:
28 | ```bash
29 | # Your pulse audio should be properly configured for audio playback
30 | # The MCP server will automatically try to connect to your pulse server
31 | ```
32 | 
33 | 4. Build the MCP server:
34 | ```bash
35 | npm run build
36 | # This will create the dist folder with the compiled server
37 | ```
38 | 
39 | 5. Add to Claude's config file:
40 | Edit your Claude config file (usually in `~/.config/claude/config.json`) and add this to the `mcpServers` section:
41 | 
42 | ```json
43 | "zonos-tts": {
44 |   "command": "node",
45 |   "args": [
46 |     "/path/to/your/zonos-mcp/dist/server.js"
47 |   ]
48 | }
49 | ```
50 | 
51 | Replace `/path/to/your/zonos-mcp` with the actual path where you installed the MCP server.
52 | 
53 | ## Using with Claude
54 | 
55 | Once configured, Claude automatically knows how to use the `speak_response` tool:
56 | 
57 | ```python
58 | speak_response(
59 |     text="Your text here",
60 |     language="en-us",  # optional, defaults to en-us
61 |     emotion="happy"    # optional: "neutral", "happy", "sad", "angry"
62 | )
63 | ```
64 | 
65 | ## Features
66 | 
67 | - Text-to-speech through Claude
68 | - Multiple emotions support
69 | - Multi-language support
70 | - Proper audio playback through PulseAudio
71 | 
72 | ## Requirements
73 | 
74 | - Node.js
75 | - PulseAudio setup
76 | - Running instance of Zonos API (PhialsBasement/zonos-api)
77 | - Working audio output device
78 | 
79 | ## Notes
80 | 
81 | - Make sure both the Zonos API server and this MCP server are running
82 | - Audio playback requires proper PulseAudio configuration
83 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "node",
 6 |     "esModuleInterop": true,
 7 |     "strict": true,
 8 |     "skipLibCheck": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "outDir": "./dist",
11 |     "rootDir": "./src"
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules", "dist"]
15 | }
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "mcp-tts",
 3 |   "version": "1.0.0",
 4 |   "type": "module",
 5 |   "scripts": {
 6 |     "build": "tsc",
 7 |     "start": "node dist/server.js",
 8 |     "dev": "tsc --watch"
 9 |   },
10 |   "dependencies": {
11 |     "@gradio/client": "^0.12.1",
12 |     "@modelcontextprotocol/sdk": "^1.0.0",
13 |     "axios": "^1.7.9",
14 |     "zod": "^3.22.4"
15 |   },
16 |   "devDependencies": {
17 |     "@types/node": "^20.0.0",
18 |     "typescript": "^5.0.0"
19 |   }
20 | }
21 | 
```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required: []
 9 |     properties: {}
10 |   commandFunction:
11 |     # A function that produces the CLI command to start the MCP on stdio.
12 |     |-
13 |     (config) => ({ command: 'node', args: ['dist/server.js'] })
14 | 
```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Use the official Node.js image with npm
 3 | FROM node:22.12-alpine AS builder
 4 | 
 5 | # Set working directory
 6 | WORKDIR /app
 7 | 
 8 | # Copy package.json and package-lock.json to the working directory
 9 | COPY package.json tsconfig.json ./
10 | 
11 | # Install dependencies
12 | RUN npm install --ignore-scripts
13 | 
14 | # Copy the TypeScript source files
15 | COPY src/ ./src
16 | 
17 | # Compile TypeScript to JavaScript
18 | RUN npx tsc
19 | 
20 | # Use a smaller Node.js image for the final build
21 | FROM node:22-alpine AS release
22 | 
23 | # Set the working directory
24 | WORKDIR /app
25 | 
26 | # Copy the compiled files and node_modules from the builder stage
27 | COPY --from=builder /app/dist /app/dist
28 | COPY --from=builder /app/node_modules /app/node_modules
29 | COPY --from=builder /app/package.json /app/package.json
30 | 
31 | # Set environment variables required for the server
32 | ENV NODE_ENV=production
33 | 
34 | # Expose any required ports (replace 3000 with the actual port if different)
35 | EXPOSE 3000
36 | 
37 | # Run the server
38 | ENTRYPOINT ["node", "dist/server.js"]
39 | 
```

--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------

```typescript
  1 | // Polyfill a minimal global 'window' for Node.js (do this before any other imports)
  2 | if (typeof global.window === "undefined") {
  3 |     (global as any).window = {
  4 |         location: {
  5 |             protocol: "http:",
  6 |             hostname: "localhost",
  7 |             port: "8000",
  8 |             href: "http://localhost:8000/"
  9 |         }
 10 |     };
 11 | }
 12 | 
 13 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 14 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 15 | import { z } from "zod";
 16 | import { exec } from "child_process";
 17 | import { promisify } from "util";
 18 | import axios from 'axios';
 19 | 
 20 | const execAsync = promisify(exec);
 21 | const API_BASE_URL = 'http://localhost:8000';
 22 | 
 23 | type Emotion = "neutral" | "happy" | "sad" | "angry";
 24 | 
 25 | interface EmotionParameters {
 26 |     happiness: number;
 27 |     sadness: number;
 28 |     disgust: number;
 29 |     fear: number;
 30 |     surprise: number;
 31 |     anger: number;
 32 |     other: number;
 33 |     neutral: number;
 34 | }
 35 | 
 36 | interface ZonosRequestParams {
 37 |     text: string;
 38 |     language: string;
 39 |     emotion: Emotion;
 40 | }
 41 | 
 42 | interface EmotionMap {
 43 |     [key: string]: EmotionParameters;
 44 | }
 45 | 
 46 | class TTSServer {
 47 |     private mcp: McpServer;
 48 |     private readonly emotionMap: EmotionMap;
 49 | 
 50 |     constructor() {
 51 |         this.mcp = new McpServer({
 52 |             name: "TTS MCP Server",
 53 |             version: "1.0.0",
 54 |         });
 55 | 
 56 |         this.emotionMap = {
 57 |             neutral: {
 58 |                 happiness: 0.2,
 59 |                 sadness: 0.2,
 60 |                 anger: 0.2,
 61 |                 disgust: 0.05,
 62 |                 fear: 0.05,
 63 |                 surprise: 0.1,
 64 |                 other: 0.1,
 65 |                 neutral: 0.8,
 66 |             },
 67 |             happy: {
 68 |                 happiness: 1,
 69 |                 sadness: 0.05,
 70 |                 anger: 0.05,
 71 |                 disgust: 0.05,
 72 |                 fear: 0.05,
 73 |                 surprise: 0.2,
 74 |                 other: 0.1,
 75 |                 neutral: 0.2,
 76 |             },
 77 |             sad: {
 78 |                 happiness: 0.05,
 79 |                 sadness: 1,
 80 |                 anger: 0.05,
 81 |                 disgust: 0.2,
 82 |                 fear: 0.2,
 83 |                 surprise: 0.05,
 84 |                 other: 0.1,
 85 |                 neutral: 0.2,
 86 |             },
 87 |             angry: {
 88 |                 happiness: 0.05,
 89 |                 sadness: 0.2,
 90 |                 anger: 1,
 91 |                 disgust: 0.4,
 92 |                 fear: 0.2,
 93 |                 surprise: 0.2,
 94 |                 other: 0.1,
 95 |                 neutral: 0.1,
 96 |             },
 97 |         };
 98 | 
 99 |         this.setupTools();
100 |     }
101 | 
102 |     private setupTools(): void {
103 |         this.mcp.tool(
104 |             "speak_response",
105 |             {
106 |                 text: z.string(),
107 |                       language: z.string().default("en-us"),
108 |                       emotion: z.enum(["neutral", "happy", "sad", "angry"]).default("neutral"),
109 |             },
110 |             async ({ text, language, emotion }: ZonosRequestParams) => {
111 |                 try {
112 |                     const emotionParams = this.emotionMap[emotion];
113 |                     console.log(`Converting to speech: "${text}" with ${emotion} emotion`);
114 | 
115 |                     // Use new OpenAI-style endpoint
116 |                     const response = await axios.post(`${API_BASE_URL}/v1/audio/speech`, {
117 |                         model: "Zyphra/Zonos-v0.1-transformer",
118 |                         input: text,
119 |                         language: language,
120 |                         emotion: emotionParams,
121 |                         speed: 1.0,
122 |                         response_format: "wav"  // Using WAV for better compatibility
123 |                     }, {
124 |                         responseType: 'arraybuffer'
125 |                     });
126 | 
127 |                     // Save the audio response to a temporary file
128 |                     const tempAudioPath = `/tmp/tts_output_${Date.now()}.wav`;
129 |                     const fs = await import('fs/promises');
130 |                     await fs.writeFile(tempAudioPath, response.data);
131 | 
132 |                     // Play the audio
133 |                     await this.playAudio(tempAudioPath);
134 | 
135 |                     // Clean up the temporary file
136 |                     await fs.unlink(tempAudioPath);
137 | 
138 |                     return {
139 |                         content: [
140 |                             {
141 |                                 type: "text",
142 |                                 text: `Successfully spoke: "${text}" with ${emotion} emotion`,
143 |                             },
144 |                         ],
145 |                     };
146 |                 } catch (error) {
147 |                     const errorMessage = error instanceof Error ? error.message : "Unknown error";
148 |                     console.error("TTS Error:", errorMessage);
149 |                     if (axios.isAxiosError(error) && error.response) {
150 |                         console.error("API Response:", error.response.data);
151 |                     }
152 |                     throw new Error(`TTS failed: ${errorMessage}`);
153 |                 }
154 |             }
155 |         );
156 |     }
157 | 
158 |     private async playAudio(audioPath: string): Promise<void> {
159 |         try {
160 |             console.log("Playing audio from:", audioPath);
161 | 
162 |             switch (process.platform) {
163 |                 case "darwin":
164 |                     await execAsync(`afplay ${audioPath}`);
165 |                     break;
166 |                 case "linux":
167 |                     // Try paplay for PulseAudio
168 |                     const XDG_RUNTIME_DIR = process.env.XDG_RUNTIME_DIR || '/run/user/1000';
169 |                     const env = {
170 |                         ...process.env,
171 |                         PULSE_SERVER: `unix:${XDG_RUNTIME_DIR}/pulse/native`,
172 |                         PULSE_COOKIE: `${process.env.HOME}/.config/pulse/cookie`
173 |                     };
174 |                     await execAsync(`paplay ${audioPath}`, { env });
175 |                     break;
176 |                 case "win32":
177 |                     await execAsync(
178 |                         `powershell -c (New-Object Media.SoundPlayer '${audioPath}').PlaySync()`
179 |                     );
180 |                     break;
181 |                 default:
182 |                     throw new Error(`Unsupported platform: ${process.platform}`);
183 |             }
184 |         } catch (error) {
185 |             const errorMessage = error instanceof Error ? error.message : "Unknown error";
186 |             console.error("Audio playback error:", errorMessage);
187 |             throw new Error(`Audio playback failed: ${errorMessage}`);
188 |         }
189 |     }
190 | 
191 |     public async start(): Promise<void> {
192 |         const transport = new StdioServerTransport();
193 |         await this.mcp.connect(transport);
194 |     }
195 | }
196 | 
197 | const server = new TTSServer();
198 | await server.start();
199 | 
```