phialsbasement/zonos-tts-mcp # codebase.md

# Directory Structure

```
├── .gitignore
├── Dockerfile
├── package.json
├── README.md
├── smithery.yaml
├── src
│   └── server.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
node_modules/
dist/
.env
*.log
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/phialsbasement-zonos-tts-mcp-badge.png)](https://mseep.ai/app/phialsbasement-zonos-tts-mcp)

# Zonos MCP Integration
[![smithery badge](https://smithery.ai/badge/@PhialsBasement/zonos-tts-mcp)](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp)

A Model Context Protocol integration for Zonos TTS, allowing Claude to generate speech directly.

## Setup

### Installing via Smithery

To install Zonos TTS Integration for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp):

```bash
npx -y @smithery/cli install @PhialsBasement/zonos-tts-mcp --client claude
```

### Manual installation

1. Make sure you have Zonos running with our API implementation ([PhialsBasement/zonos-api](https://github.com/PhialsBasement/Zonos-API))

2. Install dependencies:
```bash
npm install @modelcontextprotocol/sdk axios
```

3. Configure PulseAudio access:
```bash
# Your pulse audio should be properly configured for audio playback
# The MCP server will automatically try to connect to your pulse server
```

4. Build the MCP server:
```bash
npm run build
# This will create the dist folder with the compiled server
```

5. Add to Claude's config file:
Edit your Claude config file (usually in `~/.config/claude/config.json`) and add this to the `mcpServers` section:

```json
"zonos-tts": {
  "command": "node",
  "args": [
    "/path/to/your/zonos-mcp/dist/server.js"
  ]
}
```

Replace `/path/to/your/zonos-mcp` with the actual path where you installed the MCP server.

## Using with Claude

Once configured, Claude automatically knows how to use the `speak_response` tool:

```python
speak_response(
    text="Your text here",
    language="en-us",  # optional, defaults to en-us
    emotion="happy"    # optional: "neutral", "happy", "sad", "angry"
)
```

## Features

- Text-to-speech through Claude
- Multiple emotions support
- Multi-language support
- Proper audio playback through PulseAudio

## Requirements

- Node.js
- PulseAudio setup
- Running instance of Zonos API (PhialsBasement/zonos-api)
- Working audio output device

## Notes

- Make sure both the Zonos API server and this MCP server are running
- Audio playback requires proper PulseAudio configuration

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "ESNext",
    "moduleResolution": "node",
    "esModuleInterop": true,
    "strict": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "outDir": "./dist",
    "rootDir": "./src"
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist"]
}
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "mcp-tts",
  "version": "1.0.0",
  "type": "module",
  "scripts": {
    "build": "tsc",
    "start": "node dist/server.js",
    "dev": "tsc --watch"
  },
  "dependencies": {
    "@gradio/client": "^0.12.1",
    "@modelcontextprotocol/sdk": "^1.0.0",
    "axios": "^1.7.9",
    "zod": "^3.22.4"
  },
  "devDependencies": {
    "@types/node": "^20.0.0",
    "typescript": "^5.0.0"
  }
}

```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
# Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml

startCommand:
  type: stdio
  configSchema:
    # JSON Schema defining the configuration options for the MCP.
    type: object
    required: []
    properties: {}
  commandFunction:
    # A function that produces the CLI command to start the MCP on stdio.
    |-
    (config) => ({ command: 'node', args: ['dist/server.js'] })

```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
# Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
# Use the official Node.js image with npm
FROM node:22.12-alpine AS builder

# Set working directory
WORKDIR /app

# Copy package.json and package-lock.json to the working directory
COPY package.json tsconfig.json ./

# Install dependencies
RUN npm install --ignore-scripts

# Copy the TypeScript source files
COPY src/ ./src

# Compile TypeScript to JavaScript
RUN npx tsc

# Use a smaller Node.js image for the final build
FROM node:22-alpine AS release

# Set the working directory
WORKDIR /app

# Copy the compiled files and node_modules from the builder stage
COPY --from=builder /app/dist /app/dist
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json

# Set environment variables required for the server
ENV NODE_ENV=production

# Expose any required ports (replace 3000 with the actual port if different)
EXPOSE 3000

# Run the server
ENTRYPOINT ["node", "dist/server.js"]

```

--------------------------------------------------------------------------------
/src/server.ts:
--------------------------------------------------------------------------------

```typescript
// Polyfill a minimal global 'window' for Node.js (do this before any other imports)
if (typeof global.window === "undefined") {
    (global as any).window = {
        location: {
            protocol: "http:",
            hostname: "localhost",
            port: "8000",
            href: "http://localhost:8000/"
        }
    };
}

import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import { exec } from "child_process";
import { promisify } from "util";
import axios from 'axios';

const execAsync = promisify(exec);
const API_BASE_URL = 'http://localhost:8000';

type Emotion = "neutral" | "happy" | "sad" | "angry";

interface EmotionParameters {
    happiness: number;
    sadness: number;
    disgust: number;
    fear: number;
    surprise: number;
    anger: number;
    other: number;
    neutral: number;
}

interface ZonosRequestParams {
    text: string;
    language: string;
    emotion: Emotion;
}

interface EmotionMap {
    [key: string]: EmotionParameters;
}

class TTSServer {
    private mcp: McpServer;
    private readonly emotionMap: EmotionMap;

    constructor() {
        this.mcp = new McpServer({
            name: "TTS MCP Server",
            version: "1.0.0",
        });

        this.emotionMap = {
            neutral: {
                happiness: 0.2,
                sadness: 0.2,
                anger: 0.2,
                disgust: 0.05,
                fear: 0.05,
                surprise: 0.1,
                other: 0.1,
                neutral: 0.8,
            },
            happy: {
                happiness: 1,
                sadness: 0.05,
                anger: 0.05,
                disgust: 0.05,
                fear: 0.05,
                surprise: 0.2,
                other: 0.1,
                neutral: 0.2,
            },
            sad: {
                happiness: 0.05,
                sadness: 1,
                anger: 0.05,
                disgust: 0.2,
                fear: 0.2,
                surprise: 0.05,
                other: 0.1,
                neutral: 0.2,
            },
            angry: {
                happiness: 0.05,
                sadness: 0.2,
                anger: 1,
                disgust: 0.4,
                fear: 0.2,
                surprise: 0.2,
                other: 0.1,
                neutral: 0.1,
            },
        };

        this.setupTools();
    }

    private setupTools(): void {
        this.mcp.tool(
            "speak_response",
            {
                text: z.string(),
                      language: z.string().default("en-us"),
                      emotion: z.enum(["neutral", "happy", "sad", "angry"]).default("neutral"),
            },
            async ({ text, language, emotion }: ZonosRequestParams) => {
                try {
                    const emotionParams = this.emotionMap[emotion];
                    console.log(`Converting to speech: "${text}" with ${emotion} emotion`);

                    // Use new OpenAI-style endpoint
                    const response = await axios.post(`${API_BASE_URL}/v1/audio/speech`, {
                        model: "Zyphra/Zonos-v0.1-transformer",
                        input: text,
                        language: language,
                        emotion: emotionParams,
                        speed: 1.0,
                        response_format: "wav"  // Using WAV for better compatibility
                    }, {
                        responseType: 'arraybuffer'
                    });

                    // Save the audio response to a temporary file
                    const tempAudioPath = `/tmp/tts_output_${Date.now()}.wav`;
                    const fs = await import('fs/promises');
                    await fs.writeFile(tempAudioPath, response.data);

                    // Play the audio
                    await this.playAudio(tempAudioPath);

                    // Clean up the temporary file
                    await fs.unlink(tempAudioPath);

                    return {
                        content: [
                            {
                                type: "text",
                                text: `Successfully spoke: "${text}" with ${emotion} emotion`,
                            },
                        ],
                    };
                } catch (error) {
                    const errorMessage = error instanceof Error ? error.message : "Unknown error";
                    console.error("TTS Error:", errorMessage);
                    if (axios.isAxiosError(error) && error.response) {
                        console.error("API Response:", error.response.data);
                    }
                    throw new Error(`TTS failed: ${errorMessage}`);
                }
            }
        );
    }

    private async playAudio(audioPath: string): Promise<void> {
        try {
            console.log("Playing audio from:", audioPath);

            switch (process.platform) {
                case "darwin":
                    await execAsync(`afplay ${audioPath}`);
                    break;
                case "linux":
                    // Try paplay for PulseAudio
                    const XDG_RUNTIME_DIR = process.env.XDG_RUNTIME_DIR || '/run/user/1000';
                    const env = {
                        ...process.env,
                        PULSE_SERVER: `unix:${XDG_RUNTIME_DIR}/pulse/native`,
                        PULSE_COOKIE: `${process.env.HOME}/.config/pulse/cookie`
                    };
                    await execAsync(`paplay ${audioPath}`, { env });
                    break;
                case "win32":
                    await execAsync(
                        `powershell -c (New-Object Media.SoundPlayer '${audioPath}').PlaySync()`
                    );
                    break;
                default:
                    throw new Error(`Unsupported platform: ${process.platform}`);
            }
        } catch (error) {
            const errorMessage = error instanceof Error ? error.message : "Unknown error";
            console.error("Audio playback error:", errorMessage);
            throw new Error(`Audio playback failed: ${errorMessage}`);
        }
    }

    public async start(): Promise<void> {
        const transport = new StdioServerTransport();
        await this.mcp.connect(transport);
    }
}

const server = new TTSServer();
await server.start();

```