#
tokens: 8362/50000 9/9 files
lines: on (toggle) GitHub
raw markdown copy reset
# Directory Structure

```
├── .gitignore
├── package-lock.json
├── package.json
├── README.md
├── server.js
├── src
│   ├── index.ts
│   └── types
│       ├── node-record-lpcm16.d.ts
│       └── say.d.ts
├── test
│   └── index.html
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
 1 | # Dependencies
 2 | node_modules/
 3 | npm-debug.log*
 4 | yarn-debug.log*
 5 | yarn-error.log*
 6 | 
 7 | # Build output
 8 | build/
 9 | dist/
10 | *.js.map
11 | 
12 | # Environment variables
13 | .env
14 | .env.local
15 | .env.*.local
16 | 
17 | # IDE files
18 | .idea/
19 | .vscode/
20 | *.swp
21 | *.swo
22 | 
23 | # Operating System
24 | .DS_Store
25 | Thumbs.db
26 | 
27 | # Temporary files
28 | *.log
29 | *.tmp
30 | recording.wav
31 | 
32 | # Test coverage
33 | coverage/
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MS-Lucidia-Voice-Gateway-MCP
  2 | 
  3 | A Model Context Protocol (MCP) server that provides text-to-speech and speech-to-text capabilities using Windows' built-in speech services. This server leverages the native Windows Speech API (SAPI) through PowerShell commands, eliminating the need for external APIs or services.
  4 | 
  5 | ## Features
  6 | 
  7 | - Text-to-Speech (TTS) using Windows SAPI voices
  8 | - Speech-to-Text (STT) using Windows Speech Recognition
  9 | - Simple web interface for testing
 10 | - No external API dependencies
 11 | - Uses native Windows capabilities
 12 | 
 13 | ## Prerequisites
 14 | 
 15 | - Windows 10/11 with Speech Recognition enabled
 16 | - Node.js 16+
 17 | - PowerShell
 18 | 
 19 | ## Installation
 20 | 
 21 | 1. Clone the repository:
 22 | ```bash
 23 | git clone https://github.com/ExpressionsBot/MS-Lucidia-Voice-Gateway-MCP.git
 24 | cd MS-Lucidia-Voice-Gateway-MCP
 25 | ```
 26 | 
 27 | 2. Install dependencies:
 28 | ```bash
 29 | npm install
 30 | ```
 31 | 
 32 | 3. Build the project:
 33 | ```bash
 34 | npm run build
 35 | ```
 36 | 
 37 | ## Usage
 38 | 
 39 | ### Testing Interface
 40 | 
 41 | 1. Start the test server:
 42 | ```bash
 43 | npm run test
 44 | ```
 45 | 
 46 | 2. Open `http://localhost:3000` in your browser
 47 | 3. Use the web interface to test TTS and STT capabilities
 48 | 
 49 | ### Available Tools
 50 | 
 51 | #### text_to_speech
 52 | Converts text to speech using Windows SAPI.
 53 | 
 54 | Parameters:
 55 | - `text` (required): The text to convert to speech
 56 | - `voice` (optional): The voice to use (e.g., "Microsoft David Desktop")
 57 | - `speed` (optional): Speech rate from 0.5 to 2.0 (default: 1.0)
 58 | 
 59 | Example:
 60 | ```javascript
 61 | fetch('http://localhost:3000/tts', {
 62 |   method: 'POST',
 63 |   headers: { 'Content-Type': 'application/json' },
 64 |   body: JSON.stringify({
 65 |     text: "Hello, this is a test",
 66 |     voice: "Microsoft David Desktop",
 67 |     speed: 1.0
 68 |   })
 69 | });
 70 | ```
 71 | 
 72 | #### speech_to_text
 73 | Records audio and converts it to text using Windows Speech Recognition.
 74 | 
 75 | Parameters:
 76 | - `duration` (optional): Recording duration in seconds (default: 5, max: 60)
 77 | 
 78 | Example:
 79 | ```javascript
 80 | fetch('http://localhost:3000/stt', {
 81 |   method: 'POST',
 82 |   headers: { 'Content-Type': 'application/json' },
 83 |   body: JSON.stringify({
 84 |     duration: 5
 85 |   })
 86 | }).then(response => response.json())
 87 |   .then(data => console.log(data.text));
 88 | ```
 89 | 
 90 | ## Troubleshooting
 91 | 
 92 | 1. Make sure Windows Speech Recognition is enabled:
 93 |    - Open Windows Settings
 94 |    - Go to Time & Language > Speech
 95 |    - Enable Speech Recognition
 96 | 
 97 | 2. Check available voices:
 98 |    - Open PowerShell and run:
 99 |    ```powershell
100 |    Add-Type -AssemblyName System.Speech
101 |    (New-Object System.Speech.Synthesis.SpeechSynthesizer).GetInstalledVoices().VoiceInfo.Name
102 |    ```
103 | 
104 | 3. Test speech recognition:
105 |    - Open Speech Recognition in Windows Settings
106 |    - Run through the setup wizard if not already done
107 |    - Test that Windows can recognize your voice
108 | 
109 | ## Contributing
110 | 
111 | 1. Fork the repository
112 | 2. Create your feature branch
113 | 3. Commit your changes
114 | 4. Push to the branch
115 | 5. Create a new Pull Request
116 | 
117 | ## License
118 | 
119 | MIT
120 | 
```

--------------------------------------------------------------------------------
/src/types/say.d.ts:
--------------------------------------------------------------------------------

```typescript
 1 | declare module 'say' {
 2 |   export function speak(
 3 |     text: string,
 4 |     voice?: string,
 5 |     speed?: number,
 6 |     callback?: (err: Error | string | null) => void
 7 |   ): void;
 8 | 
 9 |   export function stop(): void;
10 | }
```

--------------------------------------------------------------------------------
/src/types/node-record-lpcm16.d.ts:
--------------------------------------------------------------------------------

```typescript
 1 | declare module 'node-record-lpcm16' {
 2 |   interface RecordOptions {
 3 |     sampleRate?: number;
 4 |     channels?: number;
 5 |     audioType?: string;
 6 |   }
 7 | 
 8 |   interface Recording {
 9 |     stream(): NodeJS.ReadableStream;
10 |     stop(): void;
11 |   }
12 | 
13 |   export function record(options?: RecordOptions): Recording;
14 | }
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "commonjs",
 5 |     "lib": ["ES2020"],
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "resolveJsonModule": true,
13 |     "declaration": true,
14 |     "moduleResolution": "node"
15 |   },
16 |   "include": ["src/**/*"],
17 |   "exclude": ["node_modules", "build", "test"]
18 | }
19 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "ms-lucidia-voice-gateway-mcp",
 3 |   "version": "1.0.0",
 4 |   "description": "Windows Speech server for Lucidia using native Windows speech capabilities",
 5 |   "main": "build/index.js",
 6 |   "scripts": {
 7 |     "start": "node build/index.js",
 8 |     "test": "node server.js",
 9 |     "dev": "nodemon server.js",
10 |     "build": "tsc",
11 |     "watch": "tsc -w"
12 |   },
13 |   "dependencies": {
14 |     "cors": "^2.8.5",
15 |     "dotenv": "^16.3.1",
16 |     "express": "^4.18.2",
17 |     "openai": "^4.24.1"
18 |   },
19 |   "devDependencies": {
20 |     "@types/cors": "^2.8.17",
21 |     "@types/express": "^4.17.21",
22 |     "@types/node": "^20.11.5",
23 |     "nodemon": "^3.0.3",
24 |     "typescript": "^5.3.3"
25 |   },
26 |   "repository": {
27 |     "type": "git",
28 |     "url": "git+https://github.com/ExpressionsBot/MS-Lucidia-Voice-Gateway-MCP.git"
29 |   },
30 |   "keywords": [
31 |     "windows",
32 |     "speech",
33 |     "tts",
34 |     "stt",
35 |     "gpt4",
36 |     "lucidia"
37 |   ],
38 |   "author": "ExpressionsBot",
39 |   "license": "MIT"
40 | }
41 | 
```

--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------

```javascript
  1 | const express = require('express');
  2 | const cors = require('cors');
  3 | const { exec } = require('child_process');
  4 | const { promisify } = require('util');
  5 | const fs = require('fs').promises;
  6 | const path = require('path');
  7 | const { createServer } = require('net');
  8 | 
  9 | const execAsync = promisify(exec);
 10 | const app = express();
 11 | 
 12 | // Helper function to find an available port
 13 | async function findAvailablePort(startPort) {
 14 |   const isPortAvailable = (port) => {
 15 |     return new Promise((resolve) => {
 16 |       const server = createServer()
 17 |         .listen(port, () => {
 18 |           server.once('close', () => resolve(true));
 19 |           server.close();
 20 |         })
 21 |         .on('error', () => resolve(false));
 22 |     });
 23 |   };
 24 | 
 25 |   let port = startPort;
 26 |   while (!(await isPortAvailable(port))) {
 27 |     port++;
 28 |   }
 29 |   return port;
 30 | }
 31 | 
 32 | app.use(cors());
 33 | app.use(express.json());
 34 | app.use(express.static('test'));
 35 | 
 36 | // Helper function to execute PowerShell commands
 37 | async function runPowerShell(script) {
 38 |     try {
 39 |         const { stdout } = await execAsync(`powershell -Command "${script}"`);
 40 |         return stdout.trim();
 41 |     } catch (error) {
 42 |         throw new Error(`PowerShell execution failed: ${error.message}`);
 43 |     }
 44 | }
 45 | 
 46 | // Get available voices
 47 | app.get('/voices', async (req, res) => {
 48 |     try {
 49 |         const script = `
 50 |             Add-Type -AssemblyName System.Speech;
 51 |             (New-Object System.Speech.Synthesis.SpeechSynthesizer).GetInstalledVoices().VoiceInfo.Name
 52 |         `;
 53 |         const output = await runPowerShell(script);
 54 |         const voices = output.split('\n').map(v => v.trim()).filter(Boolean);
 55 |         res.json(voices);
 56 |     } catch (error) {
 57 |         res.status(500).json({ error: error.message });
 58 |     }
 59 | });
 60 | 
 61 | // Text to Speech
 62 | app.post('/tts', async (req, res) => {
 63 |     try {
 64 |         const { text, voice = 'Microsoft David Desktop', speed = 1.0 } = req.body;
 65 |         
 66 |         if (!text) {
 67 |             return res.status(400).json({ error: 'Text is required' });
 68 |         }
 69 | 
 70 |         const script = `
 71 |             Add-Type -AssemblyName System.Speech;
 72 |             $synthesizer = New-Object System.Speech.Synthesis.SpeechSynthesizer;
 73 |             $synthesizer.SelectVoice('${voice}');
 74 |             $synthesizer.Rate = ${Math.round((speed - 1) * 10)};
 75 |             $synthesizer.Speak('${text.replace(/'/g, "''")}');
 76 |         `;
 77 | 
 78 |         await runPowerShell(script);
 79 |         res.json({ success: true });
 80 |     } catch (error) {
 81 |         res.status(500).json({ error: error.message });
 82 |     }
 83 | });
 84 | 
 85 | // Speech to Text
 86 | app.post('/stt', async (req, res) => {
 87 |     try {
 88 |         const { duration = 5 } = req.body;
 89 |         const audioFile = path.join(__dirname, 'recording.wav');
 90 | 
 91 |         // Record audio using PowerShell
 92 |         const recordScript = `
 93 |             Add-Type -AssemblyName System.Windows.Forms;
 94 |             $audio = New-Object System.IO.MemoryStream;
 95 |             $waveSource = New-Object NAudio.Wave.WaveInEvent;
 96 |             $waveSource.WaveFormat = New-Object NAudio.Wave.WaveFormat(16000, 1);
 97 |             $waveFile = New-Object NAudio.Wave.WaveFileWriter('${audioFile}', $waveSource.WaveFormat);
 98 |             $waveSource.DataAvailable = {
 99 |                 param($sender, $e)
100 |                 $waveFile.Write($e.Buffer, 0, $e.BytesRecorded)
101 |             };
102 |             $waveSource.StartRecording();
103 |             Start-Sleep -Seconds ${duration};
104 |             $waveSource.StopRecording();
105 |             $waveFile.Dispose();
106 |         `;
107 | 
108 |         await runPowerShell(recordScript);
109 | 
110 |         // Transcribe the recorded audio
111 |         const transcribeScript = `
112 |             Add-Type -AssemblyName System.Speech;
113 |             $recognizer = New-Object System.Speech.Recognition.SpeechRecognizer;
114 |             $grammar = New-Object System.Speech.Recognition.DictationGrammar;
115 |             $recognizer.LoadGrammar($grammar);
116 |             $audio = [System.IO.File]::ReadAllBytes('${audioFile}');
117 |             $stream = New-Object System.IO.MemoryStream(@(,$audio));
118 |             $result = $recognizer.RecognizeSync([System.Speech.AudioFormat.AudioStream]::new($stream));
119 |             $result.Text;
120 |         `;
121 | 
122 |         const transcription = await runPowerShell(transcribeScript);
123 | 
124 |         // Clean up the audio file
125 |         await fs.unlink(audioFile);
126 | 
127 |         res.json({ text: transcription || 'No speech detected' });
128 |     } catch (error) {
129 |         res.status(500).json({ error: error.message });
130 |     }
131 | });
132 | 
133 | // Start the server
134 | async function startServer() {
135 |     try {
136 |         const port = await findAvailablePort(3000);
137 |         app.listen(port, () => {
138 |             console.log(`Windows Speech Server running at http://localhost:${port}`);
139 |         });
140 |     } catch (error) {
141 |         console.error('Failed to start server:', error);
142 |         process.exit(1);
143 |     }
144 | }
145 | 
146 | startServer();
```

--------------------------------------------------------------------------------
/test/index.html:
--------------------------------------------------------------------------------

```html
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Windows Speech MCP Test</title>
  7 |     <style>
  8 |         body {
  9 |             font-family: Arial, sans-serif;
 10 |             max-width: 800px;
 11 |             margin: 0 auto;
 12 |             padding: 20px;
 13 |             background-color: #f5f5f5;
 14 |         }
 15 |         .container {
 16 |             background-color: white;
 17 |             padding: 20px;
 18 |             border-radius: 8px;
 19 |             box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 20 |         }
 21 |         .section {
 22 |             margin-bottom: 20px;
 23 |             padding: 20px;
 24 |             border: 1px solid #ddd;
 25 |             border-radius: 4px;
 26 |         }
 27 |         h1 {
 28 |             color: #333;
 29 |             text-align: center;
 30 |         }
 31 |         textarea {
 32 |             width: 100%;
 33 |             height: 100px;
 34 |             margin: 10px 0;
 35 |             padding: 8px;
 36 |             border: 1px solid #ddd;
 37 |             border-radius: 4px;
 38 |             resize: vertical;
 39 |         }
 40 |         button {
 41 |             background-color: #007bff;
 42 |             color: white;
 43 |             border: none;
 44 |             padding: 10px 20px;
 45 |             border-radius: 4px;
 46 |             cursor: pointer;
 47 |             margin: 5px;
 48 |         }
 49 |         button:hover {
 50 |             background-color: #0056b3;
 51 |         }
 52 |         select {
 53 |             padding: 8px;
 54 |             margin: 5px;
 55 |             border-radius: 4px;
 56 |             border: 1px solid #ddd;
 57 |         }
 58 |         .status {
 59 |             margin-top: 10px;
 60 |             padding: 10px;
 61 |             border-radius: 4px;
 62 |         }
 63 |         .success {
 64 |             background-color: #d4edda;
 65 |             color: #155724;
 66 |         }
 67 |         .error {
 68 |             background-color: #f8d7da;
 69 |             color: #721c24;
 70 |         }
 71 |     </style>
 72 | </head>
 73 | <body>
 74 |     <div class="container">
 75 |         <h1>Windows Speech MCP Test</h1>
 76 |         
 77 |         <div class="section">
 78 |             <h2>Text to Speech</h2>
 79 |             <textarea id="ttsText" placeholder="Enter text to speak...">Hello, this is a test of Windows speech synthesis.</textarea>
 80 |             <div>
 81 |                 <select id="ttsVoice">
 82 |                     <option value="Microsoft David Desktop">David</option>
 83 |                     <option value="Microsoft Zira Desktop">Zira</option>
 84 |                 </select>
 85 |                 <select id="ttsSpeed">
 86 |                     <option value="0.5">0.5x Speed</option>
 87 |                     <option value="1.0" selected>1.0x Speed</option>
 88 |                     <option value="1.5">1.5x Speed</option>
 89 |                     <option value="2.0">2.0x Speed</option>
 90 |                 </select>
 91 |                 <button onclick="speak()">Speak</button>
 92 |             </div>
 93 |             <div id="ttsStatus" class="status"></div>
 94 |         </div>
 95 | 
 96 |         <div class="section">
 97 |             <h2>Speech to Text</h2>
 98 |             <div>
 99 |                 <select id="sttDuration">
100 |                     <option value="5">5 seconds</option>
101 |                     <option value="10">10 seconds</option>
102 |                     <option value="15">15 seconds</option>
103 |                     <option value="30">30 seconds</option>
104 |                 </select>
105 |                 <button onclick="startRecording()">Start Recording</button>
106 |             </div>
107 |             <textarea id="sttText" placeholder="Transcribed text will appear here..." readonly></textarea>
108 |             <div id="sttStatus" class="status"></div>
109 |         </div>
110 |     </div>
111 | 
112 |     <script>
113 |         async function speak() {
114 |             const text = document.getElementById('ttsText').value;
115 |             const voice = document.getElementById('ttsVoice').value;
116 |             const speed = parseFloat(document.getElementById('ttsSpeed').value);
117 |             const statusDiv = document.getElementById('ttsStatus');
118 | 
119 |             try {
120 |                 const response = await fetch('http://localhost:3000/tts', {
121 |                     method: 'POST',
122 |                     headers: {
123 |                         'Content-Type': 'application/json'
124 |                     },
125 |                     body: JSON.stringify({ text, voice, speed })
126 |                 });
127 | 
128 |                 if (!response.ok) throw new Error('Failed to synthesize speech');
129 |                 
130 |                 statusDiv.textContent = 'Speech synthesis successful!';
131 |                 statusDiv.className = 'status success';
132 |             } catch (error) {
133 |                 statusDiv.textContent = `Error: ${error.message}`;
134 |                 statusDiv.className = 'status error';
135 |             }
136 |         }
137 | 
138 |         async function startRecording() {
139 |             const duration = parseInt(document.getElementById('sttDuration').value);
140 |             const statusDiv = document.getElementById('sttStatus');
141 |             const textArea = document.getElementById('sttText');
142 | 
143 |             try {
144 |                 statusDiv.textContent = `Recording for ${duration} seconds...`;
145 |                 statusDiv.className = 'status';
146 | 
147 |                 const response = await fetch('http://localhost:3000/stt', {
148 |                     method: 'POST',
149 |                     headers: {
150 |                         'Content-Type': 'application/json'
151 |                     },
152 |                     body: JSON.stringify({ duration })
153 |                 });
154 | 
155 |                 if (!response.ok) throw new Error('Failed to transcribe speech');
156 |                 
157 |                 const result = await response.json();
158 |                 textArea.value = result.text;
159 |                 statusDiv.textContent = 'Transcription successful!';
160 |                 statusDiv.className = 'status success';
161 |             } catch (error) {
162 |                 statusDiv.textContent = `Error: ${error.message}`;
163 |                 statusDiv.className = 'status error';
164 |                 textArea.value = '';
165 |             }
166 |         }
167 | 
168 |         // Fetch available voices when the page loads
169 |         async function loadVoices() {
170 |             try {
171 |                 const response = await fetch('http://localhost:3000/voices');
172 |                 if (!response.ok) throw new Error('Failed to fetch voices');
173 |                 
174 |                 const voices = await response.json();
175 |                 const voiceSelect = document.getElementById('ttsVoice');
176 |                 voiceSelect.innerHTML = '';
177 |                 
178 |                 voices.forEach(voice => {
179 |                     const option = document.createElement('option');
180 |                     option.value = voice;
181 |                     option.textContent = voice.replace('Microsoft ', '').replace(' Desktop', '');
182 |                     voiceSelect.appendChild(option);
183 |                 });
184 |             } catch (error) {
185 |                 console.error('Failed to load voices:', error);
186 |             }
187 |         }
188 | 
189 |         window.onload = loadVoices;
190 |     </script>
191 | </body>
192 | </html>
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | #!/usr/bin/env node
  2 | 
  3 | import express, { Request, Response } from 'express';
  4 | import cors from 'cors';
  5 | import { exec } from 'child_process';
  6 | import { promisify } from 'util';
  7 | import * as path from 'path';
  8 | import * as fs from 'fs';
  9 | import * as net from 'net';
 10 | import OpenAI from 'openai';
 11 | import dotenv from 'dotenv';
 12 | 
 13 | dotenv.config();
 14 | 
 15 | const execAsync = promisify(exec);
 16 | 
 17 | // Configuration
 18 | const DEFAULT_VOICE = 'Microsoft Jenny(Natural) - English (United States)';
 19 | const DEFAULT_TIMEOUT = parseInt(process.env.TIMEOUT || '30000', 10);
 20 | const DEFAULT_PORT = 3000;
 21 | 
 22 | // Initialize OpenAI
 23 | const openai = new OpenAI({
 24 |   apiKey: process.env.OPENAI_API_KEY
 25 | });
 26 | 
 27 | // Type definitions for request arguments
 28 | interface TextToSpeechArgs {
 29 |   text: string;
 30 |   voice?: string;
 31 |   speed?: number;
 32 | }
 33 | 
 34 | interface SpeechToTextArgs {
 35 |   duration?: number;
 36 | }
 37 | 
 38 | interface ChatArgs {
 39 |   message: string;
 40 |   voice?: string;
 41 |   speed?: number;
 42 | }
 43 | 
 44 | // Helper function to find an available port
 45 | async function findAvailablePort(startPort: number): Promise<number> {
 46 |   const isPortAvailable = (port: number): Promise<boolean> => {
 47 |     return new Promise((resolve) => {
 48 |       const server = net.createServer()
 49 |         .once('error', () => resolve(false))
 50 |         .once('listening', () => {
 51 |           server.close();
 52 |           resolve(true);
 53 |         })
 54 |         .listen(port);
 55 |     });
 56 |   };
 57 | 
 58 |   for (let port = startPort; port < startPort + 100; port++) {
 59 |     if (await isPortAvailable(port)) {
 60 |       return port;
 61 |     }
 62 |   }
 63 |   throw new Error('No available ports found');
 64 | }
 65 | 
 66 | // Helper function to get available Windows voices
 67 | async function getWindowsVoices(): Promise<string[]> {
 68 |   try {
 69 |     const { stdout } = await execAsync('powershell -Command "Add-Type -AssemblyName System.Speech; (New-Object System.Speech.Synthesis.SpeechSynthesizer).GetInstalledVoices().VoiceInfo.Name"', {
 70 |       timeout: DEFAULT_TIMEOUT
 71 |     });
 72 |     return stdout.split('\n').map(v => v.trim()).filter(Boolean);
 73 |   } catch (error) {
 74 |     console.error('Error getting voices:', error);
 75 |     return [DEFAULT_VOICE];
 76 |   }
 77 | }
 78 | 
 79 | // Helper function to speak text using Windows TTS
 80 | async function speakText(text: string, voice: string = DEFAULT_VOICE, speed: number = 1.0): Promise<void> {
 81 |   const script = `
 82 |     Add-Type -AssemblyName System.Speech;
 83 |     $synthesizer = New-Object System.Speech.Synthesis.SpeechSynthesizer;
 84 |     $synthesizer.SelectVoice('${voice}');
 85 |     $synthesizer.Rate = ${Math.round((speed - 1) * 10)};
 86 |     $synthesizer.Speak('${text.replace(/'/g, "''")}');
 87 |   `;
 88 | 
 89 |   await execAsync(`powershell -Command "${script}"`, { timeout: DEFAULT_TIMEOUT });
 90 | }
 91 | 
 92 | // Helper function to get GPT-4 response
 93 | async function getChatResponse(message: string): Promise<string> {
 94 |   try {
 95 |     const completion = await openai.chat.completions.create({
 96 |       model: "gpt-4",
 97 |       messages: [
 98 |         { 
 99 |           role: "system", 
100 |           content: "You are a helpful assistant. Keep your responses concise and natural, as they will be spoken aloud."
101 |         },
102 |         { 
103 |           role: "user", 
104 |           content: message 
105 |         }
106 |       ],
107 |       temperature: 0.7,
108 |       max_tokens: 150
109 |     });
110 | 
111 |     return completion.choices[0]?.message?.content || "I'm sorry, I couldn't generate a response.";
112 |   } catch (error) {
113 |     console.error('Error getting GPT-4 response:', error);
114 |     throw error;
115 |   }
116 | }
117 | 
118 | // Initialize Express app
119 | const app = express();
120 | 
121 | app.use(cors());
122 | app.use(express.json());
123 | app.use(express.static('test'));
124 | 
125 | // Add timeout middleware
126 | app.use((req: Request, res: Response, next) => {
127 |   res.setTimeout(DEFAULT_TIMEOUT, () => {
128 |     res.status(408).json({ error: 'Request timeout' });
129 |   });
130 |   next();
131 | });
132 | 
133 | // Get available voices
134 | app.get('/voices', async (_req: Request, res: Response) => {
135 |   try {
136 |     const voices = await getWindowsVoices();
137 |     res.json(voices);
138 |   } catch (error) {
139 |     res.status(500).json({ error: error instanceof Error ? error.message : String(error) });
140 |   }
141 | });
142 | 
143 | // Text to Speech
144 | app.post('/tts', async (req: Request<{}, {}, TextToSpeechArgs>, res: Response) => {
145 |   try {
146 |     const { text, voice = DEFAULT_VOICE, speed = 1.0 } = req.body;
147 |     
148 |     if (!text) {
149 |       return res.status(400).json({ error: 'Text is required' });
150 |     }
151 | 
152 |     await speakText(text, voice, speed);
153 |     res.json({ success: true });
154 |   } catch (error) {
155 |     if (error instanceof Error && error.message.includes('timeout')) {
156 |       res.status(408).json({ error: 'Operation timed out' });
157 |     } else {
158 |       res.status(500).json({ error: error instanceof Error ? error.message : String(error) });
159 |     }
160 |   }
161 | });
162 | 
163 | // Speech to Text
164 | app.post('/stt', async (req: Request<{}, {}, SpeechToTextArgs>, res: Response) => {
165 |   try {
166 |     const { duration = 5 } = req.body;
167 |     const audioFile = path.join(__dirname, 'recording.wav');
168 | 
169 |     // Record audio using PowerShell
170 |     const recordScript = `
171 |       Add-Type -AssemblyName System.Windows.Forms;
172 |       $audio = New-Object System.IO.MemoryStream;
173 |       $waveSource = New-Object NAudio.Wave.WaveInEvent;
174 |       $waveSource.WaveFormat = New-Object NAudio.Wave.WaveFormat(16000, 1);
175 |       $waveFile = New-Object NAudio.Wave.WaveFileWriter('${audioFile}', $waveSource.WaveFormat);
176 |       $waveSource.DataAvailable = {
177 |         param($sender, $e)
178 |         $waveFile.Write($e.Buffer, 0, $e.BytesRecorded)
179 |       };
180 |       $waveSource.StartRecording();
181 |       Start-Sleep -Seconds ${duration};
182 |       $waveSource.StopRecording();
183 |       $waveFile.Dispose();
184 |     `;
185 | 
186 |     await execAsync(recordScript, { timeout: DEFAULT_TIMEOUT + (duration * 1000) });
187 | 
188 |     // Transcribe the recorded audio
189 |     const transcribeScript = `
190 |       Add-Type -AssemblyName System.Speech;
191 |       $recognizer = New-Object System.Speech.Recognition.SpeechRecognizer;
192 |       $grammar = New-Object System.Speech.Recognition.DictationGrammar;
193 |       $recognizer.LoadGrammar($grammar);
194 |       $audio = [System.IO.File]::ReadAllBytes('${audioFile}');
195 |       $stream = New-Object System.IO.MemoryStream(@(,$audio));
196 |       $result = $recognizer.RecognizeSync([System.Speech.AudioFormat.AudioStream]::new($stream));
197 |       $result.Text;
198 |     `;
199 | 
200 |     const { stdout } = await execAsync(`powershell -Command "${transcribeScript}"`, { timeout: DEFAULT_TIMEOUT });
201 | 
202 |     // Clean up the audio file
203 |     await fs.promises.unlink(audioFile);
204 | 
205 |     res.json({ text: stdout.trim() || 'No speech detected' });
206 |   } catch (error) {
207 |     // Clean up the audio file if it exists
208 |     const audioFile = path.join(__dirname, 'recording.wav');
209 |     if (fs.existsSync(audioFile)) {
210 |       await fs.promises.unlink(audioFile);
211 |     }
212 |     
213 |     if (error instanceof Error && error.message.includes('timeout')) {
214 |       res.status(408).json({ error: 'Operation timed out' });
215 |     } else {
216 |       res.status(500).json({ error: error instanceof Error ? error.message : String(error) });
217 |     }
218 |   }
219 | });
220 | 
221 | // Chat endpoint that gets GPT-4 response and speaks it
222 | app.post('/chat', async (req: Request<{}, {}, ChatArgs>, res: Response) => {
223 |   try {
224 |     const { message, voice = DEFAULT_VOICE, speed = 1.0 } = req.body;
225 |     
226 |     if (!message) {
227 |       return res.status(400).json({ error: 'Message is required' });
228 |     }
229 | 
230 |     // Get GPT-4 response
231 |     const response = await getChatResponse(message);
232 |     
233 |     // Speak the response
234 |     await speakText(response, voice, speed);
235 | 
236 |     res.json({ 
237 |       success: true,
238 |       response,
239 |       spoken: true
240 |     });
241 |   } catch (error) {
242 |     if (error instanceof Error && error.message.includes('timeout')) {
243 |       res.status(408).json({ error: 'Operation timed out' });
244 |     } else {
245 |       res.status(500).json({ error: error instanceof Error ? error.message : String(error) });
246 |     }
247 |   }
248 | });
249 | 
250 | // Start the server
251 | async function startServer() {
252 |   try {
253 |     const port = await findAvailablePort(DEFAULT_PORT);
254 |     app.listen(port, () => {
255 |       console.log(`Windows Speech Server running at http://localhost:${port}`);
256 |       console.log(`Using default voice: ${DEFAULT_VOICE}`);
257 |       console.log(`Timeout set to: ${DEFAULT_TIMEOUT}ms`);
258 |       console.log('GPT-4 integration enabled');
259 |     });
260 |   } catch (error) {
261 |     console.error('Failed to start server:', error);
262 |     process.exit(1);
263 |   }
264 | }
265 | 
266 | startServer();
267 | 
```