ichigo3766/audio-transcriber-mcp # codebase.md

# Directory Structure

```
├── .gitignore
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   └── index.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
1 | node_modules/
2 | build/
3 | *.log
4 | .env*
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
 1 | # OpenAI Speech-to-Text transcriptions MCP Server
 2 | 
 3 | A MCP server that provides audio transcription capabilities using OpenAI's API.
 4 | 
 5 | <a href="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp">
 6 |   <img width="380" height="200" src="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp/badge" alt="Audio Transcriber Server MCP server" />
 7 | </a>
 8 | 
 9 | ## Installation
10 | 
11 | ### Setup
12 | 
13 | 1. Clone the repository:
14 | ```bash
15 | git clone https://github.com/Ichigo3766/audio-transcriber-mcp.git
16 | cd audio-transcriber-mcp
17 | ```
18 | 
19 | 2. Install dependencies:
20 | ```bash
21 | npm install
22 | ```
23 | 
24 | 3. Build the server:
25 | ```bash
26 | npm run build
27 | ```
28 | 
29 | 4. Set up your OpenAI API key in your environment variables.
30 | 
31 | 5. Add the server configuration to your environment:
32 | 
33 | ```json
34 | {
35 |   "mcpServers": {
36 |     "audio-transcriber": {
37 |       "command": "node",
38 |       "args": [
39 |         "/path/to/audio-transcriber-mcp/build/index.js"
40 |       ],
41 |       "env": {
42 |         "OPENAI_API_KEY": "",
43 |         "OPENAI_BASE_URL": "", // Optional
44 |         "OPENAI_MODEL": "" // Optional
45 |       }
46 |     }
47 |   }
48 | }
49 | ```
50 | 
51 | Replace `/path/to/audio-transcriber-mcp` with the actual path where you cloned the repository.
52 | 
53 | ## Features
54 | 
55 | ### Tools
56 | - `transcribe_audio` - Transcribe audio files using OpenAI's API
57 |   - Takes filepath as a required parameter
58 |   - Optional parameters:
59 |     - save_to_file: Boolean to save transcription to a file
60 |     - language: ISO-639-1 language code (e.g., "en", "es")
61 | 
62 | ## License
63 | 
64 | This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
65 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true
12 |   },
13 |   "include": ["src/**/*"],
14 |   "exclude": ["node_modules"]
15 | }
16 | 
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "audio-transcriber",
 3 |   "version": "0.1.0",
 4 |   "description": "A MCP server to transcribe audio files using OpenAI Api",
 5 |   "private": true,
 6 |   "type": "module",
 7 |   "bin": {
 8 |     "audio-transcriber": "./build/index.js"
 9 |   },
10 |   "files": [
11 |     "build"
12 |   ],
13 |   "scripts": {
14 |     "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
15 |     "prepare": "npm run build",
16 |     "watch": "tsc --watch",
17 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js"
18 |   },
19 |   "dependencies": {
20 |     "@modelcontextprotocol/sdk": "0.6.0",
21 |     "openai": "^4.89.0"
22 |   },
23 |   "devDependencies": {
24 |     "@types/node": "^20.11.24",
25 |     "typescript": "^5.3.3"
26 |   }
27 | }
28 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | #!/usr/bin/env node
  2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  4 | import {
  5 |   CallToolRequestSchema,
  6 |   ErrorCode,
  7 |   ListToolsRequestSchema,
  8 |   McpError,
  9 | } from '@modelcontextprotocol/sdk/types.js';
 10 | import OpenAI from 'openai';
 11 | import fs from 'fs';
 12 | import path from 'path';
 13 | import { promisify } from 'util';
 14 | 
 15 | // Initialize OpenAI client with configuration
 16 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
 17 | const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL;
 18 | const OPENAI_MODEL = process.env.OPENAI_MODEL || "whisper-1";
 19 | 
 20 | if (!OPENAI_API_KEY) {
 21 |   throw new Error('OPENAI_API_KEY environment variable is required');
 22 | }
 23 | 
 24 | const config: { apiKey: string; baseURL?: string } = {
 25 |   apiKey: OPENAI_API_KEY
 26 | };
 27 | 
 28 | if (OPENAI_BASE_URL) {
 29 |   config.baseURL = OPENAI_BASE_URL;
 30 | }
 31 | 
 32 | const openai = new OpenAI(config);
 33 | 
 34 | interface TranscribeArgs {
 35 |   filepath: string;
 36 |   save_to_file?: boolean | string;
 37 |   language?: string;
 38 | }
 39 | 
 40 | const isValidTranscribeArgs = (args: any): args is TranscribeArgs =>
 41 |   typeof args === 'object' &&
 42 |   args !== null &&
 43 |   typeof args.filepath === 'string' &&
 44 |   (args.save_to_file === undefined || 
 45 |    typeof args.save_to_file === 'boolean' || 
 46 |    typeof args.save_to_file === 'string') &&
 47 |   (args.language === undefined || typeof args.language === 'string');
 48 | 
 49 | class AudioTranscriberServer {
 50 |   private server: Server;
 51 |   
 52 |   constructor() {
 53 |     this.server = new Server(
 54 |       {
 55 |         name: 'audio-transcriber',
 56 |         version: '0.1.0',
 57 |       },
 58 |       {
 59 |         capabilities: {
 60 |           tools: {},
 61 |         },
 62 |       }
 63 |     );
 64 |     
 65 |     this.setupToolHandlers();
 66 |     this.server.onerror = (error) => console.error('[MCP Error]', error);
 67 |     process.on('SIGINT', async () => {
 68 |       await this.server.close();
 69 |       process.exit(0);
 70 |     });
 71 |   }
 72 |   
 73 |   private setupToolHandlers() {
 74 |     this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
 75 |       tools: [
 76 |         {
 77 |           name: 'transcribe_audio',
 78 |           description: 'Transcribe an audio file using OpenAI Whisper API',
 79 |           inputSchema: {
 80 |             type: 'object',
 81 |             properties: {
 82 |               filepath: {
 83 |                 type: 'string',
 84 |                 description: 'Absolute path to the audio file',
 85 |               },
 86 |               save_to_file: {
 87 |                 type: 'boolean',
 88 |                 description: 'Whether to save the transcription to a file next to the audio file',
 89 |               },
 90 |               language: {
 91 |                 type: 'string',
 92 |                 description: 'Language of the audio in ISO-639-1 format (e.g. "en", "es"). Default is "en".',
 93 |               },
 94 |             },
 95 |             required: ['filepath'],
 96 |           },
 97 |         },
 98 |       ],
 99 |     }));
100 |     
101 |     this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
102 |       if (request.params.name !== 'transcribe_audio') {
103 |         throw new McpError(
104 |           ErrorCode.MethodNotFound,
105 |           `Unknown tool: ${request.params.name}`
106 |         );
107 |       }
108 |       
109 |       if (!isValidTranscribeArgs(request.params.arguments)) {
110 |         throw new McpError(
111 |           ErrorCode.InvalidParams,
112 |           'Invalid transcribe arguments'
113 |         );
114 |       }
115 |       
116 |       let fileStream = null;
117 |       
118 |       try {
119 |         const { filepath, save_to_file, language = "en" } = request.params.arguments;
120 |         
121 |         // Normalize and decode path properly
122 |         const decodedPath = decodeURIComponent(filepath.replace(/\\/g, '').trim());
123 |         
124 |         console.error(`[DEBUG] Requested file path: ${decodedPath}`);
125 |         
126 |         // Verify file exists
127 |         if (!fs.existsSync(decodedPath)) {
128 |           throw new Error(`Audio file not found: ${decodedPath}`);
129 |         }
130 |         
131 |         // Check if file is readable
132 |         try {
133 |           await promisify(fs.access)(decodedPath, fs.constants.R_OK);
134 |         } catch (err) {
135 |           throw new Error(`Audio file not readable: ${decodedPath}`);
136 |         }
137 |         
138 |         console.error(`[DEBUG] File exists and is readable: ${decodedPath}`);
139 |         
140 |         // Create transcription
141 |         console.error(`[DEBUG] Sending transcription request to OpenAI API`);
142 |         fileStream = fs.createReadStream(decodedPath);
143 |         
144 |         const response = await openai.audio.transcriptions.create({
145 |           file: fileStream,
146 |           model: OPENAI_MODEL,
147 |           language: language
148 |         });
149 |         
150 |         // Close the file stream immediately after use
151 |         fileStream.destroy();
152 |         fileStream = null;
153 |         
154 |         const transcription = response.text;
155 |         console.error(`[DEBUG] Transcription completed successfully`);
156 |         
157 |         // Handle save_to_file parameter
158 |         const shouldSaveToFile = typeof save_to_file === 'string'
159 |           ? save_to_file.toLowerCase() === 'true'
160 |           : Boolean(save_to_file);
161 |           
162 |         if (shouldSaveToFile) {
163 |           const audioDir = path.dirname(decodedPath);
164 |           const audioName = path.basename(decodedPath, path.extname(decodedPath));
165 |           const transcriptionPath = path.join(audioDir, `${audioName}.txt`);
166 |           
167 |           console.error(`[DEBUG] Saving transcription to: ${transcriptionPath}`);
168 |           await promisify(fs.writeFile)(transcriptionPath, transcription);
169 |           console.error(`[DEBUG] File saved successfully`);
170 |         }
171 |         
172 |         return {
173 |           content: [
174 |             {
175 |               type: 'text',
176 |               text: transcription,
177 |             },
178 |           ],
179 |         };
180 |       } catch (error: any) {
181 |         console.error('[ERROR] Transcription failed:', error);
182 |         return {
183 |           content: [
184 |             {
185 |               type: 'text',
186 |               text: `Error transcribing audio: ${error?.message || String(error)}`,
187 |             },
188 |           ],
189 |           isError: true,
190 |         };
191 |       } finally {
192 |         // Ensure file stream is closed even if there's an error
193 |         if (fileStream) {
194 |           try {
195 |             fileStream.destroy();
196 |             console.error("[DEBUG] File stream closed");
197 |           } catch (err) {
198 |             console.error("[ERROR] Failed to close file stream:", err);
199 |           }
200 |         }
201 |       }
202 |     });
203 |   }
204 |   
205 |   async run() {
206 |     try {
207 |       const transport = new StdioServerTransport();
208 |       await this.server.connect(transport);
209 |       console.error('[INFO] Audio Transcriber MCP server running on stdio');
210 |     } catch (err) {
211 |       console.error('[FATAL] Failed to start server:', err);
212 |       process.exit(1);
213 |     }
214 |   }
215 | }
216 | 
217 | // Handle global unhandled promise rejections
218 | process.on('unhandledRejection', (reason, promise) => {
219 |   console.error('[ERROR] Unhandled Rejection at:', promise, 'reason:', reason);
220 | });
221 | 
222 | // Handle global uncaught exceptions
223 | process.on('uncaughtException', (err) => {
224 |   console.error('[FATAL] Uncaught Exception:', err);
225 |   // Give the error logs time to flush before exiting
226 |   setTimeout(() => process.exit(1), 500);
227 | });
228 | 
229 | const server = new AudioTranscriberServer();
230 | server.run().catch(err => {
231 |   console.error('[FATAL] Server initialization failed:', err);
232 |   process.exit(1);
233 | });
```