# Directory Structure ``` ├── .gitignore ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── src │ └── index.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | node_modules/ 2 | build/ 3 | *.log 4 | .env* ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # OpenAI Speech-to-Text transcriptions MCP Server 2 | 3 | A MCP server that provides audio transcription capabilities using OpenAI's API. 4 | 5 | <a href="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp"> 6 | <img width="380" height="200" src="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp/badge" alt="Audio Transcriber Server MCP server" /> 7 | </a> 8 | 9 | ## Installation 10 | 11 | ### Setup 12 | 13 | 1. Clone the repository: 14 | ```bash 15 | git clone https://github.com/Ichigo3766/audio-transcriber-mcp.git 16 | cd audio-transcriber-mcp 17 | ``` 18 | 19 | 2. Install dependencies: 20 | ```bash 21 | npm install 22 | ``` 23 | 24 | 3. Build the server: 25 | ```bash 26 | npm run build 27 | ``` 28 | 29 | 4. Set up your OpenAI API key in your environment variables. 30 | 31 | 5. Add the server configuration to your environment: 32 | 33 | ```json 34 | { 35 | "mcpServers": { 36 | "audio-transcriber": { 37 | "command": "node", 38 | "args": [ 39 | "/path/to/audio-transcriber-mcp/build/index.js" 40 | ], 41 | "env": { 42 | "OPENAI_API_KEY": "", 43 | "OPENAI_BASE_URL": "", // Optional 44 | "OPENAI_MODEL": "" // Optional 45 | } 46 | } 47 | } 48 | } 49 | ``` 50 | 51 | Replace `/path/to/audio-transcriber-mcp` with the actual path where you cloned the repository. 52 | 53 | ## Features 54 | 55 | ### Tools 56 | - `transcribe_audio` - Transcribe audio files using OpenAI's API 57 | - Takes filepath as a required parameter 58 | - Optional parameters: 59 | - save_to_file: Boolean to save transcription to a file 60 | - language: ISO-639-1 language code (e.g., "en", "es") 61 | 62 | ## License 63 | 64 | This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository. 65 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "./build", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules"] 15 | } 16 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "audio-transcriber", 3 | "version": "0.1.0", 4 | "description": "A MCP server to transcribe audio files using OpenAI Api", 5 | "private": true, 6 | "type": "module", 7 | "bin": { 8 | "audio-transcriber": "./build/index.js" 9 | }, 10 | "files": [ 11 | "build" 12 | ], 13 | "scripts": { 14 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 15 | "prepare": "npm run build", 16 | "watch": "tsc --watch", 17 | "inspector": "npx @modelcontextprotocol/inspector build/index.js" 18 | }, 19 | "dependencies": { 20 | "@modelcontextprotocol/sdk": "0.6.0", 21 | "openai": "^4.89.0" 22 | }, 23 | "devDependencies": { 24 | "@types/node": "^20.11.24", 25 | "typescript": "^5.3.3" 26 | } 27 | } 28 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #!/usr/bin/env node 2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 4 | import { 5 | CallToolRequestSchema, 6 | ErrorCode, 7 | ListToolsRequestSchema, 8 | McpError, 9 | } from '@modelcontextprotocol/sdk/types.js'; 10 | import OpenAI from 'openai'; 11 | import fs from 'fs'; 12 | import path from 'path'; 13 | import { promisify } from 'util'; 14 | 15 | // Initialize OpenAI client with configuration 16 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY; 17 | const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL; 18 | const OPENAI_MODEL = process.env.OPENAI_MODEL || "whisper-1"; 19 | 20 | if (!OPENAI_API_KEY) { 21 | throw new Error('OPENAI_API_KEY environment variable is required'); 22 | } 23 | 24 | const config: { apiKey: string; baseURL?: string } = { 25 | apiKey: OPENAI_API_KEY 26 | }; 27 | 28 | if (OPENAI_BASE_URL) { 29 | config.baseURL = OPENAI_BASE_URL; 30 | } 31 | 32 | const openai = new OpenAI(config); 33 | 34 | interface TranscribeArgs { 35 | filepath: string; 36 | save_to_file?: boolean | string; 37 | language?: string; 38 | } 39 | 40 | const isValidTranscribeArgs = (args: any): args is TranscribeArgs => 41 | typeof args === 'object' && 42 | args !== null && 43 | typeof args.filepath === 'string' && 44 | (args.save_to_file === undefined || 45 | typeof args.save_to_file === 'boolean' || 46 | typeof args.save_to_file === 'string') && 47 | (args.language === undefined || typeof args.language === 'string'); 48 | 49 | class AudioTranscriberServer { 50 | private server: Server; 51 | 52 | constructor() { 53 | this.server = new Server( 54 | { 55 | name: 'audio-transcriber', 56 | version: '0.1.0', 57 | }, 58 | { 59 | capabilities: { 60 | tools: {}, 61 | }, 62 | } 63 | ); 64 | 65 | this.setupToolHandlers(); 66 | this.server.onerror = (error) => console.error('[MCP Error]', error); 67 | process.on('SIGINT', async () => { 68 | await this.server.close(); 69 | process.exit(0); 70 | }); 71 | } 72 | 73 | private setupToolHandlers() { 74 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ 75 | tools: [ 76 | { 77 | name: 'transcribe_audio', 78 | description: 'Transcribe an audio file using OpenAI Whisper API', 79 | inputSchema: { 80 | type: 'object', 81 | properties: { 82 | filepath: { 83 | type: 'string', 84 | description: 'Absolute path to the audio file', 85 | }, 86 | save_to_file: { 87 | type: 'boolean', 88 | description: 'Whether to save the transcription to a file next to the audio file', 89 | }, 90 | language: { 91 | type: 'string', 92 | description: 'Language of the audio in ISO-639-1 format (e.g. "en", "es"). Default is "en".', 93 | }, 94 | }, 95 | required: ['filepath'], 96 | }, 97 | }, 98 | ], 99 | })); 100 | 101 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => { 102 | if (request.params.name !== 'transcribe_audio') { 103 | throw new McpError( 104 | ErrorCode.MethodNotFound, 105 | `Unknown tool: ${request.params.name}` 106 | ); 107 | } 108 | 109 | if (!isValidTranscribeArgs(request.params.arguments)) { 110 | throw new McpError( 111 | ErrorCode.InvalidParams, 112 | 'Invalid transcribe arguments' 113 | ); 114 | } 115 | 116 | let fileStream = null; 117 | 118 | try { 119 | const { filepath, save_to_file, language = "en" } = request.params.arguments; 120 | 121 | // Normalize and decode path properly 122 | const decodedPath = decodeURIComponent(filepath.replace(/\\/g, '').trim()); 123 | 124 | console.error(`[DEBUG] Requested file path: ${decodedPath}`); 125 | 126 | // Verify file exists 127 | if (!fs.existsSync(decodedPath)) { 128 | throw new Error(`Audio file not found: ${decodedPath}`); 129 | } 130 | 131 | // Check if file is readable 132 | try { 133 | await promisify(fs.access)(decodedPath, fs.constants.R_OK); 134 | } catch (err) { 135 | throw new Error(`Audio file not readable: ${decodedPath}`); 136 | } 137 | 138 | console.error(`[DEBUG] File exists and is readable: ${decodedPath}`); 139 | 140 | // Create transcription 141 | console.error(`[DEBUG] Sending transcription request to OpenAI API`); 142 | fileStream = fs.createReadStream(decodedPath); 143 | 144 | const response = await openai.audio.transcriptions.create({ 145 | file: fileStream, 146 | model: OPENAI_MODEL, 147 | language: language 148 | }); 149 | 150 | // Close the file stream immediately after use 151 | fileStream.destroy(); 152 | fileStream = null; 153 | 154 | const transcription = response.text; 155 | console.error(`[DEBUG] Transcription completed successfully`); 156 | 157 | // Handle save_to_file parameter 158 | const shouldSaveToFile = typeof save_to_file === 'string' 159 | ? save_to_file.toLowerCase() === 'true' 160 | : Boolean(save_to_file); 161 | 162 | if (shouldSaveToFile) { 163 | const audioDir = path.dirname(decodedPath); 164 | const audioName = path.basename(decodedPath, path.extname(decodedPath)); 165 | const transcriptionPath = path.join(audioDir, `${audioName}.txt`); 166 | 167 | console.error(`[DEBUG] Saving transcription to: ${transcriptionPath}`); 168 | await promisify(fs.writeFile)(transcriptionPath, transcription); 169 | console.error(`[DEBUG] File saved successfully`); 170 | } 171 | 172 | return { 173 | content: [ 174 | { 175 | type: 'text', 176 | text: transcription, 177 | }, 178 | ], 179 | }; 180 | } catch (error: any) { 181 | console.error('[ERROR] Transcription failed:', error); 182 | return { 183 | content: [ 184 | { 185 | type: 'text', 186 | text: `Error transcribing audio: ${error?.message || String(error)}`, 187 | }, 188 | ], 189 | isError: true, 190 | }; 191 | } finally { 192 | // Ensure file stream is closed even if there's an error 193 | if (fileStream) { 194 | try { 195 | fileStream.destroy(); 196 | console.error("[DEBUG] File stream closed"); 197 | } catch (err) { 198 | console.error("[ERROR] Failed to close file stream:", err); 199 | } 200 | } 201 | } 202 | }); 203 | } 204 | 205 | async run() { 206 | try { 207 | const transport = new StdioServerTransport(); 208 | await this.server.connect(transport); 209 | console.error('[INFO] Audio Transcriber MCP server running on stdio'); 210 | } catch (err) { 211 | console.error('[FATAL] Failed to start server:', err); 212 | process.exit(1); 213 | } 214 | } 215 | } 216 | 217 | // Handle global unhandled promise rejections 218 | process.on('unhandledRejection', (reason, promise) => { 219 | console.error('[ERROR] Unhandled Rejection at:', promise, 'reason:', reason); 220 | }); 221 | 222 | // Handle global uncaught exceptions 223 | process.on('uncaughtException', (err) => { 224 | console.error('[FATAL] Uncaught Exception:', err); 225 | // Give the error logs time to flush before exiting 226 | setTimeout(() => process.exit(1), 500); 227 | }); 228 | 229 | const server = new AudioTranscriberServer(); 230 | server.run().catch(err => { 231 | console.error('[FATAL] Server initialization failed:', err); 232 | process.exit(1); 233 | }); ```