# Directory Structure ``` ├── .gitignore ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── src │ └── index.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` node_modules/ build/ *.log .env* ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # OpenAI Speech-to-Text transcriptions MCP Server A MCP server that provides audio transcription capabilities using OpenAI's API. <a href="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp"> <img width="380" height="200" src="https://glama.ai/mcp/servers/@Ichigo3766/audio-transcriber-mcp/badge" alt="Audio Transcriber Server MCP server" /> </a> ## Installation ### Setup 1. Clone the repository: ```bash git clone https://github.com/Ichigo3766/audio-transcriber-mcp.git cd audio-transcriber-mcp ``` 2. Install dependencies: ```bash npm install ``` 3. Build the server: ```bash npm run build ``` 4. Set up your OpenAI API key in your environment variables. 5. Add the server configuration to your environment: ```json { "mcpServers": { "audio-transcriber": { "command": "node", "args": [ "/path/to/audio-transcriber-mcp/build/index.js" ], "env": { "OPENAI_API_KEY": "", "OPENAI_BASE_URL": "", // Optional "OPENAI_MODEL": "" // Optional } } } } ``` Replace `/path/to/audio-transcriber-mcp` with the actual path where you cloned the repository. ## Features ### Tools - `transcribe_audio` - Transcribe audio files using OpenAI's API - Takes filepath as a required parameter - Optional parameters: - save_to_file: Boolean to save transcription to a file - language: ISO-639-1 language code (e.g., "en", "es") ## License This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository. ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "ES2022", "module": "Node16", "moduleResolution": "Node16", "outDir": "./build", "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true }, "include": ["src/**/*"], "exclude": ["node_modules"] } ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "audio-transcriber", "version": "0.1.0", "description": "A MCP server to transcribe audio files using OpenAI Api", "private": true, "type": "module", "bin": { "audio-transcriber": "./build/index.js" }, "files": [ "build" ], "scripts": { "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", "prepare": "npm run build", "watch": "tsc --watch", "inspector": "npx @modelcontextprotocol/inspector build/index.js" }, "dependencies": { "@modelcontextprotocol/sdk": "0.6.0", "openai": "^4.89.0" }, "devDependencies": { "@types/node": "^20.11.24", "typescript": "^5.3.3" } } ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript #!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js'; import OpenAI from 'openai'; import fs from 'fs'; import path from 'path'; import { promisify } from 'util'; // Initialize OpenAI client with configuration const OPENAI_API_KEY = process.env.OPENAI_API_KEY; const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL; const OPENAI_MODEL = process.env.OPENAI_MODEL || "whisper-1"; if (!OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY environment variable is required'); } const config: { apiKey: string; baseURL?: string } = { apiKey: OPENAI_API_KEY }; if (OPENAI_BASE_URL) { config.baseURL = OPENAI_BASE_URL; } const openai = new OpenAI(config); interface TranscribeArgs { filepath: string; save_to_file?: boolean | string; language?: string; } const isValidTranscribeArgs = (args: any): args is TranscribeArgs => typeof args === 'object' && args !== null && typeof args.filepath === 'string' && (args.save_to_file === undefined || typeof args.save_to_file === 'boolean' || typeof args.save_to_file === 'string') && (args.language === undefined || typeof args.language === 'string'); class AudioTranscriberServer { private server: Server; constructor() { this.server = new Server( { name: 'audio-transcriber', version: '0.1.0', }, { capabilities: { tools: {}, }, } ); this.setupToolHandlers(); this.server.onerror = (error) => console.error('[MCP Error]', error); process.on('SIGINT', async () => { await this.server.close(); process.exit(0); }); } private setupToolHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'transcribe_audio', description: 'Transcribe an audio file using OpenAI Whisper API', inputSchema: { type: 'object', properties: { filepath: { type: 'string', description: 'Absolute path to the audio file', }, save_to_file: { type: 'boolean', description: 'Whether to save the transcription to a file next to the audio file', }, language: { type: 'string', description: 'Language of the audio in ISO-639-1 format (e.g. "en", "es"). Default is "en".', }, }, required: ['filepath'], }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { if (request.params.name !== 'transcribe_audio') { throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}` ); } if (!isValidTranscribeArgs(request.params.arguments)) { throw new McpError( ErrorCode.InvalidParams, 'Invalid transcribe arguments' ); } let fileStream = null; try { const { filepath, save_to_file, language = "en" } = request.params.arguments; // Normalize and decode path properly const decodedPath = decodeURIComponent(filepath.replace(/\\/g, '').trim()); console.error(`[DEBUG] Requested file path: ${decodedPath}`); // Verify file exists if (!fs.existsSync(decodedPath)) { throw new Error(`Audio file not found: ${decodedPath}`); } // Check if file is readable try { await promisify(fs.access)(decodedPath, fs.constants.R_OK); } catch (err) { throw new Error(`Audio file not readable: ${decodedPath}`); } console.error(`[DEBUG] File exists and is readable: ${decodedPath}`); // Create transcription console.error(`[DEBUG] Sending transcription request to OpenAI API`); fileStream = fs.createReadStream(decodedPath); const response = await openai.audio.transcriptions.create({ file: fileStream, model: OPENAI_MODEL, language: language }); // Close the file stream immediately after use fileStream.destroy(); fileStream = null; const transcription = response.text; console.error(`[DEBUG] Transcription completed successfully`); // Handle save_to_file parameter const shouldSaveToFile = typeof save_to_file === 'string' ? save_to_file.toLowerCase() === 'true' : Boolean(save_to_file); if (shouldSaveToFile) { const audioDir = path.dirname(decodedPath); const audioName = path.basename(decodedPath, path.extname(decodedPath)); const transcriptionPath = path.join(audioDir, `${audioName}.txt`); console.error(`[DEBUG] Saving transcription to: ${transcriptionPath}`); await promisify(fs.writeFile)(transcriptionPath, transcription); console.error(`[DEBUG] File saved successfully`); } return { content: [ { type: 'text', text: transcription, }, ], }; } catch (error: any) { console.error('[ERROR] Transcription failed:', error); return { content: [ { type: 'text', text: `Error transcribing audio: ${error?.message || String(error)}`, }, ], isError: true, }; } finally { // Ensure file stream is closed even if there's an error if (fileStream) { try { fileStream.destroy(); console.error("[DEBUG] File stream closed"); } catch (err) { console.error("[ERROR] Failed to close file stream:", err); } } } }); } async run() { try { const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('[INFO] Audio Transcriber MCP server running on stdio'); } catch (err) { console.error('[FATAL] Failed to start server:', err); process.exit(1); } } } // Handle global unhandled promise rejections process.on('unhandledRejection', (reason, promise) => { console.error('[ERROR] Unhandled Rejection at:', promise, 'reason:', reason); }); // Handle global uncaught exceptions process.on('uncaughtException', (err) => { console.error('[FATAL] Uncaught Exception:', err); // Give the error logs time to flush before exiting setTimeout(() => process.exit(1), 500); }); const server = new AudioTranscriberServer(); server.run().catch(err => { console.error('[FATAL] Server initialization failed:', err); process.exit(1); }); ```