# Directory Structure ``` ├── .gitignore ├── Dockerfile ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── smithery.yaml ├── src │ └── index.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* lerna-debug.log* .pnpm-debug.log* # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json # Runtime data pids *.pid *.seed *.pid.lock # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage *.lcov # nyc test coverage .nyc_output # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) .grunt # Bower dependency directory (https://bower.io/) bower_components # node-waf configuration .lock-wscript # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules/ jspm_packages/ # Snowpack dependency directory (https://snowpack.dev/) web_modules/ # TypeScript cache *.tsbuildinfo # Optional npm cache directory .npm # Optional eslint cache .eslintcache # Optional stylelint cache .stylelintcache # Microbundle cache .rpt2_cache/ .rts2_cache_cjs/ .rts2_cache_es/ .rts2_cache_umd/ # Optional REPL history .node_repl_history # Output of 'npm pack' *.tgz # Yarn Integrity file .yarn-integrity # dotenv environment variable files .env .env.development.local .env.test.local .env.production.local .env.local # parcel-bundler cache (https://parceljs.org/) .cache .parcel-cache # Next.js build output .next out # Nuxt.js build / generate output .nuxt dist # Gatsby files .cache/ # Comment in the public line in if your project uses Gatsby and not Next.js # https://nextjs.org/blog/next-9-1#public-directory-support # public # vuepress build output .vuepress/dist # vuepress v2.x temp and cache directory .temp .cache # Docusaurus cache and generated files .docusaurus # Serverless directories .serverless/ # FuseBox cache .fusebox/ # DynamoDB Local files .dynamodb/ # TernJS port file .tern-port # Stores VSCode versions used for testing VSCode extensions .vscode-test # yarn v2 .yarn/cache .yarn/unplugged .yarn/build-state.yml .yarn/install-state.gz .pnp.* build/ gcp-oauth.keys.json .*-server-credentials.json # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ .DS_Store # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # YouTube Transcript Server [](https://smithery.ai/server/@kimtaeyoon83/mcp-server-youtube-transcript) A Model Context Protocol server that enables retrieval of transcripts from YouTube videos. This server provides direct access to video captions and subtitles through a simple interface. <a href="https://glama.ai/mcp/servers/z429kk3te7"><img width="380" height="200" src="https://glama.ai/mcp/servers/z429kk3te7/badge" alt="mcp-server-youtube-transcript MCP server" /></a> ### Installing via Smithery To install YouTube Transcript Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@kimtaeyoon83/mcp-server-youtube-transcript): ```bash npx -y @smithery/cli install @kimtaeyoon83/mcp-server-youtube-transcript --client claude ``` ## Components ### Tools - **get_transcript** - Extract transcripts from YouTube videos - Inputs: - `url` (string, required): YouTube video URL or video ID - `lang` (string, optional, default: "en"): Language code for transcript (e.g., 'ko', 'en') ## Key Features - Support for multiple video URL formats - Language-specific transcript retrieval - Detailed metadata in responses ## Configuration To use with Claude Desktop, add this server configuration: ```json { "mcpServers": { "youtube-transcript": { "command": "npx", "args": ["-y", "@kimtaeyoon83/mcp-server-youtube-transcript"] } } } ``` ## Install via tool [mcp-get](https://github.com/michaellatman/mcp-get) A command-line tool for installing and managing Model Context Protocol (MCP) servers. ```shell npx @michaellatman/mcp-get@latest install @kimtaeyoon83/mcp-server-youtube-transcript ``` ## Awesome-mcp-servers [awesome-mcp-servers](https://github.com/punkpeye/awesome-mcp-servers) A curated list of awesome Model Context Protocol (MCP) servers. ## Development ### Prerequisites - Node.js 18 or higher - npm or yarn ### Setup Install dependencies: ```bash npm install ``` Build the server: ```bash npm run build ``` For development with auto-rebuild: ```bash npm run watch ``` ### Testing ```bash npm test ``` ### Debugging Since MCP servers communicate over stdio, debugging can be challenging. We recommend using the MCP Inspector for development: ```bash npm run inspector ``` ## Error Handling The server implements robust error handling for common scenarios: - Invalid video URLs or IDs - Unavailable transcripts - Language availability issues - Network errors ## Usage Examples 1. Get transcript by video URL: ```typescript await server.callTool("get_transcript", { url: "https://www.youtube.com/watch?v=VIDEO_ID", lang: "en" }); ``` 2. Get transcript by video ID: ```typescript await server.callTool("get_transcript", { url: "VIDEO_ID", lang: "ko" }); ``` 3. How to Extract YouTube Subtitles in Claude Desktop App ``` chat: https://youtu.be/ODaHJzOyVCQ?si=aXkJgso96Deri0aB Extract subtitles ``` ## Security Considerations The server: - Validates all input parameters - Handles YouTube API errors gracefully - Implements timeouts for transcript retrieval - Provides detailed error messages for troubleshooting ## License This MCP server is licensed under the MIT License. See the LICENSE file for details. ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "ES2022", "module": "Node16", "moduleResolution": "Node16", "outDir": "./dist", "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true }, "include": ["src/**/*"], "exclude": ["node_modules"] } ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml # Smithery configuration file: https://smithery.ai/docs/deployments startCommand: type: stdio configSchema: # JSON Schema defining the configuration options for the MCP. type: object required: [] properties: {} commandFunction: # A function that produces the CLI command to start the MCP on stdio. |- config => ({ command: 'node', args: ['dist/index.js'] }) ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Stage 1: Build the application FROM node:18-alpine AS builder # Set working directory WORKDIR /app # Copy package.json and package-lock.json COPY package.json package-lock.json ./ # Install dependencies RUN npm install # Copy the rest of the application COPY . . # Build the application RUN npm run build # Stage 2: Create the production image FROM node:18-alpine AS production # Set working directory WORKDIR /app # Copy the built files from the builder stage COPY --from=builder /app/dist /app/dist COPY --from=builder /app/package.json /app/package-lock.json /app/ # Install production dependencies only RUN npm ci --omit=dev # Specify the default command ENTRYPOINT ["node", "dist/index.js"] ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "@kimtaeyoon83/mcp-server-youtube-transcript", "version": "0.1.1", "description": "This is an MCP server that allows you to directly download transcripts of YouTube videos.", "license": "MIT", "author": "Freddie", "homepage": "https://github.com/kimtaeyoon83/mcp-server-youtube-transcript", "bugs": "https://github.com/kimtaeyoon83/mcp-server-youtube-transcript/issues", "type": "module", "access": "public", "main": "dist/index.js", "module": "dist/index.js", "bin": { "mcp-server-youtube-transcript": "dist/index.js" }, "files": [ "dist" ], "scripts": { "build": "tsc && shx chmod +x dist/*.js", "prepare": "npm run build", "watch": "tsc --watch" }, "dependencies": { "@modelcontextprotocol/sdk": "0.6.0", "youtube-captions-scraper": "^2.0.3" }, "devDependencies": { "@types/node": "^20.11.24", "shx": "^0.3.4", "typescript": "^5.6.2" } } ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript #!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListToolsRequestSchema, ErrorCode, McpError, Tool, CallToolResult, } from "@modelcontextprotocol/sdk/types.js"; // @ts-ignore import { getSubtitles } from 'youtube-captions-scraper'; // Define tool configurations const TOOLS: Tool[] = [ { name: "get_transcript", description: "Extract transcript from a YouTube video URL or ID", inputSchema: { type: "object", properties: { url: { type: "string", description: "YouTube video URL or ID" }, lang: { type: "string", description: "Language code for transcript (e.g., 'ko', 'en')", default: "en" } }, required: ["url", "lang"] } } ]; interface TranscriptLine { text: string; start: number; dur: number; } class YouTubeTranscriptExtractor { /** * Extracts YouTube video ID from various URL formats or direct ID input */ extractYoutubeId(input: string): string { if (!input) { throw new McpError( ErrorCode.InvalidParams, 'YouTube URL or ID is required' ); } // Handle URL formats try { const url = new URL(input); if (url.hostname === 'youtu.be') { return url.pathname.slice(1); } else if (url.hostname.includes('youtube.com')) { const videoId = url.searchParams.get('v'); if (!videoId) { throw new McpError( ErrorCode.InvalidParams, `Invalid YouTube URL: ${input}` ); } return videoId; } } catch (error) { // Not a URL, check if it's a direct video ID if (!/^[a-zA-Z0-9_-]{11}$/.test(input)) { throw new McpError( ErrorCode.InvalidParams, `Invalid YouTube video ID: ${input}` ); } return input; } throw new McpError( ErrorCode.InvalidParams, `Could not extract video ID from: ${input}` ); } /** * Retrieves transcript for a given video ID and language */ async getTranscript(videoId: string, lang: string): Promise<string> { try { const transcript = await getSubtitles({ videoID: videoId, lang: lang, }); return this.formatTranscript(transcript); } catch (error) { console.error('Failed to fetch transcript:', error); throw new McpError( ErrorCode.InternalError, `Failed to retrieve transcript: ${(error as Error).message}` ); } } /** * Formats transcript lines into readable text */ private formatTranscript(transcript: TranscriptLine[]): string { return transcript .map(line => line.text.trim()) .filter(text => text.length > 0) .join(' '); } } class TranscriptServer { private extractor: YouTubeTranscriptExtractor; private server: Server; constructor() { this.extractor = new YouTubeTranscriptExtractor(); this.server = new Server( { name: "mcp-servers-youtube-transcript", version: "0.1.0", }, { capabilities: { tools: {}, }, } ); this.setupHandlers(); this.setupErrorHandling(); } private setupErrorHandling(): void { this.server.onerror = (error) => { console.error("[MCP Error]", error); }; process.on('SIGINT', async () => { await this.stop(); process.exit(0); }); } private setupHandlers(): void { // List available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS })); // Handle tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => this.handleToolCall(request.params.name, request.params.arguments ?? {}) ); } /** * Handles tool call requests */ private async handleToolCall(name: string, args: any): Promise<{ toolResult: CallToolResult }> { switch (name) { case "get_transcript": { const { url: input, lang = "en" } = args; if (!input || typeof input !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'URL parameter is required and must be a string' ); } if (lang && typeof lang !== 'string') { throw new McpError( ErrorCode.InvalidParams, 'Language code must be a string' ); } try { const videoId = this.extractor.extractYoutubeId(input); console.error(`Processing transcript for video: ${videoId}`); const transcript = await this.extractor.getTranscript(videoId, lang); console.error(`Successfully extracted transcript (${transcript.length} chars)`); return { toolResult: { content: [{ type: "text", text: transcript, metadata: { videoId, language: lang, timestamp: new Date().toISOString(), charCount: transcript.length } }], isError: false } }; } catch (error) { console.error('Transcript extraction failed:', error); if (error instanceof McpError) { throw error; } throw new McpError( ErrorCode.InternalError, `Failed to process transcript: ${(error as Error).message}` ); } } default: throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${name}` ); } } /** * Starts the server */ async start(): Promise<void> { const transport = new StdioServerTransport(); await this.server.connect(transport); } /** * Stops the server */ async stop(): Promise<void> { try { await this.server.close(); } catch (error) { console.error('Error while stopping server:', error); } } } // Main execution async function main() { const server = new TranscriptServer(); try { await server.start(); } catch (error) { console.error("Server failed to start:", error); process.exit(1); } } main().catch((error) => { console.error("Fatal server error:", error); process.exit(1); }); ```