# Directory Structure ``` ├── .gitignore ├── Dockerfile ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── smithery.yaml ├── src │ └── index.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | build/ 133 | 134 | gcp-oauth.keys.json 135 | .*-server-credentials.json 136 | 137 | # Byte-compiled / optimized / DLL files 138 | __pycache__/ 139 | *.py[cod] 140 | *$py.class 141 | 142 | # C extensions 143 | *.so 144 | 145 | # Distribution / packaging 146 | .Python 147 | build/ 148 | develop-eggs/ 149 | dist/ 150 | downloads/ 151 | eggs/ 152 | .eggs/ 153 | lib/ 154 | lib64/ 155 | parts/ 156 | sdist/ 157 | var/ 158 | wheels/ 159 | share/python-wheels/ 160 | *.egg-info/ 161 | .installed.cfg 162 | *.egg 163 | MANIFEST 164 | 165 | # PyInstaller 166 | # Usually these files are written by a python script from a template 167 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 168 | *.manifest 169 | *.spec 170 | 171 | # Installer logs 172 | pip-log.txt 173 | pip-delete-this-directory.txt 174 | 175 | # Unit test / coverage reports 176 | htmlcov/ 177 | .tox/ 178 | .nox/ 179 | .coverage 180 | .coverage.* 181 | .cache 182 | nosetests.xml 183 | coverage.xml 184 | *.cover 185 | *.py,cover 186 | .hypothesis/ 187 | .pytest_cache/ 188 | cover/ 189 | 190 | # Translations 191 | *.mo 192 | *.pot 193 | 194 | # Django stuff: 195 | *.log 196 | local_settings.py 197 | db.sqlite3 198 | db.sqlite3-journal 199 | 200 | # Flask stuff: 201 | instance/ 202 | .webassets-cache 203 | 204 | # Scrapy stuff: 205 | .scrapy 206 | 207 | # Sphinx documentation 208 | docs/_build/ 209 | 210 | # PyBuilder 211 | .pybuilder/ 212 | target/ 213 | 214 | # Jupyter Notebook 215 | .ipynb_checkpoints 216 | 217 | # IPython 218 | profile_default/ 219 | ipython_config.py 220 | 221 | # pyenv 222 | # For a library or package, you might want to ignore these files since the code is 223 | # intended to run in multiple environments; otherwise, check them in: 224 | # .python-version 225 | 226 | # pipenv 227 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 228 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 229 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 230 | # install all needed dependencies. 231 | #Pipfile.lock 232 | 233 | # poetry 234 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 235 | # This is especially recommended for binary packages to ensure reproducibility, and is more 236 | # commonly ignored for libraries. 237 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 238 | #poetry.lock 239 | 240 | # pdm 241 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 242 | #pdm.lock 243 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 244 | # in version control. 245 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 246 | .pdm.toml 247 | .pdm-python 248 | .pdm-build/ 249 | 250 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 251 | __pypackages__/ 252 | 253 | # Celery stuff 254 | celerybeat-schedule 255 | celerybeat.pid 256 | 257 | # SageMath parsed files 258 | *.sage.py 259 | 260 | # Environments 261 | .env 262 | .venv 263 | env/ 264 | venv/ 265 | ENV/ 266 | env.bak/ 267 | venv.bak/ 268 | 269 | # Spyder project settings 270 | .spyderproject 271 | .spyproject 272 | 273 | # Rope project settings 274 | .ropeproject 275 | 276 | # mkdocs documentation 277 | /site 278 | 279 | # mypy 280 | .mypy_cache/ 281 | .dmypy.json 282 | dmypy.json 283 | 284 | # Pyre type checker 285 | .pyre/ 286 | 287 | # pytype static type analyzer 288 | .pytype/ 289 | 290 | # Cython debug symbols 291 | cython_debug/ 292 | 293 | .DS_Store 294 | 295 | # PyCharm 296 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 297 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 298 | # and can be added to the global gitignore or merged into this file. For a more nuclear 299 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 300 | #.idea/ 301 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # YouTube Transcript Server 2 | 3 | [](https://smithery.ai/server/@kimtaeyoon83/mcp-server-youtube-transcript) 4 | 5 | A Model Context Protocol server that enables retrieval of transcripts from YouTube videos. This server provides direct access to video captions and subtitles through a simple interface. 6 | 7 | <a href="https://glama.ai/mcp/servers/z429kk3te7"><img width="380" height="200" src="https://glama.ai/mcp/servers/z429kk3te7/badge" alt="mcp-server-youtube-transcript MCP server" /></a> 8 | 9 | ### Installing via Smithery 10 | 11 | To install YouTube Transcript Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@kimtaeyoon83/mcp-server-youtube-transcript): 12 | 13 | ```bash 14 | npx -y @smithery/cli install @kimtaeyoon83/mcp-server-youtube-transcript --client claude 15 | ``` 16 | 17 | ## Components 18 | 19 | ### Tools 20 | 21 | - **get_transcript** 22 | - Extract transcripts from YouTube videos 23 | - Inputs: 24 | - `url` (string, required): YouTube video URL or video ID 25 | - `lang` (string, optional, default: "en"): Language code for transcript (e.g., 'ko', 'en') 26 | 27 | ## Key Features 28 | 29 | - Support for multiple video URL formats 30 | - Language-specific transcript retrieval 31 | - Detailed metadata in responses 32 | 33 | ## Configuration 34 | 35 | To use with Claude Desktop, add this server configuration: 36 | 37 | ```json 38 | { 39 | "mcpServers": { 40 | "youtube-transcript": { 41 | "command": "npx", 42 | "args": ["-y", "@kimtaeyoon83/mcp-server-youtube-transcript"] 43 | } 44 | } 45 | } 46 | ``` 47 | 48 | ## Install via tool 49 | 50 | [mcp-get](https://github.com/michaellatman/mcp-get) A command-line tool for installing and managing Model Context Protocol (MCP) servers. 51 | 52 | ```shell 53 | npx @michaellatman/mcp-get@latest install @kimtaeyoon83/mcp-server-youtube-transcript 54 | ``` 55 | 56 | ## Awesome-mcp-servers 57 | [awesome-mcp-servers](https://github.com/punkpeye/awesome-mcp-servers) A curated list of awesome Model Context Protocol (MCP) servers. 58 | 59 | ## Development 60 | 61 | ### Prerequisites 62 | 63 | - Node.js 18 or higher 64 | - npm or yarn 65 | 66 | ### Setup 67 | 68 | Install dependencies: 69 | ```bash 70 | npm install 71 | ``` 72 | 73 | Build the server: 74 | ```bash 75 | npm run build 76 | ``` 77 | 78 | For development with auto-rebuild: 79 | ```bash 80 | npm run watch 81 | ``` 82 | 83 | ### Testing 84 | 85 | ```bash 86 | npm test 87 | ``` 88 | 89 | ### Debugging 90 | 91 | Since MCP servers communicate over stdio, debugging can be challenging. We recommend using the MCP Inspector for development: 92 | 93 | ```bash 94 | npm run inspector 95 | ``` 96 | 97 | ## Error Handling 98 | 99 | The server implements robust error handling for common scenarios: 100 | - Invalid video URLs or IDs 101 | - Unavailable transcripts 102 | - Language availability issues 103 | - Network errors 104 | 105 | ## Usage Examples 106 | 107 | 1. Get transcript by video URL: 108 | ```typescript 109 | await server.callTool("get_transcript", { 110 | url: "https://www.youtube.com/watch?v=VIDEO_ID", 111 | lang: "en" 112 | }); 113 | ``` 114 | 115 | 2. Get transcript by video ID: 116 | ```typescript 117 | await server.callTool("get_transcript", { 118 | url: "VIDEO_ID", 119 | lang: "ko" 120 | }); 121 | ``` 122 | 123 | 3. How to Extract YouTube Subtitles in Claude Desktop App 124 | ``` 125 | chat: https://youtu.be/ODaHJzOyVCQ?si=aXkJgso96Deri0aB Extract subtitles 126 | ``` 127 | 128 | ## Security Considerations 129 | 130 | The server: 131 | - Validates all input parameters 132 | - Handles YouTube API errors gracefully 133 | - Implements timeouts for transcript retrieval 134 | - Provides detailed error messages for troubleshooting 135 | 136 | ## License 137 | 138 | This MCP server is licensed under the MIT License. See the LICENSE file for details. 139 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "Node16", 5 | "moduleResolution": "Node16", 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "resolveJsonModule": true 13 | }, 14 | "include": ["src/**/*"], 15 | "exclude": ["node_modules"] 16 | } 17 | ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | # Smithery configuration file: https://smithery.ai/docs/deployments 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: [] 9 | properties: {} 10 | commandFunction: 11 | # A function that produces the CLI command to start the MCP on stdio. 12 | |- 13 | config => ({ command: 'node', args: ['dist/index.js'] }) ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Stage 1: Build the application 2 | FROM node:18-alpine AS builder 3 | 4 | # Set working directory 5 | WORKDIR /app 6 | 7 | # Copy package.json and package-lock.json 8 | COPY package.json package-lock.json ./ 9 | 10 | # Install dependencies 11 | RUN npm install 12 | 13 | # Copy the rest of the application 14 | COPY . . 15 | 16 | # Build the application 17 | RUN npm run build 18 | 19 | # Stage 2: Create the production image 20 | FROM node:18-alpine AS production 21 | 22 | # Set working directory 23 | WORKDIR /app 24 | 25 | # Copy the built files from the builder stage 26 | COPY --from=builder /app/dist /app/dist 27 | COPY --from=builder /app/package.json /app/package-lock.json /app/ 28 | 29 | # Install production dependencies only 30 | RUN npm ci --omit=dev 31 | 32 | # Specify the default command 33 | ENTRYPOINT ["node", "dist/index.js"] ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@kimtaeyoon83/mcp-server-youtube-transcript", 3 | "version": "0.1.1", 4 | "description": "This is an MCP server that allows you to directly download transcripts of YouTube videos.", 5 | "license": "MIT", 6 | "author": "Freddie", 7 | "homepage": "https://github.com/kimtaeyoon83/mcp-server-youtube-transcript", 8 | "bugs": "https://github.com/kimtaeyoon83/mcp-server-youtube-transcript/issues", 9 | "type": "module", 10 | "access": "public", 11 | "main": "dist/index.js", 12 | "module": "dist/index.js", 13 | "bin": { 14 | "mcp-server-youtube-transcript": "dist/index.js" 15 | }, 16 | "files": [ 17 | "dist" 18 | ], 19 | "scripts": { 20 | "build": "tsc && shx chmod +x dist/*.js", 21 | "prepare": "npm run build", 22 | "watch": "tsc --watch" 23 | }, 24 | "dependencies": { 25 | "@modelcontextprotocol/sdk": "0.6.0", 26 | "youtube-captions-scraper": "^2.0.3" 27 | }, 28 | "devDependencies": { 29 | "@types/node": "^20.11.24", 30 | "shx": "^0.3.4", 31 | "typescript": "^5.6.2" 32 | } 33 | } 34 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #!/usr/bin/env node 2 | 3 | import { Server } from "@modelcontextprotocol/sdk/server/index.js"; 4 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 5 | import { 6 | CallToolRequestSchema, 7 | ListToolsRequestSchema, 8 | ErrorCode, 9 | McpError, 10 | Tool, 11 | CallToolResult, 12 | } from "@modelcontextprotocol/sdk/types.js"; 13 | // @ts-ignore 14 | import { getSubtitles } from 'youtube-captions-scraper'; 15 | 16 | // Define tool configurations 17 | const TOOLS: Tool[] = [ 18 | { 19 | name: "get_transcript", 20 | description: "Extract transcript from a YouTube video URL or ID", 21 | inputSchema: { 22 | type: "object", 23 | properties: { 24 | url: { 25 | type: "string", 26 | description: "YouTube video URL or ID" 27 | }, 28 | lang: { 29 | type: "string", 30 | description: "Language code for transcript (e.g., 'ko', 'en')", 31 | default: "en" 32 | } 33 | }, 34 | required: ["url", "lang"] 35 | } 36 | } 37 | ]; 38 | 39 | interface TranscriptLine { 40 | text: string; 41 | start: number; 42 | dur: number; 43 | } 44 | 45 | class YouTubeTranscriptExtractor { 46 | /** 47 | * Extracts YouTube video ID from various URL formats or direct ID input 48 | */ 49 | extractYoutubeId(input: string): string { 50 | if (!input) { 51 | throw new McpError( 52 | ErrorCode.InvalidParams, 53 | 'YouTube URL or ID is required' 54 | ); 55 | } 56 | 57 | // Handle URL formats 58 | try { 59 | const url = new URL(input); 60 | if (url.hostname === 'youtu.be') { 61 | return url.pathname.slice(1); 62 | } else if (url.hostname.includes('youtube.com')) { 63 | const videoId = url.searchParams.get('v'); 64 | if (!videoId) { 65 | throw new McpError( 66 | ErrorCode.InvalidParams, 67 | `Invalid YouTube URL: ${input}` 68 | ); 69 | } 70 | return videoId; 71 | } 72 | } catch (error) { 73 | // Not a URL, check if it's a direct video ID 74 | if (!/^[a-zA-Z0-9_-]{11}$/.test(input)) { 75 | throw new McpError( 76 | ErrorCode.InvalidParams, 77 | `Invalid YouTube video ID: ${input}` 78 | ); 79 | } 80 | return input; 81 | } 82 | 83 | throw new McpError( 84 | ErrorCode.InvalidParams, 85 | `Could not extract video ID from: ${input}` 86 | ); 87 | } 88 | 89 | /** 90 | * Retrieves transcript for a given video ID and language 91 | */ 92 | async getTranscript(videoId: string, lang: string): Promise<string> { 93 | try { 94 | const transcript = await getSubtitles({ 95 | videoID: videoId, 96 | lang: lang, 97 | }); 98 | 99 | return this.formatTranscript(transcript); 100 | } catch (error) { 101 | console.error('Failed to fetch transcript:', error); 102 | throw new McpError( 103 | ErrorCode.InternalError, 104 | `Failed to retrieve transcript: ${(error as Error).message}` 105 | ); 106 | } 107 | } 108 | 109 | /** 110 | * Formats transcript lines into readable text 111 | */ 112 | private formatTranscript(transcript: TranscriptLine[]): string { 113 | return transcript 114 | .map(line => line.text.trim()) 115 | .filter(text => text.length > 0) 116 | .join(' '); 117 | } 118 | } 119 | 120 | class TranscriptServer { 121 | private extractor: YouTubeTranscriptExtractor; 122 | private server: Server; 123 | 124 | constructor() { 125 | this.extractor = new YouTubeTranscriptExtractor(); 126 | this.server = new Server( 127 | { 128 | name: "mcp-servers-youtube-transcript", 129 | version: "0.1.0", 130 | }, 131 | { 132 | capabilities: { 133 | tools: {}, 134 | }, 135 | } 136 | ); 137 | 138 | this.setupHandlers(); 139 | this.setupErrorHandling(); 140 | } 141 | 142 | private setupErrorHandling(): void { 143 | this.server.onerror = (error) => { 144 | console.error("[MCP Error]", error); 145 | }; 146 | 147 | process.on('SIGINT', async () => { 148 | await this.stop(); 149 | process.exit(0); 150 | }); 151 | } 152 | 153 | private setupHandlers(): void { 154 | // List available tools 155 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ 156 | tools: TOOLS 157 | })); 158 | 159 | // Handle tool calls 160 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => 161 | this.handleToolCall(request.params.name, request.params.arguments ?? {}) 162 | ); 163 | } 164 | 165 | /** 166 | * Handles tool call requests 167 | */ 168 | private async handleToolCall(name: string, args: any): Promise<{ toolResult: CallToolResult }> { 169 | switch (name) { 170 | case "get_transcript": { 171 | const { url: input, lang = "en" } = args; 172 | 173 | if (!input || typeof input !== 'string') { 174 | throw new McpError( 175 | ErrorCode.InvalidParams, 176 | 'URL parameter is required and must be a string' 177 | ); 178 | } 179 | 180 | if (lang && typeof lang !== 'string') { 181 | throw new McpError( 182 | ErrorCode.InvalidParams, 183 | 'Language code must be a string' 184 | ); 185 | } 186 | 187 | try { 188 | const videoId = this.extractor.extractYoutubeId(input); 189 | console.error(`Processing transcript for video: ${videoId}`); 190 | 191 | const transcript = await this.extractor.getTranscript(videoId, lang); 192 | console.error(`Successfully extracted transcript (${transcript.length} chars)`); 193 | 194 | return { 195 | toolResult: { 196 | content: [{ 197 | type: "text", 198 | text: transcript, 199 | metadata: { 200 | videoId, 201 | language: lang, 202 | timestamp: new Date().toISOString(), 203 | charCount: transcript.length 204 | } 205 | }], 206 | isError: false 207 | } 208 | }; 209 | } catch (error) { 210 | console.error('Transcript extraction failed:', error); 211 | 212 | if (error instanceof McpError) { 213 | throw error; 214 | } 215 | 216 | throw new McpError( 217 | ErrorCode.InternalError, 218 | `Failed to process transcript: ${(error as Error).message}` 219 | ); 220 | } 221 | } 222 | 223 | default: 224 | throw new McpError( 225 | ErrorCode.MethodNotFound, 226 | `Unknown tool: ${name}` 227 | ); 228 | } 229 | } 230 | 231 | /** 232 | * Starts the server 233 | */ 234 | async start(): Promise<void> { 235 | const transport = new StdioServerTransport(); 236 | await this.server.connect(transport); 237 | } 238 | 239 | /** 240 | * Stops the server 241 | */ 242 | async stop(): Promise<void> { 243 | try { 244 | await this.server.close(); 245 | } catch (error) { 246 | console.error('Error while stopping server:', error); 247 | } 248 | } 249 | } 250 | 251 | // Main execution 252 | async function main() { 253 | const server = new TranscriptServer(); 254 | 255 | try { 256 | await server.start(); 257 | } catch (error) { 258 | console.error("Server failed to start:", error); 259 | process.exit(1); 260 | } 261 | } 262 | 263 | main().catch((error) => { 264 | console.error("Fatal server error:", error); 265 | process.exit(1); 266 | }); ```