tatn/mcp-server-fetch-typescript # codebase.md

# Directory Structure

```
├── .gitignore
├── .npmignore
├── jest.config.js
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   └── index.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
1 | node_modules/
2 | build/
3 | *.log
4 | .env*
```

--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------

```
1 | node_modules
2 | src
3 | test
4 | tsconfig.json
5 | jest.config.js
6 | .gitignore
7 | package-lock.json
8 | 
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
  1 | # mcp-server-fetch-typescript MCP Server
  2 | 
  3 | A Model Context Protocol server that provides web content fetching and conversion capabilities. This server implements a comprehensive web content retrieval system with support for various formats and rendering methods, making it ideal for tasks ranging from simple data extraction to sophisticated web scraping.
  4 | 
  5 | <a href="https://glama.ai/mcp/servers/iyfpvfkgyx"><img width="380" height="200" src="https://glama.ai/mcp/servers/iyfpvfkgyx/badge" alt="Server Fetch TypeScript MCP server" /></a>
  6 | 
  7 | ## Features
  8 | 
  9 | ### Tools
 10 | 
 11 | - `get_raw_text` - Retrieve raw text content directly from URLs
 12 |   - Takes `url` as a required parameter pointing to text-based resources
 13 |   - Returns unprocessed text content without browser rendering
 14 |   - Ideal for JSON, XML, CSV, TSV, or plain text files
 15 |   - Best used when fast, direct access to source content is needed
 16 | 
 17 | - `get_rendered_html` - Fetch fully rendered HTML content
 18 |   - Takes `url` as a required parameter
 19 |   - Returns complete HTML content after JavaScript execution
 20 |   - Uses Playwright for headless browser rendering
 21 |   - Essential for modern web applications and SPAs
 22 | 
 23 | - `get_markdown` - Convert web content to Markdown format
 24 |   - Takes `url` as a required parameter
 25 |   - Returns well-formatted Markdown preserving structural elements
 26 |   - Supports tables and definition lists
 27 |   - Recommended for content archiving and documentation
 28 | 
 29 | - `get_markdown_summary` - Extract and convert main content
 30 |   - Takes `url` as a required parameter
 31 |   - Returns clean Markdown focusing on main content
 32 |   - Automatically removes navigation, headers, footers
 33 |   - Perfect for article and blog post extraction
 34 | 
 35 | ## Installation
 36 | 
 37 | ### As a Global Package
 38 | 
 39 | ```bash
 40 | npm install -g mcp-server-fetch-typescript
 41 | ```
 42 | 
 43 | ### As a Project Dependency
 44 | 
 45 | ```bash
 46 | npm install mcp-server-fetch-typescript
 47 | ```
 48 | 
 49 | ## Usage
 50 | 
 51 | ### Using with Claude Desktop
 52 | 
 53 | To use with Claude Desktop, add the server config:
 54 | 
 55 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`  
 56 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
 57 | 
 58 | ```json
 59 | "mcpServers": {
 60 |   "mcp-server-fetch-typescript": {
 61 |     "command": "npx",
 62 |     "args": [
 63 |       "-y",
 64 |       "mcp-server-fetch-typescript"
 65 |     ]
 66 |   }
 67 | }
 68 | ```
 69 | 
 70 | or Add the following configuration:
 71 | 
 72 | ```bash
 73 | git clone https://github.com/tatn/mcp-server-fetch-typescript.git
 74 | cd mcp-server-fetch-typescript
 75 | npm install
 76 | npm run build
 77 | ```
 78 | 
 79 | ```json
 80 | "mcpServers": {
 81 |   "mcp-server-fetch-typescript": {
 82 |     "command": "node",
 83 |     "args": [
 84 |       "/path/to/mcp-server-fetch-typescript/build/index.js"
 85 |     ]
 86 |   }
 87 | }
 88 | ```
 89 | 
 90 | ### Debugging
 91 | 
 92 | To debug the MCP server:
 93 | 
 94 | ```bash
 95 | npx @modelcontextprotocol/inspector npx -y mcp-server-fetch-typescript
 96 | ```
 97 | 
 98 | ```bash
 99 | npx @modelcontextprotocol/inspector node /path/to/mcp-server-fetch-typescript/build/index.js
100 | ```
101 | 
102 | 
103 | 
```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "Node16",
 5 |     "moduleResolution": "Node16",
 6 |     "outDir": "./build",
 7 |     "rootDir": "./src",
 8 |     "strict": true,
 9 |     "esModuleInterop": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "declaration": true,
13 |     "sourceMap": true
14 |   },
15 |   "include": ["src/**/*"],
16 |   "exclude": ["node_modules","build"]
17 | }
18 | 
```

--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------

```javascript
 1 | /** @type {import('ts-jest').JestConfigWithTsJest} */
 2 | export default {
 3 |   preset: 'ts-jest',
 4 |   testEnvironment: 'node',
 5 |   extensionsToTreatAsEsm: ['.ts'],
 6 |   moduleNameMapper: {
 7 |     '^(\\.{1,2}/.*)\\.js$': '$1',
 8 |   },
 9 |   transform: {
10 |     '^.+\\.tsx?$': ['ts-jest', {
11 |       useESM: true,
12 |     }],
13 |   },
14 |   // transformIgnorePatterns: [
15 |   //   'node_modules/(?!(@modelcontextprotocol)/)'
16 |   // ],
17 |   // moduleDirectories: ['node_modules'],
18 | };
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
 1 | {
 2 |   "name": "mcp-server-fetch-typescript",
 3 |   "version": "0.1.1",
 4 |   "description": "A Model Context Protocol server that provides web content fetching and conversion capabilities",
 5 |   "main": "./build/index.js",
 6 |   "type": "module",
 7 |   "bin": {
 8 |     "mcp-server-fetch-typescript": "build/index.js"
 9 |   },
10 |   "files": [
11 |     "build"
12 |   ],
13 |   "scripts": {
14 |     "build": "tsc && node --eval \"import('fs').then(fs => fs.chmodSync('build/index.js', '755'))\"",
15 |     "prepare": "npm run build",
16 |     "watch": "tsc --watch",
17 |     "test": "jest",
18 |     "inspector": "npx @modelcontextprotocol/inspector build/index.js"
19 |   },
20 |   "repository": {
21 |     "type": "git",
22 |     "url": "git+https://github.com/tatn/mcp-server-fetch-typescript.git"
23 |   },
24 |   "keywords": [
25 |     "mcp",
26 |     "server",
27 |     "fetch",
28 |     "playwright",
29 |     "markdown",
30 |     "html",
31 |     "Typescript"
32 |   ],
33 |   "homepage": "https://github.com/tatn/mcp-server-fetch-typescript",
34 |   "license": "MIT",
35 |   "author": "tatn",
36 |   "dependencies": {
37 |     "@modelcontextprotocol/sdk": "0.6.0",
38 |     "axios": "^1.7.9",
39 |     "node-html-markdown": "^1.3.0",
40 |     "playwright": "^1.49.1",
41 |     "turndown": "^7.2.0"
42 |   },
43 |   "devDependencies": {
44 |     "@types/jest": "^29.5.14",
45 |     "@types/node": "^20.11.24",
46 |     "@types/turndown": "^5.0.5",
47 |     "jest": "^29.7.0",
48 |     "ts-jest": "^29.2.5",
49 |     "typescript": "^5.3.3"
50 |   },
51 |   "types": "build/index.d.ts"
52 | }
53 | 
```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
  1 | #!/usr/bin/env node
  2 | 
  3 | /**
  4 |  * This MCP server implements web content fetching and conversion functionality.
  5 |  * It provides tools for:
  6 |  * - Fetching raw text content from URLs
  7 |  * - Getting rendered HTML content with JavaScript execution
  8 |  * - Converting web content to Markdown format
  9 |  * - Extracting main content from web pages
 10 |  */
 11 | 
 12 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
 13 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 14 | import {
 15 |   CallToolRequestSchema,
 16 |   ListToolsRequestSchema,
 17 | } from "@modelcontextprotocol/sdk/types.js";
 18 | 
 19 | import axios from 'axios';
 20 | import { Browser, chromium, Page } from 'playwright';
 21 | import Turndown from 'turndown';
 22 | import { NodeHtmlMarkdown } from 'node-html-markdown';
 23 | import type { TranslatorConfigObject } from 'node-html-markdown';
 24 | 
 25 | /**
 26 |  * Create an MCP server with capabilities for web content fetching tools.
 27 |  * The server provides various tools for fetching and converting web content
 28 |  * in different formats including raw text, rendered HTML, and Markdown.
 29 |  */
 30 | const server = new Server(
 31 |   {
 32 |     name: "mcp-server-fetch-typescript",
 33 |     version: "0.1.0",
 34 |   },
 35 |   {
 36 |     capabilities: {
 37 |       resources: {},
 38 |       tools: {},
 39 |       prompts: {},
 40 |     },
 41 |   }
 42 | );
 43 | 
 44 | const TIMEOUT = 20000;
 45 | 
 46 | /**
 47 |  * Handler that lists available web content fetching tools.
 48 |  * Exposes multiple tools for fetching and converting web content
 49 |  * in various formats including raw text, rendered HTML, and Markdown.
 50 |  */
 51 | server.setRequestHandler(ListToolsRequestSchema, async () => {
 52 |   return {
 53 |     tools: [
 54 |       {
 55 |         name: "get_raw_text",
 56 |         description: "Retrieves raw text content directly from a URL without browser rendering. Ideal for structured data formats like JSON, XML, CSV, TSV, or plain text files. Best used when fast, direct access to the source content is needed without processing dynamic elements.",
 57 |         inputSchema: {
 58 |           type: "object",
 59 |           properties: {
 60 |             url: {
 61 |               type: "string",
 62 |               description: "URL of the target resource containing raw text content (JSON, XML, CSV, TSV, plain text, etc.)."
 63 |             }
 64 |           },
 65 |           required: ["url"]
 66 |         }
 67 |       },
 68 |       {
 69 |         name: "get_rendered_html",
 70 |         description: "Fetches fully rendered HTML content using a headless browser, including JavaScript-generated content. Essential for modern web applications, single-page applications (SPAs), or any content that requires client-side rendering to be complete.",
 71 |         inputSchema: {
 72 |           type: "object",
 73 |           properties: {
 74 |             url: {
 75 |               type: "string",
 76 |               description: "URL of the target web page that requires JavaScript execution or dynamic content rendering."
 77 |             }
 78 |           },
 79 |           required: ["url"]
 80 |         }
 81 |       },
 82 |       {
 83 |         name: "get_markdown",
 84 |         description: "Converts web page content to well-formatted Markdown, preserving structural elements like tables and definition lists. Recommended as the default tool for web content extraction when a clean, readable text format is needed while maintaining document structure.",
 85 |         inputSchema: {
 86 |           type: "object",
 87 |           properties: {
 88 |             url: {
 89 |               type: "string",
 90 |               description: "URL of the web page to convert to Markdown format, supporting various HTML elements and structures."
 91 |             }
 92 |           },
 93 |           required: ["url"]
 94 |         }
 95 |       },
 96 |       {
 97 |         name: "get_markdown_summary",
 98 |         description: "Extracts and converts the main content area of a web page to Markdown format, automatically removing navigation menus, headers, footers, and other peripheral content. Perfect for capturing the core content of articles, blog posts, or documentation pages.",
 99 |         inputSchema: {
100 |           type: "object",
101 |           properties: {
102 |             url: {
103 |               type: "string",
104 |               description: "URL of the web page whose main content should be extracted and converted to Markdown."
105 |             }
106 |           },
107 |           required: ["url"]
108 |         }
109 |       },
110 |     ]
111 |   };
112 | });
113 | 
114 | /**
115 |  * Handler for web content fetching tools.
116 |  * Processes requests to fetch and convert web content based on the specified tool
117 |  * and returns the content in the requested format.
118 |  */
119 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
120 |   const url = String(request.params.arguments?.url);
121 | 
122 |   if (!url) {
123 |     throw new Error("url is required !");
124 |   }
125 | 
126 |   switch (request.params.name) {
127 |     case "get_raw_text": {
128 |       return {
129 |         content: [{
130 |           type: "text",
131 |           text: (await getRawTextString(url))
132 |         }]
133 |       };
134 |     }
135 |     case "get_rendered_html": {
136 |       return {
137 |         content: [{
138 |           type: "text",
139 |           text: (await getHtmlString(url))
140 |         }]
141 |       };
142 |     }
143 |     case "get_markdown": {
144 |       return {
145 |         content: [{
146 |           type: "text",
147 |           text: (await getMarkdownStringFromHtmlByNHM(url))
148 |         }]
149 |       };
150 |     }
151 |     case "get_markdown_summary": {
152 |       return {
153 |         content: [{
154 |           type: "text",
155 |           text: (await getMarkdownStringFromHtmlByTD(url, true))
156 |         }]
157 |       };
158 |     }
159 | 
160 |     default:
161 |       throw new Error("Unknown tool");
162 |   }
163 | });
164 | 
165 | 
166 | // Helper method to fetch raw text content from a URL
167 | export async function getRawTextString(request_url: string) {
168 |   const response = await axios.get(request_url);
169 |   const data = response.data;
170 |   return data;
171 | }
172 | 
173 | // Helper method to fetch rendered HTML content using a headless browser
174 | async function getHtmlString(request_url: string): Promise<string> {
175 |   let browser: Browser | null = null;
176 |   let page: Page | null = null;
177 |   try {
178 |     browser = await chromium.launch({
179 |       headless: true,
180 |       // args: ['--single-process'], 
181 |     });
182 |     const context = await browser.newContext();
183 |     page = await context.newPage();
184 | 
185 |     await page.goto(request_url, {
186 |       waitUntil: 'domcontentloaded',
187 |       timeout: TIMEOUT,
188 |     });
189 |     const htmlString = await page.content();
190 |     return htmlString;
191 |   } catch (error) {
192 |     console.error(`Failed to fetch HTML for ${request_url}:`, error);
193 |     return ""; 
194 |   } finally {
195 |     if (page) {
196 |       try {
197 |         await page.close();
198 |       } catch (e) {
199 |         console.error("Error closing page:", e);
200 |       }
201 |     }
202 |     if (browser) {
203 |       try {
204 |         await browser.close();
205 |       } catch (error) {
206 |         console.error('Error closing browser:', error);
207 |       }
208 |     }
209 |   }
210 | }
211 | 
212 | // Helper method to convert HTML to Markdown using Turndown with custom rules for tables and definition lists
213 | export async function getMarkdownStringFromHtmlByTD(
214 |   request_url: string,
215 |   mainOnly: boolean = false,
216 | ) {
217 |   const htmlString = await getHtmlString(request_url);
218 | 
219 |   const turndownService = new Turndown({ headingStyle: 'atx' });
220 |   turndownService.remove('script');
221 |   turndownService.remove('style');
222 | 
223 |   if (mainOnly) {
224 |     turndownService.remove('header');
225 |     turndownService.remove('footer');
226 |     turndownService.remove('nav');
227 |   }
228 | 
229 |   turndownService.addRule('table', {
230 |     filter: 'table',
231 |     // eslint-disable-next-line @typescript-eslint/no-unused-vars
232 |     replacement: function (content, node, _options) {
233 |       // Process each row in the table
234 |       const rows = Array.from(node.querySelectorAll('tr'));
235 |       if (rows.length === 0) {
236 |         return '';
237 |       }
238 |       const headerRow = rows[0];
239 |       const headerCells = Array.from(
240 |         headerRow.querySelectorAll('th, td'),
241 |       ).map((cell) => cell.textContent?.trim() || '');
242 |       const separator = headerCells.map(() => '---').join('|');
243 |       // Header row and separator line
244 |       let markdown = `\n| ${headerCells.join(' | ')} |\n|${separator}|`;
245 |       // Process remaining rows
246 |       for (let i = 1; i < rows.length; i++) {
247 |         const row = rows[i];
248 |         const rowCells = Array.from(row.querySelectorAll('th, td')).map(
249 |           (cell) => cell.textContent?.trim() || '',
250 |         );
251 |         markdown += `\n| ${rowCells.join(' | ')} |`;
252 |       }
253 |       return markdown + '\n';
254 |     },
255 |   });
256 | 
257 |   turndownService.addRule('dl', {
258 |     filter: 'dl',
259 |     // eslint-disable-next-line @typescript-eslint/no-unused-vars
260 |     replacement: function (content, node, _options) {
261 |       let markdown = '\n\n';
262 |       const items = Array.from(node.children);
263 | 
264 |       let currentDt: string = '';
265 |       items.forEach((item) => {
266 |         if (item.tagName === 'DT') {
267 |           currentDt = item.textContent?.trim() || '';
268 |           if (currentDt) {
269 |             markdown += `**${currentDt}:**`;
270 |           }
271 |         } else if (item.tagName === 'DD') {
272 |           const ddContent = item.textContent?.trim() || '';
273 |           if (ddContent) {
274 |             markdown += ` ${ddContent}\n`;
275 |           }
276 |         }
277 |       });
278 |       return markdown + '\n';
279 |     },
280 |   });
281 | 
282 |   const markdownString = turndownService.turndown(htmlString);
283 | 
284 |   return markdownString;
285 | }
286 | 
287 | // Helper method to convert HTML to Markdown using NodeHtmlMarkdown with custom translators for special elements
288 | export async function getMarkdownStringFromHtmlByNHM(
289 |   request_url: string,
290 |   mainOnly: boolean = false,
291 | ) {
292 |   const htmlString = await getHtmlString(request_url);
293 | 
294 |   const customTranslators: TranslatorConfigObject = {
295 |     dl: () => ({
296 |       preserveWhitespace: false,
297 |       surroundingNewlines: true,
298 |     }),
299 |     dt: () => ({
300 |       prefix: '**',
301 |       postfix: ':** ',
302 |       surroundingNewlines: false,
303 |     }),
304 |     dd: () => ({
305 |       postfix: '\n',
306 |       surroundingNewlines: false,
307 |     }),
308 |     Head: () => ({
309 |       postfix: '\n',
310 |       ignore: false,
311 |       postprocess: (ctx) => {
312 |         const titleNode = ctx.node.querySelector('title');
313 |         if (titleNode) {
314 |           return titleNode.textContent || '';
315 |         }
316 |         return '';
317 |       },
318 |       surroundingNewlines: true,
319 |     }),
320 |   };
321 | 
322 |   if (mainOnly) {
323 |     customTranslators.Header = () => ({
324 |       ignore: true,
325 |     });
326 |     customTranslators.Footer = () => ({
327 |       ignore: true,
328 |     });
329 |     customTranslators.Nav = () => ({
330 |       ignore: true,
331 |     });
332 |   }
333 | 
334 |   const markdownString = NodeHtmlMarkdown.translate(
335 |     htmlString,
336 |     {},
337 |     customTranslators,
338 |   );
339 | 
340 |   return markdownString;
341 | }
342 | 
343 | /**
344 |  * Start the server using stdio transport.
345 |  * This allows the server to communicate via standard input/output streams.
346 |  */
347 | async function main() {
348 |   const transport = new StdioServerTransport();
349 |   await server.connect(transport);
350 | }
351 | 
352 | main().catch((error) => {
353 |   console.error("Server error:", error);
354 |   process.exit(1);
355 | });
356 | 
357 | 
358 | 
```