# Directory Structure
```
├── .gitignore
├── .npmignore
├── jest.config.js
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│ └── index.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | node_modules/
2 | build/
3 | *.log
4 | .env*
```
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
```
1 | node_modules
2 | src
3 | test
4 | tsconfig.json
5 | jest.config.js
6 | .gitignore
7 | package-lock.json
8 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # mcp-server-fetch-typescript MCP Server
2 |
3 | A Model Context Protocol server that provides web content fetching and conversion capabilities. This server implements a comprehensive web content retrieval system with support for various formats and rendering methods, making it ideal for tasks ranging from simple data extraction to sophisticated web scraping.
4 |
5 | <a href="https://glama.ai/mcp/servers/iyfpvfkgyx"><img width="380" height="200" src="https://glama.ai/mcp/servers/iyfpvfkgyx/badge" alt="Server Fetch TypeScript MCP server" /></a>
6 |
7 | ## Features
8 |
9 | ### Tools
10 |
11 | - `get_raw_text` - Retrieve raw text content directly from URLs
12 | - Takes `url` as a required parameter pointing to text-based resources
13 | - Returns unprocessed text content without browser rendering
14 | - Ideal for JSON, XML, CSV, TSV, or plain text files
15 | - Best used when fast, direct access to source content is needed
16 |
17 | - `get_rendered_html` - Fetch fully rendered HTML content
18 | - Takes `url` as a required parameter
19 | - Returns complete HTML content after JavaScript execution
20 | - Uses Playwright for headless browser rendering
21 | - Essential for modern web applications and SPAs
22 |
23 | - `get_markdown` - Convert web content to Markdown format
24 | - Takes `url` as a required parameter
25 | - Returns well-formatted Markdown preserving structural elements
26 | - Supports tables and definition lists
27 | - Recommended for content archiving and documentation
28 |
29 | - `get_markdown_summary` - Extract and convert main content
30 | - Takes `url` as a required parameter
31 | - Returns clean Markdown focusing on main content
32 | - Automatically removes navigation, headers, footers
33 | - Perfect for article and blog post extraction
34 |
35 | ## Installation
36 |
37 | ### As a Global Package
38 |
39 | ```bash
40 | npm install -g mcp-server-fetch-typescript
41 | ```
42 |
43 | ### As a Project Dependency
44 |
45 | ```bash
46 | npm install mcp-server-fetch-typescript
47 | ```
48 |
49 | ## Usage
50 |
51 | ### Using with Claude Desktop
52 |
53 | To use with Claude Desktop, add the server config:
54 |
55 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
56 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
57 |
58 | ```json
59 | "mcpServers": {
60 | "mcp-server-fetch-typescript": {
61 | "command": "npx",
62 | "args": [
63 | "-y",
64 | "mcp-server-fetch-typescript"
65 | ]
66 | }
67 | }
68 | ```
69 |
70 | or Add the following configuration:
71 |
72 | ```bash
73 | git clone https://github.com/tatn/mcp-server-fetch-typescript.git
74 | cd mcp-server-fetch-typescript
75 | npm install
76 | npm run build
77 | ```
78 |
79 | ```json
80 | "mcpServers": {
81 | "mcp-server-fetch-typescript": {
82 | "command": "node",
83 | "args": [
84 | "/path/to/mcp-server-fetch-typescript/build/index.js"
85 | ]
86 | }
87 | }
88 | ```
89 |
90 | ### Debugging
91 |
92 | To debug the MCP server:
93 |
94 | ```bash
95 | npx @modelcontextprotocol/inspector npx -y mcp-server-fetch-typescript
96 | ```
97 |
98 | ```bash
99 | npx @modelcontextprotocol/inspector node /path/to/mcp-server-fetch-typescript/build/index.js
100 | ```
101 |
102 |
103 |
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "Node16",
5 | "moduleResolution": "Node16",
6 | "outDir": "./build",
7 | "rootDir": "./src",
8 | "strict": true,
9 | "esModuleInterop": true,
10 | "skipLibCheck": true,
11 | "forceConsistentCasingInFileNames": true,
12 | "declaration": true,
13 | "sourceMap": true
14 | },
15 | "include": ["src/**/*"],
16 | "exclude": ["node_modules","build"]
17 | }
18 |
```
--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
```javascript
1 | /** @type {import('ts-jest').JestConfigWithTsJest} */
2 | export default {
3 | preset: 'ts-jest',
4 | testEnvironment: 'node',
5 | extensionsToTreatAsEsm: ['.ts'],
6 | moduleNameMapper: {
7 | '^(\\.{1,2}/.*)\\.js$': '$1',
8 | },
9 | transform: {
10 | '^.+\\.tsx?$': ['ts-jest', {
11 | useESM: true,
12 | }],
13 | },
14 | // transformIgnorePatterns: [
15 | // 'node_modules/(?!(@modelcontextprotocol)/)'
16 | // ],
17 | // moduleDirectories: ['node_modules'],
18 | };
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "mcp-server-fetch-typescript",
3 | "version": "0.1.1",
4 | "description": "A Model Context Protocol server that provides web content fetching and conversion capabilities",
5 | "main": "./build/index.js",
6 | "type": "module",
7 | "bin": {
8 | "mcp-server-fetch-typescript": "build/index.js"
9 | },
10 | "files": [
11 | "build"
12 | ],
13 | "scripts": {
14 | "build": "tsc && node --eval \"import('fs').then(fs => fs.chmodSync('build/index.js', '755'))\"",
15 | "prepare": "npm run build",
16 | "watch": "tsc --watch",
17 | "test": "jest",
18 | "inspector": "npx @modelcontextprotocol/inspector build/index.js"
19 | },
20 | "repository": {
21 | "type": "git",
22 | "url": "git+https://github.com/tatn/mcp-server-fetch-typescript.git"
23 | },
24 | "keywords": [
25 | "mcp",
26 | "server",
27 | "fetch",
28 | "playwright",
29 | "markdown",
30 | "html",
31 | "Typescript"
32 | ],
33 | "homepage": "https://github.com/tatn/mcp-server-fetch-typescript",
34 | "license": "MIT",
35 | "author": "tatn",
36 | "dependencies": {
37 | "@modelcontextprotocol/sdk": "0.6.0",
38 | "axios": "^1.7.9",
39 | "node-html-markdown": "^1.3.0",
40 | "playwright": "^1.49.1",
41 | "turndown": "^7.2.0"
42 | },
43 | "devDependencies": {
44 | "@types/jest": "^29.5.14",
45 | "@types/node": "^20.11.24",
46 | "@types/turndown": "^5.0.5",
47 | "jest": "^29.7.0",
48 | "ts-jest": "^29.2.5",
49 | "typescript": "^5.3.3"
50 | },
51 | "types": "build/index.d.ts"
52 | }
53 |
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | #!/usr/bin/env node
2 |
3 | /**
4 | * This MCP server implements web content fetching and conversion functionality.
5 | * It provides tools for:
6 | * - Fetching raw text content from URLs
7 | * - Getting rendered HTML content with JavaScript execution
8 | * - Converting web content to Markdown format
9 | * - Extracting main content from web pages
10 | */
11 |
12 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
13 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14 | import {
15 | CallToolRequestSchema,
16 | ListToolsRequestSchema,
17 | } from "@modelcontextprotocol/sdk/types.js";
18 |
19 | import axios from 'axios';
20 | import { Browser, chromium, Page } from 'playwright';
21 | import Turndown from 'turndown';
22 | import { NodeHtmlMarkdown } from 'node-html-markdown';
23 | import type { TranslatorConfigObject } from 'node-html-markdown';
24 |
25 | /**
26 | * Create an MCP server with capabilities for web content fetching tools.
27 | * The server provides various tools for fetching and converting web content
28 | * in different formats including raw text, rendered HTML, and Markdown.
29 | */
30 | const server = new Server(
31 | {
32 | name: "mcp-server-fetch-typescript",
33 | version: "0.1.0",
34 | },
35 | {
36 | capabilities: {
37 | resources: {},
38 | tools: {},
39 | prompts: {},
40 | },
41 | }
42 | );
43 |
44 | const TIMEOUT = 20000;
45 |
46 | /**
47 | * Handler that lists available web content fetching tools.
48 | * Exposes multiple tools for fetching and converting web content
49 | * in various formats including raw text, rendered HTML, and Markdown.
50 | */
51 | server.setRequestHandler(ListToolsRequestSchema, async () => {
52 | return {
53 | tools: [
54 | {
55 | name: "get_raw_text",
56 | description: "Retrieves raw text content directly from a URL without browser rendering. Ideal for structured data formats like JSON, XML, CSV, TSV, or plain text files. Best used when fast, direct access to the source content is needed without processing dynamic elements.",
57 | inputSchema: {
58 | type: "object",
59 | properties: {
60 | url: {
61 | type: "string",
62 | description: "URL of the target resource containing raw text content (JSON, XML, CSV, TSV, plain text, etc.)."
63 | }
64 | },
65 | required: ["url"]
66 | }
67 | },
68 | {
69 | name: "get_rendered_html",
70 | description: "Fetches fully rendered HTML content using a headless browser, including JavaScript-generated content. Essential for modern web applications, single-page applications (SPAs), or any content that requires client-side rendering to be complete.",
71 | inputSchema: {
72 | type: "object",
73 | properties: {
74 | url: {
75 | type: "string",
76 | description: "URL of the target web page that requires JavaScript execution or dynamic content rendering."
77 | }
78 | },
79 | required: ["url"]
80 | }
81 | },
82 | {
83 | name: "get_markdown",
84 | description: "Converts web page content to well-formatted Markdown, preserving structural elements like tables and definition lists. Recommended as the default tool for web content extraction when a clean, readable text format is needed while maintaining document structure.",
85 | inputSchema: {
86 | type: "object",
87 | properties: {
88 | url: {
89 | type: "string",
90 | description: "URL of the web page to convert to Markdown format, supporting various HTML elements and structures."
91 | }
92 | },
93 | required: ["url"]
94 | }
95 | },
96 | {
97 | name: "get_markdown_summary",
98 | description: "Extracts and converts the main content area of a web page to Markdown format, automatically removing navigation menus, headers, footers, and other peripheral content. Perfect for capturing the core content of articles, blog posts, or documentation pages.",
99 | inputSchema: {
100 | type: "object",
101 | properties: {
102 | url: {
103 | type: "string",
104 | description: "URL of the web page whose main content should be extracted and converted to Markdown."
105 | }
106 | },
107 | required: ["url"]
108 | }
109 | },
110 | ]
111 | };
112 | });
113 |
114 | /**
115 | * Handler for web content fetching tools.
116 | * Processes requests to fetch and convert web content based on the specified tool
117 | * and returns the content in the requested format.
118 | */
119 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
120 | const url = String(request.params.arguments?.url);
121 |
122 | if (!url) {
123 | throw new Error("url is required !");
124 | }
125 |
126 | switch (request.params.name) {
127 | case "get_raw_text": {
128 | return {
129 | content: [{
130 | type: "text",
131 | text: (await getRawTextString(url))
132 | }]
133 | };
134 | }
135 | case "get_rendered_html": {
136 | return {
137 | content: [{
138 | type: "text",
139 | text: (await getHtmlString(url))
140 | }]
141 | };
142 | }
143 | case "get_markdown": {
144 | return {
145 | content: [{
146 | type: "text",
147 | text: (await getMarkdownStringFromHtmlByNHM(url))
148 | }]
149 | };
150 | }
151 | case "get_markdown_summary": {
152 | return {
153 | content: [{
154 | type: "text",
155 | text: (await getMarkdownStringFromHtmlByTD(url, true))
156 | }]
157 | };
158 | }
159 |
160 | default:
161 | throw new Error("Unknown tool");
162 | }
163 | });
164 |
165 |
166 | // Helper method to fetch raw text content from a URL
167 | export async function getRawTextString(request_url: string) {
168 | const response = await axios.get(request_url);
169 | const data = response.data;
170 | return data;
171 | }
172 |
173 | // Helper method to fetch rendered HTML content using a headless browser
174 | async function getHtmlString(request_url: string): Promise<string> {
175 | let browser: Browser | null = null;
176 | let page: Page | null = null;
177 | try {
178 | browser = await chromium.launch({
179 | headless: true,
180 | // args: ['--single-process'],
181 | });
182 | const context = await browser.newContext();
183 | page = await context.newPage();
184 |
185 | await page.goto(request_url, {
186 | waitUntil: 'domcontentloaded',
187 | timeout: TIMEOUT,
188 | });
189 | const htmlString = await page.content();
190 | return htmlString;
191 | } catch (error) {
192 | console.error(`Failed to fetch HTML for ${request_url}:`, error);
193 | return "";
194 | } finally {
195 | if (page) {
196 | try {
197 | await page.close();
198 | } catch (e) {
199 | console.error("Error closing page:", e);
200 | }
201 | }
202 | if (browser) {
203 | try {
204 | await browser.close();
205 | } catch (error) {
206 | console.error('Error closing browser:', error);
207 | }
208 | }
209 | }
210 | }
211 |
212 | // Helper method to convert HTML to Markdown using Turndown with custom rules for tables and definition lists
213 | export async function getMarkdownStringFromHtmlByTD(
214 | request_url: string,
215 | mainOnly: boolean = false,
216 | ) {
217 | const htmlString = await getHtmlString(request_url);
218 |
219 | const turndownService = new Turndown({ headingStyle: 'atx' });
220 | turndownService.remove('script');
221 | turndownService.remove('style');
222 |
223 | if (mainOnly) {
224 | turndownService.remove('header');
225 | turndownService.remove('footer');
226 | turndownService.remove('nav');
227 | }
228 |
229 | turndownService.addRule('table', {
230 | filter: 'table',
231 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
232 | replacement: function (content, node, _options) {
233 | // Process each row in the table
234 | const rows = Array.from(node.querySelectorAll('tr'));
235 | if (rows.length === 0) {
236 | return '';
237 | }
238 | const headerRow = rows[0];
239 | const headerCells = Array.from(
240 | headerRow.querySelectorAll('th, td'),
241 | ).map((cell) => cell.textContent?.trim() || '');
242 | const separator = headerCells.map(() => '---').join('|');
243 | // Header row and separator line
244 | let markdown = `\n| ${headerCells.join(' | ')} |\n|${separator}|`;
245 | // Process remaining rows
246 | for (let i = 1; i < rows.length; i++) {
247 | const row = rows[i];
248 | const rowCells = Array.from(row.querySelectorAll('th, td')).map(
249 | (cell) => cell.textContent?.trim() || '',
250 | );
251 | markdown += `\n| ${rowCells.join(' | ')} |`;
252 | }
253 | return markdown + '\n';
254 | },
255 | });
256 |
257 | turndownService.addRule('dl', {
258 | filter: 'dl',
259 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
260 | replacement: function (content, node, _options) {
261 | let markdown = '\n\n';
262 | const items = Array.from(node.children);
263 |
264 | let currentDt: string = '';
265 | items.forEach((item) => {
266 | if (item.tagName === 'DT') {
267 | currentDt = item.textContent?.trim() || '';
268 | if (currentDt) {
269 | markdown += `**${currentDt}:**`;
270 | }
271 | } else if (item.tagName === 'DD') {
272 | const ddContent = item.textContent?.trim() || '';
273 | if (ddContent) {
274 | markdown += ` ${ddContent}\n`;
275 | }
276 | }
277 | });
278 | return markdown + '\n';
279 | },
280 | });
281 |
282 | const markdownString = turndownService.turndown(htmlString);
283 |
284 | return markdownString;
285 | }
286 |
287 | // Helper method to convert HTML to Markdown using NodeHtmlMarkdown with custom translators for special elements
288 | export async function getMarkdownStringFromHtmlByNHM(
289 | request_url: string,
290 | mainOnly: boolean = false,
291 | ) {
292 | const htmlString = await getHtmlString(request_url);
293 |
294 | const customTranslators: TranslatorConfigObject = {
295 | dl: () => ({
296 | preserveWhitespace: false,
297 | surroundingNewlines: true,
298 | }),
299 | dt: () => ({
300 | prefix: '**',
301 | postfix: ':** ',
302 | surroundingNewlines: false,
303 | }),
304 | dd: () => ({
305 | postfix: '\n',
306 | surroundingNewlines: false,
307 | }),
308 | Head: () => ({
309 | postfix: '\n',
310 | ignore: false,
311 | postprocess: (ctx) => {
312 | const titleNode = ctx.node.querySelector('title');
313 | if (titleNode) {
314 | return titleNode.textContent || '';
315 | }
316 | return '';
317 | },
318 | surroundingNewlines: true,
319 | }),
320 | };
321 |
322 | if (mainOnly) {
323 | customTranslators.Header = () => ({
324 | ignore: true,
325 | });
326 | customTranslators.Footer = () => ({
327 | ignore: true,
328 | });
329 | customTranslators.Nav = () => ({
330 | ignore: true,
331 | });
332 | }
333 |
334 | const markdownString = NodeHtmlMarkdown.translate(
335 | htmlString,
336 | {},
337 | customTranslators,
338 | );
339 |
340 | return markdownString;
341 | }
342 |
343 | /**
344 | * Start the server using stdio transport.
345 | * This allows the server to communicate via standard input/output streams.
346 | */
347 | async function main() {
348 | const transport = new StdioServerTransport();
349 | await server.connect(transport);
350 | }
351 |
352 | main().catch((error) => {
353 | console.error("Server error:", error);
354 | process.exit(1);
355 | });
356 |
357 |
358 |
```