# Directory Structure ``` ├── .github │ └── workflows │ └── publish.yml ├── .gitignore ├── biome.json ├── Dockerfile ├── index.ts ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── smithery.yaml ├── tsconfig.json └── types.d.ts ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` dist node_modules ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # MCP Fetch [](https://smithery.ai/server/@kazuph/mcp-fetch) Model Context Protocol server for fetching web content and processing images. This allows Claude Desktop (or any MCP client) to fetch web content and handle images appropriately. ## Quick Start (For Users) To use this tool with Claude Desktop, simply add the following to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json`): ```json { "tools": { "fetch": { "command": "npx", "args": ["-y", "@kazuph/mcp-fetch"] } } } ``` This will automatically download and run the latest version of the tool when needed. ### Required Setup 1. Enable Accessibility for Claude: - Open System Settings - Go to Privacy & Security > Accessibility - Click the "+" button - Add Claude from your Applications folder - Turn ON the toggle for Claude This accessibility setting is required for automated clipboard operations (Cmd+V) to work properly. ## For Developers The following sections are for those who want to develop or modify the tool. ## Prerequisites - Node.js 18+ - macOS (for clipboard operations) - Claude Desktop (install from https://claude.ai/desktop) - tsx (install via `npm install -g tsx`) ## Installation ### Installing via Smithery To install MCP Fetch for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@kazuph/mcp-fetch): ```bash npx -y @smithery/cli install @kazuph/mcp-fetch --client claude ``` ### Manual Installation ```bash git clone https://github.com/kazuph/mcp-fetch.git cd mcp-fetch npm install npm run build ``` ## Image Processing Specifications When processing images from web content, the following limits are applied: - Maximum 6 images per group - Maximum height of 8000 pixels per group - Maximum size of 30MB per group If content exceeds these limits, images will be automatically split into multiple groups, and you'll need to paste (Cmd+V) multiple times. ## Configuration 1. Make sure Claude Desktop is installed and running. 2. Install tsx globally if you haven't: ```bash npm install -g tsx # or pnpm add -g tsx ``` 3. Modify your Claude Desktop config located at: `~/Library/Application Support/Claude/claude_desktop_config.json` You can easily find this through the Claude Desktop menu: 1. Open Claude Desktop 2. Click Claude on the Mac menu bar 3. Click "Settings" 4. Click "Developer" Add the following to your MCP client's configuration: ```json { "tools": { "fetch": { "args": ["tsx", "/path/to/mcp-fetch/index.ts"] } } } ``` ## Available Tools - `fetch`: Retrieves URLs from the Internet and extracts their content as markdown. Images are automatically processed and prepared for clipboard operations. ## Notes - This tool is designed for macOS only due to its dependency on macOS-specific clipboard operations. - Images are processed using Sharp for optimal performance and quality. - When multiple images are found, they are merged vertically with consideration for size limits. - Animated GIFs are automatically handled by extracting their first frame. ``` -------------------------------------------------------------------------------- /biome.json: -------------------------------------------------------------------------------- ```json { "formatter": { "enabled": true, "indentStyle": "space", "indentWidth": 2, "lineWidth": 80 }, "linter": { "enabled": true, "rules": { "recommended": true } }, "javascript": { "formatter": { "quoteStyle": "double", "trailingComma": "es5" } } } ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml startCommand: type: stdio configSchema: # JSON Schema defining the configuration options for the MCP. {} commandFunction: # A function that produces the CLI command to start the MCP on stdio. |- (config) => ({ "command": "node", "args": [ "dist/index.js" ] }) ``` -------------------------------------------------------------------------------- /types.d.ts: -------------------------------------------------------------------------------- ```typescript declare module "applescript" { export function execString( script: string, callback: (err: Error | null, result: unknown) => void, ): void; } declare module "robots-parser" { interface RobotsParser { isAllowed(url: string, userAgent: string): boolean; } export default function (robotsUrl: string, robotsTxt: string): RobotsParser; } ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "ES2022", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, "outDir": "./dist", "rootDir": ".", "moduleResolution": "NodeNext", "module": "NodeNext" }, "exclude": ["node_modules"], "include": ["./**/*.ts"] } ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile FROM node:22.12-alpine as builder # Must be entire project because `prepare` script is run during `npm install` and requires all files. COPY . /app WORKDIR /app RUN --mount=type=cache,target=/root/.npm npm install FROM node:22-alpine AS release WORKDIR /app COPY --from=builder /app/dist /app/dist COPY --from=builder /app/package.json /app/package.json COPY --from=builder /app/package-lock.json /app/package-lock.json ENV NODE_ENV=production RUN npm ci --ignore-scripts --omit-dev ENTRYPOINT ["node", "dist/index.js"] ``` -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- ```yaml name: Publish to npm on: push: branches: - main workflow_dispatch: # Allows manual triggering permissions: contents: write concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: publish: runs-on: ubuntu-latest steps: - name: Check out repository uses: actions/checkout@v2 with: fetch-depth: 0 - name: Set up Node.js uses: actions/setup-node@v2 with: node-version: '18' registry-url: 'https://registry.npmjs.org' - name: Install dependencies run: npm ci - name: Build project run: npm run build - name: Configure Git run: | git config --local user.email "[email protected]" git config --local user.name "GitHub Action" - name: Bump version run: | npm version patch -m "chore: bump version to %s [skip ci]" git push git push --tags - name: Publish to npm run: npm publish --access public -ws --include-workspace-root env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "@smithery/mcp-fetch", "version": "0.8.11", "type": "module", "description": "A Model Context Protocol server that provides web content fetching capabilities", "main": "dist/index.js", "scripts": { "prepare": "npm run build", "build": "tsc", "start": "node dist/index.js", "dev": "tsc && node dist/index.js", "check": "biome check .", "format": "biome format . --write", "lint": "biome lint .", "typecheck": "tsc --noEmit", "test": "npm run typecheck && npm run check" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.0.0", "@mozilla/readability": "^0.5.0", "@types/sharp": "^0.31.1", "jsdom": "^24.0.0", "node-fetch": "^3.3.2", "robots-parser": "^3.0.1", "sharp": "^0.33.5", "turndown": "^7.1.2", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.4" }, "devDependencies": { "@types/jsdom": "^21.1.6", "@types/node": "^20.10.5", "@types/turndown": "^5.0.4", "typescript": "^5.3.3" }, "author": "kazuph", "license": "MIT", "publishConfig": { "access": "public" }, "files": [ "dist", "dist/**/*.map", "README.md" ], "repository": { "type": "git", "url": "git+https://github.com/kazuph/mcp-fetch.git" }, "keywords": [ "mcp", "fetch", "web", "content" ], "bugs": { "url": "https://github.com/kazuph/mcp-fetch/issues" }, "homepage": "https://github.com/kazuph/mcp-fetch#readme", "bin": { "mcp-fetch": "./dist/index.js" } } ``` -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- ```typescript #!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js" import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { z } from "zod" import { zodToJsonSchema } from "zod-to-json-schema" import fetch from "node-fetch" import { JSDOM } from "jsdom" import { Readability } from "@mozilla/readability" import TurndownService from "turndown" import { exec } from "node:child_process" import { promisify } from "node:util" import sharp from "sharp" const execAsync = promisify(exec) function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)) } interface Image { src: string alt: string data?: Buffer } interface ExtractedContent { markdown: string images: Image[] } const DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)" const DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)" const FetchArgsSchema = z.object({ url: z.string().url(), maxLength: z.number().positive().max(1000000).default(20000), startIndex: z.number().min(0).default(0), raw: z.boolean().default(false), }) const ListToolsSchema = z.object({ method: z.literal("tools/list"), }) const CallToolSchema = z.object({ method: z.literal("tools/call"), params: z.object({ name: z.string(), arguments: z.record(z.unknown()).optional(), }), }) function extractContentFromHtml( html: string, url: string, ): ExtractedContent | string { const dom = new JSDOM(html, { url }) const reader = new Readability(dom.window.document) const article = reader.parse() if (!article || !article.content) { return "<e>Page failed to be simplified from HTML</e>" } // Extract images from the article content only const articleDom = new JSDOM(article.content) const imgElements = Array.from( articleDom.window.document.querySelectorAll("img"), ) const images: Image[] = imgElements.map((img) => { const src = img.src const alt = img.alt || "" return { src, alt } }) const turndownService = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", }) const markdown = turndownService.turndown(article.content) return { markdown, images } } async function fetchImages( images: Image[], ): Promise<(Image & { data: Buffer })[]> { const fetchedImages = [] for (const img of images) { const response = await fetch(img.src) if (!response.ok) { throw new Error( `Failed to fetch image ${img.src}: status ${response.status}`, ) } const buffer = await response.arrayBuffer() const imageBuffer = Buffer.from(buffer) // Check if the image is a GIF and extract first frame if animated if (img.src.toLowerCase().endsWith(".gif")) { try { const metadata = await sharp(imageBuffer).metadata() if (metadata.pages && metadata.pages > 1) { // Extract first frame of animated GIF const firstFrame = await sharp(imageBuffer, { page: 0 }) .png() .toBuffer() fetchedImages.push({ ...img, data: firstFrame, }) continue } } catch (error) { console.warn(`Warning: Failed to process GIF image ${img.src}:`, error) } } fetchedImages.push({ ...img, data: imageBuffer, }) } return fetchedImages } async function commandExists(cmd: string): Promise<boolean> { try { await execAsync(`which ${cmd}`) return true } catch { return false } } async function getImageDimensions( buffer: Buffer, ): Promise<{ width: number; height: number; size: number }> { const metadata = await sharp(buffer).metadata() return { width: metadata.width || 0, height: metadata.height || 0, size: buffer.length, } } async function addImagesToClipboard( images: (Image & { data: Buffer })[], ): Promise<void> { if (images.length === 0) return const hasPbcopy = await commandExists("pbcopy") const hasOsascript = await commandExists("osascript") if (!hasPbcopy) { throw new Error( "'pbcopy' command not found. This tool works on macOS only by default.", ) } if (!hasOsascript) { throw new Error( "'osascript' command not found. Required to set clipboard with images.", ) } const MAX_HEIGHT = 8000 const MAX_SIZE_BYTES = 30 * 1024 * 1024 // 30MB const MAX_IMAGES_PER_GROUP = 6 // 1グループあたりの最大画像数 const tempDir = "/tmp/mcp-fetch-images" await execAsync(`mkdir -p ${tempDir} && rm -f ${tempDir}/*.png`) // 画像をグループ化して処理 let currentGroup: Buffer[] = [] let currentHeight = 0 let currentSize = 0 const processGroup = async (group: Buffer[]) => { if (group.length === 0) return // 垂直方向に画像を結合 const mergedImagePath = `${tempDir}/merged_${Date.now()}.png` await sharp({ create: { width: Math.max( ...(await Promise.all( group.map(async (buffer) => { const metadata = await sharp(buffer).metadata() return metadata.width || 0 }), )), ), height: ( await Promise.all( group.map(async (buffer) => { const metadata = await sharp(buffer).metadata() return metadata.height || 0 }), ) ).reduce((a, b) => a + b, 0), channels: 4, background: { r: 255, g: 255, b: 255, alpha: 1 }, }, }) .composite( await Promise.all( group.map(async (buffer, index) => { const previousHeights = await Promise.all( group.slice(0, index).map(async (b) => { const metadata = await sharp(b).metadata() return metadata.height || 0 }), ) const top = previousHeights.reduce((a, b) => a + b, 0) return { input: buffer, top, left: 0, } }), ), ) .png() .toFile(mergedImagePath) const { stderr } = await execAsync( `osascript -e 'set the clipboard to (read (POSIX file "${mergedImagePath}") as «class PNGf»)'`, ) if (stderr?.trim()) { const lines = stderr.trim().split("\n") const nonWarningLines = lines.filter((line) => !line.includes("WARNING:")) if (nonWarningLines.length > 0) { throw new Error("Failed to copy merged image to clipboard.") } } await sleep(500) const pasteScript = `osascript -e 'tell application "System Events" to keystroke "v" using command down'` const { stderr: pasteStderr } = await execAsync(pasteScript) if (pasteStderr?.trim()) { const lines = pasteStderr.trim().split("\n") const nonWarningLines = lines.filter((line) => !line.includes("WARNING:")) if (nonWarningLines.length > 0) { console.warn("Failed to paste merged image.") } } await sleep(500) } for (const img of images) { const { height, size } = await getImageDimensions(img.data) if ( currentGroup.length >= MAX_IMAGES_PER_GROUP || currentHeight + height > MAX_HEIGHT || currentSize + size > MAX_SIZE_BYTES ) { // 現在のグループを処理 await processGroup(currentGroup) // 新しいグループを開始 currentGroup = [img.data] currentHeight = height currentSize = size } else { currentGroup.push(img.data) currentHeight += height currentSize += size } } // 残りのグループを処理 await processGroup(currentGroup) await execAsync(`rm -rf ${tempDir}`) } interface FetchResult { content: string prefix: string imageUrls?: string[] } async function fetchUrl( url: string, userAgent: string, forceRaw = false, ): Promise<FetchResult> { const response = await fetch(url, { headers: { "User-Agent": userAgent }, }) if (!response.ok) { throw new Error(`Failed to fetch ${url} - status code ${response.status}`) } const contentType = response.headers.get("content-type") || "" const text = await response.text() const isHtml = text.toLowerCase().includes("<html") || contentType.includes("text/html") if (isHtml && !forceRaw) { const result = extractContentFromHtml(text, url) if (typeof result === "string") { return { content: result, prefix: "", } } const { markdown, images } = result const fetchedImages = await fetchImages(images) const imageUrls = fetchedImages.map((img) => img.src) if (fetchedImages.length > 0) { try { await addImagesToClipboard(fetchedImages) return { content: markdown, prefix: `Found and processed ${fetchedImages.length} images. Images have been merged vertically (max 6 images per group) and copied to your clipboard. Please paste (Cmd+V) to combine with the retrieved content.\n`, imageUrls, } } catch (err) { return { content: markdown, prefix: `Found ${fetchedImages.length} images but failed to copy them to the clipboard.\nError: ${err instanceof Error ? err.message : String(err)}\n`, imageUrls, } } } return { content: markdown, prefix: "", imageUrls, } } return { content: text, prefix: `Content type ${contentType} cannot be simplified to markdown, but here is the raw content:\n`, } } // Server setup const server = new Server( { name: "mcp-fetch", version: "1.0.0", }, { capabilities: { tools: {}, }, }, ) interface RequestHandlerExtra { signal: AbortSignal } server.setRequestHandler( ListToolsSchema, async (request: { method: "tools/list" }, extra: RequestHandlerExtra) => { const tools = [ { name: "fetch", description: "Retrieves URLs from the Internet and extracts their content as markdown. If images are found, they are merged vertically (max 6 images per group, max height 8000px, max size 30MB per group) and copied to the clipboard of the user's host machine. You will need to paste (Cmd+V) to insert the images.", inputSchema: zodToJsonSchema(FetchArgsSchema), }, ] return { tools } }, ) server.setRequestHandler( CallToolSchema, async ( request: { method: "tools/call" params: { name: string; arguments?: Record<string, unknown> } }, extra: RequestHandlerExtra, ) => { try { const { name, arguments: args } = request.params if (name !== "fetch") { throw new Error(`Unknown tool: ${name}`) } const parsed = FetchArgsSchema.safeParse(args) if (!parsed.success) { throw new Error(`Invalid arguments: ${parsed.error}`) } const { content, prefix, imageUrls } = await fetchUrl( parsed.data.url, DEFAULT_USER_AGENT_AUTONOMOUS, parsed.data.raw, ) let finalContent = content if (finalContent.length > parsed.data.maxLength) { finalContent = finalContent.slice( parsed.data.startIndex, parsed.data.startIndex + parsed.data.maxLength, ) finalContent += `\n\n<e>Content truncated. Call the fetch tool with a start_index of ${ parsed.data.startIndex + parsed.data.maxLength } to get more content.</e>` } let imagesSection = "" if (imageUrls && imageUrls.length > 0) { imagesSection = "\n\nImages found in article:\n" + imageUrls.map((url) => `- ${url}`).join("\n") } return { content: [ { type: "text", text: `${prefix}Contents of ${parsed.data.url}:\n${finalContent}${imagesSection}`, }, ], } } catch (error) { return { content: [ { type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, } } }, ) // Start server async function runServer() { const transport = new StdioServerTransport() await server.connect(transport) } runServer().catch((error) => { process.stderr.write(`Fatal error running server: ${error}\n`) process.exit(1) }) ```