jeremynixon/mcp-fetch # codebase.md

# Directory Structure

```
├── .github
│   └── workflows
│       └── publish.yml
├── .gitignore
├── biome.json
├── Dockerfile
├── index.ts
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── tsconfig.json
└── types.d.ts
```

# Files

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
dist
node_modules
```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# MCP Fetch

[![smithery badge](https://smithery.ai/badge/@kazuph/mcp-fetch)](https://smithery.ai/server/@kazuph/mcp-fetch)

Model Context Protocol server for fetching web content and processing images. This allows Claude Desktop (or any MCP client) to fetch web content and handle images appropriately.

## Quick Start (For Users)

To use this tool with Claude Desktop, simply add the following to your Claude Desktop configuration (`~/Library/Application Support/Claude/claude_desktop_config.json`):

```json
{
  "tools": {
    "fetch": {
      "command": "npx",
      "args": ["-y", "@kazuph/mcp-fetch"]
    }
  }
}
```

This will automatically download and run the latest version of the tool when needed.

### Required Setup

1. Enable Accessibility for Claude:
   - Open System Settings
   - Go to Privacy & Security > Accessibility
   - Click the "+" button
   - Add Claude from your Applications folder
   - Turn ON the toggle for Claude

This accessibility setting is required for automated clipboard operations (Cmd+V) to work properly.

## For Developers

The following sections are for those who want to develop or modify the tool.

## Prerequisites

- Node.js 18+
- macOS (for clipboard operations)
- Claude Desktop (install from https://claude.ai/desktop)
- tsx (install via `npm install -g tsx`)

## Installation

### Installing via Smithery

To install MCP Fetch for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@kazuph/mcp-fetch):

```bash
npx -y @smithery/cli install @kazuph/mcp-fetch --client claude
```

### Manual Installation
```bash
git clone https://github.com/kazuph/mcp-fetch.git
cd mcp-fetch
npm install
npm run build
```

## Image Processing Specifications

When processing images from web content, the following limits are applied:

- Maximum 6 images per group
- Maximum height of 8000 pixels per group
- Maximum size of 30MB per group

If content exceeds these limits, images will be automatically split into multiple groups, and you'll need to paste (Cmd+V) multiple times.

## Configuration

1. Make sure Claude Desktop is installed and running.

2. Install tsx globally if you haven't:
```bash
npm install -g tsx
# or
pnpm add -g tsx
```

3. Modify your Claude Desktop config located at:
`~/Library/Application Support/Claude/claude_desktop_config.json`

You can easily find this through the Claude Desktop menu:
1. Open Claude Desktop
2. Click Claude on the Mac menu bar
3. Click "Settings"
4. Click "Developer"

Add the following to your MCP client's configuration:

```json
{
  "tools": {
    "fetch": {
      "args": ["tsx", "/path/to/mcp-fetch/index.ts"]
    }
  }
}
```

## Available Tools

- `fetch`: Retrieves URLs from the Internet and extracts their content as markdown. Images are automatically processed and prepared for clipboard operations.

## Notes

- This tool is designed for macOS only due to its dependency on macOS-specific clipboard operations.
- Images are processed using Sharp for optimal performance and quality.
- When multiple images are found, they are merged vertically with consideration for size limits.
- Animated GIFs are automatically handled by extracting their first frame.

```

--------------------------------------------------------------------------------
/biome.json:
--------------------------------------------------------------------------------

```json
{
  "formatter": {
    "enabled": true,
    "indentStyle": "space",
    "indentWidth": 2,
    "lineWidth": 80
  },
  "linter": {
    "enabled": true,
    "rules": {
      "recommended": true
    }
  },
  "javascript": {
    "formatter": {
      "quoteStyle": "double",
      "trailingComma": "es5"
    }
  }
}

```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
startCommand:
  type: stdio
  configSchema:
    # JSON Schema defining the configuration options for the MCP.
    {}
  commandFunction:
    # A function that produces the CLI command to start the MCP on stdio.
    |-
    (config) => ({
      "command": "node",
      "args": [
        "dist/index.js"
      ]
    })

```

--------------------------------------------------------------------------------
/types.d.ts:
--------------------------------------------------------------------------------

```typescript
declare module "applescript" {
	export function execString(
		script: string,
		callback: (err: Error | null, result: unknown) => void,
	): void;
}

declare module "robots-parser" {
	interface RobotsParser {
		isAllowed(url: string, userAgent: string): boolean;
	}
	export default function (robotsUrl: string, robotsTxt: string): RobotsParser;
}

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
	"compilerOptions": {
		"target": "ES2022",
		"strict": true,
		"esModuleInterop": true,
		"skipLibCheck": true,
		"forceConsistentCasingInFileNames": true,
		"resolveJsonModule": true,
		"outDir": "./dist",
		"rootDir": ".",
		"moduleResolution": "NodeNext",
		"module": "NodeNext"
	},
	"exclude": ["node_modules"],
	"include": ["./**/*.ts"]
}

```

--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------

```dockerfile
FROM node:22.12-alpine as builder

# Must be entire project because `prepare` script is run during `npm install` and requires all files.
COPY . /app
WORKDIR /app

RUN --mount=type=cache,target=/root/.npm npm install

FROM node:22-alpine AS release

WORKDIR /app
COPY --from=builder /app/dist /app/dist
COPY --from=builder /app/package.json /app/package.json
COPY --from=builder /app/package-lock.json /app/package-lock.json

ENV NODE_ENV=production

RUN npm ci --ignore-scripts --omit-dev

ENTRYPOINT ["node", "dist/index.js"]

```

--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------

```yaml
name: Publish to npm

on:
  push:
    branches:
      - main
  workflow_dispatch:  # Allows manual triggering

permissions:
  contents: write

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  publish:
    runs-on: ubuntu-latest

    steps:
      - name: Check out repository
        uses: actions/checkout@v2
        with:
          fetch-depth: 0

      - name: Set up Node.js
        uses: actions/setup-node@v2
        with:
          node-version: '18'
          registry-url: 'https://registry.npmjs.org'

      - name: Install dependencies
        run: npm ci

      - name: Build project
        run: npm run build

      - name: Configure Git
        run: |
          git config --local user.email "[email protected]"
          git config --local user.name "GitHub Action"

      - name: Bump version
        run: |
          npm version patch -m "chore: bump version to %s [skip ci]"
          git push
          git push --tags

      - name: Publish to npm
        run: npm publish --access public -ws --include-workspace-root
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
	"name": "@smithery/mcp-fetch",
	"version": "0.8.11",
	"type": "module",
	"description": "A Model Context Protocol server that provides web content fetching capabilities",
	"main": "dist/index.js",
	"scripts": {
		"prepare": "npm run build",
		"build": "tsc",
		"start": "node dist/index.js",
		"dev": "tsc && node dist/index.js",
		"check": "biome check .",
		"format": "biome format . --write",
		"lint": "biome lint .",
		"typecheck": "tsc --noEmit",
		"test": "npm run typecheck && npm run check"
	},
	"dependencies": {
		"@modelcontextprotocol/sdk": "^1.0.0",
		"@mozilla/readability": "^0.5.0",
		"@types/sharp": "^0.31.1",
		"jsdom": "^24.0.0",
		"node-fetch": "^3.3.2",
		"robots-parser": "^3.0.1",
		"sharp": "^0.33.5",
		"turndown": "^7.1.2",
		"zod": "^3.22.4",
		"zod-to-json-schema": "^3.22.4"
	},
	"devDependencies": {
		"@types/jsdom": "^21.1.6",
		"@types/node": "^20.10.5",
		"@types/turndown": "^5.0.4",
		"typescript": "^5.3.3"
	},
	"author": "kazuph",
	"license": "MIT",
	"publishConfig": {
		"access": "public"
	},
	"files": [
		"dist",
		"dist/**/*.map",
		"README.md"
	],
	"repository": {
		"type": "git",
		"url": "git+https://github.com/kazuph/mcp-fetch.git"
	},
	"keywords": [
		"mcp",
		"fetch",
		"web",
		"content"
	],
	"bugs": {
		"url": "https://github.com/kazuph/mcp-fetch/issues"
	},
	"homepage": "https://github.com/kazuph/mcp-fetch#readme",
	"bin": {
		"mcp-fetch": "./dist/index.js"
	}
}

```

--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node

import { Server } from "@modelcontextprotocol/sdk/server/index.js"
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
import { z } from "zod"
import { zodToJsonSchema } from "zod-to-json-schema"
import fetch from "node-fetch"
import { JSDOM } from "jsdom"
import { Readability } from "@mozilla/readability"
import TurndownService from "turndown"
import { exec } from "node:child_process"
import { promisify } from "node:util"
import sharp from "sharp"

const execAsync = promisify(exec)

function sleep(ms: number) {
	return new Promise((resolve) => setTimeout(resolve, ms))
}

interface Image {
	src: string
	alt: string
	data?: Buffer
}

interface ExtractedContent {
	markdown: string
	images: Image[]
}

const DEFAULT_USER_AGENT_AUTONOMOUS =
	"ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
const DEFAULT_USER_AGENT_MANUAL =
	"ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"

const FetchArgsSchema = z.object({
	url: z.string().url(),
	maxLength: z.number().positive().max(1000000).default(20000),
	startIndex: z.number().min(0).default(0),
	raw: z.boolean().default(false),
})

const ListToolsSchema = z.object({
	method: z.literal("tools/list"),
})

const CallToolSchema = z.object({
	method: z.literal("tools/call"),
	params: z.object({
		name: z.string(),
		arguments: z.record(z.unknown()).optional(),
	}),
})

function extractContentFromHtml(
	html: string,
	url: string,
): ExtractedContent | string {
	const dom = new JSDOM(html, { url })
	const reader = new Readability(dom.window.document)
	const article = reader.parse()

	if (!article || !article.content) {
		return "<e>Page failed to be simplified from HTML</e>"
	}

	// Extract images from the article content only
	const articleDom = new JSDOM(article.content)
	const imgElements = Array.from(
		articleDom.window.document.querySelectorAll("img"),
	)

	const images: Image[] = imgElements.map((img) => {
		const src = img.src
		const alt = img.alt || ""
		return { src, alt }
	})

	const turndownService = new TurndownService({
		headingStyle: "atx",
		codeBlockStyle: "fenced",
	})
	const markdown = turndownService.turndown(article.content)

	return { markdown, images }
}

async function fetchImages(
	images: Image[],
): Promise<(Image & { data: Buffer })[]> {
	const fetchedImages = []
	for (const img of images) {
		const response = await fetch(img.src)
		if (!response.ok) {
			throw new Error(
				`Failed to fetch image ${img.src}: status ${response.status}`,
			)
		}
		const buffer = await response.arrayBuffer()
		const imageBuffer = Buffer.from(buffer)

		// Check if the image is a GIF and extract first frame if animated
		if (img.src.toLowerCase().endsWith(".gif")) {
			try {
				const metadata = await sharp(imageBuffer).metadata()
				if (metadata.pages && metadata.pages > 1) {
					// Extract first frame of animated GIF
					const firstFrame = await sharp(imageBuffer, { page: 0 })
						.png()
						.toBuffer()
					fetchedImages.push({
						...img,
						data: firstFrame,
					})
					continue
				}
			} catch (error) {
				console.warn(`Warning: Failed to process GIF image ${img.src}:`, error)
			}
		}

		fetchedImages.push({
			...img,
			data: imageBuffer,
		})
	}
	return fetchedImages
}

async function commandExists(cmd: string): Promise<boolean> {
	try {
		await execAsync(`which ${cmd}`)
		return true
	} catch {
		return false
	}
}

async function getImageDimensions(
	buffer: Buffer,
): Promise<{ width: number; height: number; size: number }> {
	const metadata = await sharp(buffer).metadata()
	return {
		width: metadata.width || 0,
		height: metadata.height || 0,
		size: buffer.length,
	}
}

async function addImagesToClipboard(
	images: (Image & { data: Buffer })[],
): Promise<void> {
	if (images.length === 0) return

	const hasPbcopy = await commandExists("pbcopy")
	const hasOsascript = await commandExists("osascript")
	if (!hasPbcopy) {
		throw new Error(
			"'pbcopy' command not found. This tool works on macOS only by default.",
		)
	}
	if (!hasOsascript) {
		throw new Error(
			"'osascript' command not found. Required to set clipboard with images.",
		)
	}

	const MAX_HEIGHT = 8000
	const MAX_SIZE_BYTES = 30 * 1024 * 1024 // 30MB
	const MAX_IMAGES_PER_GROUP = 6 // 1グループあたりの最大画像数

	const tempDir = "/tmp/mcp-fetch-images"
	await execAsync(`mkdir -p ${tempDir} && rm -f ${tempDir}/*.png`)

	// 画像をグループ化して処理
	let currentGroup: Buffer[] = []
	let currentHeight = 0
	let currentSize = 0

	const processGroup = async (group: Buffer[]) => {
		if (group.length === 0) return

		// 垂直方向に画像を結合
		const mergedImagePath = `${tempDir}/merged_${Date.now()}.png`
		await sharp({
			create: {
				width: Math.max(
					...(await Promise.all(
						group.map(async (buffer) => {
							const metadata = await sharp(buffer).metadata()
							return metadata.width || 0
						}),
					)),
				),
				height: (
					await Promise.all(
						group.map(async (buffer) => {
							const metadata = await sharp(buffer).metadata()
							return metadata.height || 0
						}),
					)
				).reduce((a, b) => a + b, 0),
				channels: 4,
				background: { r: 255, g: 255, b: 255, alpha: 1 },
			},
		})
			.composite(
				await Promise.all(
					group.map(async (buffer, index) => {
						const previousHeights = await Promise.all(
							group.slice(0, index).map(async (b) => {
								const metadata = await sharp(b).metadata()
								return metadata.height || 0
							}),
						)
						const top = previousHeights.reduce((a, b) => a + b, 0)
						return {
							input: buffer,
							top,
							left: 0,
						}
					}),
				),
			)
			.png()
			.toFile(mergedImagePath)

		const { stderr } = await execAsync(
			`osascript -e 'set the clipboard to (read (POSIX file "${mergedImagePath}") as «class PNGf»)'`,
		)
		if (stderr?.trim()) {
			const lines = stderr.trim().split("\n")
			const nonWarningLines = lines.filter((line) => !line.includes("WARNING:"))
			if (nonWarningLines.length > 0) {
				throw new Error("Failed to copy merged image to clipboard.")
			}
		}

		await sleep(500)
		const pasteScript = `osascript -e 'tell application "System Events" to keystroke "v" using command down'`
		const { stderr: pasteStderr } = await execAsync(pasteScript)
		if (pasteStderr?.trim()) {
			const lines = pasteStderr.trim().split("\n")
			const nonWarningLines = lines.filter((line) => !line.includes("WARNING:"))
			if (nonWarningLines.length > 0) {
				console.warn("Failed to paste merged image.")
			}
		}
		await sleep(500)
	}

	for (const img of images) {
		const { height, size } = await getImageDimensions(img.data)

		if (
			currentGroup.length >= MAX_IMAGES_PER_GROUP ||
			currentHeight + height > MAX_HEIGHT ||
			currentSize + size > MAX_SIZE_BYTES
		) {
			// 現在のグループを処理
			await processGroup(currentGroup)
			// 新しいグループを開始
			currentGroup = [img.data]
			currentHeight = height
			currentSize = size
		} else {
			currentGroup.push(img.data)
			currentHeight += height
			currentSize += size
		}
	}

	// 残りのグループを処理
	await processGroup(currentGroup)

	await execAsync(`rm -rf ${tempDir}`)
}

interface FetchResult {
	content: string
	prefix: string
	imageUrls?: string[]
}

async function fetchUrl(
	url: string,
	userAgent: string,
	forceRaw = false,
): Promise<FetchResult> {
	const response = await fetch(url, {
		headers: { "User-Agent": userAgent },
	})

	if (!response.ok) {
		throw new Error(`Failed to fetch ${url} - status code ${response.status}`)
	}

	const contentType = response.headers.get("content-type") || ""
	const text = await response.text()
	const isHtml =
		text.toLowerCase().includes("<html") || contentType.includes("text/html")

	if (isHtml && !forceRaw) {
		const result = extractContentFromHtml(text, url)
		if (typeof result === "string") {
			return {
				content: result,
				prefix: "",
			}
		}

		const { markdown, images } = result
		const fetchedImages = await fetchImages(images)
		const imageUrls = fetchedImages.map((img) => img.src)

		if (fetchedImages.length > 0) {
			try {
				await addImagesToClipboard(fetchedImages)
				return {
					content: markdown,
					prefix: `Found and processed ${fetchedImages.length} images. Images have been merged vertically (max 6 images per group) and copied to your clipboard. Please paste (Cmd+V) to combine with the retrieved content.\n`,
					imageUrls,
				}
			} catch (err) {
				return {
					content: markdown,
					prefix: `Found ${fetchedImages.length} images but failed to copy them to the clipboard.\nError: ${err instanceof Error ? err.message : String(err)}\n`,
					imageUrls,
				}
			}
		}
		return {
			content: markdown,
			prefix: "",
			imageUrls,
		}
	}

	return {
		content: text,
		prefix: `Content type ${contentType} cannot be simplified to markdown, but here is the raw content:\n`,
	}
}

// Server setup
const server = new Server(
	{
		name: "mcp-fetch",
		version: "1.0.0",
	},
	{
		capabilities: {
			tools: {},
		},
	},
)

interface RequestHandlerExtra {
	signal: AbortSignal
}

server.setRequestHandler(
	ListToolsSchema,
	async (request: { method: "tools/list" }, extra: RequestHandlerExtra) => {
		const tools = [
			{
				name: "fetch",
				description:
					"Retrieves URLs from the Internet and extracts their content as markdown. If images are found, they are merged vertically (max 6 images per group, max height 8000px, max size 30MB per group) and copied to the clipboard of the user's host machine. You will need to paste (Cmd+V) to insert the images.",
				inputSchema: zodToJsonSchema(FetchArgsSchema),
			},
		]
		return { tools }
	},
)

server.setRequestHandler(
	CallToolSchema,
	async (
		request: {
			method: "tools/call"
			params: { name: string; arguments?: Record<string, unknown> }
		},
		extra: RequestHandlerExtra,
	) => {
		try {
			const { name, arguments: args } = request.params

			if (name !== "fetch") {
				throw new Error(`Unknown tool: ${name}`)
			}

			const parsed = FetchArgsSchema.safeParse(args)
			if (!parsed.success) {
				throw new Error(`Invalid arguments: ${parsed.error}`)
			}

			const { content, prefix, imageUrls } = await fetchUrl(
				parsed.data.url,
				DEFAULT_USER_AGENT_AUTONOMOUS,
				parsed.data.raw,
			)

			let finalContent = content
			if (finalContent.length > parsed.data.maxLength) {
				finalContent = finalContent.slice(
					parsed.data.startIndex,
					parsed.data.startIndex + parsed.data.maxLength,
				)
				finalContent += `\n\n<e>Content truncated. Call the fetch tool with a start_index of ${
					parsed.data.startIndex + parsed.data.maxLength
				} to get more content.</e>`
			}

			let imagesSection = ""
			if (imageUrls && imageUrls.length > 0) {
				imagesSection =
					"\n\nImages found in article:\n" +
					imageUrls.map((url) => `- ${url}`).join("\n")
			}

			return {
				content: [
					{
						type: "text",
						text: `${prefix}Contents of ${parsed.data.url}:\n${finalContent}${imagesSection}`,
					},
				],
			}
		} catch (error) {
			return {
				content: [
					{
						type: "text",
						text: `Error: ${error instanceof Error ? error.message : String(error)}`,
					},
				],
				isError: true,
			}
		}
	},
)

// Start server
async function runServer() {
	const transport = new StdioServerTransport()
	await server.connect(transport)
}

runServer().catch((error) => {
	process.stderr.write(`Fatal error running server: ${error}\n`)
	process.exit(1)
})

```