# Directory Structure ``` ├── .gitignore ├── .vscode │ └── settings.json ├── biome.json ├── dist │ └── index.js ├── index.ts ├── package-lock.json ├── package.json ├── README.md ├── src │ └── utils │ └── htmlToMarkdown.ts └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` node_modules/ .env ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # Wikipedia MCP Server A Model Context Protocol (MCP) server for searching and retrieving Wikipedia articles. ## Overview This MCP server enables language models to search Wikipedia and retrieve article content programmatically using the Model Context Protocol. It provides a structured interface for AI assistants to access Wikipedia knowledge. ## Features - **Search Wikipedia**: Find articles matching specific search terms - **Read Full Articles**: Retrieve complete Wikipedia articles by title or page ID - **Markdown Conversion**: All article content is automatically converted from HTML ## Add it to your MCP Client Start it via this CLI command: ``` npx wikipedia-mcp ``` Most AI tools support a JSON-based configuration for MCP servers looking like this: ```json { "mcpServers": { "Wikipedia": { "command": "npx", "args": ["-y", "wikipedia-mcp"] } } } ``` ``` -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- ```json { "editor.defaultFormatter": "biomejs.biome" } ``` -------------------------------------------------------------------------------- /biome.json: -------------------------------------------------------------------------------- ```json { "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": false }, "files": { "ignoreUnknown": false, "ignore": [] }, "formatter": { "enabled": true, "indentStyle": "tab" }, "organizeImports": { "enabled": true }, "linter": { "enabled": true, "rules": { "recommended": true } }, "javascript": { "formatter": { "quoteStyle": "double" } } } ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json { "compilerOptions": { "target": "ES2022", // Or a newer target if needed "module": "NodeNext", // Use NodeNext for modern Node.js ESM support "outDir": "./dist", "rootDir": "./", // Assuming index.ts is at the root "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "moduleResolution": "NodeNext", // Required with module: NodeNext // Adding declaration to generate type definitions "declaration": true }, "include": ["index.ts", "src/**/*.ts"], // Include your main entry point "exclude": ["node_modules", "dist"] } ``` -------------------------------------------------------------------------------- /src/utils/htmlToMarkdown.ts: -------------------------------------------------------------------------------- ```typescript import TurndownService, { type TagName } from "turndown"; /** * Converts HTML content to Markdown with proper cleaning * @param htmlContent The HTML content to convert * @returns Clean markdown string */ export function convertHtmlToMarkdown(htmlContent: string): string { // Initialize turndown service const turndownService = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", emDelimiter: "*", }); // Remove unwanted elements const elementsToRemove: TagName[] = [ "script", "style", "link", "meta", "iframe", "noscript", "object", "embed", ]; for (const tag of elementsToRemove) { turndownService.remove(tag); } const markdown = turndownService.turndown(htmlContent); return markdown; } ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "wikipedia-mcp", "version": "1.0.3", "description": "MCP server for Wikipedia", "license": "MIT", "author": "Tim Jünemann", "homepage": "https://github.com/timjuenemann/wikipedia-mcp", "bugs": "https://github.com/timjuenemann/wikipedia-mcp/issues", "type": "module", "bin": { "wikipedia-mcp": "dist/index.js" }, "files": [ "dist" ], "scripts": { "release": "npm run build && npm publish", "build": "esbuild index.ts --bundle --platform=node --target=node22 --format=esm --outfile=dist/index.js", "build:inspect": "npm run build && npx @modelcontextprotocol/[email protected] node dist/index.js", "watch": "esbuild index.ts --bundle --platform=node --target=node22 --format=esm --outfile=dist/index.js --watch" }, "devDependencies": { "@types/node": "^22.14.1", "@types/turndown": "^5.0.5", "esbuild": "^0.25.2", "typescript": "^5.8.3", "zod": "^3.24.3" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.10.0", "turndown": "^7.2.0" } } ``` -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- ```typescript #!/usr/bin/env node import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import { convertHtmlToMarkdown } from "./src/utils/htmlToMarkdown.js"; // Constants const USER_AGENT = "WikipediaMCPServer/1.0.0 (https://github.com/timjuenemann/wikipedia-mcp)"; const WIKIPEDIA_API_URL = "https://en.wikipedia.org/w/api.php"; // Interfaces interface WikipediaSearchResult { title: string; snippet: string; pageid: number; } // Helper functions /** * Generic function to fetch data from Wikipedia API */ async function fetchFromWikipedia(params: Record<string, string>) { const url = new URL(WIKIPEDIA_API_URL); // Set common parameters url.searchParams.set("format", "json"); // Add all provided parameters to the URL for (const [key, value] of Object.entries(params)) { url.searchParams.set(key, value); } const response = await fetch(url, { headers: { "User-Agent": USER_AGENT, }, }); if (!response.ok) { throw new Error(`Wikipedia API request failed: ${response.statusText}`); } return response.json(); } /** * Search Wikipedia for articles matching a query */ async function searchWikipedia( query: string ): Promise<{ type: "text"; text: string }[]> { const data = await fetchFromWikipedia({ action: "query", list: "search", srsearch: query, }); return (data?.query?.search || []).map((item: WikipediaSearchResult) => { const markdownSnippet = convertHtmlToMarkdown(item.snippet || ""); // Format the result to include title and link in the text const formattedText = `**${item.title}**\n\n${markdownSnippet}…\n\nArticle link: https://en.wikipedia.org/?curid=${item.pageid}`; return { type: "text", text: formattedText, }; }); } /** * Get a Wikipedia article by title or page ID */ async function getWikipediaArticle(options: { title?: string; pageId?: number; }): Promise<string> { const { title, pageId } = options; const params: Record<string, string> = { action: "parse", prop: "text", formatversion: "2", // Request format version 2 for simpler output }; if (pageId) { params.pageid = String(pageId); } else if (title) { params.page = title; } else { throw new Error("Either title or pageId must be provided"); } const data = await fetchFromWikipedia(params); if (data.error) { throw new Error(`Error reading article: ${data.error.info}`); } const articleText = data?.parse?.text; if (!articleText) { throw new Error("Could not find content for the specified article"); } return convertHtmlToMarkdown(articleText); } // Create MCP server const server = new McpServer({ name: "Wikipedia", description: "Search and retrieve Wikipedia articles", version: "1.0.0", }); // Add a Wikipedia search tool server.tool( "search", { query: z.string().describe("The search term for Wikipedia") }, async ({ query }) => { try { const results = await searchWikipedia(query); if (results.length === 0) { return { content: [{ type: "text", text: "No results found." }] }; } return { content: results }; } catch (error) { console.error("Error fetching from Wikipedia:", error); return { content: [ { type: "text", text: `Error searching Wikipedia: ${ error instanceof Error ? error.message : String(error) }`, }, ], }; } } ); // Add a Wikipedia article reading tool server.tool( "readArticle", { title: z .string() .optional() .describe("The title of the Wikipedia article to read"), pageId: z .number() .optional() .describe("The page ID of the Wikipedia article to read"), }, async ({ title, pageId }) => { // Validate that either title or pageId is provided if (!title && !pageId) { return { content: [ { type: "text", text: "Error: Either title or pageId must be provided.", }, ], isError: true, }; } try { const markdown = await getWikipediaArticle({ title, pageId }); return { content: [{ type: "text", text: markdown }] }; } catch (error) { console.error("Error fetching from Wikipedia:", error); return { content: [ { type: "text", text: `Error reading Wikipedia article: ${ error instanceof Error ? error.message : String(error) }`, }, ], }; } } ); async function runServer() { const transport = new StdioServerTransport(); await server.connect(transport); } runServer().catch((error) => { console.error("Fatal error running server:", error); process.exit(1); }); ```