# Directory Structure
```
├── .gitignore
├── .npmignore
├── build
│ ├── examples
│ │ ├── CursorAdapter.js
│ │ └── CursorCommands.js
│ ├── index.js
│ ├── services
│ │ ├── CacheService.js
│ │ ├── CursorIntegration.js
│ │ └── ScraperService.js
│ ├── types
│ │ └── index.js
│ └── utils
│ ├── extractors.js
│ ├── github.js
│ └── packageRepository.js
├── cdugo-docs-fetcher-mcp-1.0.0.tgz
├── Dockerfile
├── docs-fetcher-mcp-install.js
├── install.js
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── smithery.yaml
├── src
│ ├── index.ts
│ ├── services
│ │ ├── CacheService.ts
│ │ └── ScraperService.ts
│ ├── types
│ │ └── index.ts
│ └── utils
│ ├── extractors.ts
│ └── packageRepository.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
# Dependency directories
node_modules/
# Environment variables
.env
# Logs
logs
*.log
npm-debug.log*
# OS-specific files
.DS_Store
Thumbs.db
# Editor directories and files
.idea/
.vscode/
*.swp
*.swo
```
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
```
# Source files
src/
# Development files
.git/
.gitignore
tsconfig.json
Dockerfile
smithery.yaml
# Node modules
node_modules/
# IDE files
.vscode/
.idea/
# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# OS specific files
.DS_Store
Thumbs.db
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
# 📚 DocsFetcher MCP Server
[](https://smithery.ai/server/@cdugo/mcp-get-docs)
[](https://www.npmjs.com/package/@cdugo/docs-fetcher-mcp)
[](https://www.npmjs.com/package/@cdugo/docs-fetcher-mcp)
An MCP server that fetches package documentation from multiple language ecosystems for LLMs like Claude without requiring API keys.
<a href="https://glama.ai/mcp/servers/8yfwtryuc5">
<img width="380" height="200" src="https://glama.ai/mcp/servers/8yfwtryuc5/badge" alt="DocsFetcher Server MCP server" />
</a>
## ✨ Features
- 🌐 Supports multiple programming languages (JavaScript, Python, Java, .NET, Ruby, PHP, Rust, Go, Swift)
- 📦 Fetches documentation for packages by name or URL
- 🔍 Crawls documentation sites to extract comprehensive information
- 📄 Extracts README, API docs, code examples, and repository info
- 🧠 Provides structured data for LLM summarization
- 💬 Includes specialized prompts for documentation analysis
- 🔑 **No API key required** - works natively with Claude Desktop and Cursor IDE
## 🚀 Installation
### Claude Desktop
1. Open Claude Desktop → Settings → Developer
2. Click "Edit Config" and add:
```json
{
"mcpServers": {
"docsFetcher": {
"command": "npx",
"args": [
"-y",
"@smithery/cli@latest",
"run",
"@cdugo/mcp-get-docs",
"--config",
"'{}'"
]
}
}
}
```
### Cursor IDE Configuration
1. Open Cursor IDE → Settings → MCP -> Add New MCP Servier
2. Add:
```json
Name: docsFetcher
Command: npx -y @smithery/cli@latest run @cdugo/mcp-get-docs --config "{}"
```
#### Prerequisites
- 📋 Node.js 18 or later
## 🏃♂️ Running Locally
```bash
git clone https://github.com/cdugo/package-documentation-mcp
cd package-documentation-mcp
npm install
npm run build
```
Once installed, you can run the server locally with:
```bash
# From the project root directory
npm start
```
For development with auto-restart on file changes:
```bash
npm run dev
```
The server will start on the default port (usually 3000). You should see output like:
```
🚀 DocsFetcher MCP Server running!
📋 Ready to fetch documentation
```
To specify a custom port:
```bash
PORT=8080 npm start
```
## 🛠️ Available Tools
1. **fetch-url-docs**: 🔗 Fetch docs from a specific URL
2. **fetch-package-docs**: 📦 Fetch docs for a package with optional language specification
3. **fetch-library-docs**: 🧠 Smart tool that works with either package name or URL
4. **fetch-multilingual-docs**: 🌍 Fetch docs for a package across multiple language ecosystems
## 📝 Available Prompts
1. **summarize-library-docs**: 📚 Create a comprehensive library summary
2. **explain-dependency-error**: 🐛 Generate dependency error explanations
## 💡 Example Queries
### Basic Library Information
- "What is Express.js and how do I use it?"
- "Tell me about the React library"
- "How do I use requests in Python?"
### Multi-language Support
- "Show me documentation for lodash in JavaScript"
- "Compare pandas in Python and data.table in R"
### Using Tools
- "@fetch-package-docs with packageName='express' and language='javascript'"
- "@fetch-package-docs with packageName='requests' and language='python'"
- "@fetch-multilingual-docs with packageName='http' and languages=['javascript', 'python', 'rust']"
### Using Prompts
- "@summarize-library-docs with libraryName='express'"
- "@explain-dependency-error with packageName='dotenv'"
## ❓ Troubleshooting
### Local Installation
- **Server not showing up**: ✅ Verify absolute path in configuration
- **Connection errors**: 🔄 Restart Claude Desktop or Cursor IDE
- **Fetch failures**: ⚠️ Some packages may have non-standard documentation
- **Language support**: 🌐 If a language isn't working, try using the package's direct URL
## 📄 License
MIT
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
FROM node:18-alpine
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
RUN npm install --ignore-scripts
# Copy application code
COPY . .
# Build the application
RUN npm run build
# Command will be provided by smithery.yaml
CMD ["node", "build/index.js"]
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
{
"compilerOptions": {
"target": "ES2022",
"module": "Node16",
"moduleResolution": "Node16",
"outDir": "./build",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true
},
"include": ["src/**/*"],
"exclude": ["node_modules"]
}
```
--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------
```typescript
// Types for processed documentation page
export interface ProcessedPage {
url: string;
title: string;
content: string;
links: string[];
codeExamples: CodeExample[];
apiSignatures: APISignature[];
timestamp: string;
}
// Interface for code examples
export interface CodeExample {
code: string;
language: string;
description: string;
}
// Interface for API signatures
export interface APISignature {
name: string;
signature: string;
description: string;
}
```
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
```yaml
# Smithery.ai configuration
name: docs-fetcher-mcp
description: MCP server that fetches library documentation
version: 1.0.0
startCommand:
type: stdio
configSchema:
# JSON Schema defining the configuration options for the MCP.
type: object
properties:
cacheDirectory:
type: string
description: Directory to store cached documentation (optional)
required: []
commandFunction: |-
(config) => {
const env = {};
if (config.cacheDirectory) {
env.CACHE_DIR = config.cacheDirectory;
}
return {
command: 'node',
args: ['build/index.js'],
env
};
}
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
{
"name": "@cdugo/docs-fetcher-mcp",
"version": "1.0.0",
"description": "MCP server that fetches package documentation from multiple language ecosystems for LLMs like Claude without requiring API keys",
"main": "build/index.js",
"type": "module",
"bin": {
"docs-fetcher-mcp": "./build/index.js",
"docs-fetcher-mcp-install": "./docs-fetcher-mcp-install.js"
},
"scripts": {
"build": "tsc && chmod +x build/index.js && chmod +x docs-fetcher-mcp-install.js",
"dev": "tsc --watch",
"start": "node build/index.js",
"install-server": "node install.js",
"prepublishOnly": "npm run build",
"smithery": "npx -y @smithery/cli@latest run @cdugo/mcp-get-docs --config \"{}\""
},
"keywords": [
"mcp",
"documentation",
"claude",
"cursor",
"docs-fetcher",
"smithery",
"package-docs",
"library-docs"
],
"author": "cdugo",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/cdugo/package-documentation-mcp"
},
"homepage": "https://smithery.ai/server/@cdugo/mcp-get-docs/tools",
"bugs": {
"url": "https://github.com/cdugo/package-documentation-mcp/issues"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.6.0",
"cheerio": "^1.0.0",
"node-fetch": "^3.3.2",
"zod": "^3.22.4"
},
"devDependencies": {
"@types/node": "^20.10.6",
"typescript": "^5.3.3"
},
"engines": {
"node": ">=18.0.0"
},
"files": [
"build/",
"install.js",
"docs-fetcher-mcp-install.js",
"README.md",
"LICENSE"
],
"publishConfig": {
"access": "public"
}
}
```
--------------------------------------------------------------------------------
/src/services/CacheService.ts:
--------------------------------------------------------------------------------
```typescript
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import { ProcessedPage } from "../types/index.js";
// Create a temporary directory for storing cache
const DOCS_CACHE_DIR = path.join(os.tmpdir(), "docs-fetcher-cache");
// Make sure the cache directory exists
if (!fs.existsSync(DOCS_CACHE_DIR)) {
fs.mkdirSync(DOCS_CACHE_DIR, { recursive: true });
}
export class CacheService {
constructor() {}
/**
* Get a page from the cache
* @param url URL of the page to retrieve
* @returns The cached page, or null if not found or expired
*/
public getPage(url: string): ProcessedPage | null {
const cacheKey = Buffer.from(url).toString("base64");
const cachePath = path.join(DOCS_CACHE_DIR, `${cacheKey}.json`);
if (fs.existsSync(cachePath)) {
try {
const cacheData = fs.readFileSync(cachePath, "utf-8");
const cachedPage = JSON.parse(cacheData) as ProcessedPage;
// Check if cache is valid (less than 24 hours old)
const cacheTime = new Date(cachedPage.timestamp).getTime();
const now = new Date().getTime();
const cacheAge = now - cacheTime;
if (cacheAge < 24 * 60 * 60 * 1000) {
// 24 hours
return cachedPage;
}
} catch (error) {
console.error(`Error reading cache for ${url}:`, error);
}
}
return null;
}
/**
* Save a page to the cache
* @param url URL of the page to cache
* @param page Page data to store
*/
public setPage(url: string, page: ProcessedPage): void {
const cacheKey = Buffer.from(url).toString("base64");
const cachePath = path.join(DOCS_CACHE_DIR, `${cacheKey}.json`);
try {
fs.writeFileSync(cachePath, JSON.stringify(page, null, 2), "utf-8");
} catch (error) {
console.error(`Error writing cache for ${url}:`, error);
}
}
}
// Export a singleton instance
export const cacheService = new CacheService();
```
--------------------------------------------------------------------------------
/src/utils/packageRepository.ts:
--------------------------------------------------------------------------------
```typescript
import fetch from "node-fetch";
/**
* Utility to detect if a string is a URL
* @param str String to check
* @returns true if the string is a valid URL
*/
export function isUrl(str: string): boolean {
try {
new URL(str);
return true;
} catch (e) {
return false;
}
}
/**
* Get the npm package documentation URL
* @param packageName Name of the npm package
* @returns URL of the npm package page
*/
export function getNpmPackageUrl(packageName: string): string {
return `https://www.npmjs.com/package/${packageName}`;
}
/**
* Get package URL for different package repositories based on language
* @param packageName Name of the package
* @param language Programming language or repository type
* @returns URL of the package documentation page
*/
export function getPackageUrl(
packageName: string,
language = "javascript"
): string {
const lang = language.toLowerCase().trim();
switch (lang) {
// JavaScript/TypeScript
case "javascript":
case "js":
case "typescript":
case "ts":
case "node":
case "nodejs":
case "npm":
return `https://www.npmjs.com/package/${packageName}`;
// Python
case "python":
case "py":
case "pypi":
return `https://pypi.org/project/${packageName}`;
// Java
case "java":
case "maven":
return `https://mvnrepository.com/artifact/${packageName}`;
// .NET
case "dotnet":
case ".net":
case "csharp":
case "c#":
case "nuget":
return `https://www.nuget.org/packages/${packageName}`;
// Ruby
case "ruby":
case "gem":
case "rubygem":
case "rubygems":
return `https://rubygems.org/gems/${packageName}`;
// PHP
case "php":
case "composer":
case "packagist":
return `https://packagist.org/packages/${packageName}`;
// Rust
case "rust":
case "cargo":
case "crate":
case "crates":
return `https://crates.io/crates/${packageName}`;
// Go
case "go":
case "golang":
return `https://pkg.go.dev/${packageName}`;
// Swift
case "swift":
case "cocoapods":
return `https://cocoapods.org/pods/${packageName}`;
// Default to npm
default:
return `https://www.npmjs.com/package/${packageName}`;
}
}
/**
* Get the GitHub repository URL for an npm package
* @param packageName Name of the npm package
* @returns GitHub repository URL, or null if not found
*/
export async function getGitHubRepoUrl(
packageName: string
): Promise<string | null> {
try {
const response = await fetch(`https://registry.npmjs.org/${packageName}`);
const data = (await response.json()) as any;
// Try to get GitHub URL from repository field
if (
data.repository &&
typeof data.repository === "object" &&
data.repository.url
) {
const repoUrl = data.repository.url;
if (repoUrl.includes("github.com")) {
return repoUrl
.replace("git+", "")
.replace("git://", "https://")
.replace(".git", "");
}
}
// Try to get GitHub URL from homepage field
if (
data.homepage &&
typeof data.homepage === "string" &&
data.homepage.includes("github.com")
) {
return data.homepage;
}
return null;
} catch (error) {
console.error(`Error fetching GitHub repo URL for ${packageName}:`, error);
return null;
}
}
/**
* Extract library name from URL
* @param url URL to extract library name from
* @returns Library name
*/
export function extractLibraryName(url: string): string {
let libraryName = url;
if (url.includes("npmjs.com/package/")) {
libraryName = url.split("/package/")[1].split("/")[0];
} else if (url.includes("pypi.org/project/")) {
libraryName = url.split("/project/")[1].split("/")[0];
} else if (url.includes("nuget.org/packages/")) {
libraryName = url.split("/packages/")[1].split("/")[0];
} else if (url.includes("rubygems.org/gems/")) {
libraryName = url.split("/gems/")[1].split("/")[0];
} else if (url.includes("packagist.org/packages/")) {
libraryName = url.split("/packages/")[1].split("/")[0];
} else if (url.includes("crates.io/crates/")) {
libraryName = url.split("/crates/")[1].split("/")[0];
} else if (url.includes("pkg.go.dev/")) {
libraryName = url.split("pkg.go.dev/")[1].split("/")[0];
} else if (url.includes("cocoapods.org/pods/")) {
libraryName = url.split("/pods/")[1].split("/")[0];
} else if (url.includes("mvnrepository.com/artifact/")) {
libraryName = url.split("/artifact/")[1].split("/")[0];
} else if (url.includes("github.com")) {
const parts = url.split("github.com/")[1].split("/");
if (parts.length >= 2) {
libraryName = parts[1];
}
}
return libraryName;
}
```
--------------------------------------------------------------------------------
/install.js:
--------------------------------------------------------------------------------
```javascript
#!/usr/bin/env node
import { execSync } from 'child_process';
import fs from 'fs';
import path from 'path';
import os from 'os';
import readline from 'readline';
import { fileURLToPath } from 'url';
// Get the current file's directory path
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
console.log('\n🚀 DocsFetcher MCP Server Installer\n');
async function promptForClient() {
return new Promise((resolve) => {
console.log('\nWhere would you like to install this MCP server?');
console.log('1. Claude Desktop');
console.log('2. Cursor IDE');
console.log('3. Both');
console.log('4. Skip configuration');
rl.question('\nEnter your choice (1-4): ', (choice) => {
resolve(choice);
});
});
}
async function main() {
try {
// Build the project
console.log('📦 Building the project...');
execSync('npm install && npm run build', { stdio: 'inherit' });
// Install globally
console.log('\n🌐 Installing the server globally...');
try {
execSync('npm link', { stdio: 'inherit' });
console.log('✅ Successfully installed globally! You can now run "docs-fetcher-mcp" from anywhere.');
} catch (error) {
console.error('❌ Failed to install globally. You may need to run with sudo/administrator privileges.');
console.error('Error:', error.message);
}
// Configure clients
const clientChoice = await promptForClient();
const configureClaudeDesktop = ['1', '3'].includes(clientChoice);
const configureCursorIDE = ['2', '3'].includes(clientChoice);
// Absolute path to the executable
const serverPath = path.resolve(process.cwd(), 'build', 'index.js');
// Configure Claude Desktop
if (configureClaudeDesktop) {
console.log('\n🔧 Configuring Claude Desktop...');
const claudeConfigDir = path.join(os.homedir(), 'Library', 'Application Support', 'Claude');
const claudeConfigPath = path.join(claudeConfigDir, 'claude_desktop_config.json');
try {
// Create directory if it doesn't exist
if (!fs.existsSync(claudeConfigDir)) {
fs.mkdirSync(claudeConfigDir, { recursive: true });
}
// Read existing config or create new one
let config = { mcpServers: {} };
if (fs.existsSync(claudeConfigPath)) {
config = JSON.parse(fs.readFileSync(claudeConfigPath, 'utf8'));
if (!config.mcpServers) config.mcpServers = {};
}
// Add our server
config.mcpServers.docsFetcher = {
command: 'node',
args: [serverPath]
};
// Write config
fs.writeFileSync(claudeConfigPath, JSON.stringify(config, null, 2));
console.log('✅ Claude Desktop configured successfully!');
console.log(`📄 Configuration saved to: ${claudeConfigPath}`);
} catch (error) {
console.error('❌ Failed to configure Claude Desktop');
console.error('Error:', error.message);
}
}
// Configure Cursor IDE
if (configureCursorIDE) {
console.log('\n🔧 Configuring Cursor IDE...');
const cursorConfigDir = path.join(os.homedir(), '.cursor');
const cursorConfigPath = path.join(cursorConfigDir, 'cursor_config.json');
try {
// Create directory if it doesn't exist
if (!fs.existsSync(cursorConfigDir)) {
fs.mkdirSync(cursorConfigDir, { recursive: true });
}
// Read existing config or create new one
let config = { mcpServers: {} };
if (fs.existsSync(cursorConfigPath)) {
config = JSON.parse(fs.readFileSync(cursorConfigPath, 'utf8'));
if (!config.mcpServers) config.mcpServers = {};
}
// Add our server
config.mcpServers.docsFetcher = {
command: 'node',
args: [serverPath]
};
// Write config
fs.writeFileSync(cursorConfigPath, JSON.stringify(config, null, 2));
console.log('✅ Cursor IDE configured successfully!');
console.log(`📄 Configuration saved to: ${cursorConfigPath}`);
} catch (error) {
console.error('❌ Failed to configure Cursor IDE');
console.error('Error:', error.message);
}
}
console.log('\n🎉 Installation complete!');
console.log('\nNext steps:');
if (configureClaudeDesktop) {
console.log('- Restart Claude Desktop to apply changes');
}
if (configureCursorIDE) {
console.log('- Restart Cursor IDE to apply changes');
}
console.log('\nThank you for installing DocsFetcher MCP Server! 🙏');
} catch (error) {
console.error('❌ Installation failed:');
console.error(error);
} finally {
rl.close();
}
}
main();
```
--------------------------------------------------------------------------------
/src/utils/extractors.ts:
--------------------------------------------------------------------------------
```typescript
import * as cheerio from "cheerio";
import { APISignature, CodeExample } from "../types/index.js";
/**
* Extract relevant links from HTML content
* @param html HTML content
* @param baseUrl Base URL of the page
* @param libraryName Name of the library
* @returns Array of relevant links
*/
export function extractRelevantLinks(
html: string,
baseUrl: string,
libraryName: string
): string[] {
const $ = cheerio.load(html);
const links = new Set<string>();
const baseUrlObj = new URL(baseUrl);
const libraryNameLower = libraryName.toLowerCase();
// Keywords that indicate important documentation pages
const apiKeywords = [
"api",
"reference",
"doc",
"guide",
"tutorial",
"example",
"usage",
"getting-started",
"introduction",
"started",
];
$("a[href]").each((_, element) => {
const href = $(element).attr("href");
if (!href) return;
try {
// Convert relative URLs to absolute
const absoluteUrl = new URL(href, baseUrl).href;
const urlObj = new URL(absoluteUrl);
// Only include links from the same hostname
if (urlObj.hostname !== baseUrlObj.hostname) return;
const linkText = $(element).text().toLowerCase();
const linkPath = urlObj.pathname.toLowerCase();
// Check if link contains relevant keywords
const isRelevant =
apiKeywords.some(
(keyword) => linkPath.includes(keyword) || linkText.includes(keyword)
) || linkPath.includes(libraryNameLower);
if (isRelevant) {
// Avoid hash links to the same page
if (absoluteUrl.split("#")[0] !== baseUrl.split("#")[0]) {
links.add(absoluteUrl);
}
}
} catch (error) {
// Ignore invalid URLs
}
});
return Array.from(links);
}
/**
* Extract code examples from HTML content
* @param html HTML content
* @returns Array of code examples
*/
export function extractCodeExamples(html: string): CodeExample[] {
const $ = cheerio.load(html);
const examples: CodeExample[] = [];
$(
'pre code, pre, code, .highlight, .code-example, [class*="code"], [class*="example"]'
).each((_, element) => {
const $elem = $(element);
// Skip nested code elements
if (
$elem.parents("pre, code").length > 0 &&
element.name !== "pre" &&
element.name !== "code"
) {
return;
}
let code = $elem.text().trim();
if (!code || code.length < 10) return; // Skip very short code blocks
let language = "";
// Try to determine the language from class attributes
const className = $elem.attr("class") || "";
const classMatch = className.match(/(language|lang|syntax)-(\w+)/i);
if (classMatch) {
language = classMatch[2];
} else if (className.includes("js") || className.includes("javascript")) {
language = "javascript";
} else if (className.includes("ts") || className.includes("typescript")) {
language = "typescript";
}
if (!language) {
language =
$elem.attr("data-language") ||
$elem.attr("data-lang") ||
$elem.attr("language") ||
$elem.attr("lang") ||
"";
}
// Try to find a description for this code block
let description = "";
let $heading = $elem.prev("h1, h2, h3, h4, h5, h6, p");
if ($heading.length > 0) {
description = $heading.text().trim();
} else {
// Look for a heading in the parent element
const $parent = $elem.parent();
$heading = $parent.find("h1, h2, h3, h4, h5, h6").first();
if ($heading.length > 0) {
description = $heading.text().trim();
}
}
examples.push({
code,
language: language.toLowerCase(),
description,
});
});
return examples;
}
/**
* Extract API signatures from HTML content
* @param html HTML content
* @param libraryName Name of the library
* @returns Array of API signatures
*/
export function extractAPISignatures(
html: string,
libraryName: string
): APISignature[] {
const $ = cheerio.load(html);
const signatures: APISignature[] = [];
const cleanText = (text: string): string => text.replace(/\s+/g, " ").trim();
$("h1, h2, h3, h4, h5, h6").each((_, heading) => {
const $heading = $(heading);
const headingText = cleanText($heading.text());
// Skip very long headings or common sections
if (
headingText.length > 100 ||
headingText.toLowerCase().includes("introduction") ||
headingText.toLowerCase().includes("getting started")
) {
return;
}
let signature = "";
let description = "";
// Look for code blocks after the heading
const $code = $heading
.nextAll("pre, code, .signature, .function-signature")
.first();
if (
$code.length > 0 &&
$code.prevAll("h1, h2, h3, h4, h5, h6").first().is($heading)
) {
signature = cleanText($code.text());
}
// Look for description paragraphs
const $description = $heading.nextAll("p").first();
if (
$description.length > 0 &&
$description.prevAll("h1, h2, h3, h4, h5, h6").first().is($heading)
) {
description = cleanText($description.text());
}
// Only add if we have either a signature or description
if (signature || description) {
signatures.push({
name: headingText,
signature,
description,
});
}
});
return signatures;
}
```
--------------------------------------------------------------------------------
/docs-fetcher-mcp-install.js:
--------------------------------------------------------------------------------
```javascript
#!/usr/bin/env node
import { execSync } from 'child_process';
import fs from 'fs';
import path from 'path';
import os from 'os';
import readline from 'readline';
import { fileURLToPath } from 'url';
// Get the current file's directory path
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
console.log('\n🚀 DocsFetcher MCP Server Installer\n');
async function promptForClient() {
return new Promise((resolve) => {
console.log('\nWhere would you like to configure this MCP server?');
console.log('1. Claude Desktop');
console.log('2. Cursor IDE');
console.log('3. Both');
console.log('4. Skip configuration');
rl.question('\nEnter your choice (1-4): ', (choice) => {
resolve(choice);
});
});
}
async function promptForInstallationType() {
return new Promise((resolve) => {
console.log('\nHow would you like to configure the MCP server?');
console.log('1. Use Smithery deployment (recommended)');
console.log('2. Use local npm installation');
rl.question('\nEnter your choice (1-2): ', (choice) => {
resolve(choice);
});
});
}
async function main() {
try {
// Configure clients
const clientChoice = await promptForClient();
const configureClaudeDesktop = ['1', '3'].includes(clientChoice);
const configureCursorIDE = ['2', '3'].includes(clientChoice);
if (clientChoice === '4') {
console.log('\n⏭️ Skipping configuration...');
rl.close();
return;
}
const installationType = await promptForInstallationType();
const useSmithery = installationType === '1';
// Configure Claude Desktop
if (configureClaudeDesktop) {
console.log('\n🔧 Configuring Claude Desktop...');
const claudeConfigDir = path.join(os.homedir(), 'Library', 'Application Support', 'Claude');
const claudeConfigPath = path.join(claudeConfigDir, 'claude_desktop_config.json');
try {
// Create directory if it doesn't exist
if (!fs.existsSync(claudeConfigDir)) {
fs.mkdirSync(claudeConfigDir, { recursive: true });
}
// Read existing config or create new one
let config = { mcpServers: {} };
if (fs.existsSync(claudeConfigPath)) {
config = JSON.parse(fs.readFileSync(claudeConfigPath, 'utf8'));
if (!config.mcpServers) config.mcpServers = {};
}
// Add our server
if (useSmithery) {
config.mcpServers.docsFetcher = {
url: "https://smithery.ai/server/@cdugo/mcp-get-docs/tools"
};
} else {
config.mcpServers.docsFetcher = {
command: "npx",
args: [
"-y",
"@smithery/cli@latest",
"run",
"@cdugo/mcp-get-docs",
"--config",
"'{}'",
]
};
}
// Write config
fs.writeFileSync(claudeConfigPath, JSON.stringify(config, null, 2));
console.log('✅ Claude Desktop configured successfully!');
console.log(`📄 Configuration saved to: ${claudeConfigPath}`);
} catch (error) {
console.error('❌ Failed to configure Claude Desktop');
console.error('Error:', error.message);
}
}
// Configure Cursor IDE
if (configureCursorIDE) {
console.log('\n🔧 Configuring Cursor IDE...');
const cursorConfigDir = path.join(os.homedir(), '.cursor');
const cursorConfigPath = path.join(cursorConfigDir, 'cursor_config.json');
try {
// Create directory if it doesn't exist
if (!fs.existsSync(cursorConfigDir)) {
fs.mkdirSync(cursorConfigDir, { recursive: true });
}
// Read existing config or create new one
let config = { mcpServers: {} };
if (fs.existsSync(cursorConfigPath)) {
config = JSON.parse(fs.readFileSync(cursorConfigPath, 'utf8'));
if (!config.mcpServers) config.mcpServers = {};
}
// Add our server
if (useSmithery) {
config.mcpServers.docsFetcher = {
url: "https://smithery.ai/server/@cdugo/mcp-get-docs/tools"
};
} else {
config.mcpServers.docsFetcher = {
command: "npx",
args: [
"-y",
"@smithery/cli@latest",
"run",
"@cdugo/mcp-get-docs",
"--config",
"'{}'",
]
};
}
// Write config
fs.writeFileSync(cursorConfigPath, JSON.stringify(config, null, 2));
console.log('✅ Cursor IDE configured successfully!');
console.log(`📄 Configuration saved to: ${cursorConfigPath}`);
} catch (error) {
console.error('❌ Failed to configure Cursor IDE');
console.error('Error:', error.message);
}
}
console.log('\n🎉 Configuration complete!');
console.log('\nNext steps:');
if (configureClaudeDesktop) {
console.log('- Restart Claude Desktop to apply changes');
}
if (configureCursorIDE) {
console.log('- Restart Cursor IDE to apply changes');
}
console.log('\nThank you for installing DocsFetcher MCP Server! 🙏');
} catch (error) {
console.error('❌ Configuration failed:');
console.error(error);
} finally {
rl.close();
}
}
main();
```
--------------------------------------------------------------------------------
/src/services/ScraperService.ts:
--------------------------------------------------------------------------------
```typescript
import fetch from "node-fetch";
import * as cheerio from "cheerio";
import { ProcessedPage } from "../types/index.js";
import { cacheService } from "./CacheService.js";
import {
extractRelevantLinks,
extractCodeExamples,
extractAPISignatures,
} from "../utils/extractors.js";
import { extractLibraryName } from "../utils/packageRepository.js";
export class ScraperService {
/**
* Fetch and process a documentation page
* @param url URL to process
* @param libraryName Name of the library
* @param skipCache Whether to skip the cache
* @returns Processed page or null if failed
*/
public async fetchAndProcessPage(
url: string,
libraryName: string,
skipCache = false
): Promise<ProcessedPage | null> {
try {
// Check cache first
if (!skipCache) {
const cachedPage = cacheService.getPage(url);
if (cachedPage) {
console.error(`Using cached version of ${url}`);
return cachedPage;
}
}
console.error(`Fetching documentation from ${url}`);
const response = await fetch(url);
const html = await response.text();
// Parse HTML using cheerio
const $ = cheerio.load(html);
// Remove script and style elements
$("script, style, noscript, iframe").remove();
// Extract basic metadata
const title = $("title").text();
// Extract links for crawling
const links = extractRelevantLinks(html, url, libraryName);
// Extract code examples and API signatures
const codeExamples = extractCodeExamples(html);
const apiSignatures = extractAPISignatures(html, libraryName);
// Extract main content
const mainContent =
$("main, article, .readme, .content, .documentation, #readme").html() ||
"";
// Extract text from body if no main content found
const content = mainContent || $("body").html() || "";
// Create the processed page
const processedPage: ProcessedPage = {
url,
title,
content,
links,
codeExamples,
apiSignatures,
timestamp: new Date().toISOString(),
};
// Cache the page
cacheService.setPage(url, processedPage);
return processedPage;
} catch (error) {
console.error(`Error processing ${url}:`, error);
return null;
}
}
/**
* Crawl documentation pages starting from a URL
* @param startUrl Starting URL for crawling
* @param libraryName Name of the library
* @param maxPages Maximum number of pages to crawl
* @param skipCache Whether to skip the cache
* @returns Array of processed pages
*/
public async crawlDocumentation(
startUrl: string,
libraryName: string,
maxPages = 5,
skipCache = false
): Promise<ProcessedPage[]> {
const visitedUrls = new Set<string>();
const processedPages: ProcessedPage[] = [];
const urlsToVisit: string[] = [startUrl];
while (urlsToVisit.length > 0 && processedPages.length < maxPages) {
const currentUrl = urlsToVisit.shift()!;
if (visitedUrls.has(currentUrl)) {
continue;
}
visitedUrls.add(currentUrl);
const processedPage = await this.fetchAndProcessPage(
currentUrl,
libraryName,
skipCache
);
if (processedPage) {
processedPages.push(processedPage);
// Add new URLs to visit
for (const link of processedPage.links) {
if (!visitedUrls.has(link) && !urlsToVisit.includes(link)) {
urlsToVisit.push(link);
}
}
}
}
return processedPages;
}
/**
* Fetch library documentation
* @param url URL or package name
* @param maxPages Maximum number of pages to crawl
* @returns Compiled markdown document
*/
public async fetchLibraryDocumentation(
url: string,
maxPages = 5
): Promise<string> {
try {
// If input is not a URL, assume it's a package name
if (!url.startsWith("http")) {
url = `https://www.npmjs.com/package/${url}`;
}
// Extract library name from URL
const libraryName = extractLibraryName(url);
// Crawl documentation
const pages = await this.crawlDocumentation(url, libraryName, maxPages);
if (pages.length === 0) {
throw new Error(`Failed to fetch documentation from ${url}`);
}
// Compile documentation into a single markdown document
const documentation = this.compileDocumentation(pages, libraryName);
// Include instructions for using the prompt
const promptInstructions = `
---
🔍 For better summarization, use the "summarize-library-docs" prompt with:
- libraryName: "${libraryName}"
- documentation: <the content above>
Example: @summarize-library-docs with libraryName="${libraryName}"
`;
return documentation + promptInstructions;
} catch (error) {
console.error(`Error fetching URL content:`, error);
// Extract library name from URL
const libraryName = extractLibraryName(url);
const errorMessage = `Error fetching URL content: ${
error instanceof Error ? error.message : String(error)
}`;
// Include error-specific prompt instructions
const promptInstructions = `
---
🔍 For information about this library despite the fetch error, use the "summarize-library-docs" prompt with:
- libraryName: "${libraryName}"
- errorStatus: "${error instanceof Error ? error.message : String(error)}"
Example: @summarize-library-docs with libraryName="${libraryName}" and errorStatus="fetch failed"
`;
return errorMessage + promptInstructions;
}
}
/**
* Compile processed pages into a single markdown document
* @param pages Array of processed pages
* @param libraryName Name of the library
* @returns Compiled markdown document
*/
private compileDocumentation(
pages: ProcessedPage[],
libraryName: string
): string {
const $ = cheerio.load("");
// Create a title for the documentation
let result = `# ${libraryName} Documentation\n\n`;
// Add metadata
result += `## 📋 Documentation Overview\n\n`;
result += `Library Name: ${libraryName}\n`;
result += `Pages Analyzed: ${pages.length}\n`;
result += `Generated: ${new Date().toISOString()}\n\n`;
// Add table of contents
result += `## 📑 Table of Contents\n\n`;
pages.forEach((page, index) => {
result += `${index + 1}. [${page.title}](#${page.title
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")})\n`;
});
result += `\n`;
// Process each page
pages.forEach((page, index) => {
// Add page header
result += `## ${page.title}\n\n`;
result += `Source: ${page.url}\n\n`;
// Process page content
const pageContent = cheerio.load(page.content);
// Extract headings and their content
const headings = pageContent("h1, h2, h3, h4, h5, h6");
if (headings.length > 0) {
headings.each((_, heading) => {
const level = parseInt(heading.name.replace("h", ""));
const headingText = pageContent(heading).text().trim();
// Add heading
result += `${"#".repeat(level + 1)} ${headingText}\n\n`;
// Get content until next heading
let content = "";
let next = pageContent(heading).next();
while (next.length && !next.is("h1, h2, h3, h4, h5, h6")) {
if (next.is("p, ul, ol, pre, code, table")) {
content += pageContent.html(next) + "\n\n";
}
next = next.next();
}
// Add content
if (content) {
const contentText = $("<div>").html(content).text();
result += `${contentText}\n\n`;
}
});
} else {
// If no headings, just add the whole content
const contentText = $("<div>").html(page.content).text();
result += `${contentText}\n\n`;
}
// Add code examples if available
if (page.codeExamples.length > 0) {
result += `### Code Examples\n\n`;
page.codeExamples.forEach((example) => {
if (example.description) {
result += `#### ${example.description}\n\n`;
}
result += `\`\`\`${example.language}\n${example.code}\n\`\`\`\n\n`;
});
}
// Add API signatures if available
if (page.apiSignatures.length > 0) {
result += `### API Reference\n\n`;
page.apiSignatures.forEach((api) => {
result += `#### ${api.name}\n\n`;
if (api.signature) {
result += `\`\`\`\n${api.signature}\n\`\`\`\n\n`;
}
if (api.description) {
result += `${api.description}\n\n`;
}
});
}
// Add separator between pages
if (index < pages.length - 1) {
result += `---\n\n`;
}
});
// Add instructions for the LLM at the end
result += `## 📌 Instructions for Summarization\n\n`;
result += `1. Provide a concise overview of what this library/package does\n`;
result += `2. Highlight key features and functionality\n`;
result += `3. Include basic usage examples when available\n`;
result += `4. Format the response for readability\n`;
result += `5. If any part of the documentation is unclear, mention this\n`;
result += `6. Include installation instructions if available\n`;
return result;
}
}
// Export a singleton instance
export const scraperService = new ScraperService();
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
#!/usr/bin/env node
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
import {
isUrl,
getNpmPackageUrl,
getGitHubRepoUrl,
getPackageUrl,
} from "./utils/packageRepository.js";
import { scraperService } from "./services/ScraperService.js";
// Create a temporary directory for storing documentation
const DOCS_DIR = path.join(os.tmpdir(), "docs-fetcher-mcp");
// Make sure the docs directory exists
if (!fs.existsSync(DOCS_DIR)) {
fs.mkdirSync(DOCS_DIR, { recursive: true });
}
// Create the MCP server
const server = new McpServer({
name: "DocsFetcher",
version: "1.0.0",
});
// Define a prompt template for summarizing library documentation
server.prompt(
"summarize-library-docs",
{
libraryName: z.string().describe("Name of the library to summarize"),
documentation: z.string().describe("The raw documentation content"),
errorStatus: z.string().optional().describe("Error status if applicable"),
},
(args) => {
const { libraryName, documentation, errorStatus } = args;
const hasError = errorStatus && errorStatus !== "";
if (hasError) {
return {
messages: [
{
role: "user",
content: {
type: "text",
text: `I was trying to learn about the ${libraryName} library, but there was an error fetching the documentation: ${errorStatus}. Can you tell me what you know about it based on your training?`,
},
},
],
};
}
return {
messages: [
{
role: "user",
content: {
type: "text",
text: `I need to understand the ${libraryName} library. Here's the raw documentation:
${documentation}
Please summarize this documentation for me with:
1. A brief overview of what the library does
2. Key features and capabilities
3. Basic installation and usage examples
4. Any important API methods or patterns
5. Common use cases
Focus on the most important information that would help me understand and start using this library.`,
},
},
],
};
}
);
// Define a prompt for exploring dependency errors
server.prompt(
"explain-dependency-error",
{
packageName: z
.string()
.describe("The package causing the dependency error"),
documentation: z.string().describe("The package documentation"),
errorStatus: z.string().optional().describe("Error status if applicable"),
},
(args) => {
const { packageName, documentation, errorStatus } = args;
const hasError = errorStatus && errorStatus !== "";
if (hasError) {
return {
messages: [
{
role: "user",
content: {
type: "text",
text: `I'm getting a dependency error for the '${packageName}' package. There was an issue fetching the detailed documentation: ${errorStatus}. Can you explain what this package does, how to install it properly, and why I might be seeing an error?`,
},
},
],
};
}
return {
messages: [
{
role: "user",
content: {
type: "text",
text: `I'm getting a dependency error for the '${packageName}' package. Here's the documentation:
${documentation}
Based on this information, please:
1. Explain what this package does
2. Show me how to properly install it
3. Tell me common reasons why I might be getting a dependency error
4. Provide a simple example of how to use it correctly`,
},
},
],
};
}
);
// Tool to fetch documentation from a URL
server.tool(
"fetch-url-docs",
{
url: z.string().url().describe("URL of the library documentation to fetch"),
},
async ({ url }) => {
console.error(`Fetching documentation from URL: ${url}`);
try {
const documentationContent =
await scraperService.fetchLibraryDocumentation(url);
return {
content: [
{
type: "text",
text: documentationContent,
},
],
};
} catch (error) {
console.error("Error fetching URL content:", error);
const errorMessage = `Error fetching URL content: ${
error instanceof Error ? error.message : String(error)
}`;
return {
content: [
{
type: "text",
text: errorMessage,
},
],
isError: true,
};
}
}
);
// Tool to fetch package documentation with language support
server.tool(
"fetch-package-docs",
{
packageName: z
.string()
.describe("Name of the package to fetch documentation for"),
language: z
.string()
.optional()
.describe(
"Programming language or repository type (e.g., javascript, python, java, dotnet)"
),
},
async ({ packageName, language = "javascript" }) => {
console.error(
`Fetching documentation for package: ${packageName} (${language})`
);
try {
const packageUrl = getPackageUrl(packageName, language);
console.error(`Using package URL: ${packageUrl}`);
const documentationContent =
await scraperService.fetchLibraryDocumentation(packageUrl);
return {
content: [
{
type: "text",
text: documentationContent,
},
],
};
} catch (error) {
console.error("Error fetching package content:", error);
const errorMessage = `Error fetching package documentation: ${
error instanceof Error ? error.message : String(error)
}`;
return {
content: [
{
type: "text",
text: errorMessage,
},
],
isError: true,
};
}
}
);
// Tool to fetch documentation from either a package name or URL
server.tool(
"fetch-library-docs",
{
library: z
.string()
.describe(
"Name of the package or URL of the library documentation to fetch"
),
language: z
.string()
.optional()
.describe(
"Programming language or repository type if providing a package name (e.g., javascript, python, java, dotnet)"
),
},
async ({ library, language = "javascript" }) => {
console.error(
`Fetching documentation for library: ${library} ${
language ? `(${language})` : ""
}`
);
try {
// Determine if input is a URL or package name
const isLibraryUrl = isUrl(library);
let url = isLibraryUrl ? library : getPackageUrl(library, language);
const documentationContent =
await scraperService.fetchLibraryDocumentation(url);
return {
content: [
{
type: "text",
text: documentationContent,
},
],
};
} catch (error) {
console.error("Error fetching library documentation:", error);
const errorMessage = `Error fetching library documentation: ${
error instanceof Error ? error.message : String(error)
}`;
return {
content: [
{
type: "text",
text: errorMessage,
},
],
isError: true,
};
}
}
);
// Tool to fetch documentation from multiple language repositories at once
server.tool(
"fetch-multilingual-docs",
{
packageName: z
.string()
.describe("Name of the package to fetch documentation for"),
languages: z
.array(z.string())
.describe(
"List of programming languages or repository types to check (e.g., javascript, python, java)"
),
},
async ({ packageName, languages }) => {
console.error(
`Fetching documentation for package: ${packageName} across languages: ${languages.join(
", "
)}`
);
const results: Record<string, any> = {};
let hasSuccessfulFetch = false;
for (const language of languages) {
try {
console.error(`Trying ${language} repository...`);
const packageUrl = getPackageUrl(packageName, language);
const documentationContent =
await scraperService.fetchLibraryDocumentation(packageUrl);
results[language] = {
url: packageUrl,
success: true,
content: documentationContent,
};
hasSuccessfulFetch = true;
} catch (error) {
console.error(`Error fetching ${language} documentation:`, error);
results[language] = {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
}
if (!hasSuccessfulFetch) {
return {
content: [
{
type: "text",
text: `Failed to fetch documentation for ${packageName} in any of the requested languages: ${languages.join(
", "
)}.`,
},
],
isError: true,
};
}
// Format the successful results
const bestLanguage =
Object.keys(results).find((lang) => results[lang].success) ||
languages[0];
const bestContent = results[bestLanguage].content;
// Include a summary of all language results
const summaryLines = [
`## Documentation Search Results for '${packageName}'`,
];
summaryLines.push("");
for (const language of languages) {
const result = results[language];
if (result.success) {
summaryLines.push(
`✅ **${language}**: Successfully fetched documentation from ${result.url}`
);
} else {
summaryLines.push(`❌ **${language}**: Failed - ${result.error}`);
}
}
summaryLines.push("");
summaryLines.push(`---`);
summaryLines.push("");
summaryLines.push(
`# Documentation Content (from ${bestLanguage} repository)`
);
summaryLines.push("");
const summary = summaryLines.join("\n");
const completeContent = summary + bestContent;
return {
content: [
{
type: "text",
text: completeContent,
},
],
};
}
);
// Create the transport and start the server
const transport = new StdioServerTransport();
server.connect(transport).catch((error: Error) => {
console.error("Server error:", error);
process.exit(1);
});
```