# Directory Structure
```
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│ ├── api-client.ts
│ ├── handler-registry.ts
│ ├── handlers
│ │ ├── add-documentation.ts
│ │ ├── base-handler.ts
│ │ ├── clear-queue.ts
│ │ ├── extract-urls.ts
│ │ ├── index.ts
│ │ ├── list-queue.ts
│ │ ├── list-sources.ts
│ │ ├── remove-documentation.ts
│ │ ├── run-queue.ts
│ │ └── search-documentation.ts
│ ├── index.ts
│ ├── tools
│ │ ├── base-tool.ts
│ │ ├── clear-queue.ts
│ │ ├── extract-urls.ts
│ │ ├── index.ts
│ │ ├── list-queue.ts
│ │ ├── list-sources.ts
│ │ ├── remove-documentation.ts
│ │ ├── run-queue.ts
│ │ └── search-documentation.ts
│ └── types.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # Dependencies
2 | node_modules/
3 | .pnp/
4 | .pnp.js
5 |
6 | # Build output
7 | build/
8 | dist/
9 | *.tsbuildinfo
10 |
11 | # Environment variables
12 | .env
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 |
18 | # Logs
19 | logs/
20 | *.log
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 |
25 | # Editor directories and files
26 | .idea/
27 | .vscode/
28 | *.swp
29 | *.swo
30 | .DS_Store
31 |
32 | # Test coverage
33 | coverage/
34 |
35 | # Local documentation files
36 | INTERNAL.TXT
37 | queue.txt
38 | MCPguide.txt
39 |
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
1 | # RAG Documentation MCP Server
2 |
3 | An MCP server implementation that provides tools for retrieving and processing documentation through vector search, enabling AI assistants to augment their responses with relevant documentation context.
4 |
5 | <a href="https://glama.ai/mcp/servers/54hsrjhmq9"><img width="380" height="200" src="https://glama.ai/mcp/servers/54hsrjhmq9/badge" alt="mcp-ragdocs MCP server" /></a>
6 |
7 | ## Features
8 |
9 | - Vector-based documentation search and retrieval
10 | - Support for multiple documentation sources
11 | - Semantic search capabilities
12 | - Automated documentation processing
13 | - Real-time context augmentation for LLMs
14 |
15 | ## Tools
16 |
17 | ### search_documentation
18 | Search through stored documentation using natural language queries. Returns matching excerpts with context, ranked by relevance.
19 |
20 | **Inputs:**
21 | - `query` (string): The text to search for in the documentation. Can be a natural language query, specific terms, or code snippets.
22 | - `limit` (number, optional): Maximum number of results to return (1-20, default: 5). Higher limits provide more comprehensive results but may take longer to process.
23 |
24 | ### list_sources
25 | List all documentation sources currently stored in the system. Returns a comprehensive list of all indexed documentation including source URLs, titles, and last update times. Use this to understand what documentation is available for searching or to verify if specific sources have been indexed.
26 |
27 | ### extract_urls
28 | Extract and analyze all URLs from a given web page. This tool crawls the specified webpage, identifies all hyperlinks, and optionally adds them to the processing queue.
29 |
30 | **Inputs:**
31 | - `url` (string): The complete URL of the webpage to analyze (must include protocol, e.g., https://). The page must be publicly accessible.
32 | - `add_to_queue` (boolean, optional): If true, automatically add extracted URLs to the processing queue for later indexing. Use with caution on large sites to avoid excessive queuing.
33 |
34 | ### remove_documentation
35 | Remove specific documentation sources from the system by their URLs. The removal is permanent and will affect future search results.
36 |
37 | **Inputs:**
38 | - `urls` (string[]): Array of URLs to remove from the database. Each URL must exactly match the URL used when the documentation was added.
39 |
40 | ### list_queue
41 | List all URLs currently waiting in the documentation processing queue. Shows pending documentation sources that will be processed when run_queue is called. Use this to monitor queue status, verify URLs were added correctly, or check processing backlog.
42 |
43 | ### run_queue
44 | Process and index all URLs currently in the documentation queue. Each URL is processed sequentially, with proper error handling and retry logic. Progress updates are provided as processing occurs. Long-running operations will process until the queue is empty or an unrecoverable error occurs.
45 |
46 | ### clear_queue
47 | Remove all pending URLs from the documentation processing queue. Use this to reset the queue when you want to start fresh, remove unwanted URLs, or cancel pending processing. This operation is immediate and permanent - URLs will need to be re-added if you want to process them later.
48 |
49 | ## Usage
50 |
51 | The RAG Documentation tool is designed for:
52 |
53 | - Enhancing AI responses with relevant documentation
54 | - Building documentation-aware AI assistants
55 | - Creating context-aware tooling for developers
56 | - Implementing semantic documentation search
57 | - Augmenting existing knowledge bases
58 |
59 | ## Configuration
60 |
61 | ### Usage with Claude Desktop
62 |
63 | Add this to your `claude_desktop_config.json`:
64 |
65 | ```json
66 | {
67 | "mcpServers": {
68 | "rag-docs": {
69 | "command": "npx",
70 | "args": [
71 | "-y",
72 | "@hannesrudolph/mcp-ragdocs"
73 | ],
74 | "env": {
75 | "OPENAI_API_KEY": "",
76 | "QDRANT_URL": "",
77 | "QDRANT_API_KEY": ""
78 | }
79 | }
80 | }
81 | }
82 | ```
83 |
84 | You'll need to provide values for the following environment variables:
85 | - `OPENAI_API_KEY`: Your OpenAI API key for embeddings generation
86 | - `QDRANT_URL`: URL of your Qdrant vector database instance
87 | - `QDRANT_API_KEY`: API key for authenticating with Qdrant
88 |
89 | ## License
90 |
91 | This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
92 |
93 | ## Acknowledgments
94 |
95 | This project is a fork of [qpd-v/mcp-ragdocs](https://github.com/qpd-v/mcp-ragdocs), originally developed by qpd-v. The original project provided the foundation for this implementation.
96 |
```
--------------------------------------------------------------------------------
/src/tools/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | export * from './search-documentation.js';
2 | export * from './list-sources.js';
3 | export * from './extract-urls.js';
4 | export * from './remove-documentation.js';
5 | export * from './list-queue.js';
6 | export * from './run-queue.js';
7 | export * from './clear-queue.js';
```
--------------------------------------------------------------------------------
/src/handlers/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | export * from './base-handler.js';
2 | export * from './add-documentation.js';
3 | export * from './search-documentation.js';
4 | export * from './list-sources.js';
5 | export * from './extract-urls.js';
6 | export * from './remove-documentation.js';
7 | export * from './list-queue.js';
8 | export * from './run-queue.js';
9 | export * from './clear-queue.js';
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "Node16",
5 | "moduleResolution": "Node16",
6 | "outDir": "./build",
7 | "rootDir": "./src",
8 | "strict": true,
9 | "esModuleInterop": true,
10 | "skipLibCheck": true,
11 | "forceConsistentCasingInFileNames": true
12 | },
13 | "include": ["src/**/*"],
14 | "exclude": ["node_modules"]
15 | }
16 |
```
--------------------------------------------------------------------------------
/src/handlers/clear-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2 | import { ApiClient } from '../api-client.js';
3 | import { ClearQueueTool } from '../tools/clear-queue.js';
4 |
5 | export class ClearQueueHandler extends ClearQueueTool {
6 | constructor(server: Server, apiClient: ApiClient) {
7 | super();
8 | }
9 |
10 | async handle(args: any) {
11 | return this.execute(args);
12 | }
13 | }
```
--------------------------------------------------------------------------------
/src/handlers/base-handler.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2 | import { ApiClient } from '../api-client.js';
3 | import { McpToolResponse } from '../types.js';
4 |
5 | export abstract class BaseHandler {
6 | protected server: Server;
7 | protected apiClient: ApiClient;
8 |
9 | constructor(server: Server, apiClient: ApiClient) {
10 | this.server = server;
11 | this.apiClient = apiClient;
12 | }
13 |
14 | protected abstract handle(args: any): Promise<McpToolResponse>;
15 | }
```
--------------------------------------------------------------------------------
/src/tools/base-tool.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { ToolDefinition, McpToolResponse } from '../types.js';
2 |
3 | export abstract class BaseTool {
4 | abstract get definition(): ToolDefinition;
5 | abstract execute(args: unknown): Promise<McpToolResponse>;
6 |
7 | protected formatResponse(data: unknown): McpToolResponse {
8 | return {
9 | content: [
10 | {
11 | type: 'text',
12 | text: JSON.stringify(data, null, 2),
13 | },
14 | ],
15 | };
16 | }
17 |
18 | protected handleError(error: any): McpToolResponse {
19 | return {
20 | content: [
21 | {
22 | type: 'text',
23 | text: `Error: ${error}`,
24 | },
25 | ],
26 | isError: true,
27 | };
28 | }
29 | }
```
--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
```typescript
1 | export interface DocumentChunk {
2 | text: string;
3 | url: string;
4 | title: string;
5 | timestamp: string;
6 | }
7 |
8 | export interface DocumentPayload extends DocumentChunk {
9 | _type: 'DocumentChunk';
10 | [key: string]: unknown;
11 | }
12 |
13 | export function isDocumentPayload(payload: unknown): payload is DocumentPayload {
14 | if (!payload || typeof payload !== 'object') return false;
15 | const p = payload as Partial<DocumentPayload>;
16 | return (
17 | p._type === 'DocumentChunk' &&
18 | typeof p.text === 'string' &&
19 | typeof p.url === 'string' &&
20 | typeof p.title === 'string' &&
21 | typeof p.timestamp === 'string'
22 | );
23 | }
24 |
25 | export interface ToolDefinition {
26 | name: string;
27 | description: string;
28 | inputSchema: {
29 | type: string;
30 | properties: Record<string, any>;
31 | required: string[];
32 | };
33 | }
34 |
35 | export interface McpToolResponse {
36 | content: Array<{
37 | type: string;
38 | text: string;
39 | }>;
40 | isError?: boolean;
41 | }
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | #!/usr/bin/env node
2 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4 | import { ApiClient } from './api-client.js';
5 | import { HandlerRegistry } from './handler-registry.js';
6 |
7 | class RagDocsServer {
8 | private server: Server;
9 | private apiClient: ApiClient;
10 | private handlerRegistry: HandlerRegistry;
11 |
12 | constructor() {
13 | this.server = new Server(
14 | {
15 | name: 'mcp-ragdocs',
16 | version: '0.1.0',
17 | },
18 | {
19 | capabilities: {
20 | tools: {},
21 | },
22 | }
23 | );
24 |
25 | this.apiClient = new ApiClient();
26 | this.handlerRegistry = new HandlerRegistry(this.server, this.apiClient);
27 |
28 | // Error handling
29 | this.server.onerror = (error) => console.error('[MCP Error]', error);
30 | process.on('SIGINT', async () => {
31 | await this.cleanup();
32 | process.exit(0);
33 | });
34 | }
35 |
36 | private async cleanup() {
37 | await this.apiClient.cleanup();
38 | await this.server.close();
39 | }
40 |
41 | async run() {
42 | const transport = new StdioServerTransport();
43 | await this.server.connect(transport);
44 | console.error('RAG Docs MCP server running on stdio');
45 | }
46 | }
47 |
48 | const server = new RagDocsServer();
49 | server.run().catch(console.error);
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "@hannesrudolph/mcp-ragdocs",
3 | "version": "1.1.0",
4 | "description": "An MCP server for semantic documentation search and retrieval using vector databases to augment LLM capabilities.",
5 | "private": false,
6 | "type": "module",
7 | "bin": {
8 | "@hannesrudolph/mcp-ragdocs": "./build/index.js"
9 | },
10 | "files": [
11 | "build",
12 | "README.md",
13 | "LICENSE"
14 | ],
15 | "scripts": {
16 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
17 | "prepare": "npm run build",
18 | "watch": "tsc --watch",
19 | "inspector": "npx @modelcontextprotocol/inspector build/index.js",
20 | "start": "node build/index.js"
21 | },
22 | "keywords": [
23 | "mcp",
24 | "model-context-protocol",
25 | "rag",
26 | "documentation",
27 | "vector-database",
28 | "qdrant",
29 | "claude",
30 | "llm"
31 | ],
32 | "author": "hannesrudolph",
33 | "license": "MIT",
34 | "repository": {
35 | "type": "git",
36 | "url": "git+https://github.com/hannesrudolph/mcp-ragdocs.git"
37 | },
38 | "bugs": {
39 | "url": "https://github.com/hannesrudolph/mcp-ragdocs/issues"
40 | },
41 | "homepage": "https://github.com/hannesrudolph/mcp-ragdocs#readme",
42 | "dependencies": {
43 | "@azure/openai": "2.0.0",
44 | "@modelcontextprotocol/sdk": "1.0.3",
45 | "@qdrant/js-client-rest": "1.12.0",
46 | "axios": "1.7.9",
47 | "cheerio": "1.0.0",
48 | "openai": "4.76.2",
49 | "playwright": "1.49.1"
50 | },
51 | "devDependencies": {
52 | "@types/node": "^20.17.10",
53 | "ts-node": "^10.9.2",
54 | "typescript": "^5.7.2"
55 | },
56 | "publishConfig": {
57 | "access": "public"
58 | }
59 | }
60 |
```
--------------------------------------------------------------------------------
/src/handlers/list-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2 | import { ApiClient } from '../api-client.js';
3 | import { BaseHandler } from './base-handler.js';
4 | import fs from 'fs/promises';
5 | import path from 'path';
6 | import { fileURLToPath } from 'url';
7 |
8 | // Get current directory in ES modules
9 | const __filename = fileURLToPath(import.meta.url);
10 | const __dirname = path.dirname(__filename);
11 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
12 |
13 | export class ListQueueHandler extends BaseHandler {
14 | constructor(server: Server, apiClient: ApiClient) {
15 | super(server, apiClient);
16 | }
17 |
18 | async handle(_args: any) {
19 | try {
20 | // Check if queue file exists
21 | try {
22 | await fs.access(QUEUE_FILE);
23 | } catch {
24 | return {
25 | content: [
26 | {
27 | type: 'text',
28 | text: 'Queue is empty (queue file does not exist)',
29 | },
30 | ],
31 | };
32 | }
33 |
34 | // Read queue file
35 | const content = await fs.readFile(QUEUE_FILE, 'utf-8');
36 | const urls = content.split('\n').filter(url => url.trim() !== '');
37 |
38 | if (urls.length === 0) {
39 | return {
40 | content: [
41 | {
42 | type: 'text',
43 | text: 'Queue is empty',
44 | },
45 | ],
46 | };
47 | }
48 |
49 | return {
50 | content: [
51 | {
52 | type: 'text',
53 | text: `Queue contains ${urls.length} URLs:\n${urls.join('\n')}`,
54 | },
55 | ],
56 | };
57 | } catch (error) {
58 | return {
59 | content: [
60 | {
61 | type: 'text',
62 | text: `Failed to read queue: ${error}`,
63 | },
64 | ],
65 | isError: true,
66 | };
67 | }
68 | }
69 | }
```
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
```markdown
1 | # Changelog
2 |
3 | ## [1.1.0] - 2024-03-14
4 |
5 | ### Initial Feature Addition
6 | - Implemented new clear_queue tool for queue management
7 | - Created src/tools/clear-queue.ts with core functionality
8 | - Added handler in src/handlers/clear-queue.ts
9 | - Integrated with existing queue management system
10 | - Added tool exports and registration
11 |
12 | ### Code Organization
13 | - Improved tool ordering in handler-registry.ts
14 | - Moved remove_documentation before extract_urls
15 | - Enhanced logical grouping of related tools
16 | - Updated imports to match new ordering
17 |
18 | ### Documentation Enhancement Phase 1
19 | - Enhanced tool descriptions in handler-registry.ts:
20 | 1. search_documentation
21 | - Added natural language query support details
22 | - Clarified result ranking and context
23 | - Improved limit parameter documentation
24 | 2. list_sources
25 | - Added details about indexed documentation
26 | - Clarified source information returned
27 | 3. extract_urls
28 | - Enhanced URL crawling explanation
29 | - Added queue integration details
30 | - Clarified URL validation requirements
31 | 4. remove_documentation
32 | - Added permanence warning
33 | - Clarified URL matching requirements
34 | 5. list_queue
35 | - Added queue monitoring details
36 | - Clarified status checking capabilities
37 | 6. run_queue
38 | - Added processing behavior details
39 | - Documented error handling
40 | 7. clear_queue
41 | - Detailed queue clearing behavior
42 | - Added permanence warnings
43 | - Documented URL re-adding requirements
44 |
45 | ### Documentation Enhancement Phase 2
46 | - Updated README.md
47 | - Removed add_documentation and queue_documentation tools
48 | - Updated tool descriptions to match handler-registry.ts
49 | - Added parameter format requirements
50 | - Enhanced usage guidance
```
--------------------------------------------------------------------------------
/src/tools/list-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse } from '../types.js';
3 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
4 | import fs from 'fs/promises';
5 | import path from 'path';
6 |
7 | const QUEUE_FILE = path.join(process.cwd(), 'queue.txt');
8 |
9 | export class ListQueueTool extends BaseTool {
10 | constructor() {
11 | super();
12 | }
13 |
14 | get definition(): ToolDefinition {
15 | return {
16 | name: 'list_queue',
17 | description: 'List all URLs currently in the documentation processing queue',
18 | inputSchema: {
19 | type: 'object',
20 | properties: {},
21 | required: [],
22 | },
23 | };
24 | }
25 |
26 | async execute(_args: any): Promise<McpToolResponse> {
27 | try {
28 | // Check if queue file exists
29 | try {
30 | await fs.access(QUEUE_FILE);
31 | } catch {
32 | return {
33 | content: [
34 | {
35 | type: 'text',
36 | text: 'Queue is empty (queue file does not exist)',
37 | },
38 | ],
39 | };
40 | }
41 |
42 | // Read queue file
43 | const content = await fs.readFile(QUEUE_FILE, 'utf-8');
44 | const urls = content.split('\n').filter(url => url.trim() !== '');
45 |
46 | if (urls.length === 0) {
47 | return {
48 | content: [
49 | {
50 | type: 'text',
51 | text: 'Queue is empty',
52 | },
53 | ],
54 | };
55 | }
56 |
57 | return {
58 | content: [
59 | {
60 | type: 'text',
61 | text: `Queue contains ${urls.length} URLs:\n${urls.join('\n')}`,
62 | },
63 | ],
64 | };
65 | } catch (error) {
66 | return {
67 | content: [
68 | {
69 | type: 'text',
70 | text: `Failed to read queue: ${error}`,
71 | },
72 | ],
73 | isError: true,
74 | };
75 | }
76 | }
77 | }
```
--------------------------------------------------------------------------------
/src/tools/clear-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse } from '../types.js';
3 | import fs from 'fs/promises';
4 | import path from 'path';
5 | import { fileURLToPath } from 'url';
6 |
7 | // Get current directory in ES modules
8 | const __filename = fileURLToPath(import.meta.url);
9 | const __dirname = path.dirname(__filename);
10 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
11 |
12 | export class ClearQueueTool extends BaseTool {
13 | get definition(): ToolDefinition {
14 | return {
15 | name: 'clear_queue',
16 | description: 'Clear all URLs from the queue',
17 | inputSchema: {
18 | type: 'object',
19 | properties: {},
20 | required: [],
21 | },
22 | };
23 | }
24 |
25 | async execute(_args: any): Promise<McpToolResponse> {
26 | try {
27 | // Check if queue file exists
28 | try {
29 | await fs.access(QUEUE_FILE);
30 | } catch {
31 | return {
32 | content: [
33 | {
34 | type: 'text',
35 | text: 'Queue is already empty (queue file does not exist)',
36 | },
37 | ],
38 | };
39 | }
40 |
41 | // Read current queue to get count of URLs being cleared
42 | const content = await fs.readFile(QUEUE_FILE, 'utf-8');
43 | const urlCount = content.split('\n').filter(url => url.trim() !== '').length;
44 |
45 | // Clear the queue by emptying the file
46 | await fs.writeFile(QUEUE_FILE, '');
47 |
48 | return {
49 | content: [
50 | {
51 | type: 'text',
52 | text: `Queue cleared successfully. Removed ${urlCount} URL${urlCount === 1 ? '' : 's'} from the queue.`,
53 | },
54 | ],
55 | };
56 | } catch (error) {
57 | return {
58 | content: [
59 | {
60 | type: 'text',
61 | text: `Failed to clear queue: ${error}`,
62 | },
63 | ],
64 | isError: true,
65 | };
66 | }
67 | }
68 | }
```
--------------------------------------------------------------------------------
/src/handlers/search-documentation.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2 | import { BaseHandler } from './base-handler.js';
3 | import { McpToolResponse, isDocumentPayload } from '../types.js';
4 |
5 | const COLLECTION_NAME = 'documentation';
6 |
7 | export class SearchDocumentationHandler extends BaseHandler {
8 | async handle(args: any): Promise<McpToolResponse> {
9 | if (!args.query || typeof args.query !== 'string') {
10 | throw new McpError(ErrorCode.InvalidParams, 'Query is required');
11 | }
12 |
13 | const limit = args.limit || 5;
14 |
15 | try {
16 | const queryEmbedding = await this.apiClient.getEmbeddings(args.query);
17 |
18 | const searchResults = await this.apiClient.qdrantClient.search(COLLECTION_NAME, {
19 | vector: queryEmbedding,
20 | limit,
21 | with_payload: true,
22 | with_vector: false, // Optimize network transfer by not retrieving vectors
23 | score_threshold: 0.7, // Only return relevant results
24 | });
25 |
26 | const formattedResults = searchResults.map(result => {
27 | if (!isDocumentPayload(result.payload)) {
28 | throw new Error('Invalid payload type');
29 | }
30 | return `[${result.payload.title}](${result.payload.url})\nScore: ${result.score.toFixed(3)}\nContent: ${result.payload.text}\n`;
31 | }).join('\n---\n');
32 |
33 | return {
34 | content: [
35 | {
36 | type: 'text',
37 | text: formattedResults || 'No results found matching the query.',
38 | },
39 | ],
40 | };
41 | } catch (error) {
42 | if (error instanceof Error) {
43 | if (error.message.includes('unauthorized')) {
44 | throw new McpError(
45 | ErrorCode.InvalidRequest,
46 | 'Failed to authenticate with Qdrant cloud while searching'
47 | );
48 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
49 | throw new McpError(
50 | ErrorCode.InternalError,
51 | 'Connection to Qdrant cloud failed while searching'
52 | );
53 | }
54 | }
55 | return {
56 | content: [
57 | {
58 | type: 'text',
59 | text: `Search failed: ${error}`,
60 | },
61 | ],
62 | isError: true,
63 | };
64 | }
65 | }
66 | }
```
--------------------------------------------------------------------------------
/src/handlers/remove-documentation.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2 | import { BaseHandler } from './base-handler.js';
3 | import { McpToolResponse } from '../types.js';
4 |
5 | const COLLECTION_NAME = 'documentation';
6 |
7 | export class RemoveDocumentationHandler extends BaseHandler {
8 | async handle(args: any): Promise<McpToolResponse> {
9 | if (!args.urls || !Array.isArray(args.urls) || args.urls.length === 0) {
10 | throw new McpError(ErrorCode.InvalidParams, 'urls must be a non-empty array');
11 | }
12 |
13 | if (!args.urls.every((url: string) => typeof url === 'string')) {
14 | throw new McpError(ErrorCode.InvalidParams, 'All URLs must be strings');
15 | }
16 |
17 | try {
18 | // Delete using filter to match any of the provided URLs
19 | const result = await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
20 | filter: {
21 | should: args.urls.map((url: string) => ({
22 | key: 'url',
23 | match: { value: url }
24 | }))
25 | },
26 | wait: true // Ensure deletion is complete before responding
27 | });
28 |
29 | if (!['acknowledged', 'completed'].includes(result.status)) {
30 | throw new Error('Delete operation failed');
31 | }
32 |
33 | return {
34 | content: [
35 | {
36 | type: 'text',
37 | text: `Successfully removed documentation from ${args.urls.length} source${args.urls.length > 1 ? 's' : ''}: ${args.urls.join(', ')}`,
38 | },
39 | ],
40 | };
41 | } catch (error) {
42 | if (error instanceof Error) {
43 | if (error.message.includes('unauthorized')) {
44 | throw new McpError(
45 | ErrorCode.InvalidRequest,
46 | 'Failed to authenticate with Qdrant cloud while removing documentation'
47 | );
48 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
49 | throw new McpError(
50 | ErrorCode.InternalError,
51 | 'Connection to Qdrant cloud failed while removing documentation'
52 | );
53 | }
54 | }
55 | return {
56 | content: [
57 | {
58 | type: 'text',
59 | text: `Failed to remove documentation: ${error}`,
60 | },
61 | ],
62 | isError: true,
63 | };
64 | }
65 | }
66 | }
```
--------------------------------------------------------------------------------
/src/tools/list-sources.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse, isDocumentPayload } from '../types.js';
3 | import { ApiClient } from '../api-client.js';
4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
5 |
6 | const COLLECTION_NAME = 'documentation';
7 |
8 | export class ListSourcesTool extends BaseTool {
9 | private apiClient: ApiClient;
10 |
11 | constructor(apiClient: ApiClient) {
12 | super();
13 | this.apiClient = apiClient;
14 | }
15 |
16 | get definition(): ToolDefinition {
17 | return {
18 | name: 'list_sources',
19 | description: 'List all documentation sources currently stored',
20 | inputSchema: {
21 | type: 'object',
22 | properties: {},
23 | required: [],
24 | },
25 | };
26 | }
27 |
28 | async execute(args: any): Promise<McpToolResponse> {
29 | try {
30 | // Use pagination for better performance with large datasets
31 | const pageSize = 100;
32 | let offset: string | null = null;
33 | const sources = new Set<string>();
34 |
35 | while (true) {
36 | const scroll = await this.apiClient.qdrantClient.scroll(COLLECTION_NAME, {
37 | with_payload: true,
38 | with_vector: false, // Optimize network transfer
39 | limit: pageSize,
40 | offset,
41 | });
42 |
43 | if (scroll.points.length === 0) break;
44 |
45 | for (const point of scroll.points) {
46 | if (isDocumentPayload(point.payload)) {
47 | sources.add(`${point.payload.title} (${point.payload.url})`);
48 | }
49 | }
50 |
51 | if (scroll.points.length < pageSize) break;
52 | offset = scroll.points[scroll.points.length - 1].id as string;
53 | }
54 |
55 | return {
56 | content: [
57 | {
58 | type: 'text',
59 | text: Array.from(sources).join('\n') || 'No documentation sources found in the cloud collection.',
60 | },
61 | ],
62 | };
63 | } catch (error) {
64 | if (error instanceof Error) {
65 | if (error.message.includes('unauthorized')) {
66 | throw new McpError(
67 | ErrorCode.InvalidRequest,
68 | 'Failed to authenticate with Qdrant cloud while listing sources'
69 | );
70 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
71 | throw new McpError(
72 | ErrorCode.InternalError,
73 | 'Connection to Qdrant cloud failed while listing sources'
74 | );
75 | }
76 | }
77 | return {
78 | content: [
79 | {
80 | type: 'text',
81 | text: `Failed to list sources: ${error}`,
82 | },
83 | ],
84 | isError: true,
85 | };
86 | }
87 | }
88 | }
```
--------------------------------------------------------------------------------
/src/handlers/run-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2 | import { ApiClient } from '../api-client.js';
3 | import { BaseHandler } from './base-handler.js';
4 | import { McpToolResponse } from '../types.js';
5 | import { AddDocumentationHandler } from './add-documentation.js';
6 | import fs from 'fs/promises';
7 | import path from 'path';
8 | import { fileURLToPath } from 'url';
9 |
10 | // Get current directory in ES modules
11 | const __filename = fileURLToPath(import.meta.url);
12 | const __dirname = path.dirname(__filename);
13 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
14 |
15 | export class RunQueueHandler extends BaseHandler {
16 | private addDocHandler: AddDocumentationHandler;
17 |
18 | constructor(server: Server, apiClient: ApiClient) {
19 | super(server, apiClient);
20 | this.addDocHandler = new AddDocumentationHandler(server, apiClient);
21 | }
22 |
23 | async handle(_args: any): Promise<McpToolResponse> {
24 | try {
25 | // Check if queue file exists
26 | try {
27 | await fs.access(QUEUE_FILE);
28 | } catch {
29 | return {
30 | content: [
31 | {
32 | type: 'text',
33 | text: 'Queue is empty (queue file does not exist)',
34 | },
35 | ],
36 | };
37 | }
38 |
39 | let processedCount = 0;
40 | let failedCount = 0;
41 | const failedUrls: string[] = [];
42 |
43 | while (true) {
44 | // Read current queue
45 | const content = await fs.readFile(QUEUE_FILE, 'utf-8');
46 | const urls = content.split('\n').filter(url => url.trim() !== '');
47 |
48 | if (urls.length === 0) {
49 | break; // Queue is empty
50 | }
51 |
52 | const currentUrl = urls[0]; // Get first URL
53 |
54 | try {
55 | // Process the URL using add_documentation handler
56 | await this.addDocHandler.handle({ url: currentUrl });
57 | processedCount++;
58 | } catch (error) {
59 | failedCount++;
60 | failedUrls.push(currentUrl);
61 | console.error(`Failed to process URL ${currentUrl}:`, error);
62 | }
63 |
64 | // Remove the processed URL from queue
65 | const remainingUrls = urls.slice(1);
66 | await fs.writeFile(QUEUE_FILE, remainingUrls.join('\n') + (remainingUrls.length > 0 ? '\n' : ''));
67 | }
68 |
69 | let resultText = `Queue processing complete.\nProcessed: ${processedCount} URLs\nFailed: ${failedCount} URLs`;
70 | if (failedUrls.length > 0) {
71 | resultText += `\n\nFailed URLs:\n${failedUrls.join('\n')}`;
72 | }
73 |
74 | return {
75 | content: [
76 | {
77 | type: 'text',
78 | text: resultText,
79 | },
80 | ],
81 | };
82 | } catch (error) {
83 | return {
84 | content: [
85 | {
86 | type: 'text',
87 | text: `Failed to process queue: ${error}`,
88 | },
89 | ],
90 | isError: true,
91 | };
92 | }
93 | }
94 | }
```
--------------------------------------------------------------------------------
/src/tools/search-documentation.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse, isDocumentPayload } from '../types.js';
3 | import { ApiClient } from '../api-client.js';
4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
5 |
6 | const COLLECTION_NAME = 'documentation';
7 |
8 | export class SearchDocumentationTool extends BaseTool {
9 | private apiClient: ApiClient;
10 |
11 | constructor(apiClient: ApiClient) {
12 | super();
13 | this.apiClient = apiClient;
14 | }
15 |
16 | get definition(): ToolDefinition {
17 | return {
18 | name: 'search_documentation',
19 | description: 'Search through stored documentation',
20 | inputSchema: {
21 | type: 'object',
22 | properties: {
23 | query: {
24 | type: 'string',
25 | description: 'Search query',
26 | },
27 | limit: {
28 | type: 'number',
29 | description: 'Maximum number of results to return',
30 | default: 5,
31 | },
32 | },
33 | required: ['query'],
34 | },
35 | };
36 | }
37 |
38 | async execute(args: any): Promise<McpToolResponse> {
39 | if (!args.query || typeof args.query !== 'string') {
40 | throw new McpError(ErrorCode.InvalidParams, 'Query is required');
41 | }
42 |
43 | const limit = args.limit || 5;
44 |
45 | try {
46 | const queryEmbedding = await this.apiClient.getEmbeddings(args.query);
47 |
48 | const searchResults = await this.apiClient.qdrantClient.search(COLLECTION_NAME, {
49 | vector: queryEmbedding,
50 | limit,
51 | with_payload: true,
52 | with_vector: false, // Optimize network transfer by not retrieving vectors
53 | score_threshold: 0.7, // Only return relevant results
54 | });
55 |
56 | const formattedResults = searchResults.map(result => {
57 | if (!isDocumentPayload(result.payload)) {
58 | throw new Error('Invalid payload type');
59 | }
60 | return `[${result.payload.title}](${result.payload.url})\nScore: ${result.score.toFixed(3)}\nContent: ${result.payload.text}\n`;
61 | }).join('\n---\n');
62 |
63 | return {
64 | content: [
65 | {
66 | type: 'text',
67 | text: formattedResults || 'No results found matching the query.',
68 | },
69 | ],
70 | };
71 | } catch (error) {
72 | if (error instanceof Error) {
73 | if (error.message.includes('unauthorized')) {
74 | throw new McpError(
75 | ErrorCode.InvalidRequest,
76 | 'Failed to authenticate with Qdrant cloud while searching'
77 | );
78 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
79 | throw new McpError(
80 | ErrorCode.InternalError,
81 | 'Connection to Qdrant cloud failed while searching'
82 | );
83 | }
84 | }
85 | return {
86 | content: [
87 | {
88 | type: 'text',
89 | text: `Search failed: ${error}`,
90 | },
91 | ],
92 | isError: true,
93 | };
94 | }
95 | }
96 | }
```
--------------------------------------------------------------------------------
/src/tools/remove-documentation.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse } from '../types.js';
3 | import { ApiClient } from '../api-client.js';
4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
5 |
6 | const COLLECTION_NAME = 'documentation';
7 |
8 | export class RemoveDocumentationTool extends BaseTool {
9 | private apiClient: ApiClient;
10 |
11 | constructor(apiClient: ApiClient) {
12 | super();
13 | this.apiClient = apiClient;
14 | }
15 |
16 | get definition(): ToolDefinition {
17 | return {
18 | name: 'remove_documentation',
19 | description: 'Remove one or more documentation sources by their URLs',
20 | inputSchema: {
21 | type: 'object',
22 | properties: {
23 | urls: {
24 | type: 'array',
25 | items: {
26 | type: 'string',
27 | description: 'URL of a documentation source to remove'
28 | },
29 | description: 'Array of URLs to remove. Can be a single URL or multiple URLs.',
30 | minItems: 1
31 | }
32 | },
33 | required: ['urls'],
34 | },
35 | };
36 | }
37 |
38 | async execute(args: { urls: string[] }): Promise<McpToolResponse> {
39 | if (!Array.isArray(args.urls) || args.urls.length === 0) {
40 | throw new McpError(ErrorCode.InvalidParams, 'At least one URL is required');
41 | }
42 |
43 | if (!args.urls.every(url => typeof url === 'string')) {
44 | throw new McpError(ErrorCode.InvalidParams, 'All URLs must be strings');
45 | }
46 |
47 | try {
48 | // Delete using filter to match any of the provided URLs
49 | const result = await this.apiClient.qdrantClient.delete(COLLECTION_NAME, {
50 | filter: {
51 | should: args.urls.map(url => ({
52 | key: 'url',
53 | match: { value: url }
54 | }))
55 | },
56 | wait: true
57 | });
58 |
59 | if (!['acknowledged', 'completed'].includes(result.status)) {
60 | throw new Error('Delete operation failed');
61 | }
62 |
63 | return {
64 | content: [
65 | {
66 | type: 'text',
67 | text: `Successfully removed documentation from ${args.urls.length} source${args.urls.length > 1 ? 's' : ''}: ${args.urls.join(', ')}`,
68 | },
69 | ],
70 | };
71 | } catch (error) {
72 | if (error instanceof Error) {
73 | if (error.message.includes('unauthorized')) {
74 | throw new McpError(
75 | ErrorCode.InvalidRequest,
76 | 'Failed to authenticate with Qdrant cloud while removing documentation'
77 | );
78 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
79 | throw new McpError(
80 | ErrorCode.InternalError,
81 | 'Connection to Qdrant cloud failed while removing documentation'
82 | );
83 | }
84 | }
85 | return {
86 | content: [
87 | {
88 | type: 'text',
89 | text: `Failed to remove documentation: ${error}`,
90 | },
91 | ],
92 | isError: true,
93 | };
94 | }
95 | }
96 | }
```
--------------------------------------------------------------------------------
/src/handlers/extract-urls.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2 | import { BaseHandler } from './base-handler.js';
3 | import { McpToolResponse } from '../types.js';
4 | import * as cheerio from 'cheerio';
5 | import fs from 'fs/promises';
6 | import path from 'path';
7 | import { fileURLToPath } from 'url';
8 |
9 | // Get current directory in ES modules
10 | const __filename = fileURLToPath(import.meta.url);
11 | const __dirname = path.dirname(__filename);
12 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
13 |
14 | export class ExtractUrlsHandler extends BaseHandler {
15 | async handle(args: any): Promise<McpToolResponse> {
16 | if (!args.url || typeof args.url !== 'string') {
17 | throw new McpError(ErrorCode.InvalidParams, 'URL is required');
18 | }
19 |
20 | await this.apiClient.initBrowser();
21 | const page = await this.apiClient.browser.newPage();
22 |
23 | try {
24 | const baseUrl = new URL(args.url);
25 | const basePath = baseUrl.pathname.split('/').slice(0, 3).join('/'); // Get the base path (e.g., /3/ for Python docs)
26 |
27 | await page.goto(args.url, { waitUntil: 'networkidle' });
28 | const content = await page.content();
29 | const $ = cheerio.load(content);
30 | const urls = new Set<string>();
31 |
32 | $('a[href]').each((_, element) => {
33 | const href = $(element).attr('href');
34 | if (href) {
35 | try {
36 | const url = new URL(href, args.url);
37 | // Only include URLs from the same documentation section
38 | if (url.hostname === baseUrl.hostname &&
39 | url.pathname.startsWith(basePath) &&
40 | !url.hash &&
41 | !url.href.endsWith('#')) {
42 | urls.add(url.href);
43 | }
44 | } catch (e) {
45 | // Ignore invalid URLs
46 | }
47 | }
48 | });
49 |
50 | const urlArray = Array.from(urls);
51 |
52 | if (args.add_to_queue) {
53 | try {
54 | // Ensure queue file exists
55 | try {
56 | await fs.access(QUEUE_FILE);
57 | } catch {
58 | await fs.writeFile(QUEUE_FILE, '');
59 | }
60 |
61 | // Append URLs to queue
62 | const urlsToAdd = urlArray.join('\n') + (urlArray.length > 0 ? '\n' : '');
63 | await fs.appendFile(QUEUE_FILE, urlsToAdd);
64 |
65 | return {
66 | content: [
67 | {
68 | type: 'text',
69 | text: `Successfully added ${urlArray.length} URLs to the queue`,
70 | },
71 | ],
72 | };
73 | } catch (error) {
74 | return {
75 | content: [
76 | {
77 | type: 'text',
78 | text: `Failed to add URLs to queue: ${error}`,
79 | },
80 | ],
81 | isError: true,
82 | };
83 | }
84 | }
85 |
86 | return {
87 | content: [
88 | {
89 | type: 'text',
90 | text: urlArray.join('\n') || 'No URLs found on this page.',
91 | },
92 | ],
93 | };
94 | } catch (error) {
95 | return {
96 | content: [
97 | {
98 | type: 'text',
99 | text: `Failed to extract URLs: ${error}`,
100 | },
101 | ],
102 | isError: true,
103 | };
104 | } finally {
105 | await page.close();
106 | }
107 | }
108 | }
```
--------------------------------------------------------------------------------
/src/api-client.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { QdrantClient } from '@qdrant/js-client-rest';
2 | import OpenAI from 'openai';
3 | import { chromium } from 'playwright';
4 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
5 |
6 | // Environment variables for configuration
7 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
8 | const QDRANT_URL = process.env.QDRANT_URL;
9 | const QDRANT_API_KEY = process.env.QDRANT_API_KEY;
10 |
11 | if (!QDRANT_URL) {
12 | throw new Error('QDRANT_URL environment variable is required for cloud storage');
13 | }
14 |
15 | if (!QDRANT_API_KEY) {
16 | throw new Error('QDRANT_API_KEY environment variable is required for cloud storage');
17 | }
18 |
19 | export class ApiClient {
20 | qdrantClient: QdrantClient;
21 | openaiClient?: OpenAI;
22 | browser: any;
23 |
24 | constructor() {
25 | // Initialize Qdrant client with cloud configuration
26 | this.qdrantClient = new QdrantClient({
27 | url: QDRANT_URL,
28 | apiKey: QDRANT_API_KEY,
29 | });
30 |
31 | // Initialize OpenAI client if API key is provided
32 | if (OPENAI_API_KEY) {
33 | this.openaiClient = new OpenAI({
34 | apiKey: OPENAI_API_KEY,
35 | });
36 | }
37 | }
38 |
39 | async initBrowser() {
40 | if (!this.browser) {
41 | this.browser = await chromium.launch();
42 | }
43 | }
44 |
45 | async cleanup() {
46 | if (this.browser) {
47 | await this.browser.close();
48 | }
49 | }
50 |
51 | async getEmbeddings(text: string): Promise<number[]> {
52 | if (!this.openaiClient) {
53 | throw new McpError(
54 | ErrorCode.InvalidRequest,
55 | 'OpenAI API key not configured'
56 | );
57 | }
58 |
59 | try {
60 | const response = await this.openaiClient.embeddings.create({
61 | model: 'text-embedding-ada-002',
62 | input: text,
63 | });
64 | return response.data[0].embedding;
65 | } catch (error) {
66 | throw new McpError(
67 | ErrorCode.InternalError,
68 | `Failed to generate embeddings: ${error}`
69 | );
70 | }
71 | }
72 |
73 | async initCollection(COLLECTION_NAME: string) {
74 | try {
75 | const collections = await this.qdrantClient.getCollections();
76 | const exists = collections.collections.some(c => c.name === COLLECTION_NAME);
77 |
78 | if (!exists) {
79 | await this.qdrantClient.createCollection(COLLECTION_NAME, {
80 | vectors: {
81 | size: 1536, // OpenAI ada-002 embedding size
82 | distance: 'Cosine',
83 | },
84 | // Add optimized settings for cloud deployment
85 | optimizers_config: {
86 | default_segment_number: 2,
87 | memmap_threshold: 20000,
88 | },
89 | replication_factor: 2,
90 | });
91 | }
92 | } catch (error) {
93 | if (error instanceof Error) {
94 | if (error.message.includes('unauthorized')) {
95 | throw new McpError(
96 | ErrorCode.InvalidRequest,
97 | 'Failed to authenticate with Qdrant cloud. Please check your API key.'
98 | );
99 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
100 | throw new McpError(
101 | ErrorCode.InternalError,
102 | 'Failed to connect to Qdrant cloud. Please check your QDRANT_URL.'
103 | );
104 | }
105 | }
106 | throw new McpError(
107 | ErrorCode.InternalError,
108 | `Failed to initialize Qdrant cloud collection: ${error}`
109 | );
110 | }
111 | }
112 | }
```
--------------------------------------------------------------------------------
/src/tools/run-queue.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse } from '../types.js';
3 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
4 | import fs from 'fs/promises';
5 | import path from 'path';
6 | import { fileURLToPath } from 'url';
7 | import { ApiClient } from '../api-client.js';
8 | import { AddDocumentationHandler } from '../handlers/add-documentation.js';
9 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
10 |
11 | // Get current directory in ES modules
12 | const __filename = fileURLToPath(import.meta.url);
13 | const __dirname = path.dirname(__filename);
14 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
15 |
16 | export class RunQueueTool extends BaseTool {
17 | private apiClient: ApiClient;
18 | private addDocHandler: AddDocumentationHandler;
19 |
20 | constructor(apiClient: ApiClient) {
21 | super();
22 | this.apiClient = apiClient;
23 | // Create a temporary server instance just for the handler
24 | const tempServer = new Server(
25 | { name: 'temp', version: '0.0.0' },
26 | { capabilities: { tools: {} } }
27 | );
28 | this.addDocHandler = new AddDocumentationHandler(tempServer, apiClient);
29 | }
30 |
31 | get definition(): ToolDefinition {
32 | return {
33 | name: 'run_queue',
34 | description: 'Process URLs from the queue one at a time until complete',
35 | inputSchema: {
36 | type: 'object',
37 | properties: {},
38 | required: [],
39 | },
40 | };
41 | }
42 |
43 | async execute(_args: any): Promise<McpToolResponse> {
44 | try {
45 | // Check if queue file exists
46 | try {
47 | await fs.access(QUEUE_FILE);
48 | } catch {
49 | return {
50 | content: [
51 | {
52 | type: 'text',
53 | text: 'Queue is empty (queue file does not exist)',
54 | },
55 | ],
56 | };
57 | }
58 |
59 | let processedCount = 0;
60 | let failedCount = 0;
61 | const failedUrls: string[] = [];
62 |
63 | while (true) {
64 | // Read current queue
65 | const content = await fs.readFile(QUEUE_FILE, 'utf-8');
66 | const urls = content.split('\n').filter(url => url.trim() !== '');
67 |
68 | if (urls.length === 0) {
69 | break; // Queue is empty
70 | }
71 |
72 | const currentUrl = urls[0]; // Get first URL
73 |
74 | try {
75 | // Process the URL using the handler
76 | await this.addDocHandler.handle({ url: currentUrl });
77 | processedCount++;
78 | } catch (error) {
79 | failedCount++;
80 | failedUrls.push(currentUrl);
81 | console.error(`Failed to process URL ${currentUrl}:`, error);
82 | }
83 |
84 | // Remove the processed URL from queue
85 | const remainingUrls = urls.slice(1);
86 | await fs.writeFile(QUEUE_FILE, remainingUrls.join('\n') + (remainingUrls.length > 0 ? '\n' : ''));
87 | }
88 |
89 | let resultText = `Queue processing complete.\nProcessed: ${processedCount} URLs\nFailed: ${failedCount} URLs`;
90 | if (failedUrls.length > 0) {
91 | resultText += `\n\nFailed URLs:\n${failedUrls.join('\n')}`;
92 | }
93 |
94 | return {
95 | content: [
96 | {
97 | type: 'text',
98 | text: resultText,
99 | },
100 | ],
101 | };
102 | } catch (error) {
103 | return {
104 | content: [
105 | {
106 | type: 'text',
107 | text: `Failed to process queue: ${error}`,
108 | },
109 | ],
110 | isError: true,
111 | };
112 | }
113 | }
114 | }
115 |
```
--------------------------------------------------------------------------------
/src/tools/extract-urls.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { BaseTool } from './base-tool.js';
2 | import { ToolDefinition, McpToolResponse } from '../types.js';
3 | import { ApiClient } from '../api-client.js';
4 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
5 | import * as cheerio from 'cheerio';
6 | import fs from 'fs/promises';
7 | import path from 'path';
8 | import { fileURLToPath } from 'url';
9 |
10 | // Get current directory in ES modules
11 | const __filename = fileURLToPath(import.meta.url);
12 | const __dirname = path.dirname(__filename);
13 | const QUEUE_FILE = path.join(__dirname, '..', '..', 'queue.txt');
14 |
15 | export class ExtractUrlsTool extends BaseTool {
16 | private apiClient: ApiClient;
17 |
18 | constructor(apiClient: ApiClient) {
19 | super();
20 | this.apiClient = apiClient;
21 | }
22 |
23 | get definition(): ToolDefinition {
24 | return {
25 | name: 'extract_urls',
26 | description: 'Extract all URLs from a given web page',
27 | inputSchema: {
28 | type: 'object',
29 | properties: {
30 | url: {
31 | type: 'string',
32 | description: 'URL of the page to extract URLs from',
33 | },
34 | add_to_queue: {
35 | type: 'boolean',
36 | description: 'If true, automatically add extracted URLs to the queue',
37 | default: false,
38 | },
39 | },
40 | required: ['url'],
41 | },
42 | };
43 | }
44 |
45 | async execute(args: any): Promise<McpToolResponse> {
46 | if (!args.url || typeof args.url !== 'string') {
47 | throw new McpError(ErrorCode.InvalidParams, 'URL is required');
48 | }
49 |
50 | await this.apiClient.initBrowser();
51 | const page = await this.apiClient.browser.newPage();
52 |
53 | try {
54 | await page.goto(args.url, { waitUntil: 'networkidle' });
55 | const content = await page.content();
56 | const $ = cheerio.load(content);
57 | const urls = new Set<string>();
58 |
59 | $('a[href]').each((_, element) => {
60 | const href = $(element).attr('href');
61 | if (href) {
62 | try {
63 | const url = new URL(href, args.url);
64 | // Only include URLs from the same domain to avoid external links
65 | if (url.origin === new URL(args.url).origin && !url.hash && !url.href.endsWith('#')) {
66 | urls.add(url.href);
67 | }
68 | } catch (e) {
69 | // Ignore invalid URLs
70 | }
71 | }
72 | });
73 |
74 | const urlArray = Array.from(urls);
75 |
76 | if (args.add_to_queue) {
77 | try {
78 | // Ensure queue file exists
79 | try {
80 | await fs.access(QUEUE_FILE);
81 | } catch {
82 | await fs.writeFile(QUEUE_FILE, '');
83 | }
84 |
85 | // Append URLs to queue
86 | const urlsToAdd = urlArray.join('\n') + (urlArray.length > 0 ? '\n' : '');
87 | await fs.appendFile(QUEUE_FILE, urlsToAdd);
88 |
89 | return {
90 | content: [
91 | {
92 | type: 'text',
93 | text: `Successfully added ${urlArray.length} URLs to the queue`,
94 | },
95 | ],
96 | };
97 | } catch (error) {
98 | return {
99 | content: [
100 | {
101 | type: 'text',
102 | text: `Failed to add URLs to queue: ${error}`,
103 | },
104 | ],
105 | isError: true,
106 | };
107 | }
108 | }
109 |
110 | return {
111 | content: [
112 | {
113 | type: 'text',
114 | text: urlArray.join('\n') || 'No URLs found on this page.',
115 | },
116 | ],
117 | };
118 | } catch (error) {
119 | return {
120 | content: [
121 | {
122 | type: 'text',
123 | text: `Failed to extract URLs: ${error}`,
124 | },
125 | ],
126 | isError: true,
127 | };
128 | } finally {
129 | await page.close();
130 | }
131 | }
132 | }
```
--------------------------------------------------------------------------------
/src/handlers/add-documentation.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2 | import { BaseHandler } from './base-handler.js';
3 | import { DocumentChunk, McpToolResponse } from '../types.js';
4 | import * as cheerio from 'cheerio';
5 | import crypto from 'crypto';
6 |
7 | const COLLECTION_NAME = 'documentation';
8 |
9 | export class AddDocumentationHandler extends BaseHandler {
10 | async handle(args: any): Promise<McpToolResponse> {
11 | if (!args.url || typeof args.url !== 'string') {
12 | throw new McpError(ErrorCode.InvalidParams, 'URL is required');
13 | }
14 |
15 | try {
16 | const chunks = await this.fetchAndProcessUrl(args.url);
17 |
18 | // Batch process chunks for better performance
19 | const batchSize = 100;
20 | for (let i = 0; i < chunks.length; i += batchSize) {
21 | const batch = chunks.slice(i, i + batchSize);
22 | const points = await Promise.all(
23 | batch.map(async (chunk) => {
24 | const embedding = await this.apiClient.getEmbeddings(chunk.text);
25 | return {
26 | id: this.generatePointId(),
27 | vector: embedding,
28 | payload: {
29 | ...chunk,
30 | _type: 'DocumentChunk' as const,
31 | } as Record<string, unknown>,
32 | };
33 | })
34 | );
35 |
36 | try {
37 | await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
38 | wait: true,
39 | points,
40 | });
41 | } catch (error) {
42 | if (error instanceof Error) {
43 | if (error.message.includes('unauthorized')) {
44 | throw new McpError(
45 | ErrorCode.InvalidRequest,
46 | 'Failed to authenticate with Qdrant cloud while adding documents'
47 | );
48 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
49 | throw new McpError(
50 | ErrorCode.InternalError,
51 | 'Connection to Qdrant cloud failed while adding documents'
52 | );
53 | }
54 | }
55 | throw error;
56 | }
57 | }
58 |
59 | return {
60 | content: [
61 | {
62 | type: 'text',
63 | text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / batchSize)} batches)`,
64 | },
65 | ],
66 | };
67 | } catch (error) {
68 | if (error instanceof McpError) {
69 | throw error;
70 | }
71 | return {
72 | content: [
73 | {
74 | type: 'text',
75 | text: `Failed to add documentation: ${error}`,
76 | },
77 | ],
78 | isError: true,
79 | };
80 | }
81 | }
82 |
83 | private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> {
84 | await this.apiClient.initBrowser();
85 | const page = await this.apiClient.browser.newPage();
86 |
87 | try {
88 | await page.goto(url, { waitUntil: 'networkidle' });
89 | const content = await page.content();
90 | const $ = cheerio.load(content);
91 |
92 | // Remove script tags, style tags, and comments
93 | $('script').remove();
94 | $('style').remove();
95 | $('noscript').remove();
96 |
97 | // Extract main content
98 | const title = $('title').text() || url;
99 | const mainContent = $('main, article, .content, .documentation, body').text();
100 |
101 | // Split content into chunks
102 | const chunks = this.chunkText(mainContent, 1000);
103 |
104 | return chunks.map(chunk => ({
105 | text: chunk,
106 | url,
107 | title,
108 | timestamp: new Date().toISOString(),
109 | }));
110 | } catch (error) {
111 | throw new McpError(
112 | ErrorCode.InternalError,
113 | `Failed to fetch URL ${url}: ${error}`
114 | );
115 | } finally {
116 | await page.close();
117 | }
118 | }
119 |
120 | private chunkText(text: string, maxChunkSize: number): string[] {
121 | const words = text.split(/\s+/);
122 | const chunks: string[] = [];
123 | let currentChunk: string[] = [];
124 |
125 | for (const word of words) {
126 | currentChunk.push(word);
127 | const currentLength = currentChunk.join(' ').length;
128 |
129 | if (currentLength >= maxChunkSize) {
130 | chunks.push(currentChunk.join(' '));
131 | currentChunk = [];
132 | }
133 | }
134 |
135 | if (currentChunk.length > 0) {
136 | chunks.push(currentChunk.join(' '));
137 | }
138 |
139 | return chunks;
140 | }
141 |
142 | private generatePointId(): string {
143 | return crypto.randomBytes(16).toString('hex');
144 | }
145 | }
```
--------------------------------------------------------------------------------
/src/handlers/list-sources.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
2 | import { BaseHandler } from './base-handler.js';
3 | import { McpToolResponse, isDocumentPayload } from '../types.js';
4 |
5 | const COLLECTION_NAME = 'documentation';
6 |
7 | interface Source {
8 | title: string;
9 | url: string;
10 | }
11 |
12 | interface GroupedSources {
13 | [domain: string]: {
14 | [subdomain: string]: Source[];
15 | };
16 | }
17 |
18 | export class ListSourcesHandler extends BaseHandler {
19 | private groupSourcesByDomainAndSubdomain(sources: Source[]): GroupedSources {
20 | const grouped: GroupedSources = {};
21 |
22 | for (const source of sources) {
23 | try {
24 | const url = new URL(source.url);
25 | const domain = url.hostname;
26 | const pathParts = url.pathname.split('/').filter(p => p);
27 | const subdomain = pathParts[0] || '/';
28 |
29 | if (!grouped[domain]) {
30 | grouped[domain] = {};
31 | }
32 | if (!grouped[domain][subdomain]) {
33 | grouped[domain][subdomain] = [];
34 | }
35 | grouped[domain][subdomain].push(source);
36 | } catch (error) {
37 | console.error(`Invalid URL: ${source.url}`);
38 | }
39 | }
40 |
41 | return grouped;
42 | }
43 |
44 | private formatGroupedSources(grouped: GroupedSources): string {
45 | const output: string[] = [];
46 | let domainCounter = 1;
47 |
48 | for (const [domain, subdomains] of Object.entries(grouped)) {
49 | output.push(`${domainCounter}. ${domain}`);
50 |
51 | // Create a Set of unique URL+title combinations
52 | const uniqueSources = new Map<string, Source>();
53 | for (const sources of Object.values(subdomains)) {
54 | for (const source of sources) {
55 | uniqueSources.set(source.url, source);
56 | }
57 | }
58 |
59 | // Convert to array and sort
60 | const sortedSources = Array.from(uniqueSources.values())
61 | .sort((a, b) => a.title.localeCompare(b.title));
62 |
63 | // Use letters for subdomain entries
64 | sortedSources.forEach((source, index) => {
65 | output.push(`${domainCounter}.${index + 1}. ${source.title} (${source.url})`);
66 | });
67 |
68 | output.push(''); // Add blank line between domains
69 | domainCounter++;
70 | }
71 |
72 | return output.join('\n');
73 | }
74 |
75 | async handle(): Promise<McpToolResponse> {
76 | try {
77 | await this.apiClient.initCollection(COLLECTION_NAME);
78 |
79 | const pageSize = 100;
80 | let offset = null;
81 | const sources: Source[] = [];
82 |
83 | while (true) {
84 | const scroll = await this.apiClient.qdrantClient.scroll(COLLECTION_NAME, {
85 | with_payload: true,
86 | with_vector: false,
87 | limit: pageSize,
88 | offset,
89 | });
90 |
91 | if (scroll.points.length === 0) break;
92 |
93 | for (const point of scroll.points) {
94 | if (point.payload && typeof point.payload === 'object' && 'url' in point.payload && 'title' in point.payload) {
95 | const payload = point.payload as any;
96 | sources.push({
97 | title: payload.title,
98 | url: payload.url
99 | });
100 | }
101 | }
102 |
103 | if (scroll.points.length < pageSize) break;
104 | offset = scroll.points[scroll.points.length - 1].id;
105 | }
106 |
107 | if (sources.length === 0) {
108 | return {
109 | content: [
110 | {
111 | type: 'text',
112 | text: 'No documentation sources found.',
113 | },
114 | ],
115 | };
116 | }
117 |
118 | const grouped = this.groupSourcesByDomainAndSubdomain(sources);
119 | const formattedOutput = this.formatGroupedSources(grouped);
120 |
121 | return {
122 | content: [
123 | {
124 | type: 'text',
125 | text: formattedOutput,
126 | },
127 | ],
128 | };
129 | } catch (error) {
130 | if (error instanceof Error) {
131 | if (error.message.includes('unauthorized')) {
132 | throw new McpError(
133 | ErrorCode.InvalidRequest,
134 | 'Failed to authenticate with Qdrant cloud while listing sources'
135 | );
136 | } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
137 | throw new McpError(
138 | ErrorCode.InternalError,
139 | 'Connection to Qdrant cloud failed while listing sources'
140 | );
141 | }
142 | }
143 | return {
144 | content: [
145 | {
146 | type: 'text',
147 | text: `Failed to list sources: ${error}`,
148 | },
149 | ],
150 | isError: true,
151 | };
152 | }
153 | }
154 | }
```
--------------------------------------------------------------------------------
/src/handler-registry.ts:
--------------------------------------------------------------------------------
```typescript
1 | import {
2 | CallToolRequestSchema,
3 | ErrorCode,
4 | ListToolsRequestSchema,
5 | McpError,
6 | } from '@modelcontextprotocol/sdk/types.js';
7 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
8 | import { ApiClient } from './api-client.js';
9 | import { ToolDefinition } from './types.js';
10 | import {
11 | AddDocumentationHandler,
12 | SearchDocumentationHandler,
13 | ListSourcesHandler,
14 | RemoveDocumentationHandler,
15 | ExtractUrlsHandler,
16 | ListQueueHandler,
17 | RunQueueHandler,
18 | ClearQueueHandler,
19 | } from './handlers/index.js';
20 |
21 | const COLLECTION_NAME = 'documentation';
22 |
23 | export class HandlerRegistry {
24 | private server: Server;
25 | private apiClient: ApiClient;
26 | private handlers: Map<string, any>;
27 |
28 | constructor(server: Server, apiClient: ApiClient) {
29 | this.server = server;
30 | this.apiClient = apiClient;
31 | this.handlers = new Map();
32 | this.setupHandlers();
33 | this.registerHandlers();
34 | }
35 |
36 | private setupHandlers() {
37 | this.handlers.set('add_documentation', new AddDocumentationHandler(this.server, this.apiClient));
38 | this.handlers.set('search_documentation', new SearchDocumentationHandler(this.server, this.apiClient));
39 | this.handlers.set('list_sources', new ListSourcesHandler(this.server, this.apiClient));
40 | this.handlers.set('remove_documentation', new RemoveDocumentationHandler(this.server, this.apiClient));
41 | this.handlers.set('extract_urls', new ExtractUrlsHandler(this.server, this.apiClient));
42 | this.handlers.set('list_queue', new ListQueueHandler(this.server, this.apiClient));
43 | this.handlers.set('run_queue', new RunQueueHandler(this.server, this.apiClient));
44 | this.handlers.set('clear_queue', new ClearQueueHandler(this.server, this.apiClient));
45 | }
46 |
47 | private registerHandlers() {
48 | this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
49 | tools: [
50 | {
51 | name: 'search_documentation',
52 | description: 'Search through stored documentation using natural language queries. Use this tool to find relevant information across all stored documentation sources. Returns matching excerpts with context, ranked by relevance. Useful for finding specific information, code examples, or related documentation.',
53 | inputSchema: {
54 | type: 'object',
55 | properties: {
56 | query: {
57 | type: 'string',
58 | description: 'The text to search for in the documentation. Can be a natural language query, specific terms, or code snippets.',
59 | },
60 | limit: {
61 | type: 'number',
62 | description: 'Maximum number of results to return (1-20). Higher limits provide more comprehensive results but may take longer to process. Default is 5.',
63 | default: 5,
64 | },
65 | },
66 | required: ['query'],
67 | },
68 | } as ToolDefinition,
69 | {
70 | name: 'list_sources',
71 | description: 'List all documentation sources currently stored in the system. Returns a comprehensive list of all indexed documentation including source URLs, titles, and last update times. Use this to understand what documentation is available for searching or to verify if specific sources have been indexed.',
72 | inputSchema: {
73 | type: 'object',
74 | properties: {},
75 | },
76 | } as ToolDefinition,
77 | {
78 | name: 'extract_urls',
79 | description: 'Extract and analyze all URLs from a given web page. This tool crawls the specified webpage, identifies all hyperlinks, and optionally adds them to the processing queue. Useful for discovering related documentation pages, API references, or building a documentation graph. Handles various URL formats and validates links before extraction.',
80 | inputSchema: {
81 | type: 'object',
82 | properties: {
83 | url: {
84 | type: 'string',
85 | description: 'The complete URL of the webpage to analyze (must include protocol, e.g., https://). The page must be publicly accessible.',
86 | },
87 | add_to_queue: {
88 | type: 'boolean',
89 | description: 'If true, automatically add extracted URLs to the processing queue for later indexing. This enables recursive documentation discovery. Use with caution on large sites to avoid excessive queuing.',
90 | default: false,
91 | },
92 | },
93 | required: ['url'],
94 | },
95 | } as ToolDefinition,
96 | {
97 | name: 'remove_documentation',
98 | description: 'Remove specific documentation sources from the system by their URLs. Use this tool to clean up outdated documentation, remove incorrect sources, or manage the documentation collection. The removal is permanent and will affect future search results. Supports removing multiple URLs in a single operation.',
99 | inputSchema: {
100 | type: 'object',
101 | properties: {
102 | urls: {
103 | type: 'array',
104 | items: {
105 | type: 'string',
106 | description: 'The complete URL of the documentation source to remove. Must exactly match the URL used when the documentation was added.',
107 | },
108 | description: 'Array of URLs to remove from the database',
109 | },
110 | },
111 | required: ['urls'],
112 | },
113 | } as ToolDefinition,
114 | {
115 | name: 'list_queue',
116 | description: 'List all URLs currently waiting in the documentation processing queue. Shows pending documentation sources that will be processed when run_queue is called. Use this to monitor queue status, verify URLs were added correctly, or check processing backlog. Returns URLs in the order they will be processed.',
117 | inputSchema: {
118 | type: 'object',
119 | properties: {},
120 | },
121 | } as ToolDefinition,
122 | {
123 | name: 'run_queue',
124 | description: 'Process and index all URLs currently in the documentation queue. Each URL is processed sequentially, with proper error handling and retry logic. Progress updates are provided as processing occurs. Use this after adding new URLs to ensure all documentation is indexed and searchable. Long-running operations will process until the queue is empty or an unrecoverable error occurs.',
125 | inputSchema: {
126 | type: 'object',
127 | properties: {},
128 | },
129 | } as ToolDefinition,
130 | {
131 | name: 'clear_queue',
132 | description: 'Remove all pending URLs from the documentation processing queue. Use this to reset the queue when you want to start fresh, remove unwanted URLs, or cancel pending processing. This operation is immediate and permanent - URLs will need to be re-added if you want to process them later. Returns the number of URLs that were cleared from the queue.',
133 | inputSchema: {
134 | type: 'object',
135 | properties: {},
136 | },
137 | } as ToolDefinition,
138 | ],
139 | }));
140 |
141 | this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
142 | await this.apiClient.initCollection(COLLECTION_NAME);
143 |
144 | const handler = this.handlers.get(request.params.name);
145 | if (!handler) {
146 | throw new McpError(
147 | ErrorCode.MethodNotFound,
148 | `Unknown tool: ${request.params.name}`
149 | );
150 | }
151 |
152 | const response = await handler.handle(request.params.arguments);
153 | return {
154 | _meta: {},
155 | ...response
156 | };
157 | });
158 | }
159 | }
```