# Directory Structure
```
├── .npmignore
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│ ├── api-client.ts
│ ├── embeddings.ts
│ ├── handlers
│ │ ├── add-documentation.ts
│ │ ├── base-handler.ts
│ │ ├── list-documentation.ts
│ │ ├── search-documentation.ts
│ │ └── test-embeddings.ts
│ ├── index.ts
│ ├── tools
│ │ ├── add-documentation.ts
│ │ ├── content-fetcher.ts
│ │ ├── list-utils.ts
│ │ ├── qdrant-client.ts
│ │ ├── search-utils.ts
│ │ ├── text-chunker.ts
│ │ └── url-processor.ts
│ ├── types
│ │ └── ollama.d.ts
│ └── types.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
```
src/
ref/
.clinecontext
.clinelearn
.clinerules
ragdocs_plan.md
tsconfig.json
.git
.gitignore
node_modules/
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
# RagDocs MCP Server
A Model Context Protocol (MCP) server that provides RAG (Retrieval-Augmented Generation) capabilities using Qdrant vector database and Ollama/OpenAI embeddings. This server enables semantic search and management of documentation through vector similarity.
## Features
- Add documentation with metadata
- Semantic search through documents
- List and organize documentation
- Delete documents
- Support for both Ollama (free) and OpenAI (paid) embeddings
- Automatic text chunking and embedding generation
- Vector storage with Qdrant
## Prerequisites
- Node.js 16 or higher
- One of the following Qdrant setups:
- Local instance using Docker (free)
- Qdrant Cloud account with API key (managed service)
- One of the following for embeddings:
- Ollama running locally (default, free)
- OpenAI API key (optional, paid)
## Available Tools
### 1. add_document
Add a document to the RAG system.
Parameters:
- `url` (required): Document URL/identifier
- `content` (required): Document content
- `metadata` (optional): Document metadata
- `title`: Document title
- `contentType`: Content type (e.g., "text/markdown")
### 2. search_documents
Search through stored documents using semantic similarity.
Parameters:
- `query` (required): Natural language search query
- `options` (optional):
- `limit`: Maximum number of results (1-20, default: 5)
- `scoreThreshold`: Minimum similarity score (0-1, default: 0.7)
- `filters`:
- `domain`: Filter by domain
- `hasCode`: Filter for documents containing code
- `after`: Filter for documents after date (ISO format)
- `before`: Filter for documents before date (ISO format)
### 3. list_documents
List all stored documents with pagination and grouping options.
Parameters (all optional):
- `page`: Page number (default: 1)
- `pageSize`: Number of documents per page (1-100, default: 20)
- `groupByDomain`: Group documents by domain (default: false)
- `sortBy`: Sort field ("timestamp", "title", or "domain")
- `sortOrder`: Sort order ("asc" or "desc")
### 4. delete_document
Delete a document from the RAG system.
Parameters:
- `url` (required): URL of the document to delete
## Installation
```bash
npm install -g @mcpservers/ragdocs
```
## MCP Server Configuration
```json
{
"mcpServers": {
"ragdocs": {
"command": "node",
"args": ["@mcpservers/ragdocs"],
"env": {
"QDRANT_URL": "http://127.0.0.1:6333",
"EMBEDDING_PROVIDER": "ollama"
}
}
}
}
```
Using Qdrant Cloud:
```json
{
"mcpServers": {
"ragdocs": {
"command": "node",
"args": ["@mcpservers/ragdocs"],
"env": {
"QDRANT_URL": "https://your-cluster-url.qdrant.tech",
"QDRANT_API_KEY": "your-qdrant-api-key",
"EMBEDDING_PROVIDER": "ollama"
}
}
}
}
```
Using OpenAI:
```json
{
"mcpServers": {
"ragdocs": {
"command": "node",
"args": ["@mcpservers/ragdocs"],
"env": {
"QDRANT_URL": "http://127.0.0.1:6333",
"EMBEDDING_PROVIDER": "openai",
"OPENAI_API_KEY": "your-api-key"
}
}
}
}
```
## Local Qdrant with Docker
```bash
docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant
```
## Environment Variables
- `QDRANT_URL`: URL of your Qdrant instance
- For local: "http://127.0.0.1:6333" (default)
- For cloud: "https://your-cluster-url.qdrant.tech"
- `QDRANT_API_KEY`: API key for Qdrant Cloud (required when using cloud instance)
- `EMBEDDING_PROVIDER`: Choice of embedding provider ("ollama" or "openai", default: "ollama")
- `OPENAI_API_KEY`: OpenAI API key (required if using OpenAI)
- `EMBEDDING_MODEL`: Model to use for embeddings
- For Ollama: defaults to "nomic-embed-text"
- For OpenAI: defaults to "text-embedding-3-small"
## License
Apache License 2.0
```
--------------------------------------------------------------------------------
/src/types/ollama.d.ts:
--------------------------------------------------------------------------------
```typescript
declare module 'ollama' {
export interface EmbeddingsRequest {
model: string;
prompt: string;
options?: Record<string, any>;
}
export interface EmbeddingsResponse {
embedding: number[];
}
const ollama: {
embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse>;
};
export default ollama;
}
```
--------------------------------------------------------------------------------
/src/handlers/base-handler.ts:
--------------------------------------------------------------------------------
```typescript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { ApiClient } from '../api-client.js';
import { ToolResult } from '../types.js';
export abstract class BaseHandler {
constructor(
protected readonly server: Server,
protected readonly apiClient: ApiClient
) {}
abstract handle(args: any): Promise<ToolResult>;
}
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
{
"compilerOptions": {
"target": "es2022",
"module": "es2022",
"moduleResolution": "node",
"outDir": "build",
"rootDir": "src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true
},
"include": ["src/**/*"],
"exclude": ["node_modules"]
}
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
{
"name": "@mcpservers/ragdocs",
"version": "1.0.0",
"type": "module",
"main": "build/index.js",
"bin": {
"mcp-ragdocs": "build/index.js"
},
"files": [
"build/**/*"
],
"scripts": {
"build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
"prepublishOnly": "npm run build",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [
"mcp",
"rag",
"documentation",
"search",
"embeddings"
],
"author": "bossying",
"license": "Apache License 2.0",
"description": "MCP server for RAG-based document search and management",
"homepage": "https://github.com/heltonteixeira/ragdocs",
"repository": {
"type": "git",
"url": "git+https://github.com/heltonteixeira/ragdocs.git"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.0.4",
"@qdrant/js-client-rest": "^1.12.0",
"axios": "^1.7.9",
"cheerio": "^1.0.0",
"ollama": "^0.5.11",
"openai": "^4.77.0",
"playwright": "^1.49.1"
},
"devDependencies": {
"typescript": "^5.7.2"
}
}
```
--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
```typescript
import { DocumentMetadata } from './tools/qdrant-client.js';
export interface Document {
url: string;
content: string;
metadata: Partial<DocumentMetadata>;
}
export interface DocumentChunk {
text: string;
url: string;
title: string;
timestamp: string;
}
export interface DocumentPayload extends DocumentChunk {
_type: 'DocumentChunk';
[key: string]: unknown;
}
export function isDocumentPayload(payload: unknown): payload is DocumentPayload {
if (!payload || typeof payload !== 'object') return false;
const p = payload as Partial<DocumentPayload>;
return (
p._type === 'DocumentChunk' &&
typeof p.text === 'string' &&
typeof p.url === 'string' &&
typeof p.title === 'string' &&
typeof p.timestamp === 'string'
);
}
export interface SearchOptions {
limit?: number;
scoreThreshold?: number;
filters?: {
domain?: string;
hasCode?: boolean;
after?: string;
before?: string;
};
}
export interface ToolDefinition {
name: string;
description: string;
inputSchema: {
type: string;
properties: Record<string, any>;
required: string[];
};
}
export interface ToolResult {
content: Array<{
type: string;
text: string;
}>;
isError?: boolean;
}
export interface RagDocsConfig {
qdrantUrl: string;
qdrantApiKey?: string;
openaiApiKey: string;
collectionName: string;
}
```
--------------------------------------------------------------------------------
/src/handlers/list-documentation.ts:
--------------------------------------------------------------------------------
```typescript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { QdrantWrapper } from '../tools/qdrant-client.js';
import { ListOptions, ListResult, ListUtils } from '../tools/list-utils.js';
import { ToolResult } from '../types.js';
import { ApiClient } from '../api-client.js';
export class ListDocumentationHandler extends BaseHandler {
protected server: Server;
protected apiClient: ApiClient;
constructor(server: Server, apiClient: ApiClient) {
super(server, apiClient);
this.server = server;
this.apiClient = apiClient;
}
async handle(args: ListOptions): Promise<ToolResult> {
try {
// Ensure Qdrant is initialized
await this.apiClient.qdrant.initializeCollection();
// Set default values
const page = args.page || 1;
const pageSize = args.pageSize || 20;
const sortBy = args.sortBy || 'timestamp';
const sortOrder = args.sortOrder || 'desc';
// Get documents with pagination
const { total, documents } = await this.apiClient.qdrant.listDocuments({
offset: (page - 1) * pageSize,
limit: pageSize,
sortBy,
sortOrder,
});
// Calculate pagination details
const { totalPages } = ListUtils.getPaginationDetails(total, page, pageSize);
// Sort documents if needed
const sortedDocs = ListUtils.sortDocuments(documents, sortBy, sortOrder);
// Group by domain if requested
const groupedDocs = args.groupByDomain
? ListUtils.groupByDomain(sortedDocs)
: [{ documents: sortedDocs }];
// Prepare result
const result: ListResult = {
total,
page,
pageSize,
totalPages,
documents: groupedDocs,
};
// Format as markdown
const markdown = ListUtils.formatAsMarkdown(result);
return {
content: [
{
type: 'text',
text: markdown,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Failed to list documentation: ${(error as Error).message}`,
},
],
isError: true,
};
}
}
}
```
--------------------------------------------------------------------------------
/src/handlers/search-documentation.ts:
--------------------------------------------------------------------------------
```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { BaseHandler } from './base-handler.js';
import { QdrantWrapper } from '../tools/qdrant-client.js';
import { EmbeddingService } from '../embeddings.js';
import {
SearchOptions,
SearchResult,
validateSearchOptions,
extractSnippet,
normalizeScore,
formatResultsAsMarkdown,
} from '../tools/search-utils.js';
interface SearchDocumentationArgs {
query: string;
options?: SearchOptions;
}
export class SearchDocumentationHandler extends BaseHandler {
private qdrant: QdrantWrapper;
private embeddings: EmbeddingService;
constructor(
qdrant: QdrantWrapper,
embeddings: EmbeddingService,
...args: ConstructorParameters<typeof BaseHandler>
) {
super(...args);
this.qdrant = qdrant;
this.embeddings = embeddings;
}
async handle(args: SearchDocumentationArgs) {
// Validate input
if (!args.query?.trim()) {
throw new McpError(
ErrorCode.InvalidRequest,
'Query string is required'
);
}
// Validate search options if provided
if (args.options) {
validateSearchOptions(args.options);
}
try {
// Generate embeddings for the query
console.error('Generating embeddings for query:', args.query);
const queryVector = await this.embeddings.generateEmbeddings(args.query);
// Search for similar documents
console.error('Searching for similar documents...');
const searchResults = await this.qdrant.searchSimilar(queryVector, args.options);
// Process and format results
const formattedResults: SearchResult[] = searchResults.map(result => ({
url: result.url,
title: result.title,
domain: result.domain,
timestamp: result.timestamp,
score: normalizeScore(result.score),
snippet: extractSnippet(result.content),
metadata: {
contentType: result.contentType,
wordCount: result.wordCount,
hasCode: result.hasCode,
chunkIndex: result.chunkIndex,
totalChunks: result.totalChunks,
},
}));
// Format results as markdown
const markdown = formatResultsAsMarkdown(formattedResults);
return {
content: [
{
type: 'text',
text: markdown,
},
],
};
} catch (error) {
console.error('Search error:', error);
throw new McpError(
ErrorCode.InternalError,
`Failed to search documentation: ${error}`
);
}
}
}
```
--------------------------------------------------------------------------------
/src/handlers/test-embeddings.ts:
--------------------------------------------------------------------------------
```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { ApiClient } from '../api-client.js';
import { ToolResult } from '../types.js';
import { EmbeddingService } from '../embeddings.js';
const COLLECTION_NAME = 'documentation';
export class TestEmbeddingsHandler extends BaseHandler {
constructor(server: Server, apiClient: ApiClient) {
super(server, apiClient);
}
async handle(args: any): Promise<ToolResult> {
if (!args.text || typeof args.text !== 'string') {
throw new McpError(ErrorCode.InvalidParams, 'Text is required');
}
try {
// Create a new embedding service instance with the requested configuration
const tempEmbeddingService = EmbeddingService.createFromConfig({
provider: args.provider || 'ollama',
apiKey: args.apiKey,
model: args.model
});
const embedding = await tempEmbeddingService.generateEmbeddings(args.text);
const provider = args.provider || 'ollama';
const model = args.model || (provider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small');
// If test is successful, update the server's embedding service
const newApiClient = new ApiClient({
embeddingConfig: {
provider: args.provider || 'ollama',
apiKey: args.apiKey,
model: args.model
},
qdrantUrl: process.env.QDRANT_URL,
qdrantApiKey: process.env.QDRANT_API_KEY
});
// Initialize collection with new vector size
await newApiClient.initCollection(COLLECTION_NAME);
return {
content: [
{
type: 'text',
text: `Successfully configured ${provider} embeddings (${model}).\nVector size: ${embedding.length}\nQdrant collection updated to match new vector size.`,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Failed to test embeddings: ${error}`,
},
],
isError: true,
};
}
}
}
export const testEmbeddingsSchema = {
type: 'object',
properties: {
text: {
type: 'string',
description: 'Text to generate embeddings for',
},
provider: {
type: 'string',
description: 'Embedding provider to use (ollama or openai)',
enum: ['ollama', 'openai'],
default: 'ollama',
},
apiKey: {
type: 'string',
description: 'OpenAI API key (required if provider is openai)',
},
model: {
type: 'string',
description: 'Model to use for embeddings',
},
},
required: ['text'],
} as const;
```
--------------------------------------------------------------------------------
/src/tools/search-utils.ts:
--------------------------------------------------------------------------------
```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { DocumentMetadata } from './qdrant-client.js';
export interface SearchResult {
url: string;
title: string;
domain: string;
timestamp: string;
score: number;
snippet: string;
metadata: Partial<DocumentMetadata>;
}
export interface SearchOptions {
limit?: number;
scoreThreshold?: number;
filters?: {
domain?: string;
hasCode?: boolean;
after?: string;
before?: string;
};
}
/**
* Extracts a relevant snippet around the most relevant content
*/
export function extractSnippet(content: string, maxLength: number = 300): string {
// If content is shorter than maxLength, return it as is
if (content.length <= maxLength) {
return content;
}
// Find a good breaking point near the middle
const middle = Math.floor(content.length / 2);
const radius = Math.floor(maxLength / 2);
let start = Math.max(0, middle - radius);
let end = Math.min(content.length, middle + radius);
// Adjust to avoid breaking words
while (start > 0 && /\S/.test(content[start - 1])) start--;
while (end < content.length && /\S/.test(content[end])) end++;
let snippet = content.slice(start, end).trim();
// Add ellipsis if we're not at the boundaries
if (start > 0) snippet = '...' + snippet;
if (end < content.length) snippet = snippet + '...';
return snippet;
}
/**
* Normalizes scores to be between 0 and 1
*/
export function normalizeScore(score: number): number {
// Qdrant uses cosine similarity which is already between -1 and 1
// Convert to 0-1 range
return (score + 1) / 2;
}
/**
* Formats search results as markdown
*/
export function formatResultsAsMarkdown(results: SearchResult[]): string {
if (results.length === 0) {
return 'No matching documents found.';
}
return results
.map((result, index) => {
const score = (result.score * 100).toFixed(1);
return `
### ${index + 1}. ${result.title} (${score}% match)
**URL:** ${result.url}
**Domain:** ${result.domain}
**Date:** ${new Date(result.timestamp).toLocaleDateString()}
${result.snippet}
`;
})
.join('\n---\n');
}
/**
* Validates search options
*/
export function validateSearchOptions(options: SearchOptions): void {
if (options.limit !== undefined && (options.limit < 1 || options.limit > 20)) {
throw new McpError(
ErrorCode.InvalidRequest,
'Limit must be between 1 and 20'
);
}
if (
options.scoreThreshold !== undefined &&
(options.scoreThreshold < 0 || options.scoreThreshold > 1)
) {
throw new McpError(
ErrorCode.InvalidRequest,
'Score threshold must be between 0 and 1'
);
}
if (options.filters?.after && isNaN(Date.parse(options.filters.after))) {
throw new McpError(ErrorCode.InvalidRequest, 'Invalid after date format');
}
if (options.filters?.before && isNaN(Date.parse(options.filters.before))) {
throw new McpError(ErrorCode.InvalidRequest, 'Invalid before date format');
}
}
```
--------------------------------------------------------------------------------
/src/tools/list-utils.ts:
--------------------------------------------------------------------------------
```typescript
import { DocumentMetadata } from './qdrant-client.js';
export interface ListOptions {
page?: number;
pageSize?: number;
groupByDomain?: boolean;
sortBy?: 'timestamp' | 'title' | 'domain';
sortOrder?: 'asc' | 'desc';
}
export interface ListResult {
total: number;
page: number;
pageSize: number;
totalPages: number;
documents: DocumentGroup[];
}
export interface DocumentGroup {
domain?: string;
documents: DocumentMetadata[];
}
export class ListUtils {
/**
* Groups documents by domain
*/
static groupByDomain(documents: DocumentMetadata[]): DocumentGroup[] {
const groupedMap = new Map<string, DocumentMetadata[]>();
for (const doc of documents) {
const domain = doc.domain;
if (!groupedMap.has(domain)) {
groupedMap.set(domain, []);
}
groupedMap.get(domain)!.push(doc);
}
return Array.from(groupedMap.entries()).map(([domain, docs]) => ({
domain,
documents: docs
}));
}
/**
* Sorts documents based on specified criteria
*/
static sortDocuments(
documents: DocumentMetadata[],
sortBy: 'timestamp' | 'title' | 'domain' = 'timestamp',
sortOrder: 'asc' | 'desc' = 'desc'
): DocumentMetadata[] {
return [...documents].sort((a, b) => {
let comparison: number;
switch (sortBy) {
case 'timestamp':
comparison = new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime();
break;
case 'title':
comparison = a.title.localeCompare(b.title);
break;
case 'domain':
comparison = a.domain.localeCompare(b.domain);
break;
default:
comparison = 0;
}
return sortOrder === 'desc' ? -comparison : comparison;
});
}
/**
* Formats the list result as markdown
*/
static formatAsMarkdown(result: ListResult): string {
const lines: string[] = [];
// Add header with pagination info
lines.push(`# Documentation List`);
lines.push(`Page ${result.page} of ${result.totalPages} (${result.total} total documents)\n`);
// Add documents grouped by domain
for (const group of result.documents) {
if (group.domain) {
lines.push(`## ${group.domain}`);
}
for (const doc of group.documents) {
const date = new Date(doc.timestamp).toLocaleDateString();
lines.push(`- [${doc.title}](${doc.url})`);
lines.push(` - Added: ${date}`);
lines.push(` - Type: ${doc.contentType}`);
lines.push(` - Words: ${doc.wordCount}`);
if (doc.hasCode) {
lines.push(` - Contains code snippets`);
}
lines.push(``);
}
}
return lines.join('\n');
}
/**
* Calculates pagination details
*/
static getPaginationDetails(
total: number,
page: number = 1,
pageSize: number = 20
): { offset: number; limit: number; totalPages: number } {
const totalPages = Math.ceil(total / pageSize);
const currentPage = Math.min(Math.max(1, page), totalPages);
const offset = (currentPage - 1) * pageSize;
return {
offset,
limit: pageSize,
totalPages
};
}
}
```
--------------------------------------------------------------------------------
/src/embeddings.ts:
--------------------------------------------------------------------------------
```typescript
import ollama from 'ollama';
import OpenAI from 'openai';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
export interface EmbeddingProvider {
generateEmbeddings(text: string): Promise<number[]>;
getVectorSize(): number;
}
export class OllamaProvider implements EmbeddingProvider {
private model: string;
constructor(model: string = 'nomic-embed-text') {
this.model = model;
}
async generateEmbeddings(text: string): Promise<number[]> {
try {
console.error('Generating Ollama embeddings for text:', text.substring(0, 50) + '...');
const response = await ollama.embeddings({
model: this.model,
prompt: text
});
console.error('Successfully generated Ollama embeddings with size:', response.embedding.length);
return response.embedding;
} catch (error) {
console.error('Ollama embedding error:', error);
throw new McpError(
ErrorCode.InternalError,
`Failed to generate embeddings with Ollama: ${error}`
);
}
}
getVectorSize(): number {
// nomic-embed-text produces 768-dimensional vectors
return 768;
}
}
export class OpenAIProvider implements EmbeddingProvider {
private client: OpenAI;
private model: string;
constructor(apiKey: string, model: string = 'text-embedding-3-small') {
this.client = new OpenAI({ apiKey });
this.model = model;
}
async generateEmbeddings(text: string): Promise<number[]> {
try {
console.error('Generating OpenAI embeddings for text:', text.substring(0, 50) + '...');
const response = await this.client.embeddings.create({
model: this.model,
input: text,
});
const embedding = response.data[0].embedding;
console.error('Successfully generated OpenAI embeddings with size:', embedding.length);
return embedding;
} catch (error) {
console.error('OpenAI embedding error:', error);
throw new McpError(
ErrorCode.InternalError,
`Failed to generate embeddings with OpenAI: ${error}`
);
}
}
getVectorSize(): number {
// text-embedding-3-small produces 1536-dimensional vectors
return 1536;
}
}
export class EmbeddingService {
private provider: EmbeddingProvider;
constructor(provider: EmbeddingProvider) {
this.provider = provider;
}
async generateEmbeddings(text: string): Promise<number[]> {
return this.provider.generateEmbeddings(text);
}
getVectorSize(): number {
return this.provider.getVectorSize();
}
static createFromConfig(config: {
provider: 'ollama' | 'openai';
apiKey?: string;
model?: string;
}): EmbeddingService {
switch (config.provider) {
case 'ollama':
return new EmbeddingService(new OllamaProvider(config.model));
case 'openai':
if (!config.apiKey) {
throw new McpError(
ErrorCode.InvalidRequest,
'OpenAI API key is required'
);
}
return new EmbeddingService(new OpenAIProvider(config.apiKey, config.model));
default:
throw new McpError(
ErrorCode.InvalidRequest,
`Unknown embedding provider: ${config.provider}`
);
}
}
}
```
--------------------------------------------------------------------------------
/src/tools/url-processor.ts:
--------------------------------------------------------------------------------
```typescript
import { URL } from 'url';
export class URLProcessingError extends Error {
constructor(message: string) {
super(message);
this.name = 'URLProcessingError';
}
}
export interface ProcessedURL {
originalUrl: string;
normalizedUrl: string;
domain: string;
path: string;
isValid: boolean;
}
export class URLProcessor {
/**
* Validates and normalizes a URL, extracting key components
* @param urlString The URL string to process
* @returns ProcessedURL object containing normalized URL and metadata
* @throws URLProcessingError if URL is invalid
*/
static processURL(urlString: string): ProcessedURL {
try {
// Trim whitespace and normalize
const trimmedUrl = urlString.trim();
// Add protocol if missing
const urlWithProtocol = trimmedUrl.startsWith('http')
? trimmedUrl
: `https://${trimmedUrl}`;
// Parse URL
const url = new URL(urlWithProtocol);
// Normalize URL
// - Convert to lowercase
// - Remove trailing slashes
// - Remove default ports
// - Sort query parameters
const normalizedUrl = this.normalizeURL(url);
return {
originalUrl: urlString,
normalizedUrl,
domain: url.hostname.toLowerCase(),
path: url.pathname,
isValid: true,
};
} catch (error) {
throw new URLProcessingError(
`Invalid URL "${urlString}": ${(error as Error).message}`
);
}
}
/**
* Normalizes a URL to ensure consistent format
* @param url URL object to normalize
* @returns Normalized URL string
*/
private static normalizeURL(url: URL): string {
// Convert hostname to lowercase
const hostname = url.hostname.toLowerCase();
// Remove default ports
const port = url.port === '80' || url.port === '443' ? '' : url.port;
// Sort query parameters
const searchParams = new URLSearchParams([...url.searchParams].sort());
const search = searchParams.toString();
// Construct normalized path (remove trailing slash except for root)
let path = url.pathname;
if (path.length > 1 && path.endsWith('/')) {
path = path.slice(0, -1);
}
// Construct normalized URL
let normalizedUrl = `${url.protocol}//${hostname}`;
if (port) normalizedUrl += `:${port}`;
normalizedUrl += path;
if (search) normalizedUrl += `?${search}`;
if (url.hash) normalizedUrl += url.hash;
return normalizedUrl;
}
/**
* Checks if a URL points to a valid web page
* @param urlString URL to validate
* @returns true if URL is valid and accessible
*/
static isValidWebPage(urlString: string): boolean {
try {
const { protocol } = new URL(urlString);
return protocol === 'http:' || protocol === 'https:';
} catch {
return false;
}
}
/**
* Extracts the root domain from a URL
* @param urlString URL to process
* @returns Root domain string
*/
static extractRootDomain(urlString: string): string {
try {
const { hostname } = new URL(urlString);
const parts = hostname.split('.');
if (parts.length <= 2) return hostname;
// Handle special cases like co.uk, com.au
const sld = parts[parts.length - 2];
const tld = parts[parts.length - 1];
if (sld.length <= 3 && tld.length <= 3 && parts.length > 2) {
return parts.slice(-3).join('.');
}
return parts.slice(-2).join('.');
} catch {
throw new URLProcessingError(`Cannot extract domain from invalid URL: ${urlString}`);
}
}
}
```
--------------------------------------------------------------------------------
/src/tools/add-documentation.ts:
--------------------------------------------------------------------------------
```typescript
import OpenAI from 'openai';
import { URLProcessor, URLProcessingError } from './url-processor.js';
import { ContentFetcher, ContentFetchError } from './content-fetcher.js';
import { TextChunker } from './text-chunker.js';
import { QdrantWrapper, QdrantError } from './qdrant-client.js';
export class AddDocumentationError extends Error {
constructor(message: string, public readonly step: string) {
super(message);
this.name = 'AddDocumentationError';
}
}
export interface AddDocumentationResult {
url: string;
title: string;
chunks: number;
wordCount: number;
}
export class AddDocumentationTool {
private openai: OpenAI;
private qdrant: QdrantWrapper;
constructor(openaiApiKey: string, qdrantUrl?: string) {
if (!openaiApiKey) {
throw new Error('OpenAI API key is required');
}
this.openai = new OpenAI({
apiKey: openaiApiKey,
});
this.qdrant = new QdrantWrapper(qdrantUrl);
}
/**
* Adds a document to the RAG system
* @param url URL of the document to add
* @returns Result of the operation
*/
async addDocument(url: string): Promise<AddDocumentationResult> {
try {
// Check Qdrant health
const isHealthy = await this.qdrant.isHealthy();
if (!isHealthy) {
throw new AddDocumentationError(
'Qdrant server is not available',
'health_check'
);
}
// Initialize collection if needed
await this.qdrant.initializeCollection();
// Process URL
const processedUrl = URLProcessor.processURL(url);
if (!processedUrl.isValid) {
throw new AddDocumentationError('Invalid URL format', 'url_validation');
}
// Check if document already exists
const exists = await this.qdrant.documentExists(processedUrl.normalizedUrl);
if (exists) {
// Remove existing document before adding new version
await this.qdrant.removeDocument(processedUrl.normalizedUrl);
}
// Fetch content
const content = await ContentFetcher.fetchContent(processedUrl.normalizedUrl);
// Chunk content
const chunks = TextChunker.chunkText(content.content, {
maxChunkSize: 1500, // Leave room for metadata in context window
minChunkSize: 100,
overlap: 200,
respectCodeBlocks: true,
});
// Generate embeddings for each chunk
const embeddings = await this.generateEmbeddings(
chunks.map(chunk => chunk.content)
);
// Store in Qdrant
await this.qdrant.storeDocumentChunks(chunks, embeddings, {
url: processedUrl.normalizedUrl,
title: content.title,
domain: processedUrl.domain,
timestamp: content.timestamp,
contentType: content.metadata.contentType,
wordCount: content.metadata.wordCount,
hasCode: content.metadata.hasCode,
});
return {
url: processedUrl.normalizedUrl,
title: content.title,
chunks: chunks.length,
wordCount: content.metadata.wordCount,
};
} catch (error) {
if (
error instanceof URLProcessingError ||
error instanceof ContentFetchError ||
error instanceof QdrantError ||
error instanceof AddDocumentationError
) {
throw error;
}
throw new AddDocumentationError(
`Unexpected error: ${(error as Error).message}`,
'unknown'
);
}
}
/**
* Generates embeddings for text chunks using OpenAI's API
* @param chunks Array of text chunks
* @returns Array of embeddings
*/
private async generateEmbeddings(chunks: string[]): Promise<number[][]> {
try {
const response = await this.openai.embeddings.create({
model: 'text-embedding-ada-002',
input: chunks,
});
return response.data.map(item => item.embedding);
} catch (error) {
throw new AddDocumentationError(
`Failed to generate embeddings: ${(error as Error).message}`,
'embedding_generation'
);
}
}
}
```
--------------------------------------------------------------------------------
/src/handlers/add-documentation.ts:
--------------------------------------------------------------------------------
```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { ApiClient } from '../api-client.js';
import { DocumentChunk, ToolResult } from '../types.js';
import * as cheerio from 'cheerio';
import crypto from 'crypto';
const COLLECTION_NAME = 'documentation';
const BATCH_SIZE = 100;
export class AddDocumentationHandler extends BaseHandler {
constructor(server: Server, apiClient: ApiClient) {
super(server, apiClient);
}
async handle(args: any): Promise<ToolResult> {
if (!args.url || typeof args.url !== 'string') {
throw new McpError(ErrorCode.InvalidParams, 'URL is required');
}
try {
const chunks = await this.fetchAndProcessUrl(args.url);
// Batch process chunks for better performance
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
const batch = chunks.slice(i, i + BATCH_SIZE);
const points = await Promise.all(
batch.map(async (chunk) => {
const embedding = await this.apiClient.getEmbeddings(chunk.text);
return {
id: this.generatePointId(),
vector: embedding,
payload: {
...chunk,
_type: 'DocumentChunk' as const,
} as Record<string, unknown>,
};
})
);
try {
await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
wait: true,
points,
});
} catch (error) {
if (error instanceof Error) {
if (error.message.includes('unauthorized')) {
throw new McpError(
ErrorCode.InvalidRequest,
'Failed to authenticate with Qdrant cloud while adding documents'
);
} else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
throw new McpError(
ErrorCode.InternalError,
'Connection to Qdrant cloud failed while adding documents'
);
}
}
throw error;
}
}
return {
content: [
{
type: 'text',
text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / BATCH_SIZE)} batches)`,
},
],
};
} catch (error) {
if (error instanceof McpError) {
throw error;
}
return {
content: [
{
type: 'text',
text: `Failed to add documentation: ${error}`,
},
],
isError: true,
};
}
}
private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> {
await this.apiClient.initBrowser();
const page = await this.apiClient.browser.newPage();
try {
await page.goto(url, { waitUntil: 'networkidle' });
const content = await page.content();
const $ = cheerio.load(content);
// Remove script tags, style tags, and comments
$('script').remove();
$('style').remove();
$('noscript').remove();
// Extract main content
const title = $('title').text() || url;
const mainContent = $('main, article, .content, .documentation, body').text();
// Split content into chunks
const chunks = this.chunkText(mainContent, 1000);
return chunks.map(chunk => ({
text: chunk,
url,
title,
timestamp: new Date().toISOString(),
}));
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to fetch URL ${url}: ${error}`
);
} finally {
await page.close();
}
}
private chunkText(text: string, maxChunkSize: number): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
let currentChunk: string[] = [];
for (const word of words) {
currentChunk.push(word);
const currentLength = currentChunk.join(' ').length;
if (currentLength >= maxChunkSize) {
chunks.push(currentChunk.join(' '));
currentChunk = [];
}
}
if (currentChunk.length > 0) {
chunks.push(currentChunk.join(' '));
}
return chunks;
}
private generatePointId(): string {
return crypto.randomBytes(16).toString('hex');
}
}
```
--------------------------------------------------------------------------------
/src/tools/content-fetcher.ts:
--------------------------------------------------------------------------------
```typescript
import axios, { AxiosError } from 'axios';
import * as cheerio from 'cheerio';
export class ContentFetchError extends Error {
constructor(message: string, public readonly url: string) {
super(message);
this.name = 'ContentFetchError';
}
}
export interface FetchedContent {
url: string;
title: string;
content: string;
timestamp: string;
metadata: {
domain: string;
contentType: string;
wordCount: number;
hasCode: boolean;
};
}
export class ContentFetcher {
private static readonly TIMEOUT = 30000; // 30 seconds
private static readonly MAX_RETRIES = 3;
private static readonly RETRY_DELAY = 1000; // 1 second
/**
* Fetches and processes content from a URL
* @param url URL to fetch content from
* @returns Processed content with metadata
*/
static async fetchContent(url: string): Promise<FetchedContent> {
let retries = 0;
let lastError: Error | null = null;
while (retries < this.MAX_RETRIES) {
try {
const response = await axios.get(url, {
timeout: this.TIMEOUT,
maxRedirects: 5,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; RagDocsBot/1.0)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
'Accept-Language': 'en-US,en;q=0.5',
},
});
const contentType = response.headers['content-type'] || '';
if (!contentType.includes('html')) {
throw new ContentFetchError('Unsupported content type: ' + contentType, url);
}
return this.processHtmlContent(url, response.data);
} catch (error) {
lastError = error as Error;
if (error instanceof AxiosError && error.response?.status === 404) {
throw new ContentFetchError('Page not found', url);
}
retries++;
if (retries < this.MAX_RETRIES) {
await new Promise(resolve => setTimeout(resolve, this.RETRY_DELAY));
}
}
}
throw new ContentFetchError(
`Failed to fetch content after ${this.MAX_RETRIES} attempts: ${lastError?.message}`,
url
);
}
/**
* Processes HTML content to extract relevant text and metadata
* @param url Original URL
* @param html Raw HTML content
* @returns Processed content with metadata
*/
private static processHtmlContent(url: string, html: string): FetchedContent {
const $ = cheerio.load(html);
// Remove unwanted elements
this.removeUnwantedElements($);
// Extract title
const title = $('title').text().trim() ||
$('h1').first().text().trim() ||
'Untitled Document';
// Extract main content
const mainContent = this.extractMainContent($);
// Check for code blocks
const hasCode = $('pre, code').length > 0 ||
mainContent.includes('```') ||
/\`[^\`]+\`/.test(mainContent);
// Count words
const wordCount = mainContent.split(/\s+/).filter(Boolean).length;
return {
url,
title,
content: mainContent,
timestamp: new Date().toISOString(),
metadata: {
domain: new URL(url).hostname,
contentType: 'text/html',
wordCount,
hasCode,
},
};
}
/**
* Removes unwanted elements from the HTML
* @param $ Cheerio instance
*/
private static removeUnwantedElements($: cheerio.CheerioAPI): void {
// Remove common non-content elements
const selectorsToRemove = [
'script',
'style',
'nav',
'header',
'footer',
'iframe',
'.advertisement',
'.ads',
'#comments',
'.comments',
'.social-share',
'.related-posts',
'aside',
];
$(selectorsToRemove.join(', ')).remove();
}
/**
* Extracts main content from the HTML
* @param $ Cheerio instance
* @returns Extracted and cleaned content
*/
private static extractMainContent($: cheerio.CheerioAPI): string {
// Try to find main content container
const mainSelectors = [
'article',
'main',
'.main-content',
'#main-content',
'.post-content',
'.article-content',
'.entry-content',
];
let $content = $();
for (const selector of mainSelectors) {
$content = $(selector);
if ($content.length > 0) break;
}
// Fallback to body if no main content container found
if ($content.length === 0) {
$content = $('body');
}
// Extract text content
const text = $content
.find('h1, h2, h3, h4, h5, h6, p, li, pre, code')
.map((_, el) => {
const $el = $(el);
// Preserve code blocks
if ($el.is('pre, code')) {
return '\n```\n' + $el.text() + '\n```\n';
}
return $el.text();
})
.get()
.join('\n')
.trim();
// Clean up the text
return this.cleanText(text);
}
/**
* Cleans extracted text content
* @param text Raw text content
* @returns Cleaned text
*/
private static cleanText(text: string): string {
return text
.replace(/[\r\n]+/g, '\n') // Normalize line endings
.replace(/\n\s+\n/g, '\n\n') // Remove excess whitespace between paragraphs
.replace(/\s+/g, ' ') // Normalize whitespace within paragraphs
.split('\n') // Split into lines
.map(line => line.trim()) // Trim each line
.filter(Boolean) // Remove empty lines
.join('\n') // Rejoin with newlines
.trim(); // Final trim
}
}
```
--------------------------------------------------------------------------------
/src/tools/text-chunker.ts:
--------------------------------------------------------------------------------
```typescript
export interface ChunkOptions {
maxChunkSize: number;
minChunkSize: number;
overlap: number;
respectCodeBlocks?: boolean;
}
export interface TextChunk {
content: string;
index: number;
metadata: {
startPosition: number;
endPosition: number;
isCodeBlock?: boolean;
};
}
export class TextChunker {
private static readonly DEFAULT_OPTIONS: ChunkOptions = {
maxChunkSize: 1000,
minChunkSize: 100,
overlap: 200,
respectCodeBlocks: true,
};
/**
* Splits text into chunks while preserving context and natural boundaries
* @param text Text to split into chunks
* @param options Chunking options
* @returns Array of text chunks with metadata
*/
static chunkText(text: string, options?: Partial<ChunkOptions>): TextChunk[] {
const opts = { ...this.DEFAULT_OPTIONS, ...options };
const chunks: TextChunk[] = [];
// First, separate code blocks from regular text
const segments = this.separateCodeBlocks(text);
let currentPosition = 0;
let chunkIndex = 0;
for (const segment of segments) {
if (segment.isCodeBlock && opts.respectCodeBlocks) {
// Keep code blocks as single chunks if they're not too large
if (segment.content.length <= opts.maxChunkSize * 1.5) {
chunks.push({
content: segment.content,
index: chunkIndex++,
metadata: {
startPosition: currentPosition,
endPosition: currentPosition + segment.content.length,
isCodeBlock: true,
},
});
currentPosition += segment.content.length;
continue;
}
}
// Process regular text or large code blocks
const segmentChunks = this.chunkSegment(
segment.content,
opts,
currentPosition,
chunkIndex,
segment.isCodeBlock
);
chunks.push(...segmentChunks);
chunkIndex += segmentChunks.length;
currentPosition += segment.content.length;
}
return chunks;
}
/**
* Separates code blocks from regular text
* @param text Input text
* @returns Array of text segments with code block flags
*/
private static separateCodeBlocks(text: string): Array<{ content: string; isCodeBlock: boolean }> {
const segments: Array<{ content: string; isCodeBlock: boolean }> = [];
const codeBlockRegex = /```[\s\S]*?```/g;
let lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = codeBlockRegex.exec(text)) !== null) {
// Add text before code block
if (match.index > lastIndex) {
segments.push({
content: text.slice(lastIndex, match.index),
isCodeBlock: false,
});
}
// Add code block
segments.push({
content: match[0],
isCodeBlock: true,
});
lastIndex = match.index + match[0].length;
}
// Add remaining text
if (lastIndex < text.length) {
segments.push({
content: text.slice(lastIndex),
isCodeBlock: false,
});
}
return segments;
}
/**
* Chunks a single segment of text
* @param text Text segment to chunk
* @param options Chunking options
* @param startPosition Starting position in original text
* @param startIndex Starting chunk index
* @param isCodeBlock Whether this is a code block
* @returns Array of chunks
*/
private static chunkSegment(
text: string,
options: ChunkOptions,
startPosition: number,
startIndex: number,
isCodeBlock: boolean
): TextChunk[] {
const chunks: TextChunk[] = [];
let currentChunk = '';
let currentPosition = 0;
// Split into sentences/paragraphs first
const blocks = isCodeBlock
? [text] // Keep code blocks together
: text
.split(/(?<=\.|\?|\!|\n)\s+/)
.filter(Boolean)
.map(block => block.trim());
for (const block of blocks) {
// If adding this block would exceed max size, start new chunk
if (
currentChunk &&
currentChunk.length + block.length > options.maxChunkSize &&
currentChunk.length >= options.minChunkSize
) {
chunks.push({
content: currentChunk,
index: startIndex + chunks.length,
metadata: {
startPosition: startPosition + currentPosition - currentChunk.length,
endPosition: startPosition + currentPosition,
isCodeBlock,
},
});
// Start new chunk with overlap
const words = currentChunk.split(/\s+/);
const overlapWords = words.slice(-Math.ceil(options.overlap / 10)); // Approximate words for overlap
currentChunk = overlapWords.join(' ') + ' ' + block;
} else {
currentChunk = currentChunk
? currentChunk + ' ' + block
: block;
}
currentPosition += block.length + 1; // +1 for the space
}
// Add final chunk if not empty
if (currentChunk) {
chunks.push({
content: currentChunk,
index: startIndex + chunks.length,
metadata: {
startPosition: startPosition + currentPosition - currentChunk.length,
endPosition: startPosition + currentPosition,
isCodeBlock,
},
});
}
return chunks;
}
/**
* Validates chunk options and sets defaults
* @param options User-provided options
* @returns Validated options
*/
private static validateOptions(options: Partial<ChunkOptions>): ChunkOptions {
const opts = { ...this.DEFAULT_OPTIONS, ...options };
if (opts.maxChunkSize < opts.minChunkSize) {
throw new Error('maxChunkSize must be greater than minChunkSize');
}
if (opts.overlap >= opts.maxChunkSize) {
throw new Error('overlap must be less than maxChunkSize');
}
if (opts.minChunkSize <= 0 || opts.maxChunkSize <= 0 || opts.overlap < 0) {
throw new Error('chunk sizes and overlap must be positive numbers');
}
return opts;
}
}
```
--------------------------------------------------------------------------------
/src/api-client.ts:
--------------------------------------------------------------------------------
```typescript
import { QdrantClient } from '@qdrant/js-client-rest';
import { chromium } from 'playwright';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { EmbeddingService } from './embeddings.js';
import { QdrantWrapper } from './tools/qdrant-client.js';
import { Document } from './types.js';
export interface QdrantCollectionConfig {
params: {
vectors: {
size: number;
distance: string;
};
};
}
export interface QdrantCollectionInfo {
config: QdrantCollectionConfig;
}
export class ApiClient {
qdrantClient: QdrantClient;
private embeddingService: EmbeddingService;
readonly qdrant: QdrantWrapper;
browser: any;
constructor(config: {
embeddingConfig: {
provider: 'ollama' | 'openai';
apiKey?: string;
model?: string;
};
qdrantUrl?: string;
qdrantApiKey?: string;
}) {
this.embeddingService = EmbeddingService.createFromConfig(config.embeddingConfig);
this.qdrant = new QdrantWrapper(config.qdrantUrl, config.qdrantApiKey);
this.qdrantClient = this.qdrant.client;
}
async initBrowser() {
if (!this.browser) {
this.browser = await chromium.launch();
}
}
async cleanup() {
if (this.browser) {
await this.browser.close();
}
}
async getEmbeddings(text: string): Promise<number[]> {
return this.embeddingService.generateEmbeddings(text);
}
get embeddings(): EmbeddingService {
return this.embeddingService;
}
async initCollection(collectionName: string) {
try {
const collections = await this.qdrantClient.getCollections();
const exists = collections.collections.some(c => c.name === collectionName);
const requiredVectorSize = this.embeddingService.getVectorSize();
if (!exists) {
console.error(`Creating new collection with vector size ${requiredVectorSize}`);
await this.createCollection(collectionName, requiredVectorSize);
return;
}
// Verify vector size of existing collection
const collectionInfo = await this.qdrantClient.getCollection(collectionName) as QdrantCollectionInfo;
const currentVectorSize = collectionInfo.config?.params?.vectors?.size;
if (!currentVectorSize) {
console.error('Could not determine current vector size, recreating collection...');
await this.recreateCollection(collectionName, requiredVectorSize);
return;
}
if (currentVectorSize !== requiredVectorSize) {
console.error(`Vector size mismatch: collection=${currentVectorSize}, required=${requiredVectorSize}`);
await this.recreateCollection(collectionName, requiredVectorSize);
}
} catch (error) {
if (error instanceof Error) {
if (error.message.includes('unauthorized')) {
throw new McpError(
ErrorCode.InvalidRequest,
'Failed to authenticate with Qdrant. Please check your API key.'
);
} else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
throw new McpError(
ErrorCode.InternalError,
'Failed to connect to Qdrant. Please check your QDRANT_URL.'
);
}
}
throw new McpError(
ErrorCode.InternalError,
`Failed to initialize Qdrant collection: ${error}`
);
}
}
private async createCollection(collectionName: string, vectorSize: number) {
await this.qdrantClient.createCollection(collectionName, {
vectors: {
size: vectorSize,
distance: 'Cosine',
},
optimizers_config: {
default_segment_number: 2,
memmap_threshold: 20000,
},
replication_factor: 2,
});
// Create indexes for efficient filtering
await this.qdrantClient.createPayloadIndex(collectionName, {
field_name: 'url',
field_schema: 'keyword',
});
await this.qdrantClient.createPayloadIndex(collectionName, {
field_name: 'timestamp',
field_schema: 'datetime',
});
}
private async recreateCollection(collectionName: string, vectorSize: number) {
try {
console.error('Recreating collection with new vector size...');
await this.qdrantClient.deleteCollection(collectionName);
await this.createCollection(collectionName, vectorSize);
console.error(`Collection recreated with new vector size ${vectorSize}`);
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to recreate collection: ${error}`
);
}
}
async isHealthy(): Promise<boolean> {
try {
await this.qdrantClient.getCollections();
return true;
} catch {
return false;
}
}
async addDocument(doc: Document): Promise<void> {
try {
// Check if document already exists
if (await this.qdrant.documentExists(doc.url)) {
throw new McpError(
ErrorCode.InvalidRequest,
`Document with URL ${doc.url} already exists`
);
}
// Generate embeddings for the content
const embedding = await this.embeddingService.generateEmbeddings(doc.content);
// Store document in Qdrant
await this.qdrant.storeDocumentChunks(
[{
content: doc.content,
index: 0,
metadata: {
startPosition: 0,
endPosition: doc.content.length,
isCodeBlock: /```/.test(doc.content)
}
}],
[embedding],
{
url: doc.url,
title: doc.metadata.title || '',
domain: new URL(doc.url).hostname,
timestamp: new Date().toISOString(),
contentType: doc.metadata.contentType || 'text/plain',
wordCount: doc.content.split(/\s+/).length,
hasCode: /```|\bfunction\b|\bclass\b|\bconst\b|\blet\b|\bvar\b/.test(doc.content),
}
);
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to add document: ${error}`
);
}
}
async deleteDocument(url: string): Promise<void> {
try {
await this.qdrant.removeDocument(url);
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to delete document: ${error}`
);
}
}
}
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema, McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import axios from 'axios';
import { ApiClient } from './api-client.js';
import { SearchDocumentationHandler } from './handlers/search-documentation.js';
import { ListDocumentationHandler } from './handlers/list-documentation.js';
import { ListOptions } from './tools/list-utils.js';
import { Document } from './types.js';
// Force using IP address to avoid hostname resolution issues
const QDRANT_URL = process.env.QDRANT_URL || 'http://127.0.0.1:6333';
const QDRANT_API_KEY = process.env.QDRANT_API_KEY;
const EMBEDDING_PROVIDER = process.env.EMBEDDING_PROVIDER || 'ollama';
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
// Test connection with direct axios call first
try {
const response = await axios.get(`${QDRANT_URL}/collections`);
console.error('Successfully connected to Qdrant:', response.data);
} catch (error) {
console.error('Failed to connect to Qdrant:', error);
throw new McpError(
ErrorCode.InternalError,
'Failed to establish initial connection to Qdrant server'
);
}
const client = new ApiClient({
qdrantUrl: QDRANT_URL,
qdrantApiKey: QDRANT_API_KEY,
embeddingConfig: {
provider: EMBEDDING_PROVIDER as 'ollama' | 'openai',
apiKey: OPENAI_API_KEY,
model: EMBEDDING_PROVIDER === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'
}
});
try {
// Initialize Qdrant collection
await client.qdrant.initializeCollection();
console.error('Successfully initialized Qdrant collection');
} catch (error) {
console.error('Failed to initialize Qdrant collection:', error);
throw error;
}
class RagDocsServer {
private server: Server;
constructor() {
this.server = new Server(
{
name: 'ragdocs',
version: '0.1.0',
},
{
capabilities: {
tools: {},
},
}
);
this.setupToolHandlers();
this.server.onerror = (error) => console.error('[MCP Error]', error);
}
private setupToolHandlers() {
this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
{
name: 'add_document',
description: 'Add a document to the RAG system',
inputSchema: {
type: 'object',
properties: {
url: { type: 'string', description: 'Document URL' },
content: { type: 'string', description: 'Document content' },
metadata: {
type: 'object',
properties: {
title: { type: 'string', description: 'Document title' },
contentType: { type: 'string', description: 'Content type (e.g., text/plain, text/markdown)' },
},
additionalProperties: true,
},
},
required: ['url', 'content'],
},
},
{
name: 'search_documents',
description: 'Search for documents using semantic similarity',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Natural language search query'
},
options: {
type: 'object',
description: 'Search options',
properties: {
limit: {
type: 'number',
description: 'Maximum number of results (1-20)',
minimum: 1,
maximum: 20
},
scoreThreshold: {
type: 'number',
description: 'Minimum similarity score (0-1)',
minimum: 0,
maximum: 1
},
filters: {
type: 'object',
description: 'Optional filters',
properties: {
domain: {
type: 'string',
description: 'Filter by domain'
},
hasCode: {
type: 'boolean',
description: 'Filter for documents containing code'
},
after: {
type: 'string',
description: 'Filter for documents after date (ISO format)'
},
before: {
type: 'string',
description: 'Filter for documents before date (ISO format)'
}
}
}
}
}
},
required: ['query'],
},
},
{
name: 'delete_document',
description: 'Delete a document from the RAG system',
inputSchema: {
type: 'object',
properties: {
url: { type: 'string', description: 'Document URL to delete' },
},
required: ['url'],
},
},
{
name: 'list_documents',
description: 'List all stored documents with pagination and grouping options',
inputSchema: {
type: 'object',
properties: {
page: {
type: 'number',
description: 'Page number (default: 1)',
minimum: 1
},
pageSize: {
type: 'number',
description: 'Number of documents per page (default: 20)',
minimum: 1,
maximum: 100
},
groupByDomain: {
type: 'boolean',
description: 'Group documents by domain (default: false)'
},
sortBy: {
type: 'string',
description: 'Sort field (default: timestamp)',
enum: ['timestamp', 'title', 'domain']
},
sortOrder: {
type: 'string',
description: 'Sort order (default: desc)',
enum: ['asc', 'desc']
}
}
}
},
],
}));
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
try {
switch (request.params.name) {
case 'add_document': {
const args = request.params.arguments as Record<string, unknown>;
if (!args || typeof args.url !== 'string' || typeof args.content !== 'string') {
throw new Error('Invalid document format: url and content must be strings');
}
const doc: Document = {
url: args.url,
content: args.content,
metadata: (args.metadata as Record<string, unknown>) || {}
};
await client.addDocument(doc);
return {
content: [{ type: 'text', text: `Document ${doc.url} added successfully` }],
};
}
case 'search_documents': {
const { query, options } = request.params.arguments as {
query: string;
options?: {
limit?: number;
scoreThreshold?: number;
filters?: {
domain?: string;
hasCode?: boolean;
after?: string;
before?: string;
};
};
};
const searchHandler = new SearchDocumentationHandler(
client.qdrant,
client.embeddings,
this.server,
client
);
return await searchHandler.handle({ query, options });
}
case 'delete_document': {
const { url } = request.params.arguments as { url: string };
await client.deleteDocument(url);
return {
content: [{ type: 'text', text: `Document ${url} deleted successfully` }],
};
}
case 'list_documents': {
const args = request.params.arguments as ListOptions;
const listHandler = new ListDocumentationHandler(this.server, client);
return await listHandler.handle(args || {});
}
default:
throw new Error(`Unknown tool: ${request.params.name}`);
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
console.error('[Tool Error]', errorMessage);
return {
content: [{ type: 'text', text: `Error: ${errorMessage}` }],
isError: true,
};
}
});
}
async run() {
const transport = new StdioServerTransport();
await this.server.connect(transport);
console.error('RagDocs MCP server running on stdio');
}
}
const server = new RagDocsServer();
server.run().catch(console.error);
```
--------------------------------------------------------------------------------
/src/tools/qdrant-client.ts:
--------------------------------------------------------------------------------
```typescript
import { QdrantClient } from '@qdrant/js-client-rest';
import { TextChunk } from './text-chunker.js';
export interface DocumentMetadata {
url: string;
title: string;
domain: string;
timestamp: string;
contentType: string;
wordCount: number;
hasCode: boolean;
chunkIndex: number;
totalChunks: number;
}
export class QdrantError extends Error {
constructor(message: string) {
super(message);
this.name = 'QdrantError';
}
}
export class QdrantWrapper {
public client: QdrantClient;
private readonly collectionName = 'documentation';
private readonly vectorSize = 768; // Ollama nomic-embed-text size
constructor(url?: string, apiKey?: string) {
this.client = new QdrantClient({
url: url || 'http://10.1.1.199:6333',
apiKey: apiKey,
timeout: 10000 // Add timeout to help debug connection issues
});
}
/**
* Initializes the Qdrant collection if it doesn't exist
*/
async initializeCollection(): Promise<void> {
try {
const collections = await this.client.getCollections();
const exists = collections.collections.some(c => c.name === this.collectionName);
if (!exists) {
await this.client.createCollection(this.collectionName, {
vectors: {
size: this.vectorSize,
distance: 'Cosine',
},
optimizers_config: {
default_segment_number: 2,
},
replication_factor: 1,
});
// Create indexes for efficient filtering
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'url',
field_schema: 'keyword',
});
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'domain',
field_schema: 'keyword',
});
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'timestamp',
field_schema: 'datetime',
});
}
} catch (error) {
console.error('Qdrant initialization error:', error);
if (error instanceof Error) {
console.error('Error details:', {
name: error.name,
message: error.message,
stack: error.stack
});
}
throw new QdrantError(
`Failed to initialize Qdrant collection: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Stores document chunks in the Qdrant collection
* @param chunks Text chunks to store
* @param embeddings Corresponding embeddings for each chunk
* @param metadata Document metadata
*/
async storeDocumentChunks(
chunks: TextChunk[],
embeddings: number[][],
metadata: Omit<DocumentMetadata, 'chunkIndex' | 'totalChunks'>
): Promise<void> {
if (chunks.length !== embeddings.length) {
throw new QdrantError('Number of chunks does not match number of embeddings');
}
try {
const points = chunks.map((chunk, index) => ({
id: this.generatePointId(metadata.url, chunk.index),
vector: embeddings[index],
payload: {
...metadata,
content: chunk.content,
chunkIndex: chunk.index,
totalChunks: chunks.length,
chunkMetadata: chunk.metadata,
},
}));
await this.client.upsert(this.collectionName, {
wait: true,
points,
});
} catch (error) {
throw new QdrantError(
`Failed to store document chunks: ${(error as Error).message}`
);
}
}
/**
* Checks if a document already exists in the collection
* @param url Document URL
* @returns true if document exists
*/
async documentExists(url: string): Promise<boolean> {
try {
const response = await this.client.scroll(this.collectionName, {
filter: {
must: [
{
key: 'url',
match: {
value: url,
},
},
],
},
limit: 1,
});
return response.points.length > 0;
} catch (error) {
throw new QdrantError(
`Failed to check document existence: ${(error as Error).message}`
);
}
}
/**
* Removes a document and all its chunks from the collection
* @param url Document URL
*/
async removeDocument(url: string): Promise<void> {
try {
await this.client.delete(this.collectionName, {
filter: {
must: [
{
key: 'url',
match: {
value: url,
},
},
],
},
wait: true,
});
} catch (error) {
throw new QdrantError(
`Failed to remove document: ${(error as Error).message}`
);
}
}
/**
* Generates a unique point ID for a chunk
* @param url Document URL
* @param chunkIndex Chunk index
* @returns Unique point ID
*/
private generatePointId(url: string, chunkIndex: number): number {
// Create a hash of the URL + chunk index
const str = `${url}:${chunkIndex}`;
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash);
}
/**
* Gets the health status of the Qdrant server
* @returns true if server is healthy
*/
async isHealthy(): Promise<boolean> {
try {
await this.client.getCollections();
return true;
} catch {
return false;
}
}
/**
* Lists all documents with pagination support
* @param options Listing options including pagination and filtering
* @returns Array of document metadata with pagination info
*/
async listDocuments(options: {
offset?: number;
limit?: number;
domain?: string;
sortBy?: 'timestamp' | 'title' | 'domain';
sortOrder?: 'asc' | 'desc';
} = {}): Promise<{ total: number; documents: DocumentMetadata[] }> {
const filter: any = {
must: [
{
key: 'chunkIndex',
match: { value: 0 }, // Only get first chunk to avoid duplicates
},
],
};
if (options.domain) {
filter.must.push({
key: 'domain',
match: { value: options.domain },
});
}
try {
// Get total count first
const countResponse = await this.client.count(this.collectionName, {
filter,
});
// Then get paginated results
const response = await this.client.scroll(this.collectionName, {
filter,
limit: options.limit || 20,
offset: options.offset || 0,
with_payload: true,
with_vector: false,
});
const documents = response.points.map(point => {
const payload = point.payload as any;
return {
url: String(payload.url),
title: String(payload.title),
domain: String(payload.domain),
timestamp: String(payload.timestamp),
contentType: String(payload.contentType),
wordCount: Number(payload.wordCount),
hasCode: Boolean(payload.hasCode),
chunkIndex: Number(payload.chunkIndex),
totalChunks: Number(payload.totalChunks),
};
});
return {
total: countResponse.count,
documents,
};
} catch (error) {
throw new QdrantError(
`Failed to list documents: ${(error as Error).message}`
);
}
}
/**
* Performs a semantic search using vector similarity
* @param queryVector Query embedding vector
* @param options Search options
* @returns Array of search results with scores
*/
async searchSimilar(
queryVector: number[],
options: {
limit?: number;
scoreThreshold?: number;
filters?: {
domain?: string;
hasCode?: boolean;
after?: string;
before?: string;
};
} = {}
): Promise<Array<DocumentMetadata & { score: number; content: string }>> {
const limit = options.limit || 5;
const scoreThreshold = options.scoreThreshold || 0.7;
const filter: any = { must: [] };
// Add filters if specified
if (options.filters?.domain) {
filter.must.push({
key: 'domain',
match: { value: options.filters.domain },
});
}
if (options.filters?.hasCode !== undefined) {
filter.must.push({
key: 'hasCode',
match: { value: options.filters.hasCode },
});
}
if (options.filters?.after) {
filter.must.push({
key: 'timestamp',
range: { gte: options.filters.after },
});
}
if (options.filters?.before) {
filter.must.push({
key: 'timestamp',
range: { lte: options.filters.before },
});
}
try {
const response = await this.client.search(this.collectionName, {
vector: queryVector,
limit: Math.ceil(limit * 1.5), // Request extra results for post-filtering
score_threshold: scoreThreshold,
filter: filter.must.length > 0 ? filter : undefined,
with_payload: true,
});
return response
.map(hit => {
const payload = hit.payload as any;
if (!payload || typeof payload !== 'object') {
throw new QdrantError('Invalid payload structure in search result');
}
// Extract and validate required fields
const result = {
score: hit.score || 0,
url: String(payload.url),
title: String(payload.title),
domain: String(payload.domain),
timestamp: String(payload.timestamp),
contentType: String(payload.contentType),
wordCount: Number(payload.wordCount),
hasCode: Boolean(payload.hasCode),
chunkIndex: Number(payload.chunkIndex),
totalChunks: Number(payload.totalChunks),
content: String(payload.content),
};
// Validate all fields are present and of correct type
if (Object.values(result).some(v => v === undefined)) {
throw new QdrantError('Missing required fields in search result');
}
return result;
})
.slice(0, limit); // Return only requested number of results
} catch (error) {
throw new QdrantError(
`Failed to perform search: ${(error as Error).message}`
);
}
}
}
```