heltonteixeira/ragdocs # codebase.md

# Directory Structure

```
├── .npmignore
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── src
│   ├── api-client.ts
│   ├── embeddings.ts
│   ├── handlers
│   │   ├── add-documentation.ts
│   │   ├── base-handler.ts
│   │   ├── list-documentation.ts
│   │   ├── search-documentation.ts
│   │   └── test-embeddings.ts
│   ├── index.ts
│   ├── tools
│   │   ├── add-documentation.ts
│   │   ├── content-fetcher.ts
│   │   ├── list-utils.ts
│   │   ├── qdrant-client.ts
│   │   ├── search-utils.ts
│   │   ├── text-chunker.ts
│   │   └── url-processor.ts
│   ├── types
│   │   └── ollama.d.ts
│   └── types.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------

```
src/
ref/
.clinecontext
.clinelearn
.clinerules
ragdocs_plan.md
tsconfig.json
.git
.gitignore
node_modules/

```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# RagDocs MCP Server

A Model Context Protocol (MCP) server that provides RAG (Retrieval-Augmented Generation) capabilities using Qdrant vector database and Ollama/OpenAI embeddings. This server enables semantic search and management of documentation through vector similarity.

## Features

- Add documentation with metadata
- Semantic search through documents
- List and organize documentation
- Delete documents
- Support for both Ollama (free) and OpenAI (paid) embeddings
- Automatic text chunking and embedding generation
- Vector storage with Qdrant

## Prerequisites

- Node.js 16 or higher
- One of the following Qdrant setups:
  - Local instance using Docker (free)
  - Qdrant Cloud account with API key (managed service)
- One of the following for embeddings:
  - Ollama running locally (default, free)
  - OpenAI API key (optional, paid)

## Available Tools

### 1. add_document
Add a document to the RAG system.

Parameters:
- `url` (required): Document URL/identifier
- `content` (required): Document content
- `metadata` (optional): Document metadata
  - `title`: Document title
  - `contentType`: Content type (e.g., "text/markdown")

### 2. search_documents
Search through stored documents using semantic similarity.

Parameters:
- `query` (required): Natural language search query
- `options` (optional):
  - `limit`: Maximum number of results (1-20, default: 5)
  - `scoreThreshold`: Minimum similarity score (0-1, default: 0.7)
  - `filters`:
    - `domain`: Filter by domain
    - `hasCode`: Filter for documents containing code
    - `after`: Filter for documents after date (ISO format)
    - `before`: Filter for documents before date (ISO format)

### 3. list_documents
List all stored documents with pagination and grouping options.

Parameters (all optional):
- `page`: Page number (default: 1)
- `pageSize`: Number of documents per page (1-100, default: 20)
- `groupByDomain`: Group documents by domain (default: false)
- `sortBy`: Sort field ("timestamp", "title", or "domain")
- `sortOrder`: Sort order ("asc" or "desc")

### 4. delete_document
Delete a document from the RAG system.

Parameters:
- `url` (required): URL of the document to delete

## Installation

```bash
npm install -g @mcpservers/ragdocs
```

## MCP Server Configuration

```json
{
  "mcpServers": {
    "ragdocs": {
      "command": "node",
      "args": ["@mcpservers/ragdocs"],
      "env": {
        "QDRANT_URL": "http://127.0.0.1:6333",
        "EMBEDDING_PROVIDER": "ollama"
      }
    }
  }
}
```

Using Qdrant Cloud:
```json
{
  "mcpServers": {
    "ragdocs": {
      "command": "node",
      "args": ["@mcpservers/ragdocs"],
      "env": {
        "QDRANT_URL": "https://your-cluster-url.qdrant.tech",
        "QDRANT_API_KEY": "your-qdrant-api-key",
        "EMBEDDING_PROVIDER": "ollama"
      }
    }
  }
}
```

Using OpenAI:
```json
{
  "mcpServers": {
    "ragdocs": {
      "command": "node",
      "args": ["@mcpservers/ragdocs"],
      "env": {
        "QDRANT_URL": "http://127.0.0.1:6333",
        "EMBEDDING_PROVIDER": "openai",
        "OPENAI_API_KEY": "your-api-key"
      }
    }
  }
}
```

## Local Qdrant with Docker

```bash
docker run -d --name qdrant -p 6333:6333 -p 6334:6334 qdrant/qdrant
```

## Environment Variables

- `QDRANT_URL`: URL of your Qdrant instance
  - For local: "http://127.0.0.1:6333" (default)
  - For cloud: "https://your-cluster-url.qdrant.tech"
- `QDRANT_API_KEY`: API key for Qdrant Cloud (required when using cloud instance)
- `EMBEDDING_PROVIDER`: Choice of embedding provider ("ollama" or "openai", default: "ollama")
- `OPENAI_API_KEY`: OpenAI API key (required if using OpenAI)
- `EMBEDDING_MODEL`: Model to use for embeddings
  - For Ollama: defaults to "nomic-embed-text"
  - For OpenAI: defaults to "text-embedding-3-small"

## License

Apache License 2.0

```

--------------------------------------------------------------------------------
/src/types/ollama.d.ts:
--------------------------------------------------------------------------------

```typescript
declare module 'ollama' {
  export interface EmbeddingsRequest {
    model: string;
    prompt: string;
    options?: Record<string, any>;
  }

  export interface EmbeddingsResponse {
    embedding: number[];
  }

  const ollama: {
    embeddings(request: EmbeddingsRequest): Promise<EmbeddingsResponse>;
  };

  export default ollama;
}

```

--------------------------------------------------------------------------------
/src/handlers/base-handler.ts:
--------------------------------------------------------------------------------

```typescript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { ApiClient } from '../api-client.js';
import { ToolResult } from '../types.js';

export abstract class BaseHandler {
  constructor(
    protected readonly server: Server,
    protected readonly apiClient: ApiClient
  ) {}

  abstract handle(args: any): Promise<ToolResult>;
}

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "es2022",
    "module": "es2022",
    "moduleResolution": "node",
    "outDir": "build",
    "rootDir": "src",
    "strict": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "resolveJsonModule": true
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules"]
}

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "@mcpservers/ragdocs",
  "version": "1.0.0",
  "type": "module",
  "main": "build/index.js",
  "bin": {
    "mcp-ragdocs": "build/index.js"
  },
  "files": [
    "build/**/*"
  ],
  "scripts": {
    "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"",
    "prepublishOnly": "npm run build",
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "keywords": [
    "mcp",
    "rag",
    "documentation",
    "search",
    "embeddings"
  ],
  "author": "bossying",
  "license": "Apache License 2.0",
  "description": "MCP server for RAG-based document search and management",
  "homepage": "https://github.com/heltonteixeira/ragdocs",
  "repository": {
    "type": "git",
    "url": "git+https://github.com/heltonteixeira/ragdocs.git"
  },
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.0.4",
    "@qdrant/js-client-rest": "^1.12.0",
    "axios": "^1.7.9",
    "cheerio": "^1.0.0",
    "ollama": "^0.5.11",
    "openai": "^4.77.0",
    "playwright": "^1.49.1"
  },
  "devDependencies": {
    "typescript": "^5.7.2"
  }
}

```

--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------

```typescript
import { DocumentMetadata } from './tools/qdrant-client.js';

export interface Document {
  url: string;
  content: string;
  metadata: Partial<DocumentMetadata>;
}

export interface DocumentChunk {
  text: string;
  url: string;
  title: string;
  timestamp: string;
}

export interface DocumentPayload extends DocumentChunk {
  _type: 'DocumentChunk';
  [key: string]: unknown;
}

export function isDocumentPayload(payload: unknown): payload is DocumentPayload {
  if (!payload || typeof payload !== 'object') return false;
  const p = payload as Partial<DocumentPayload>;
  return (
    p._type === 'DocumentChunk' &&
    typeof p.text === 'string' &&
    typeof p.url === 'string' &&
    typeof p.title === 'string' &&
    typeof p.timestamp === 'string'
  );
}

export interface SearchOptions {
  limit?: number;
  scoreThreshold?: number;
  filters?: {
    domain?: string;
    hasCode?: boolean;
    after?: string;
    before?: string;
  };
}

export interface ToolDefinition {
  name: string;
  description: string;
  inputSchema: {
    type: string;
    properties: Record<string, any>;
    required: string[];
  };
}

export interface ToolResult {
  content: Array<{
    type: string;
    text: string;
  }>;
  isError?: boolean;
}

export interface RagDocsConfig {
  qdrantUrl: string;
  qdrantApiKey?: string;
  openaiApiKey: string;
  collectionName: string;
}

```

--------------------------------------------------------------------------------
/src/handlers/list-documentation.ts:
--------------------------------------------------------------------------------

```typescript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { QdrantWrapper } from '../tools/qdrant-client.js';
import { ListOptions, ListResult, ListUtils } from '../tools/list-utils.js';
import { ToolResult } from '../types.js';
import { ApiClient } from '../api-client.js';

export class ListDocumentationHandler extends BaseHandler {
  protected server: Server;
  protected apiClient: ApiClient;

  constructor(server: Server, apiClient: ApiClient) {
    super(server, apiClient);
    this.server = server;
    this.apiClient = apiClient;
  }

  async handle(args: ListOptions): Promise<ToolResult> {
    try {
      // Ensure Qdrant is initialized
      await this.apiClient.qdrant.initializeCollection();

      // Set default values
      const page = args.page || 1;
      const pageSize = args.pageSize || 20;
      const sortBy = args.sortBy || 'timestamp';
      const sortOrder = args.sortOrder || 'desc';

      // Get documents with pagination
      const { total, documents } = await this.apiClient.qdrant.listDocuments({
        offset: (page - 1) * pageSize,
        limit: pageSize,
        sortBy,
        sortOrder,
      });

      // Calculate pagination details
      const { totalPages } = ListUtils.getPaginationDetails(total, page, pageSize);

      // Sort documents if needed
      const sortedDocs = ListUtils.sortDocuments(documents, sortBy, sortOrder);

      // Group by domain if requested
      const groupedDocs = args.groupByDomain
        ? ListUtils.groupByDomain(sortedDocs)
        : [{ documents: sortedDocs }];

      // Prepare result
      const result: ListResult = {
        total,
        page,
        pageSize,
        totalPages,
        documents: groupedDocs,
      };

      // Format as markdown
      const markdown = ListUtils.formatAsMarkdown(result);

      return {
        content: [
          {
            type: 'text',
            text: markdown,
          },
        ],
      };
    } catch (error) {
      return {
        content: [
          {
            type: 'text',
            text: `Failed to list documentation: ${(error as Error).message}`,
          },
        ],
        isError: true,
      };
    }
  }
}

```

--------------------------------------------------------------------------------
/src/handlers/search-documentation.ts:
--------------------------------------------------------------------------------

```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { BaseHandler } from './base-handler.js';
import { QdrantWrapper } from '../tools/qdrant-client.js';
import { EmbeddingService } from '../embeddings.js';
import {
  SearchOptions,
  SearchResult,
  validateSearchOptions,
  extractSnippet,
  normalizeScore,
  formatResultsAsMarkdown,
} from '../tools/search-utils.js';

interface SearchDocumentationArgs {
  query: string;
  options?: SearchOptions;
}

export class SearchDocumentationHandler extends BaseHandler {
  private qdrant: QdrantWrapper;
  private embeddings: EmbeddingService;

  constructor(
    qdrant: QdrantWrapper,
    embeddings: EmbeddingService,
    ...args: ConstructorParameters<typeof BaseHandler>
  ) {
    super(...args);
    this.qdrant = qdrant;
    this.embeddings = embeddings;
  }

  async handle(args: SearchDocumentationArgs) {
    // Validate input
    if (!args.query?.trim()) {
      throw new McpError(
        ErrorCode.InvalidRequest,
        'Query string is required'
      );
    }

    // Validate search options if provided
    if (args.options) {
      validateSearchOptions(args.options);
    }

    try {
      // Generate embeddings for the query
      console.error('Generating embeddings for query:', args.query);
      const queryVector = await this.embeddings.generateEmbeddings(args.query);

      // Search for similar documents
      console.error('Searching for similar documents...');
      const searchResults = await this.qdrant.searchSimilar(queryVector, args.options);

      // Process and format results
      const formattedResults: SearchResult[] = searchResults.map(result => ({
        url: result.url,
        title: result.title,
        domain: result.domain,
        timestamp: result.timestamp,
        score: normalizeScore(result.score),
        snippet: extractSnippet(result.content),
        metadata: {
          contentType: result.contentType,
          wordCount: result.wordCount,
          hasCode: result.hasCode,
          chunkIndex: result.chunkIndex,
          totalChunks: result.totalChunks,
        },
      }));

      // Format results as markdown
      const markdown = formatResultsAsMarkdown(formattedResults);

      return {
        content: [
          {
            type: 'text',
            text: markdown,
          },
        ],
      };
    } catch (error) {
      console.error('Search error:', error);
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to search documentation: ${error}`
      );
    }
  }
}

```

--------------------------------------------------------------------------------
/src/handlers/test-embeddings.ts:
--------------------------------------------------------------------------------

```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { ApiClient } from '../api-client.js';
import { ToolResult } from '../types.js';
import { EmbeddingService } from '../embeddings.js';

const COLLECTION_NAME = 'documentation';

export class TestEmbeddingsHandler extends BaseHandler {
  constructor(server: Server, apiClient: ApiClient) {
    super(server, apiClient);
  }

  async handle(args: any): Promise<ToolResult> {
    if (!args.text || typeof args.text !== 'string') {
      throw new McpError(ErrorCode.InvalidParams, 'Text is required');
    }

    try {
      // Create a new embedding service instance with the requested configuration
      const tempEmbeddingService = EmbeddingService.createFromConfig({
        provider: args.provider || 'ollama',
        apiKey: args.apiKey,
        model: args.model
      });

      const embedding = await tempEmbeddingService.generateEmbeddings(args.text);
      const provider = args.provider || 'ollama';
      const model = args.model || (provider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small');

      // If test is successful, update the server's embedding service
      const newApiClient = new ApiClient({
        embeddingConfig: {
          provider: args.provider || 'ollama',
          apiKey: args.apiKey,
          model: args.model
        },
        qdrantUrl: process.env.QDRANT_URL,
        qdrantApiKey: process.env.QDRANT_API_KEY
      });

      // Initialize collection with new vector size
      await newApiClient.initCollection(COLLECTION_NAME);

      return {
        content: [
          {
            type: 'text',
            text: `Successfully configured ${provider} embeddings (${model}).\nVector size: ${embedding.length}\nQdrant collection updated to match new vector size.`,
          },
        ],
      };
    } catch (error) {
      return {
        content: [
          {
            type: 'text',
            text: `Failed to test embeddings: ${error}`,
          },
        ],
        isError: true,
      };
    }
  }
}

export const testEmbeddingsSchema = {
  type: 'object',
  properties: {
    text: {
      type: 'string',
      description: 'Text to generate embeddings for',
    },
    provider: {
      type: 'string',
      description: 'Embedding provider to use (ollama or openai)',
      enum: ['ollama', 'openai'],
      default: 'ollama',
    },
    apiKey: {
      type: 'string',
      description: 'OpenAI API key (required if provider is openai)',
    },
    model: {
      type: 'string',
      description: 'Model to use for embeddings',
    },
  },
  required: ['text'],
} as const;

```

--------------------------------------------------------------------------------
/src/tools/search-utils.ts:
--------------------------------------------------------------------------------

```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { DocumentMetadata } from './qdrant-client.js';

export interface SearchResult {
  url: string;
  title: string;
  domain: string;
  timestamp: string;
  score: number;
  snippet: string;
  metadata: Partial<DocumentMetadata>;
}

export interface SearchOptions {
  limit?: number;
  scoreThreshold?: number;
  filters?: {
    domain?: string;
    hasCode?: boolean;
    after?: string;
    before?: string;
  };
}

/**
 * Extracts a relevant snippet around the most relevant content
 */
export function extractSnippet(content: string, maxLength: number = 300): string {
  // If content is shorter than maxLength, return it as is
  if (content.length <= maxLength) {
    return content;
  }

  // Find a good breaking point near the middle
  const middle = Math.floor(content.length / 2);
  const radius = Math.floor(maxLength / 2);
  
  let start = Math.max(0, middle - radius);
  let end = Math.min(content.length, middle + radius);

  // Adjust to avoid breaking words
  while (start > 0 && /\S/.test(content[start - 1])) start--;
  while (end < content.length && /\S/.test(content[end])) end++;

  let snippet = content.slice(start, end).trim();

  // Add ellipsis if we're not at the boundaries
  if (start > 0) snippet = '...' + snippet;
  if (end < content.length) snippet = snippet + '...';

  return snippet;
}

/**
 * Normalizes scores to be between 0 and 1
 */
export function normalizeScore(score: number): number {
  // Qdrant uses cosine similarity which is already between -1 and 1
  // Convert to 0-1 range
  return (score + 1) / 2;
}

/**
 * Formats search results as markdown
 */
export function formatResultsAsMarkdown(results: SearchResult[]): string {
  if (results.length === 0) {
    return 'No matching documents found.';
  }

  return results
    .map((result, index) => {
      const score = (result.score * 100).toFixed(1);
      return `
### ${index + 1}. ${result.title} (${score}% match)
**URL:** ${result.url}
**Domain:** ${result.domain}
**Date:** ${new Date(result.timestamp).toLocaleDateString()}

${result.snippet}
`;
    })
    .join('\n---\n');
}

/**
 * Validates search options
 */
export function validateSearchOptions(options: SearchOptions): void {
  if (options.limit !== undefined && (options.limit < 1 || options.limit > 20)) {
    throw new McpError(
      ErrorCode.InvalidRequest,
      'Limit must be between 1 and 20'
    );
  }

  if (
    options.scoreThreshold !== undefined &&
    (options.scoreThreshold < 0 || options.scoreThreshold > 1)
  ) {
    throw new McpError(
      ErrorCode.InvalidRequest,
      'Score threshold must be between 0 and 1'
    );
  }

  if (options.filters?.after && isNaN(Date.parse(options.filters.after))) {
    throw new McpError(ErrorCode.InvalidRequest, 'Invalid after date format');
  }

  if (options.filters?.before && isNaN(Date.parse(options.filters.before))) {
    throw new McpError(ErrorCode.InvalidRequest, 'Invalid before date format');
  }
}

```

--------------------------------------------------------------------------------
/src/tools/list-utils.ts:
--------------------------------------------------------------------------------

```typescript
import { DocumentMetadata } from './qdrant-client.js';

export interface ListOptions {
  page?: number;
  pageSize?: number;
  groupByDomain?: boolean;
  sortBy?: 'timestamp' | 'title' | 'domain';
  sortOrder?: 'asc' | 'desc';
}

export interface ListResult {
  total: number;
  page: number;
  pageSize: number;
  totalPages: number;
  documents: DocumentGroup[];
}

export interface DocumentGroup {
  domain?: string;
  documents: DocumentMetadata[];
}

export class ListUtils {
  /**
   * Groups documents by domain
   */
  static groupByDomain(documents: DocumentMetadata[]): DocumentGroup[] {
    const groupedMap = new Map<string, DocumentMetadata[]>();
    
    for (const doc of documents) {
      const domain = doc.domain;
      if (!groupedMap.has(domain)) {
        groupedMap.set(domain, []);
      }
      groupedMap.get(domain)!.push(doc);
    }

    return Array.from(groupedMap.entries()).map(([domain, docs]) => ({
      domain,
      documents: docs
    }));
  }

  /**
   * Sorts documents based on specified criteria
   */
  static sortDocuments(
    documents: DocumentMetadata[],
    sortBy: 'timestamp' | 'title' | 'domain' = 'timestamp',
    sortOrder: 'asc' | 'desc' = 'desc'
  ): DocumentMetadata[] {
    return [...documents].sort((a, b) => {
      let comparison: number;
      switch (sortBy) {
        case 'timestamp':
          comparison = new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime();
          break;
        case 'title':
          comparison = a.title.localeCompare(b.title);
          break;
        case 'domain':
          comparison = a.domain.localeCompare(b.domain);
          break;
        default:
          comparison = 0;
      }
      return sortOrder === 'desc' ? -comparison : comparison;
    });
  }

  /**
   * Formats the list result as markdown
   */
  static formatAsMarkdown(result: ListResult): string {
    const lines: string[] = [];
    
    // Add header with pagination info
    lines.push(`# Documentation List`);
    lines.push(`Page ${result.page} of ${result.totalPages} (${result.total} total documents)\n`);

    // Add documents grouped by domain
    for (const group of result.documents) {
      if (group.domain) {
        lines.push(`## ${group.domain}`);
      }

      for (const doc of group.documents) {
        const date = new Date(doc.timestamp).toLocaleDateString();
        lines.push(`- [${doc.title}](${doc.url})`);
        lines.push(`  - Added: ${date}`);
        lines.push(`  - Type: ${doc.contentType}`);
        lines.push(`  - Words: ${doc.wordCount}`);
        if (doc.hasCode) {
          lines.push(`  - Contains code snippets`);
        }
        lines.push(``);
      }
    }

    return lines.join('\n');
  }

  /**
   * Calculates pagination details
   */
  static getPaginationDetails(
    total: number,
    page: number = 1,
    pageSize: number = 20
  ): { offset: number; limit: number; totalPages: number } {
    const totalPages = Math.ceil(total / pageSize);
    const currentPage = Math.min(Math.max(1, page), totalPages);
    const offset = (currentPage - 1) * pageSize;
    
    return {
      offset,
      limit: pageSize,
      totalPages
    };
  }
}

```

--------------------------------------------------------------------------------
/src/embeddings.ts:
--------------------------------------------------------------------------------

```typescript
import ollama from 'ollama';
import OpenAI from 'openai';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';

export interface EmbeddingProvider {
  generateEmbeddings(text: string): Promise<number[]>;
  getVectorSize(): number;
}

export class OllamaProvider implements EmbeddingProvider {
  private model: string;

  constructor(model: string = 'nomic-embed-text') {
    this.model = model;
  }

  async generateEmbeddings(text: string): Promise<number[]> {
    try {
      console.error('Generating Ollama embeddings for text:', text.substring(0, 50) + '...');
      const response = await ollama.embeddings({
        model: this.model,
        prompt: text
      });
      console.error('Successfully generated Ollama embeddings with size:', response.embedding.length);
      return response.embedding;
    } catch (error) {
      console.error('Ollama embedding error:', error);
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to generate embeddings with Ollama: ${error}`
      );
    }
  }

  getVectorSize(): number {
    // nomic-embed-text produces 768-dimensional vectors
    return 768;
  }
}

export class OpenAIProvider implements EmbeddingProvider {
  private client: OpenAI;
  private model: string;

  constructor(apiKey: string, model: string = 'text-embedding-3-small') {
    this.client = new OpenAI({ apiKey });
    this.model = model;
  }

  async generateEmbeddings(text: string): Promise<number[]> {
    try {
      console.error('Generating OpenAI embeddings for text:', text.substring(0, 50) + '...');
      const response = await this.client.embeddings.create({
        model: this.model,
        input: text,
      });
      const embedding = response.data[0].embedding;
      console.error('Successfully generated OpenAI embeddings with size:', embedding.length);
      return embedding;
    } catch (error) {
      console.error('OpenAI embedding error:', error);
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to generate embeddings with OpenAI: ${error}`
      );
    }
  }

  getVectorSize(): number {
    // text-embedding-3-small produces 1536-dimensional vectors
    return 1536;
  }
}

export class EmbeddingService {
  private provider: EmbeddingProvider;

  constructor(provider: EmbeddingProvider) {
    this.provider = provider;
  }

  async generateEmbeddings(text: string): Promise<number[]> {
    return this.provider.generateEmbeddings(text);
  }

  getVectorSize(): number {
    return this.provider.getVectorSize();
  }

  static createFromConfig(config: {
    provider: 'ollama' | 'openai';
    apiKey?: string;
    model?: string;
  }): EmbeddingService {
    switch (config.provider) {
      case 'ollama':
        return new EmbeddingService(new OllamaProvider(config.model));
      case 'openai':
        if (!config.apiKey) {
          throw new McpError(
            ErrorCode.InvalidRequest,
            'OpenAI API key is required'
          );
        }
        return new EmbeddingService(new OpenAIProvider(config.apiKey, config.model));
      default:
        throw new McpError(
          ErrorCode.InvalidRequest,
          `Unknown embedding provider: ${config.provider}`
        );
    }
  }
}

```

--------------------------------------------------------------------------------
/src/tools/url-processor.ts:
--------------------------------------------------------------------------------

```typescript
import { URL } from 'url';

export class URLProcessingError extends Error {
  constructor(message: string) {
    super(message);
    this.name = 'URLProcessingError';
  }
}

export interface ProcessedURL {
  originalUrl: string;
  normalizedUrl: string;
  domain: string;
  path: string;
  isValid: boolean;
}

export class URLProcessor {
  /**
   * Validates and normalizes a URL, extracting key components
   * @param urlString The URL string to process
   * @returns ProcessedURL object containing normalized URL and metadata
   * @throws URLProcessingError if URL is invalid
   */
  static processURL(urlString: string): ProcessedURL {
    try {
      // Trim whitespace and normalize
      const trimmedUrl = urlString.trim();
      
      // Add protocol if missing
      const urlWithProtocol = trimmedUrl.startsWith('http')
        ? trimmedUrl
        : `https://${trimmedUrl}`;

      // Parse URL
      const url = new URL(urlWithProtocol);

      // Normalize URL
      // - Convert to lowercase
      // - Remove trailing slashes
      // - Remove default ports
      // - Sort query parameters
      const normalizedUrl = this.normalizeURL(url);

      return {
        originalUrl: urlString,
        normalizedUrl,
        domain: url.hostname.toLowerCase(),
        path: url.pathname,
        isValid: true,
      };
    } catch (error) {
      throw new URLProcessingError(
        `Invalid URL "${urlString}": ${(error as Error).message}`
      );
    }
  }

  /**
   * Normalizes a URL to ensure consistent format
   * @param url URL object to normalize
   * @returns Normalized URL string
   */
  private static normalizeURL(url: URL): string {
    // Convert hostname to lowercase
    const hostname = url.hostname.toLowerCase();

    // Remove default ports
    const port = url.port === '80' || url.port === '443' ? '' : url.port;

    // Sort query parameters
    const searchParams = new URLSearchParams([...url.searchParams].sort());
    const search = searchParams.toString();

    // Construct normalized path (remove trailing slash except for root)
    let path = url.pathname;
    if (path.length > 1 && path.endsWith('/')) {
      path = path.slice(0, -1);
    }

    // Construct normalized URL
    let normalizedUrl = `${url.protocol}//${hostname}`;
    if (port) normalizedUrl += `:${port}`;
    normalizedUrl += path;
    if (search) normalizedUrl += `?${search}`;
    if (url.hash) normalizedUrl += url.hash;

    return normalizedUrl;
  }

  /**
   * Checks if a URL points to a valid web page
   * @param urlString URL to validate
   * @returns true if URL is valid and accessible
   */
  static isValidWebPage(urlString: string): boolean {
    try {
      const { protocol } = new URL(urlString);
      return protocol === 'http:' || protocol === 'https:';
    } catch {
      return false;
    }
  }

  /**
   * Extracts the root domain from a URL
   * @param urlString URL to process
   * @returns Root domain string
   */
  static extractRootDomain(urlString: string): string {
    try {
      const { hostname } = new URL(urlString);
      const parts = hostname.split('.');
      if (parts.length <= 2) return hostname;
      
      // Handle special cases like co.uk, com.au
      const sld = parts[parts.length - 2];
      const tld = parts[parts.length - 1];
      if (sld.length <= 3 && tld.length <= 3 && parts.length > 2) {
        return parts.slice(-3).join('.');
      }
      
      return parts.slice(-2).join('.');
    } catch {
      throw new URLProcessingError(`Cannot extract domain from invalid URL: ${urlString}`);
    }
  }
}

```

--------------------------------------------------------------------------------
/src/tools/add-documentation.ts:
--------------------------------------------------------------------------------

```typescript
import OpenAI from 'openai';
import { URLProcessor, URLProcessingError } from './url-processor.js';
import { ContentFetcher, ContentFetchError } from './content-fetcher.js';
import { TextChunker } from './text-chunker.js';
import { QdrantWrapper, QdrantError } from './qdrant-client.js';

export class AddDocumentationError extends Error {
  constructor(message: string, public readonly step: string) {
    super(message);
    this.name = 'AddDocumentationError';
  }
}

export interface AddDocumentationResult {
  url: string;
  title: string;
  chunks: number;
  wordCount: number;
}

export class AddDocumentationTool {
  private openai: OpenAI;
  private qdrant: QdrantWrapper;

  constructor(openaiApiKey: string, qdrantUrl?: string) {
    if (!openaiApiKey) {
      throw new Error('OpenAI API key is required');
    }

    this.openai = new OpenAI({
      apiKey: openaiApiKey,
    });

    this.qdrant = new QdrantWrapper(qdrantUrl);
  }

  /**
   * Adds a document to the RAG system
   * @param url URL of the document to add
   * @returns Result of the operation
   */
  async addDocument(url: string): Promise<AddDocumentationResult> {
    try {
      // Check Qdrant health
      const isHealthy = await this.qdrant.isHealthy();
      if (!isHealthy) {
        throw new AddDocumentationError(
          'Qdrant server is not available',
          'health_check'
        );
      }

      // Initialize collection if needed
      await this.qdrant.initializeCollection();

      // Process URL
      const processedUrl = URLProcessor.processURL(url);
      if (!processedUrl.isValid) {
        throw new AddDocumentationError('Invalid URL format', 'url_validation');
      }

      // Check if document already exists
      const exists = await this.qdrant.documentExists(processedUrl.normalizedUrl);
      if (exists) {
        // Remove existing document before adding new version
        await this.qdrant.removeDocument(processedUrl.normalizedUrl);
      }

      // Fetch content
      const content = await ContentFetcher.fetchContent(processedUrl.normalizedUrl);

      // Chunk content
      const chunks = TextChunker.chunkText(content.content, {
        maxChunkSize: 1500, // Leave room for metadata in context window
        minChunkSize: 100,
        overlap: 200,
        respectCodeBlocks: true,
      });

      // Generate embeddings for each chunk
      const embeddings = await this.generateEmbeddings(
        chunks.map(chunk => chunk.content)
      );

      // Store in Qdrant
      await this.qdrant.storeDocumentChunks(chunks, embeddings, {
        url: processedUrl.normalizedUrl,
        title: content.title,
        domain: processedUrl.domain,
        timestamp: content.timestamp,
        contentType: content.metadata.contentType,
        wordCount: content.metadata.wordCount,
        hasCode: content.metadata.hasCode,
      });

      return {
        url: processedUrl.normalizedUrl,
        title: content.title,
        chunks: chunks.length,
        wordCount: content.metadata.wordCount,
      };
    } catch (error) {
      if (
        error instanceof URLProcessingError ||
        error instanceof ContentFetchError ||
        error instanceof QdrantError ||
        error instanceof AddDocumentationError
      ) {
        throw error;
      }

      throw new AddDocumentationError(
        `Unexpected error: ${(error as Error).message}`,
        'unknown'
      );
    }
  }

  /**
   * Generates embeddings for text chunks using OpenAI's API
   * @param chunks Array of text chunks
   * @returns Array of embeddings
   */
  private async generateEmbeddings(chunks: string[]): Promise<number[][]> {
    try {
      const response = await this.openai.embeddings.create({
        model: 'text-embedding-ada-002',
        input: chunks,
      });

      return response.data.map(item => item.embedding);
    } catch (error) {
      throw new AddDocumentationError(
        `Failed to generate embeddings: ${(error as Error).message}`,
        'embedding_generation'
      );
    }
  }
}

```

--------------------------------------------------------------------------------
/src/handlers/add-documentation.ts:
--------------------------------------------------------------------------------

```typescript
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { BaseHandler } from './base-handler.js';
import { ApiClient } from '../api-client.js';
import { DocumentChunk, ToolResult } from '../types.js';
import * as cheerio from 'cheerio';
import crypto from 'crypto';

const COLLECTION_NAME = 'documentation';
const BATCH_SIZE = 100;

export class AddDocumentationHandler extends BaseHandler {
  constructor(server: Server, apiClient: ApiClient) {
    super(server, apiClient);
  }

  async handle(args: any): Promise<ToolResult> {
    if (!args.url || typeof args.url !== 'string') {
      throw new McpError(ErrorCode.InvalidParams, 'URL is required');
    }

    try {
      const chunks = await this.fetchAndProcessUrl(args.url);
      
      // Batch process chunks for better performance
      for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
        const batch = chunks.slice(i, i + BATCH_SIZE);
        const points = await Promise.all(
          batch.map(async (chunk) => {
            const embedding = await this.apiClient.getEmbeddings(chunk.text);
            return {
              id: this.generatePointId(),
              vector: embedding,
              payload: {
                ...chunk,
                _type: 'DocumentChunk' as const,
              } as Record<string, unknown>,
            };
          })
        );

        try {
          await this.apiClient.qdrantClient.upsert(COLLECTION_NAME, {
            wait: true,
            points,
          });
        } catch (error) {
          if (error instanceof Error) {
            if (error.message.includes('unauthorized')) {
              throw new McpError(
                ErrorCode.InvalidRequest,
                'Failed to authenticate with Qdrant cloud while adding documents'
              );
            } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
              throw new McpError(
                ErrorCode.InternalError,
                'Connection to Qdrant cloud failed while adding documents'
              );
            }
          }
          throw error;
        }
      }

      return {
        content: [
          {
            type: 'text',
            text: `Successfully added documentation from ${args.url} (${chunks.length} chunks processed in ${Math.ceil(chunks.length / BATCH_SIZE)} batches)`,
          },
        ],
      };
    } catch (error) {
      if (error instanceof McpError) {
        throw error;
      }
      return {
        content: [
          {
            type: 'text',
            text: `Failed to add documentation: ${error}`,
          },
        ],
        isError: true,
      };
    }
  }

  private async fetchAndProcessUrl(url: string): Promise<DocumentChunk[]> {
    await this.apiClient.initBrowser();
    const page = await this.apiClient.browser.newPage();
    
    try {
      await page.goto(url, { waitUntil: 'networkidle' });
      const content = await page.content();
      const $ = cheerio.load(content);
      
      // Remove script tags, style tags, and comments
      $('script').remove();
      $('style').remove();
      $('noscript').remove();
      
      // Extract main content
      const title = $('title').text() || url;
      const mainContent = $('main, article, .content, .documentation, body').text();
      
      // Split content into chunks
      const chunks = this.chunkText(mainContent, 1000);
      
      return chunks.map(chunk => ({
        text: chunk,
        url,
        title,
        timestamp: new Date().toISOString(),
      }));
    } catch (error) {
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to fetch URL ${url}: ${error}`
      );
    } finally {
      await page.close();
    }
  }

  private chunkText(text: string, maxChunkSize: number): string[] {
    const words = text.split(/\s+/);
    const chunks: string[] = [];
    let currentChunk: string[] = [];
    
    for (const word of words) {
      currentChunk.push(word);
      const currentLength = currentChunk.join(' ').length;
      
      if (currentLength >= maxChunkSize) {
        chunks.push(currentChunk.join(' '));
        currentChunk = [];
      }
    }
    
    if (currentChunk.length > 0) {
      chunks.push(currentChunk.join(' '));
    }
    
    return chunks;
  }

  private generatePointId(): string {
    return crypto.randomBytes(16).toString('hex');
  }
}

```

--------------------------------------------------------------------------------
/src/tools/content-fetcher.ts:
--------------------------------------------------------------------------------

```typescript
import axios, { AxiosError } from 'axios';
import * as cheerio from 'cheerio';

export class ContentFetchError extends Error {
  constructor(message: string, public readonly url: string) {
    super(message);
    this.name = 'ContentFetchError';
  }
}

export interface FetchedContent {
  url: string;
  title: string;
  content: string;
  timestamp: string;
  metadata: {
    domain: string;
    contentType: string;
    wordCount: number;
    hasCode: boolean;
  };
}

export class ContentFetcher {
  private static readonly TIMEOUT = 30000; // 30 seconds
  private static readonly MAX_RETRIES = 3;
  private static readonly RETRY_DELAY = 1000; // 1 second

  /**
   * Fetches and processes content from a URL
   * @param url URL to fetch content from
   * @returns Processed content with metadata
   */
  static async fetchContent(url: string): Promise<FetchedContent> {
    let retries = 0;
    let lastError: Error | null = null;

    while (retries < this.MAX_RETRIES) {
      try {
        const response = await axios.get(url, {
          timeout: this.TIMEOUT,
          maxRedirects: 5,
          headers: {
            'User-Agent': 'Mozilla/5.0 (compatible; RagDocsBot/1.0)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
            'Accept-Language': 'en-US,en;q=0.5',
          },
        });

        const contentType = response.headers['content-type'] || '';
        if (!contentType.includes('html')) {
          throw new ContentFetchError('Unsupported content type: ' + contentType, url);
        }

        return this.processHtmlContent(url, response.data);
      } catch (error) {
        lastError = error as Error;
        if (error instanceof AxiosError && error.response?.status === 404) {
          throw new ContentFetchError('Page not found', url);
        }
        retries++;
        if (retries < this.MAX_RETRIES) {
          await new Promise(resolve => setTimeout(resolve, this.RETRY_DELAY));
        }
      }
    }

    throw new ContentFetchError(
      `Failed to fetch content after ${this.MAX_RETRIES} attempts: ${lastError?.message}`,
      url
    );
  }

  /**
   * Processes HTML content to extract relevant text and metadata
   * @param url Original URL
   * @param html Raw HTML content
   * @returns Processed content with metadata
   */
  private static processHtmlContent(url: string, html: string): FetchedContent {
    const $ = cheerio.load(html);

    // Remove unwanted elements
    this.removeUnwantedElements($);

    // Extract title
    const title = $('title').text().trim() || 
                 $('h1').first().text().trim() || 
                 'Untitled Document';

    // Extract main content
    const mainContent = this.extractMainContent($);

    // Check for code blocks
    const hasCode = $('pre, code').length > 0 || 
                   mainContent.includes('```') ||
                   /\`[^\`]+\`/.test(mainContent);

    // Count words
    const wordCount = mainContent.split(/\s+/).filter(Boolean).length;

    return {
      url,
      title,
      content: mainContent,
      timestamp: new Date().toISOString(),
      metadata: {
        domain: new URL(url).hostname,
        contentType: 'text/html',
        wordCount,
        hasCode,
      },
    };
  }

  /**
   * Removes unwanted elements from the HTML
   * @param $ Cheerio instance
   */
  private static removeUnwantedElements($: cheerio.CheerioAPI): void {
    // Remove common non-content elements
    const selectorsToRemove = [
      'script',
      'style',
      'nav',
      'header',
      'footer',
      'iframe',
      '.advertisement',
      '.ads',
      '#comments',
      '.comments',
      '.social-share',
      '.related-posts',
      'aside',
    ];

    $(selectorsToRemove.join(', ')).remove();
  }

  /**
   * Extracts main content from the HTML
   * @param $ Cheerio instance
   * @returns Extracted and cleaned content
   */
  private static extractMainContent($: cheerio.CheerioAPI): string {
    // Try to find main content container
    const mainSelectors = [
      'article',
      'main',
      '.main-content',
      '#main-content',
      '.post-content',
      '.article-content',
      '.entry-content',
    ];

    let $content = $();
    for (const selector of mainSelectors) {
      $content = $(selector);
      if ($content.length > 0) break;
    }

    // Fallback to body if no main content container found
    if ($content.length === 0) {
      $content = $('body');
    }

    // Extract text content
    const text = $content
      .find('h1, h2, h3, h4, h5, h6, p, li, pre, code')
      .map((_, el) => {
        const $el = $(el);
        // Preserve code blocks
        if ($el.is('pre, code')) {
          return '\n```\n' + $el.text() + '\n```\n';
        }
        return $el.text();
      })
      .get()
      .join('\n')
      .trim();

    // Clean up the text
    return this.cleanText(text);
  }

  /**
   * Cleans extracted text content
   * @param text Raw text content
   * @returns Cleaned text
   */
  private static cleanText(text: string): string {
    return text
      .replace(/[\r\n]+/g, '\n') // Normalize line endings
      .replace(/\n\s+\n/g, '\n\n') // Remove excess whitespace between paragraphs
      .replace(/\s+/g, ' ') // Normalize whitespace within paragraphs
      .split('\n') // Split into lines
      .map(line => line.trim()) // Trim each line
      .filter(Boolean) // Remove empty lines
      .join('\n') // Rejoin with newlines
      .trim(); // Final trim
  }
}

```

--------------------------------------------------------------------------------
/src/tools/text-chunker.ts:
--------------------------------------------------------------------------------

```typescript
export interface ChunkOptions {
  maxChunkSize: number;
  minChunkSize: number;
  overlap: number;
  respectCodeBlocks?: boolean;
}

export interface TextChunk {
  content: string;
  index: number;
  metadata: {
    startPosition: number;
    endPosition: number;
    isCodeBlock?: boolean;
  };
}

export class TextChunker {
  private static readonly DEFAULT_OPTIONS: ChunkOptions = {
    maxChunkSize: 1000,
    minChunkSize: 100,
    overlap: 200,
    respectCodeBlocks: true,
  };

  /**
   * Splits text into chunks while preserving context and natural boundaries
   * @param text Text to split into chunks
   * @param options Chunking options
   * @returns Array of text chunks with metadata
   */
  static chunkText(text: string, options?: Partial<ChunkOptions>): TextChunk[] {
    const opts = { ...this.DEFAULT_OPTIONS, ...options };
    const chunks: TextChunk[] = [];
    
    // First, separate code blocks from regular text
    const segments = this.separateCodeBlocks(text);
    let currentPosition = 0;
    let chunkIndex = 0;

    for (const segment of segments) {
      if (segment.isCodeBlock && opts.respectCodeBlocks) {
        // Keep code blocks as single chunks if they're not too large
        if (segment.content.length <= opts.maxChunkSize * 1.5) {
          chunks.push({
            content: segment.content,
            index: chunkIndex++,
            metadata: {
              startPosition: currentPosition,
              endPosition: currentPosition + segment.content.length,
              isCodeBlock: true,
            },
          });
          currentPosition += segment.content.length;
          continue;
        }
      }

      // Process regular text or large code blocks
      const segmentChunks = this.chunkSegment(
        segment.content,
        opts,
        currentPosition,
        chunkIndex,
        segment.isCodeBlock
      );

      chunks.push(...segmentChunks);
      chunkIndex += segmentChunks.length;
      currentPosition += segment.content.length;
    }

    return chunks;
  }

  /**
   * Separates code blocks from regular text
   * @param text Input text
   * @returns Array of text segments with code block flags
   */
  private static separateCodeBlocks(text: string): Array<{ content: string; isCodeBlock: boolean }> {
    const segments: Array<{ content: string; isCodeBlock: boolean }> = [];
    const codeBlockRegex = /```[\s\S]*?```/g;
    
    let lastIndex = 0;
    let match: RegExpExecArray | null;
    
    while ((match = codeBlockRegex.exec(text)) !== null) {
      // Add text before code block
      if (match.index > lastIndex) {
        segments.push({
          content: text.slice(lastIndex, match.index),
          isCodeBlock: false,
        });
      }
      
      // Add code block
      segments.push({
        content: match[0],
        isCodeBlock: true,
      });
      
      lastIndex = match.index + match[0].length;
    }
    
    // Add remaining text
    if (lastIndex < text.length) {
      segments.push({
        content: text.slice(lastIndex),
        isCodeBlock: false,
      });
    }
    
    return segments;
  }

  /**
   * Chunks a single segment of text
   * @param text Text segment to chunk
   * @param options Chunking options
   * @param startPosition Starting position in original text
   * @param startIndex Starting chunk index
   * @param isCodeBlock Whether this is a code block
   * @returns Array of chunks
   */
  private static chunkSegment(
    text: string,
    options: ChunkOptions,
    startPosition: number,
    startIndex: number,
    isCodeBlock: boolean
  ): TextChunk[] {
    const chunks: TextChunk[] = [];
    let currentChunk = '';
    let currentPosition = 0;

    // Split into sentences/paragraphs first
    const blocks = isCodeBlock
      ? [text] // Keep code blocks together
      : text
          .split(/(?<=\.|\?|\!|\n)\s+/)
          .filter(Boolean)
          .map(block => block.trim());

    for (const block of blocks) {
      // If adding this block would exceed max size, start new chunk
      if (
        currentChunk &&
        currentChunk.length + block.length > options.maxChunkSize &&
        currentChunk.length >= options.minChunkSize
      ) {
        chunks.push({
          content: currentChunk,
          index: startIndex + chunks.length,
          metadata: {
            startPosition: startPosition + currentPosition - currentChunk.length,
            endPosition: startPosition + currentPosition,
            isCodeBlock,
          },
        });
        
        // Start new chunk with overlap
        const words = currentChunk.split(/\s+/);
        const overlapWords = words.slice(-Math.ceil(options.overlap / 10)); // Approximate words for overlap
        currentChunk = overlapWords.join(' ') + ' ' + block;
      } else {
        currentChunk = currentChunk
          ? currentChunk + ' ' + block
          : block;
      }
      
      currentPosition += block.length + 1; // +1 for the space
    }

    // Add final chunk if not empty
    if (currentChunk) {
      chunks.push({
        content: currentChunk,
        index: startIndex + chunks.length,
        metadata: {
          startPosition: startPosition + currentPosition - currentChunk.length,
          endPosition: startPosition + currentPosition,
          isCodeBlock,
        },
      });
    }

    return chunks;
  }

  /**
   * Validates chunk options and sets defaults
   * @param options User-provided options
   * @returns Validated options
   */
  private static validateOptions(options: Partial<ChunkOptions>): ChunkOptions {
    const opts = { ...this.DEFAULT_OPTIONS, ...options };
    
    if (opts.maxChunkSize < opts.minChunkSize) {
      throw new Error('maxChunkSize must be greater than minChunkSize');
    }
    
    if (opts.overlap >= opts.maxChunkSize) {
      throw new Error('overlap must be less than maxChunkSize');
    }
    
    if (opts.minChunkSize <= 0 || opts.maxChunkSize <= 0 || opts.overlap < 0) {
      throw new Error('chunk sizes and overlap must be positive numbers');
    }
    
    return opts;
  }
}

```

--------------------------------------------------------------------------------
/src/api-client.ts:
--------------------------------------------------------------------------------

```typescript
import { QdrantClient } from '@qdrant/js-client-rest';
import { chromium } from 'playwright';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import { EmbeddingService } from './embeddings.js';
import { QdrantWrapper } from './tools/qdrant-client.js';
import { Document } from './types.js';

export interface QdrantCollectionConfig {
  params: {
    vectors: {
      size: number;
      distance: string;
    };
  };
}

export interface QdrantCollectionInfo {
  config: QdrantCollectionConfig;
}

export class ApiClient {
  qdrantClient: QdrantClient;
  private embeddingService: EmbeddingService;
  readonly qdrant: QdrantWrapper;
  browser: any;

  constructor(config: {
    embeddingConfig: {
      provider: 'ollama' | 'openai';
      apiKey?: string;
      model?: string;
    };
    qdrantUrl?: string;
    qdrantApiKey?: string;
  }) {
    this.embeddingService = EmbeddingService.createFromConfig(config.embeddingConfig);

    this.qdrant = new QdrantWrapper(config.qdrantUrl, config.qdrantApiKey);
    this.qdrantClient = this.qdrant.client;
  }

  async initBrowser() {
    if (!this.browser) {
      this.browser = await chromium.launch();
    }
  }

  async cleanup() {
    if (this.browser) {
      await this.browser.close();
    }
  }

  async getEmbeddings(text: string): Promise<number[]> {
    return this.embeddingService.generateEmbeddings(text);
  }

  get embeddings(): EmbeddingService {
    return this.embeddingService;
  }

  async initCollection(collectionName: string) {
    try {
      const collections = await this.qdrantClient.getCollections();
      const exists = collections.collections.some(c => c.name === collectionName);

      const requiredVectorSize = this.embeddingService.getVectorSize();

      if (!exists) {
        console.error(`Creating new collection with vector size ${requiredVectorSize}`);
        await this.createCollection(collectionName, requiredVectorSize);
        return;
      }

      // Verify vector size of existing collection
      const collectionInfo = await this.qdrantClient.getCollection(collectionName) as QdrantCollectionInfo;
      const currentVectorSize = collectionInfo.config?.params?.vectors?.size;

      if (!currentVectorSize) {
        console.error('Could not determine current vector size, recreating collection...');
        await this.recreateCollection(collectionName, requiredVectorSize);
        return;
      }

      if (currentVectorSize !== requiredVectorSize) {
        console.error(`Vector size mismatch: collection=${currentVectorSize}, required=${requiredVectorSize}`);
        await this.recreateCollection(collectionName, requiredVectorSize);
      }
    } catch (error) {
      if (error instanceof Error) {
        if (error.message.includes('unauthorized')) {
          throw new McpError(
            ErrorCode.InvalidRequest,
            'Failed to authenticate with Qdrant. Please check your API key.'
          );
        } else if (error.message.includes('ECONNREFUSED') || error.message.includes('ETIMEDOUT')) {
          throw new McpError(
            ErrorCode.InternalError,
            'Failed to connect to Qdrant. Please check your QDRANT_URL.'
          );
        }
      }
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to initialize Qdrant collection: ${error}`
      );
    }
  }

  private async createCollection(collectionName: string, vectorSize: number) {
    await this.qdrantClient.createCollection(collectionName, {
      vectors: {
        size: vectorSize,
        distance: 'Cosine',
      },
      optimizers_config: {
        default_segment_number: 2,
        memmap_threshold: 20000,
      },
      replication_factor: 2,
    });

    // Create indexes for efficient filtering
    await this.qdrantClient.createPayloadIndex(collectionName, {
      field_name: 'url',
      field_schema: 'keyword',
    });

    await this.qdrantClient.createPayloadIndex(collectionName, {
      field_name: 'timestamp',
      field_schema: 'datetime',
    });
  }

  private async recreateCollection(collectionName: string, vectorSize: number) {
    try {
      console.error('Recreating collection with new vector size...');
      await this.qdrantClient.deleteCollection(collectionName);
      await this.createCollection(collectionName, vectorSize);
      console.error(`Collection recreated with new vector size ${vectorSize}`);
    } catch (error) {
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to recreate collection: ${error}`
      );
    }
  }

  async isHealthy(): Promise<boolean> {
    try {
      await this.qdrantClient.getCollections();
      return true;
    } catch {
      return false;
    }
  }

  async addDocument(doc: Document): Promise<void> {
    try {
      // Check if document already exists
      if (await this.qdrant.documentExists(doc.url)) {
        throw new McpError(
          ErrorCode.InvalidRequest,
          `Document with URL ${doc.url} already exists`
        );
      }

      // Generate embeddings for the content
      const embedding = await this.embeddingService.generateEmbeddings(doc.content);

      // Store document in Qdrant
      await this.qdrant.storeDocumentChunks(
        [{
          content: doc.content,
          index: 0,
          metadata: {
            startPosition: 0,
            endPosition: doc.content.length,
            isCodeBlock: /```/.test(doc.content)
          }
        }],
        [embedding],
        {
          url: doc.url,
          title: doc.metadata.title || '',
          domain: new URL(doc.url).hostname,
          timestamp: new Date().toISOString(),
          contentType: doc.metadata.contentType || 'text/plain',
          wordCount: doc.content.split(/\s+/).length,
          hasCode: /```|\bfunction\b|\bclass\b|\bconst\b|\blet\b|\bvar\b/.test(doc.content),
        }
      );
    } catch (error) {
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to add document: ${error}`
      );
    }
  }

  async deleteDocument(url: string): Promise<void> {
    try {
      await this.qdrant.removeDocument(url);
    } catch (error) {
      throw new McpError(
        ErrorCode.InternalError,
        `Failed to delete document: ${error}`
      );
    }
  }
}

```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema, McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import axios from 'axios';
import { ApiClient } from './api-client.js';
import { SearchDocumentationHandler } from './handlers/search-documentation.js';
import { ListDocumentationHandler } from './handlers/list-documentation.js';
import { ListOptions } from './tools/list-utils.js';
import { Document } from './types.js';

// Force using IP address to avoid hostname resolution issues
const QDRANT_URL = process.env.QDRANT_URL || 'http://127.0.0.1:6333';
const QDRANT_API_KEY = process.env.QDRANT_API_KEY;
const EMBEDDING_PROVIDER = process.env.EMBEDDING_PROVIDER || 'ollama';
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;

// Test connection with direct axios call first
try {
  const response = await axios.get(`${QDRANT_URL}/collections`);
  console.error('Successfully connected to Qdrant:', response.data);
} catch (error) {
  console.error('Failed to connect to Qdrant:', error);
  throw new McpError(
    ErrorCode.InternalError,
    'Failed to establish initial connection to Qdrant server'
  );
}

const client = new ApiClient({
  qdrantUrl: QDRANT_URL,
  qdrantApiKey: QDRANT_API_KEY,
  embeddingConfig: {
    provider: EMBEDDING_PROVIDER as 'ollama' | 'openai',
    apiKey: OPENAI_API_KEY,
    model: EMBEDDING_PROVIDER === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'
  }
});

try {
  // Initialize Qdrant collection
  await client.qdrant.initializeCollection();
  console.error('Successfully initialized Qdrant collection');
} catch (error) {
  console.error('Failed to initialize Qdrant collection:', error);
  throw error;
}

class RagDocsServer {
  private server: Server;

  constructor() {
    this.server = new Server(
      {
        name: 'ragdocs',
        version: '0.1.0',
      },
      {
        capabilities: {
          tools: {},
        },
      }
    );

    this.setupToolHandlers();
    this.server.onerror = (error) => console.error('[MCP Error]', error);
  }

  private setupToolHandlers() {
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
      tools: [
        {
          name: 'add_document',
          description: 'Add a document to the RAG system',
          inputSchema: {
            type: 'object',
            properties: {
              url: { type: 'string', description: 'Document URL' },
              content: { type: 'string', description: 'Document content' },
              metadata: {
                type: 'object',
                properties: {
                  title: { type: 'string', description: 'Document title' },
                  contentType: { type: 'string', description: 'Content type (e.g., text/plain, text/markdown)' },
                },
                additionalProperties: true,
              },
            },
            required: ['url', 'content'],
          },
        },
        {
          name: 'search_documents',
          description: 'Search for documents using semantic similarity',
          inputSchema: {
            type: 'object',
            properties: {
              query: { 
                type: 'string', 
                description: 'Natural language search query' 
              },
              options: {
                type: 'object',
                description: 'Search options',
                properties: {
                  limit: { 
                    type: 'number', 
                    description: 'Maximum number of results (1-20)',
                    minimum: 1,
                    maximum: 20
                  },
                  scoreThreshold: {
                    type: 'number',
                    description: 'Minimum similarity score (0-1)',
                    minimum: 0,
                    maximum: 1
                  },
                  filters: {
                    type: 'object',
                    description: 'Optional filters',
                    properties: {
                      domain: {
                        type: 'string',
                        description: 'Filter by domain'
                      },
                      hasCode: {
                        type: 'boolean',
                        description: 'Filter for documents containing code'
                      },
                      after: {
                        type: 'string',
                        description: 'Filter for documents after date (ISO format)'
                      },
                      before: {
                        type: 'string',
                        description: 'Filter for documents before date (ISO format)'
                      }
                    }
                  }
                }
              }
            },
            required: ['query'],
          },
        },
        {
          name: 'delete_document',
          description: 'Delete a document from the RAG system',
          inputSchema: {
            type: 'object',
            properties: {
              url: { type: 'string', description: 'Document URL to delete' },
            },
            required: ['url'],
          },
        },
        {
          name: 'list_documents',
          description: 'List all stored documents with pagination and grouping options',
          inputSchema: {
            type: 'object',
            properties: {
              page: {
                type: 'number',
                description: 'Page number (default: 1)',
                minimum: 1
              },
              pageSize: {
                type: 'number',
                description: 'Number of documents per page (default: 20)',
                minimum: 1,
                maximum: 100
              },
              groupByDomain: {
                type: 'boolean',
                description: 'Group documents by domain (default: false)'
              },
              sortBy: {
                type: 'string',
                description: 'Sort field (default: timestamp)',
                enum: ['timestamp', 'title', 'domain']
              },
              sortOrder: {
                type: 'string',
                description: 'Sort order (default: desc)',
                enum: ['asc', 'desc']
              }
            }
          }
        },
      ],
    }));

    this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
      try {
        switch (request.params.name) {
          case 'add_document': {
            const args = request.params.arguments as Record<string, unknown>;
            if (!args || typeof args.url !== 'string' || typeof args.content !== 'string') {
              throw new Error('Invalid document format: url and content must be strings');
            }
            const doc: Document = {
              url: args.url,
              content: args.content,
              metadata: (args.metadata as Record<string, unknown>) || {}
            };
            await client.addDocument(doc);
            return {
              content: [{ type: 'text', text: `Document ${doc.url} added successfully` }],
            };
          }

          case 'search_documents': {
            const { query, options } = request.params.arguments as { 
              query: string; 
              options?: {
                limit?: number;
                scoreThreshold?: number;
                filters?: {
                  domain?: string;
                  hasCode?: boolean;
                  after?: string;
                  before?: string;
                };
              };
            };

            const searchHandler = new SearchDocumentationHandler(
              client.qdrant,
              client.embeddings,
              this.server,
              client
            );

            return await searchHandler.handle({ query, options });
          }

          case 'delete_document': {
            const { url } = request.params.arguments as { url: string };
            await client.deleteDocument(url);
            return {
              content: [{ type: 'text', text: `Document ${url} deleted successfully` }],
            };
          }

          case 'list_documents': {
            const args = request.params.arguments as ListOptions;
            const listHandler = new ListDocumentationHandler(this.server, client);
            return await listHandler.handle(args || {});
          }

          default:
            throw new Error(`Unknown tool: ${request.params.name}`);
        }
      } catch (error) {
        const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
        console.error('[Tool Error]', errorMessage);
        return {
          content: [{ type: 'text', text: `Error: ${errorMessage}` }],
          isError: true,
        };
      }
    });
  }

  async run() {
    const transport = new StdioServerTransport();
    await this.server.connect(transport);
    console.error('RagDocs MCP server running on stdio');
  }
}

const server = new RagDocsServer();
server.run().catch(console.error);

```

--------------------------------------------------------------------------------
/src/tools/qdrant-client.ts:
--------------------------------------------------------------------------------

```typescript
import { QdrantClient } from '@qdrant/js-client-rest';
import { TextChunk } from './text-chunker.js';

export interface DocumentMetadata {
  url: string;
  title: string;
  domain: string;
  timestamp: string;
  contentType: string;
  wordCount: number;
  hasCode: boolean;
  chunkIndex: number;
  totalChunks: number;
}

export class QdrantError extends Error {
  constructor(message: string) {
    super(message);
    this.name = 'QdrantError';
  }
}

export class QdrantWrapper {
  public client: QdrantClient;
  private readonly collectionName = 'documentation';
  private readonly vectorSize = 768; // Ollama nomic-embed-text size

  constructor(url?: string, apiKey?: string) {
    this.client = new QdrantClient({
      url: url || 'http://10.1.1.199:6333',
      apiKey: apiKey,
      timeout: 10000 // Add timeout to help debug connection issues
    });
  }

  /**
   * Initializes the Qdrant collection if it doesn't exist
   */
  async initializeCollection(): Promise<void> {
    try {
      const collections = await this.client.getCollections();
      const exists = collections.collections.some(c => c.name === this.collectionName);

      if (!exists) {
        await this.client.createCollection(this.collectionName, {
          vectors: {
            size: this.vectorSize,
            distance: 'Cosine',
          },
          optimizers_config: {
            default_segment_number: 2,
          },
          replication_factor: 1,
        });

        // Create indexes for efficient filtering
        await this.client.createPayloadIndex(this.collectionName, {
          field_name: 'url',
          field_schema: 'keyword',
        });

        await this.client.createPayloadIndex(this.collectionName, {
          field_name: 'domain',
          field_schema: 'keyword',
        });

        await this.client.createPayloadIndex(this.collectionName, {
          field_name: 'timestamp',
          field_schema: 'datetime',
        });
      }
    } catch (error) {
      console.error('Qdrant initialization error:', error);
      if (error instanceof Error) {
        console.error('Error details:', {
          name: error.name,
          message: error.message,
          stack: error.stack
        });
      }
      throw new QdrantError(
        `Failed to initialize Qdrant collection: ${error instanceof Error ? error.message : String(error)}`
      );
    }
  }

  /**
   * Stores document chunks in the Qdrant collection
   * @param chunks Text chunks to store
   * @param embeddings Corresponding embeddings for each chunk
   * @param metadata Document metadata
   */
  async storeDocumentChunks(
    chunks: TextChunk[],
    embeddings: number[][],
    metadata: Omit<DocumentMetadata, 'chunkIndex' | 'totalChunks'>
  ): Promise<void> {
    if (chunks.length !== embeddings.length) {
      throw new QdrantError('Number of chunks does not match number of embeddings');
    }

    try {
      const points = chunks.map((chunk, index) => ({
        id: this.generatePointId(metadata.url, chunk.index),
        vector: embeddings[index],
        payload: {
          ...metadata,
          content: chunk.content,
          chunkIndex: chunk.index,
          totalChunks: chunks.length,
          chunkMetadata: chunk.metadata,
        },
      }));

      await this.client.upsert(this.collectionName, {
        wait: true,
        points,
      });
    } catch (error) {
      throw new QdrantError(
        `Failed to store document chunks: ${(error as Error).message}`
      );
    }
  }

  /**
   * Checks if a document already exists in the collection
   * @param url Document URL
   * @returns true if document exists
   */
  async documentExists(url: string): Promise<boolean> {
    try {
      const response = await this.client.scroll(this.collectionName, {
        filter: {
          must: [
            {
              key: 'url',
              match: {
                value: url,
              },
            },
          ],
        },
        limit: 1,
      });

      return response.points.length > 0;
    } catch (error) {
      throw new QdrantError(
        `Failed to check document existence: ${(error as Error).message}`
      );
    }
  }

  /**
   * Removes a document and all its chunks from the collection
   * @param url Document URL
   */
  async removeDocument(url: string): Promise<void> {
    try {
      await this.client.delete(this.collectionName, {
        filter: {
          must: [
            {
              key: 'url',
              match: {
                value: url,
              },
            },
          ],
        },
        wait: true,
      });
    } catch (error) {
      throw new QdrantError(
        `Failed to remove document: ${(error as Error).message}`
      );
    }
  }

  /**
   * Generates a unique point ID for a chunk
   * @param url Document URL
   * @param chunkIndex Chunk index
   * @returns Unique point ID
   */
  private generatePointId(url: string, chunkIndex: number): number {
    // Create a hash of the URL + chunk index
    const str = `${url}:${chunkIndex}`;
    let hash = 0;
    for (let i = 0; i < str.length; i++) {
      const char = str.charCodeAt(i);
      hash = ((hash << 5) - hash) + char;
      hash = hash & hash; // Convert to 32-bit integer
    }
    return Math.abs(hash);
  }

  /**
   * Gets the health status of the Qdrant server
   * @returns true if server is healthy
   */
  async isHealthy(): Promise<boolean> {
    try {
      await this.client.getCollections();
      return true;
    } catch {
      return false;
    }
  }

  /**
   * Lists all documents with pagination support
   * @param options Listing options including pagination and filtering
   * @returns Array of document metadata with pagination info
   */
  async listDocuments(options: {
    offset?: number;
    limit?: number;
    domain?: string;
    sortBy?: 'timestamp' | 'title' | 'domain';
    sortOrder?: 'asc' | 'desc';
  } = {}): Promise<{ total: number; documents: DocumentMetadata[] }> {
    const filter: any = {
      must: [
        {
          key: 'chunkIndex',
          match: { value: 0 }, // Only get first chunk to avoid duplicates
        },
      ],
    };

    if (options.domain) {
      filter.must.push({
        key: 'domain',
        match: { value: options.domain },
      });
    }

    try {
      // Get total count first
      const countResponse = await this.client.count(this.collectionName, {
        filter,
      });

      // Then get paginated results
      const response = await this.client.scroll(this.collectionName, {
        filter,
        limit: options.limit || 20,
        offset: options.offset || 0,
        with_payload: true,
        with_vector: false,
      });

      const documents = response.points.map(point => {
        const payload = point.payload as any;
        return {
          url: String(payload.url),
          title: String(payload.title),
          domain: String(payload.domain),
          timestamp: String(payload.timestamp),
          contentType: String(payload.contentType),
          wordCount: Number(payload.wordCount),
          hasCode: Boolean(payload.hasCode),
          chunkIndex: Number(payload.chunkIndex),
          totalChunks: Number(payload.totalChunks),
        };
      });

      return {
        total: countResponse.count,
        documents,
      };
    } catch (error) {
      throw new QdrantError(
        `Failed to list documents: ${(error as Error).message}`
      );
    }
  }

  /**
   * Performs a semantic search using vector similarity
   * @param queryVector Query embedding vector
   * @param options Search options
   * @returns Array of search results with scores
   */
  async searchSimilar(
    queryVector: number[],
    options: {
      limit?: number;
      scoreThreshold?: number;
      filters?: {
        domain?: string;
        hasCode?: boolean;
        after?: string;
        before?: string;
      };
    } = {}
  ): Promise<Array<DocumentMetadata & { score: number; content: string }>> {
    const limit = options.limit || 5;
    const scoreThreshold = options.scoreThreshold || 0.7;
    const filter: any = { must: [] };

    // Add filters if specified
    if (options.filters?.domain) {
      filter.must.push({
        key: 'domain',
        match: { value: options.filters.domain },
      });
    }

    if (options.filters?.hasCode !== undefined) {
      filter.must.push({
        key: 'hasCode',
        match: { value: options.filters.hasCode },
      });
    }

    if (options.filters?.after) {
      filter.must.push({
        key: 'timestamp',
        range: { gte: options.filters.after },
      });
    }

    if (options.filters?.before) {
      filter.must.push({
        key: 'timestamp',
        range: { lte: options.filters.before },
      });
    }

    try {
      const response = await this.client.search(this.collectionName, {
        vector: queryVector,
        limit: Math.ceil(limit * 1.5), // Request extra results for post-filtering
        score_threshold: scoreThreshold,
        filter: filter.must.length > 0 ? filter : undefined,
        with_payload: true,
      });

      return response
        .map(hit => {
          const payload = hit.payload as any;
          if (!payload || typeof payload !== 'object') {
            throw new QdrantError('Invalid payload structure in search result');
          }
          
          // Extract and validate required fields
          const result = {
            score: hit.score || 0,
            url: String(payload.url),
            title: String(payload.title),
            domain: String(payload.domain),
            timestamp: String(payload.timestamp),
            contentType: String(payload.contentType),
            wordCount: Number(payload.wordCount),
            hasCode: Boolean(payload.hasCode),
            chunkIndex: Number(payload.chunkIndex),
            totalChunks: Number(payload.totalChunks),
            content: String(payload.content),
          };

          // Validate all fields are present and of correct type
          if (Object.values(result).some(v => v === undefined)) {
            throw new QdrantError('Missing required fields in search result');
          }

          return result;
        })
        .slice(0, limit); // Return only requested number of results
    } catch (error) {
      throw new QdrantError(
        `Failed to perform search: ${(error as Error).message}`
      );
    }
  }
}

```