fkesheh/code-context-mcp # codebase.md

# Directory Structure

```
├── .env.example
├── .gitignore
├── claude_desktop_config.example.json
├── config.ts
├── index.ts
├── jest.config.mjs
├── LICENSE
├── package-lock.json
├── package.json
├── README.md
├── SETUP.md
├── start.ts
├── tools
│   ├── embedFiles.ts
│   ├── ingestBranch.ts
│   ├── processFiles.ts
│   └── queryRepo.ts
├── tsconfig.json
└── utils
    ├── codeSplitter.ts
    ├── db.ts
    ├── filePatternMatcher.ts
    ├── ollamaEmbeddings.ts
    ├── repoConfig.ts
    └── types.ts
```

# Files

--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------

```
DATA_DIR=/home/user/.config/Claude/data
REPO_CONFIG_DIR=/home/user/.config/Claude/repos
NODE_ENV=development

```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
# Node.js
node_modules/
npm-debug.log
yarn-debug.log
yarn-error.log

# TypeScript
dist/
*.tsbuildinfo

# Data directories
data/
cache/
repos/

# HuggingFace specific
.transformers/
.cache/
huggingface/
models/
**/temp_test_repos/

# Test temporary files
coverage/
.nyc_output/
junit.xml

# Database files
*.db
*.sqlite
*.sqlite3

# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local

# Log files
logs/
*.log

# Editor directories and files
.idea/
.vscode/
*.swp
*.swo

# OS files
.DS_Store
Thumbs.db

# Build files
build/
out/

```

--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------

```markdown
# Code Context MCP Server

A Model Context Protocol (MCP) server for providing code context from local git repositories. This server allows you to:

1. Clone git repositories locally
2. Process branches and files
3. Generate embeddings for code chunks
4. Perform semantic search over code

## Features

- Uses local git repositories instead of GitHub API
- Stores data in SQLite database
- Splits code into semantic chunks
- Generates embeddings for code chunks using Ollama
- Provides semantic search over code

## Prerequisites

- Node.js (v16+)
- Git
- Ollama with an embedding model

## Installation

```bash
# Clone the repository
git clone <repository-url>
cd code-context-mcp

# Install dependencies
npm install

# Build the project
npm run build
```

## Configuration

Set the following environment variables:

- `DATA_DIR`: Directory for SQLite database (default: '~/.codeContextMcp/data')
- `REPO_CACHE_DIR`: Directory for cloned repositories (default: '~/.codeContextMcp/repos')

### Using Ollama

For faster and more powerful embeddings, you can use Ollama:

```bash
# Install Ollama from https://ollama.ai/

# Pull an embedding model (unclemusclez/jina-embeddings-v2-base-code is recommended)
ollama pull unclemusclez/jina-embeddings-v2-base-code

```

## Usage

### Using with Claude Desktop

Add the following configuration to your Claude Desktop configuration file (`claude_desktop_config.json`):

```json
{
  "mcpServers": {
    "code-context-mcp": {
      "command": "/path/to/your/node",
      "args": ["/path/to/code-context-mcp/dist/index.js"]
    }
  }
}
```

## Tools

The server provides the following tool:

### queryRepo

Clones a repository, processes code, and performs semantic search:

```json
{
  "repoUrl": "https://github.com/username/repo.git",
  "branch": "main", // Optional - defaults to repository's default branch
  "query": "Your search query",
  "keywords": ["keyword1", "keyword2"], // Optional - filter results by keywords
  "filePatterns": ["**/*.ts", "src/*.js"], // Optional - filter files by glob patterns
  "excludePatterns": ["**/node_modules/**"], // Optional - exclude files by glob patterns
  "limit": 10 // Optional - number of results to return, default: 10
}
```

The `branch` parameter is optional. If not provided, the tool will automatically use the repository's default branch.

The `keywords` parameter is optional. If provided, the results will be filtered to only include chunks that contain at least one of the specified keywords (case-insensitive matching).

The `filePatterns` and `excludePatterns` parameters are optional. They allow you to filter which files are processed and searched using glob patterns (e.g., `**/*.ts` for all TypeScript files).

## Database Schema

The server uses SQLite with the following schema:

- `repository`: Stores information about repositories
- `branch`: Stores information about branches
- `file`: Stores information about files
- `branch_file_association`: Associates files with branches
- `file_chunk`: Stores code chunks and their embeddings

# Debugging

## MAC Mx Series - ARM Architecture Issues

When installing better-sqlite3 on Mac M-series chips (ARM architecture), if you encounter errors like "mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64e' or 'arm64')", you need to ensure the binary matches your architecture. Here's how to resolve this issue:

```bash
# Check your Node.js architecture
node -p "process.arch"

# If it shows 'arm64', but you're still having issues, try:
npm rebuild better-sqlite3 --build-from-source

# Or for a clean install:
npm uninstall better-sqlite3
export npm_config_arch=arm64
export npm_config_target_arch=arm64
npm install better-sqlite3 --build-from-source
```

If you're using Rosetta, make sure your entire environment is consistent. Your error shows x86_64 binaries being built but your system needs arm64.
For persistent configuration, add to your .zshrc or .bashrc:

```
export npm_config_arch=arm64
export npm_config_target_arch=arm64
```

## Testing Ollama Embeddings

curl http://localhost:11434/api/embed -d '{"model":"unclemusclez/jina-embeddings-v2-base-code","input":"Llamas are members of the camelid family"}'
curl http://127.0.01:11434/api/embed -d '{"model":"unclemusclez/jina-embeddings-v2-base-code","input":"Llamas are members of the camelid family"}'
curl http://[::1]:11434/api/embed -d '{"model":"unclemusclez/jina-embeddings-v2-base-code","input":"Llamas are members of the camelid family"}'

## License

MIT

```

--------------------------------------------------------------------------------
/utils/types.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Common interfaces and types used across the codebase
 */

/**
 * Interface for objects that can send progress notifications
 */
export interface ProgressNotifier {
  sendProgress: (progress: number, total: number) => Promise<void>;
}

```

--------------------------------------------------------------------------------
/claude_desktop_config.example.json:
--------------------------------------------------------------------------------

```json
{
  "mcpServers": {
    "code-context": {
      "command": "node",
      "args": ["<CLAUDE_CONFIG_DIR>/mcp-servers/code-context-mcp/dist/start.js"],
      "env": {
        "DATA_DIR": "<CLAUDE_CONFIG_DIR>/data",
        "REPO_CONFIG_DIR": "<CLAUDE_CONFIG_DIR>/repos",
        "NODE_ENV": "development"
      }
    }
  }
}

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2020",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "esModuleInterop": true,
    "strict": true,
    "outDir": "./dist",
    "rootDir": ".",
    "declaration": true,
    "skipLibCheck": true,
    "isolatedModules": true,
    "allowJs": true,
    "resolveJsonModule": true,
    "forceConsistentCasingInFileNames": true,
    "baseUrl": ".",
    "paths": {
      "*": ["*"]
    }
  },
  "include": [
    "./**/*.ts",
    "./**/*.mts",
    "./tests/**/*.ts"
  ],
  "exclude": [
    "node_modules",
    "dist",
    "repos"
  ]
}

```

--------------------------------------------------------------------------------
/config.ts:
--------------------------------------------------------------------------------

```typescript
import path from "path";
import os from "os";

// Available models for code embeddings
export const EMBEDDING_MODELS = {
  OLLAMA: {
    model: "unclemusclez/jina-embeddings-v2-base-code",
    contextSize: 8192,
    dimensions: 768,
    baseUrl: "http://127.0.0.1:11434",
  },
};



export const codeContextConfig = {
  ENV: process.env.NODE_ENV || "development",
  REPO_CONFIG_DIR:
    process.env.REPO_CONFIG_DIR ||
    path.join(os.homedir(), ".codeContextMcp", "repos"),
  BATCH_SIZE: 100,
  DATA_DIR:
    process.env.DATA_DIR || path.join(os.homedir(), ".codeContextMcp", "data"),
  DB_PATH: process.env.DB_PATH || "code_context.db",
  EMBEDDING_MODEL: EMBEDDING_MODELS.OLLAMA,
};

export default codeContextConfig;

```

--------------------------------------------------------------------------------
/jest.config.mjs:
--------------------------------------------------------------------------------

```
export default {
  preset: 'ts-jest/presets/default-esm',
  clearMocks: true,
  coverageDirectory: "coverage",
  roots: [
    "./tests"
  ],
  moduleNameMapper: {
    '^(\\.{1,2}/.*)\\.js$': '$1',
  },
  transform: {
    '^.+\\.tsx?$': [
      'ts-jest',
      {
        isolatedModules: true,
        useESM: true,
        tsconfig: './tsconfig.json'
      }
    ]
  },
  testEnvironment: 'node',
  moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node', 'mjs'],
  extensionsToTreatAsEsm: ['.ts', '.mts'],
  transformIgnorePatterns: [
    'node_modules/(?!(@huggingface)/)'
  ],
  testMatch: [
    '**/?(*.)+(spec|test).ts',
    '**/tests/*EmbeddingsTest.ts',
    '**/tests/githubRepoTest.ts'
  ],
  globals: {
    'ts-jest': {
      useESM: true,
    },
  },
  setupFilesAfterEnv: ['<rootDir>/tests/setup.ts'],
  verbose: true
};

```

--------------------------------------------------------------------------------
/SETUP.md:
--------------------------------------------------------------------------------

```markdown
# Code Context MCP Setup

## Prerequisites

```bash
ollama pull unclemusclez/jina-embeddings-v2-base-code
```

## Install

```bash
npm install
npm run build
```

## Configuration

Copy `claude_desktop_config.example.json` to your Claude Desktop config location:

**Linux/macOS**: `~/.config/Claude/claude_desktop_config.json`  
**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`

Replace `<CLAUDE_CONFIG_DIR>` with your actual path:
- Linux/macOS: `/home/username/.config/Claude`
- Windows: `C:\Users\username\AppData\Roaming\Claude`

## Environment

Copy `.env.example` to `.env` and adjust paths if needed.

The `repos/` directory stores configuration metadata for repositories, not full clones.
For local repositories (file:// URLs), no cloning occurs - files are accessed directly.

## Test

```bash
npm run start:mcp
```

Restart Claude Desktop.

```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "@modelcontextprotocol/server-code-context",
  "version": "0.1.0",
  "description": "MCP server for code context from local git repositories",
  "license": "MIT",
  "type": "module",
  "bin": {
    "mcp-server-code-context": "dist/index.js"
  },
  "files": [
    "dist"
  ],
  "scripts": {
    "build": "tsc && shx chmod +x dist/*.js",
    "watch": "tsc --watch",
    "start": "node dist/index.js",
    "start:mcp": "node dist/start.js",
    "inspect": "npm run build && npx @modelcontextprotocol/inspector node dist/index.js"
  },
  "dependencies": {
    "@langchain/textsplitters": "^0.1.0",
    "@modelcontextprotocol/sdk": "1.0.1",
    "axios": "^1.8.4",
    "better-sqlite3": "^11.9.1",
    "express": "^4.21.1",
    "simple-git": "^3.20.0",
    "zod": "^3.23.8",
    "zod-to-json-schema": "^3.23.5"
  },
  "devDependencies": {
    "@types/better-sqlite3": "^7.6.4",
    "@types/express": "^5.0.0",
    "@types/jest": "^29.5.14",
    "@types/minimatch": "^5.1.2",
    "@types/node": "^20.10.0",
    "jest": "^29.7.0",
    "jest-environment-node-single-context": "^29.4.0",
    "shx": "^0.3.4",
    "ts-jest": "^29.3.0",
    "tsx": "^4.19.3",
    "typescript": "^5.6.2"
  }
}

```

--------------------------------------------------------------------------------
/utils/filePatternMatcher.ts:
--------------------------------------------------------------------------------

```typescript
/**
 * Convert a glob pattern to an SQL LIKE pattern
 */
export function globToSqlPattern(pattern: string): string {
  // Handle ** (any depth of directories)
  let sqlPattern = pattern.replace(/\*\*/g, '%');
  
  // Handle * (any characters within a directory)
  sqlPattern = sqlPattern.replace(/\*/g, '%');
  
  return sqlPattern;
}

/**
 * Create SQL WHERE conditions for file pattern filtering using numbered parameters
 * for better SQLite compatibility
 */
export function createFilePatternCondition(
  includePatterns: string[] | undefined,
  excludePatterns: string[] | undefined
): string {
  let conditions = '';
  
  // Include patterns (files must match at least one pattern)
  if (includePatterns && includePatterns.length > 0) {
    const includeConditions = includePatterns.map(pattern => {
      const sqlPattern = globToSqlPattern(pattern);
      return `f.path LIKE '${sqlPattern}'`;
    });
    conditions += ` AND (${includeConditions.join(' OR ')})`;
  }
  
  // Exclude patterns (files must not match any pattern)
  if (excludePatterns && excludePatterns.length > 0) {
    const excludeConditions = excludePatterns.map(pattern => {
      const sqlPattern = globToSqlPattern(pattern);
      return `f.path NOT LIKE '${sqlPattern}'`;
    });
    conditions += ` AND (${excludeConditions.join(' AND ')})`;
  }
  
  return conditions;
}

```

--------------------------------------------------------------------------------
/start.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node

import { spawn } from 'child_process';
import { existsSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));

const DATA_DIR = process.env.DATA_DIR || join(process.env.HOME!, '.config', 'Claude', 'data');
const REPO_CONFIG_DIR = process.env.REPO_CONFIG_DIR || join(process.env.HOME!, '.config', 'Claude', 'repos');
const NODE_ENV = process.env.NODE_ENV || 'development';

[DATA_DIR, REPO_CONFIG_DIR].forEach(dir => {
  if (!existsSync(dir)) {
    mkdirSync(dir, { recursive: true, mode: 0o755 });
  }
});

process.stderr.write(`Starting Code Context MCP Server\n`);
process.stderr.write(`Data Directory: ${DATA_DIR}\n`);
process.stderr.write(`Repo Config: ${REPO_CONFIG_DIR}\n`);
process.stderr.write(`Node Environment: ${NODE_ENV}\n\n`);

const checkOllama = () => {
  try {
    const result = spawn('pgrep', ['ollama'], { stdio: 'pipe' });
    result.on('exit', (code) => {
      if (code !== 0) {
        process.stderr.write('Starting Ollama...\n');
        spawn('ollama', ['serve'], { detached: true, stdio: 'ignore' }).unref();
        setTimeout(() => startMcpServer(), 3000);
      } else {
        startMcpServer();
      }
    });
  } catch {
    startMcpServer();
  }
};

const startMcpServer = () => {
  const serverPath = join(__dirname, 'index.js');
  
  if (!existsSync(serverPath)) {
    process.stderr.write(`Error: MCP server not found at ${serverPath}\n`);
    process.stderr.write('Run: npm run build\n');
    process.exit(1);
  }

  process.env.DATA_DIR = DATA_DIR;
  process.env.REPO_CONFIG_DIR = REPO_CONFIG_DIR;
  process.env.NODE_ENV = NODE_ENV;

  const server = spawn('node', [serverPath, ...process.argv.slice(2)], {
    stdio: 'inherit',
    cwd: __dirname
  });

  server.on('exit', (code) => process.exit(code || 0));
};

checkOllama();

```

--------------------------------------------------------------------------------
/utils/ollamaEmbeddings.ts:
--------------------------------------------------------------------------------

```typescript
import axios from "axios";
import config from "../config.js";

// Cache for API
let apiInitialized = false;

/**
 * Generate embeddings for text using Ollama API
 * @param texts Array of text strings to embed
 * @param embeddingModel Optional model configuration to use
 * @returns Promise containing array of embeddings
 */
export async function generateOllamaEmbeddings(
  texts: string[],
  embeddingModel: {
    model: string;
    contextSize: number;
    dimensions: number;
    baseUrl?: string;
  } = config.EMBEDDING_MODEL
): Promise<number[][]> {
  try {
    // Log initialization
    if (!apiInitialized) {
      console.error(
        `Initializing Ollama embeddings with model: ${embeddingModel.model}...`
      );
      apiInitialized = true;
    }

    const baseUrl = embeddingModel.baseUrl || "http://127.0.0.1:11434";
    const embeddings: number[][] = [];

    // Process texts in parallel with a rate limit
    console.error(`Generating embeddings for ${texts.length} chunks...`);
    const batchSize = 1000; // Process 5 at a time to avoid overwhelming the API
    for (let i = 0; i < texts.length; i += batchSize) {
      const batch = texts.slice(i, i + batchSize);
      const response = await axios.post(
            `${baseUrl}/api/embed`,
            {
              model: embeddingModel.model,
              input: batch,
              options: {
                num_ctx: embeddingModel.contextSize,
              },
            },
            {
              headers: {
                "Content-Type": "application/json",
              },
            }
          );
      // Await all promises in this batch
      embeddings.push(...response.data.embeddings);
    }

    console.error(`Successfully generated ${embeddings.length} embeddings`);
    return embeddings;
  } catch (error) {
    console.error("Error generating embeddings:", error);

    // For testing purposes, return mock embeddings if running in test environment
    if (config.ENV === "test") {
      console.error("Using mock embeddings for testing");
      return texts.map(() => generateMockEmbedding(embeddingModel.dimensions));
    }

    throw error;
  }
}

/**
 * Generate a simple mock embedding vector for testing
 * @param dimensions The number of dimensions in the embedding vector
 * @returns A normalized random vector of the specified dimensions
 */
function generateMockEmbedding(dimensions: number): number[] {
  // Create a random vector
  const vector = Array.from({ length: dimensions }, () => Math.random() - 0.5);

  // Normalize the vector
  const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
  return vector.map((val) => val / magnitude);
}

```

--------------------------------------------------------------------------------
/utils/repoConfig.ts:
--------------------------------------------------------------------------------

```typescript
import { existsSync, writeFileSync, readFileSync, mkdirSync } from 'fs';
import { join, basename } from 'path';
import { createHash } from 'crypto';
import config from '../config.js';

interface RepoConfig {
  url: string;
  localPath?: string;
  lastAccessed: number;
  type: 'local' | 'remote' | 'cached';
  branch?: string;
}

export class RepositoryConfigManager {
  private configDir: string;

  constructor() {
    this.configDir = config.REPO_CONFIG_DIR;
    if (!existsSync(this.configDir)) {
      mkdirSync(this.configDir, { recursive: true });
    }
  }

  private getConfigPath(repoUrl: string): string {
    const hash = createHash('md5').update(repoUrl).digest('hex');
    return join(this.configDir, `${hash}.json`);
  }

  private sanitizeLocalPath(repoUrl: string): string | null {
    if (repoUrl.startsWith('file://')) {
      const localPath = repoUrl.replace('file://', '');
      return existsSync(localPath) ? localPath : null;
    }
    return null;
  }

  getRepositoryPath(repoUrl: string, branch?: string): { path: string; config: RepoConfig } {
    const localPath = this.sanitizeLocalPath(repoUrl);
    
    if (localPath) {
      const repoConfig: RepoConfig = {
        url: repoUrl,
        localPath,
        lastAccessed: Date.now(),
        type: 'local',
        branch
      };
      
      this.saveConfig(repoUrl, repoConfig);
      return { path: localPath, config: repoConfig };
    }

    const configPath = this.getConfigPath(repoUrl);
    let repoConfig: RepoConfig;

    if (existsSync(configPath)) {
      try {
        repoConfig = JSON.parse(readFileSync(configPath, 'utf8'));
        repoConfig.lastAccessed = Date.now();
      } catch {
        repoConfig = this.createRemoteConfig(repoUrl, branch);
      }
    } else {
      repoConfig = this.createRemoteConfig(repoUrl, branch);
    }

    this.saveConfig(repoUrl, repoConfig);
    return { path: repoConfig.localPath || '', config: repoConfig };
  }

  private createRemoteConfig(repoUrl: string, branch?: string): RepoConfig {
    const repoName = basename(repoUrl.replace('.git', ''));
    const cacheDir = join(this.configDir, 'cache');
    
    if (!existsSync(cacheDir)) {
      mkdirSync(cacheDir, { recursive: true });
    }

    return {
      url: repoUrl,
      localPath: join(cacheDir, repoName),
      lastAccessed: Date.now(),
      type: 'remote',
      branch
    };
  }

  private saveConfig(repoUrl: string, config: RepoConfig): void {
    const configPath = this.getConfigPath(repoUrl);
    writeFileSync(configPath, JSON.stringify(config, null, 2));
  }

  isLocalRepository(repoUrl: string): boolean {
    return repoUrl.startsWith('file://');
  }

  needsCloning(repoUrl: string): boolean {
    if (this.isLocalRepository(repoUrl)) {
      return false;
    }
    
    const { config } = this.getRepositoryPath(repoUrl);
    return !config.localPath || !existsSync(config.localPath);
  }

  getRepoType(repoUrl: string): 'local' | 'remote' {
    return this.isLocalRepository(repoUrl) ? 'local' : 'remote';
  }
}

export const repoConfigManager = new RepositoryConfigManager();

```

--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
  CallToolRequestSchema,
  ErrorCode,
  ListToolsRequestSchema,
  McpError,
} from "@modelcontextprotocol/sdk/types.js";
import { QueryRepoSchema, queryRepo } from "./tools/queryRepo.js";
import { zodToJsonSchema } from "zod-to-json-schema";
import { z } from "zod";
import { ProgressNotifier } from "utils/types.js";

enum ToolName {
  QUERY_REPO = "query_repo",
}

class CodeContextServer {
  private server: Server;

  constructor() {
    this.server = new Server(
      {
        name: "code-context-mcp",
        version: "0.1.0",
      },
      {
        capabilities: {
          tools: {},
        },
      }
    );

    this.setupToolHandlers();

    // Error handling
    this.server.onerror = (error) => console.error("[MCP Error]", error);
    process.on("SIGINT", async () => {
      await this.server.close();
      process.exit(0);
    });
  }

  private setupToolHandlers() {
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
      tools: [
        {
          name: ToolName.QUERY_REPO,
          description: "Queries a git repository using semantic and keyword search. Use keywords and file patterns if you want to targer specific files or terms",
          inputSchema: zodToJsonSchema(QueryRepoSchema),
        },
      ],
    }));

    this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
      const { name, arguments: input } = request.params;
      const progressToken = request.params._meta?.progressToken;

      switch (name) {
        case ToolName.QUERY_REPO:
          try {
            // Create a progress notifier if we have a progress token
            let progressNotifier: ProgressNotifier | undefined;
            
            if (progressToken !== undefined) {
              progressNotifier = {
                sendProgress: async (progress: number, total: number) => {
                  await this.server.notification({
                    method: "notifications/progress",
                    params: {
                      progress: Math.floor(progress * 100),
                      total: total * 100,
                      progressToken,
                    },
                  });
                },
              };
            }
            
            // Get the raw result from queryRepo with progress notifications
            const result = await queryRepo(
              input as z.infer<typeof QueryRepoSchema>,
              progressNotifier
            );
            
            // Format the response in Claude's expected structure
            return {
              content: [
                {
                  type: "text",
                  text: JSON.stringify(result),
                },
              ],
            };
          } catch (error) {
            console.error("Error in query_repo:", error);
            return {
              content: [
                {
                  type: "text",
                  text: `Error executing query: ${error instanceof Error ? error.message : String(error)}`,
                },
              ],
            };
          }
        default:
          throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`);
      }
    });
  }

  async run() {
    const transport = new StdioServerTransport();
    await this.server.connect(transport);
    console.error("Code Context MCP server running on stdio");
  }
}

const server = new CodeContextServer();
server.run().catch(console.error);

```

--------------------------------------------------------------------------------
/utils/db.ts:
--------------------------------------------------------------------------------

```typescript
import Database from "better-sqlite3";
import fs from "fs";
import path from "path";
import config from "../config.js";

// Ensure the data directory exists
const DATA_DIR = config.DATA_DIR;
if (!fs.existsSync(DATA_DIR)) {
  fs.mkdirSync(DATA_DIR, { recursive: true });
}

const DB_PATH = path.join(DATA_DIR, "code_context.db");
const db = new Database(DB_PATH);

console.error(`Using db at: ${DB_PATH}`)

// Enable foreign keys
db.pragma("foreign_keys = ON");

// SQL schema for the database
export const SCHEMA_SQL = `
CREATE TABLE IF NOT EXISTS repository (
  id INTEGER PRIMARY KEY AUTOINCREMENT,
  name TEXT NOT NULL,
  path TEXT NOT NULL,
  last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  UNIQUE(path)
);

CREATE TABLE IF NOT EXISTS branch (
  id INTEGER PRIMARY KEY AUTOINCREMENT,
  name TEXT NOT NULL,
  repository_id INTEGER NOT NULL,
  last_commit_sha TEXT NOT NULL,
  status TEXT CHECK(status IN ('pending', 'files_processed', 'embeddings_generated')) DEFAULT 'pending',
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  FOREIGN KEY (repository_id) REFERENCES repository(id) ON DELETE CASCADE,
  UNIQUE(name, repository_id)
);

CREATE TABLE IF NOT EXISTS file (
  id INTEGER PRIMARY KEY AUTOINCREMENT,
  repository_id INTEGER NOT NULL,
  path TEXT NOT NULL,
  name TEXT NOT NULL,
  sha TEXT NOT NULL,
  status TEXT CHECK(status IN ('pending', 'fetched', 'ingested', 'done')) DEFAULT 'pending',
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  FOREIGN KEY (repository_id) REFERENCES repository(id) ON DELETE CASCADE,
  UNIQUE(repository_id, path, sha)
);

CREATE TABLE IF NOT EXISTS branch_file_association (
  branch_id INTEGER NOT NULL,
  file_id INTEGER NOT NULL,
  PRIMARY KEY (branch_id, file_id),
  FOREIGN KEY (branch_id) REFERENCES branch(id) ON DELETE CASCADE,
  FOREIGN KEY (file_id) REFERENCES file(id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS file_chunk (
  id INTEGER PRIMARY KEY AUTOINCREMENT,
  file_id INTEGER NOT NULL,
  content TEXT NOT NULL,
  chunk_number INTEGER NOT NULL,
  embedding TEXT,
  model_version TEXT,
  token_count INTEGER,
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  FOREIGN KEY (file_id) REFERENCES file(id) ON DELETE CASCADE,
  UNIQUE(file_id, chunk_number)
);
`;

// Initialize the database
export const initializeDatabase = () => {
  try {
    // Split the schema SQL into individual statements
    const statements = SCHEMA_SQL.split(";").filter(
      (stmt) => stmt.trim().length > 0
    );

    // Execute each statement
    for (const statement of statements) {
      db.exec(statement + ";");
    }
  } catch (error) {
    console.error("Error initializing database:", error);
    throw error;
  }
};

// Helper function to run queries with parameters
const run = (sql: string, params: any = {}) => {
  return db.prepare(sql).run(params);
};

// Helper function to get a single row
const get = (sql: string, params: any = {}) => {
  return db.prepare(sql).get(params);
};

// Helper function to get all rows
const all = (sql: string, params: any = {}) => {
  return db.prepare(sql).all(params);
};

// Define a type for the database operations that can be performed in a transaction
export interface DatabaseOperations {
  prepare: (sql: string) => {
    run: (params?: any) => any;
    get: (params?: any) => any;
    all: (params?: any) => any;
  };
}

// Create a transaction function that's compatible with the existing code
const transaction = (cb: (dbOps: any) => any): any => {
  const runTransaction = db.transaction(cb);
  return runTransaction(db);
};

// Define a public interface for our database module
export interface DatabaseInterface {
  run: (sql: string, params?: any) => any;
  get: (sql: string, params?: any) => any;
  all: (sql: string, params?: any) => any;
  transaction: (cb: (dbOps: any) => any) => any;
  close: () => void;
}

// Initialize the database
initializeDatabase();

// Export the database interface
const dbInterface: DatabaseInterface = {
  run,
  get,
  all,
  transaction,
  close: () => db.close(),
};

export default dbInterface;

```

--------------------------------------------------------------------------------
/tools/embedFiles.ts:
--------------------------------------------------------------------------------

```typescript
import { z } from "zod";
import dbInterface from "../utils/db.js";
import { generateOllamaEmbeddings } from "../utils/ollamaEmbeddings.js";
import { ProgressNotifier } from "../utils/types.js";
import config from "../config.js";

// Define input schema for embedFiles
export const EmbedFilesSchema = z.object({
  repoLocalPath: z.string().describe("Local path to the cloned repository"),
  branchId: z.number().describe("Branch ID in the database"),
  _meta: z
    .object({
      progressToken: z.union([z.string(), z.number()]).optional(),
    })
    .optional(),
});

// Define chunk interface
interface Chunk {
  id: number;
  content: string;
  file_id: number;
}

export async function embedFiles(
  input: z.infer<typeof EmbedFilesSchema>,
  progressNotifier?: ProgressNotifier
) {
  try {
    console.error(
      `[embedFiles] Starting with parameters: ${JSON.stringify(input)}`
    );

    // Check if input is defined
    if (!input) {
      console.error(`[embedFiles] Error: Input parameters are undefined`);
      return {
        error: {
          message: "Input parameters are required for embedFiles tool",
        },
      };
    }

    const startTime = Date.now();
    const { branchId } = input;

    // First check if the branch exists
    const branchExists = dbInterface.get(
      "SELECT id, status FROM branch WHERE id = ?",
      branchId
    );

    if (!branchExists) {
      console.error(`[embedFiles] Error: Branch with ID ${branchId} does not exist`);
      return {
        error: {
          message: `Branch with ID ${branchId} does not exist`,
        },
      };
    }

    // Check if there are any files associated with this branch
    const fileCount = dbInterface.get(
      "SELECT COUNT(*) as count FROM branch_file_association WHERE branch_id = ?",
      branchId
    );

    if (!fileCount || fileCount.count === 0) {
      console.error(`[embedFiles] No files found for branch ${branchId}`);
      // Still update the branch status
      console.error(`[embedFiles] Setting branch status to 'embeddings_generated'`);
      dbInterface.run(
        "UPDATE branch SET status = 'embeddings_generated' WHERE id = ?",
        branchId
      );
      return { success: true, chunksProcessed: 0 };
    }

    // Get all chunks that need embeddings
    console.error(`[embedFiles] Finding chunks that need embeddings for branch ${branchId}`);
    const chunks = dbInterface.all(
      `SELECT fc.id, fc.content, f.id as file_id
       FROM file_chunk fc
       JOIN file f ON fc.file_id = f.id
       JOIN branch_file_association bfa ON f.id = bfa.file_id
       WHERE bfa.branch_id = ?
       AND fc.embedding IS NULL`,
      branchId
    );

    if (chunks.length === 0) {
      console.error(`[embedFiles] No chunks need embeddings, skipping`);
      // Update branch status even when no chunks need embeddings
      console.error(`[embedFiles] Setting branch status to 'embeddings_generated'`);
      dbInterface.run(
        "UPDATE branch SET status = 'embeddings_generated' WHERE id = ?",
        branchId
      );
      
      if (progressNotifier) {
        await progressNotifier.sendProgress(1, 1);
      }
      return { success: true, chunksProcessed: 0 };
    }

    console.error(`[embedFiles] Found ${chunks.length} chunks that need embeddings`);

    let processedChunks = 0;
    const totalChunks = chunks.length;

    const BATCH_SIZE = 100

    // Process chunks in batches of BATCH_SIZE
    for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
      const batch = chunks.slice(i, i + BATCH_SIZE);
      console.error(
        `[embedFiles] Processing batch ${Math.floor(i/BATCH_SIZE) + 1}/${Math.ceil(totalChunks/BATCH_SIZE)}`
      );

      // Generate embeddings for the batch
      const chunkContents = batch.map((chunk: Chunk) => chunk.content);
      console.error(`[embedFiles] Generating embeddings for ${batch.length} chunks`);
      const embeddingStartTime = Date.now();
      const embeddings = await generateOllamaEmbeddings(chunkContents);
      console.error(
        `[embedFiles] Generated embeddings in ${Date.now() - embeddingStartTime}ms`
      );

      // Store embeddings in transaction
      console.error(`[embedFiles] Storing embeddings`);
      dbInterface.transaction((db) => {
        const updateStmt = db.prepare(
          `UPDATE file_chunk 
           SET embedding = ?, model_version = ? 
           WHERE id = ?`
        );
        for (let j = 0; j < batch.length; j++) {
          const chunk = batch[j];
          const embedding = JSON.stringify(embeddings[j]);
          updateStmt.run(embedding, config.EMBEDDING_MODEL.model, chunk.id);
        }
      });

      processedChunks += batch.length;

      // Update progress
      if (progressNotifier) {
        const progress = processedChunks / totalChunks;
        await progressNotifier.sendProgress(progress, 1);
      }
    }

    // Update branch status
    console.error(`[embedFiles] Setting branch status to 'embeddings_generated'`);
    dbInterface.run(
      "UPDATE branch SET status = 'embeddings_generated' WHERE id = ?",
      branchId
    );

    console.error(
      `[embedFiles] Processed ${processedChunks} chunks in ${
        Date.now() - startTime
      }ms`
    );

    return { 
      success: true, 
      chunksProcessed: processedChunks 
    };
  } catch (error) {
    console.error(`[embedFiles] Error executing tool:`, error);
    return {
      error: {
        message: `Error executing embedFiles tool: ${
          error instanceof Error ? error.message : String(error)
        }`,
      },
    };
  }
}

```

--------------------------------------------------------------------------------
/tools/ingestBranch.ts:
--------------------------------------------------------------------------------

```typescript
import { z } from "zod";
import { simpleGit } from "simple-git";
import path from "path";
import fs from "fs";
import dbInterface from "../utils/db.js";
import { ProgressNotifier } from "../utils/types.js";
import config from "../config.js";
import { repoConfigManager } from "../utils/repoConfig.js";

// Define input schema for ingestBranch
export const IngestBranchSchema = z.object({
  repoUrl: z.string().describe("GitHub repository URL"),
  branch: z
    .string()
    .optional()
    .describe("Branch name to query (defaults to repository's default branch)"),
  _meta: z
    .object({
      progressToken: z.union([z.string(), z.number()]).optional(),
    })
    .optional(),
});

// Define chunk interface
interface Chunk {
  content: string;
  chunkNumber: number;
  tokenCount: number;
}


const cloneRepository = async (
  repoUrl: string,
  localPath: string
): Promise<string> => {
  // Extract repository name from URL
  const repoName = path.basename(repoUrl, ".git");
  const fullPath = path.join(localPath, repoName);

  // Check if repository already exists
  if (fs.existsSync(fullPath)) {
    console.error(`Repository already exists at ${fullPath}`);
    return fullPath;
  }

  // Clone the repository
  console.error(`Cloning repository ${repoUrl} to ${fullPath}`);
  const git = simpleGit();
  await git.clone(repoUrl, fullPath);

  return fullPath;
};

// Modified cloneRepository function wrapper that reports progress
async function cloneRepositoryWithProgress(
  repoUrl: string, 
  reposDir: string, 
  progressNotifier?: ProgressNotifier
): Promise<string> {
  // Send initial progress notification (start of cloning - 0% of the 33%)
  if (progressNotifier) {
    await progressNotifier.sendProgress(0, 1);
  }
  
  // Set up a timer to periodically send progress updates
  let progressPercentage = 0;
  let isCloning = true;
  const progressInterval = 1500; // 1.5 seconds between updates
  const maxProgress = 0.30; // Progress up to 30% (reserving 3% for completion)
  const progressStep = 0.02; // Increments of 2%
  
  // Create an interval that will send progress updates periodically
  let timer: NodeJS.Timeout | null = null;
  
  if (progressNotifier) {
    timer = setInterval(async () => {
      if (isCloning && progressPercentage < maxProgress) {
        progressPercentage += progressStep;
        await progressNotifier!.sendProgress(progressPercentage, 1);
      }
    }, progressInterval);
  }
  
  try {
    // Start cloning operation
    const repoLocalPath = await cloneRepository(repoUrl, reposDir);
    
    // Clone completed
    isCloning = false;
    
    // Send completion of cloning phase (33% of total progress)
    if (progressNotifier) {
      await progressNotifier.sendProgress(0.33, 1);
    }
    
    return repoLocalPath;
  } finally {
    // Clean up the timer when done
    if (timer) {
      clearInterval(timer);
    }
  }
}

export async function ingestBranch(
  input: z.infer<typeof IngestBranchSchema>,
  progressNotifier?: ProgressNotifier
) {
  try {
    console.error(
      `[ingestBranch] Starting with parameters: ${JSON.stringify(input)}`
    );

    // Check if input is defined
    if (!input) {
      console.error(`[ingestBranch] Error: Input parameters are undefined`);
      return {
        error: {
          message: "Input parameters are required for ingestBranch tool",
        },
      };
    }

    const startTime = Date.now();
    const { repoUrl, branch } = input;

    // Validate required parameters
    if (!repoUrl) {
      console.error(`[ingestBranch] Error: Missing required parameter repoUrl`);
      return {
        error: {
          message: "Required parameter (repoUrl) is missing",
        },
      };
    }

    // Get repository path using config manager
    const { path: repoLocalPath, config: repoConfig } = repoConfigManager.getRepositoryPath(repoUrl, branch);
    let actualBranch = branch || "";

    console.error(
      `[ingestBranch] Processing repository: ${repoUrl}, type: ${repoConfig.type}, branch: ${actualBranch || 'default'}`
    );

    // Handle repository based on type
    if (repoConfig.type === 'local') {
      console.error(`[ingestBranch] Using local repository at: ${repoLocalPath}`);
    } else {
      // Only clone if needed
      if (repoConfigManager.needsCloning(repoUrl)) {
        console.error(`[ingestBranch] Cloning remote repository to: ${repoLocalPath}`);
        await cloneRepositoryWithProgress(repoUrl, path.dirname(repoLocalPath), progressNotifier);
      } else {
        console.error(`[ingestBranch] Using cached repository at: ${repoLocalPath}`);
      }
    }
    
    console.error(
      `[ingestBranch] Repository cloned to: ${repoLocalPath} (${
        Date.now() - startTime
      }ms)`
    );

    // Initialize git
    const git = simpleGit(repoLocalPath);

    // If branch is not specified, get the default branch using git
    if (!actualBranch) {
      console.error(`[ingestBranch] Branch not specified, getting default branch`);
      try {
        // Get the default branch name
        const defaultBranch = await git.revparse(['--abbrev-ref', 'HEAD']);
        actualBranch = defaultBranch;
        console.error(`[ingestBranch] Using default branch: ${actualBranch}`);
      } catch (error) {
        console.error(`[ingestBranch] Error getting default branch:`, error);
        // Fallback to 'main' if we can't determine the default branch
        actualBranch = "main";
        console.error(`[ingestBranch] Falling back to branch: ${actualBranch}`);
      }
    }

    // Checkout the branch
    console.error(`[ingestBranch] Checking out branch: ${actualBranch}`);
    await git.checkout(actualBranch);
    const latestCommit = await git.revparse([actualBranch]);
    console.error(`[ingestBranch] Latest commit SHA: ${latestCommit}`);

    // Extract repo name from URL
    const repoName = path.basename(repoUrl, ".git");

    // Check if repo exists in database
    console.error(
      `[ingestBranch] Checking if repo exists in database: ${repoName}`
    );
    const repoExists = dbInterface.get(
      "SELECT id FROM repository WHERE name = ?",
      repoName
    );

    let repoId;
    if (repoExists) {
      repoId = repoExists.id;
      console.error(
        `[ingestBranch] Repository found in database with ID: ${repoId}`
      );
    } else {
      // Register repository
      console.error(`[ingestBranch] Registering new repository: ${repoName}`);
      const result = dbInterface.run(
        "INSERT INTO repository (name, path) VALUES (?, ?)",
        [repoName, repoLocalPath]
      );
      repoId = result.lastInsertRowid;
      console.error(`[ingestBranch] Repository registered with ID: ${repoId}`);
    }

    // Check if branch exists and has the same commit SHA
    console.error(`[ingestBranch] Checking if branch exists in database`);
    const branchExists = dbInterface.get(
      "SELECT id, last_commit_sha, status FROM branch WHERE name = ? AND repository_id = ?",
      [actualBranch, repoId]
    );

    let branchId;
    let needsUpdate = false;

    if (branchExists) {
      branchId = branchExists.id;
      console.error(
        `[ingestBranch] Branch found in database with ID: ${branchId}`
      );
      
      // Step 1: Check if SHA changed
      if (branchExists.last_commit_sha !== latestCommit) {
        console.error(`[ingestBranch] Commit SHA changed, updating branch: ${branchId}`);
        // Update branch commit SHA and set status to 'pending'
        dbInterface.run(
          "UPDATE branch SET last_commit_sha = ?, status = 'pending' WHERE id = ?",
          [latestCommit, branchId]
        );
        needsUpdate = true;
      }
      
      // Step 2: Check if status is not embeddings_generated
      if (branchExists.status !== 'embeddings_generated') {
        console.error(`[ingestBranch] Branch status is "${branchExists.status}" not "embeddings_generated", needs processing`);
        needsUpdate = true;
      }

      if (!needsUpdate) {
        console.error(`[ingestBranch] No changes needed, skipping update`);
      }
    } else {
      // Register the branch
      console.error(`[ingestBranch] Registering new branch: ${actualBranch}`);
      const result = dbInterface.run(
        "INSERT INTO branch (name, repository_id, last_commit_sha, status) VALUES (?, ?, ?, 'pending')",
        [actualBranch, repoId, latestCommit]
      );
      branchId = result.lastInsertRowid;
      needsUpdate = true;
      console.error(`[ingestBranch] Branch registered with ID: ${branchId}`);
    }

    // We don't process files directly here, just return the state
    // The actual file processing will happen in processFiles.ts
    return {
      repoLocalPath,
      repoId,
      branchId,
      needsUpdate,
      repoName,
      actualBranch,
      latestCommit
    };
  } catch (error) {
    console.error(`[ingestBranch] Error executing tool:`, error);
    return {
      error: {
        message: `Error executing ingestBranch tool: ${
          error instanceof Error ? error.message : String(error)
        }`,
      },
    };
  }
}

```

--------------------------------------------------------------------------------
/tools/queryRepo.ts:
--------------------------------------------------------------------------------

```typescript
import { z } from "zod";
import dbInterface from "../utils/db.js";
import { generateOllamaEmbeddings } from "../utils/ollamaEmbeddings.js";
import { createFilePatternCondition } from "../utils/filePatternMatcher.js";
import { ProgressNotifier } from "../utils/types.js";
import { ingestBranch } from "./ingestBranch.js";
import { processFiles } from "./processFiles.js";

// Define input schemas for tools
export const QueryRepoSchema = z.object({
  repoUrl: z.string().describe("GitHub repository URL"),
  branch: z
    .string()
    .optional()
    .describe("Branch name to query (defaults to repository's default branch)"),
  semanticSearch: z.string().describe("Query for semantic search. This search is not exact, it will try to find the most relevant files, it doesn't accept file: or path: prefixes."),
  keywordsSearch: z
    .array(z.string())
    .describe(
      "Search to the files that contain at least one of the keywords in this list. Leave empty to disable. This can work in conjunction with the semantic search."
    ),
  filePatterns: z
    .array(z.string())
    .describe(
      "Array of glob patterns to filter files (e.g. '**/*.ts', 'src/*.js'). Use it for a more effective search or to target specific files for example 'somefile.tsx'. Leave empty to disable"
    ),
  excludePatterns: z
    .array(z.string())
    .optional()
    .describe(
      "Array of glob patterns to exclude files (e.g. '**/node_modules/**', '**/dist/**'). Use it to exclude files that are not relevant to the search. Leave empty to disable"
    ),
  limit: z.number().optional().describe("Maximum number of results to return"),
  _meta: z
    .object({
      progressToken: z.union([z.string(), z.number()]).optional(),
    })
    .optional(),
});

// Helper function to create a heartbeat progress notifier
function createHeartbeatNotifier(originalNotifier?: ProgressNotifier, heartbeatMs: number = 2000): {
  notifier: ProgressNotifier;
  stopHeartbeat: () => void;
} {
  if (!originalNotifier) {
    return {
      notifier: {
        sendProgress: async () => {} // No-op if no original notifier
      },
      stopHeartbeat: () => {}
    };
  }
  
  let currentProgress = 0;
  let currentMax = 1;
  let isActive = true;
  let lastUpdate = Date.now();
  
  // Heartbeat interval
  const intervalId = setInterval(async () => {
    if (!isActive) return;
    
    // Only send if it's been more than heartbeatMs since the last update
    if (Date.now() - lastUpdate >= heartbeatMs) {
      console.error(`[queryRepo] Heartbeat progress: ${currentProgress}/${currentMax}`);
      await originalNotifier.sendProgress(currentProgress, currentMax);
    }
  }, heartbeatMs);
  
  return {
    notifier: {
      sendProgress: async (progress: number, max: number) => {
        currentProgress = progress;
        currentMax = max;
        lastUpdate = Date.now();
        await originalNotifier.sendProgress(progress, max);
      }
    },
    stopHeartbeat: () => {
      isActive = false;
      clearInterval(intervalId);
    }
  };
}

export async function queryRepo(
  input: z.infer<typeof QueryRepoSchema>,
  progressNotifier?: ProgressNotifier
) {
  // Create heartbeat notifier that will send regular updates
  const { notifier: heartbeatNotifier, stopHeartbeat } = createHeartbeatNotifier(progressNotifier);
  
  try {
    console.error(
      `[queryRepo] Starting with parameters: ${JSON.stringify(input)}`
    );

    // Check if input is defined
    if (!input) {
      console.error(`[queryRepo] Error: Input parameters are undefined`);
      return {
        error: {
          message: "Input parameters are required for queryRepo tool",
        },
      };
    }

    const startTime = Date.now();

    const {
      repoUrl,
      branch,
      semanticSearch: semanticSearchInput,
      keywordsSearch,
      limit,
      filePatterns,
      excludePatterns,
    } = input;

    // Validate required parameters
    if (!repoUrl ||(!semanticSearchInput && !keywordsSearch)) {
      console.error(`[queryRepo] Error: Missing required parameters`);
      return {
        error: {
          message: "Required parameters (repoUrl, semanticSearch or keywordsSearch) are missing",
        },
      };
    }

    let semanticSearch = semanticSearchInput;
    if(!semanticSearchInput) {
      semanticSearch = keywordsSearch.join(" ");
    }

    // Initialize progress at start
    await heartbeatNotifier.sendProgress(0.05, 1);

    // Step 1: Ingest the branch (25% of progress)
    console.error(`[queryRepo] Ingesting branch: ${repoUrl}, ${branch || 'default'}`);
    const branchResult = await ingestBranch(
      { 
        repoUrl, 
        branch
      }, 
      undefined // Don't pass progress notifier to individual tools
    );

    // Update progress after branch ingestion
    await heartbeatNotifier.sendProgress(0.25, 1);

    // Check for error
    if ('error' in branchResult) {
      console.error(`[queryRepo] Error in ingestBranch:`, branchResult.error);
      return { error: branchResult.error };
    }

    const branchData = branchResult;

    // Step 2: Process files if needed (50% of progress)
    console.error(`[queryRepo] Processing files for branch: ${branchData.branchId}`);
    const filesResult = await processFiles(
      {
        repoLocalPath: branchData.repoLocalPath,
        repoId: branchData.repoId,
        branchId: branchData.branchId,
        actualBranch: branchData.actualBranch,
        needsUpdate: branchData.needsUpdate
      },
      undefined // Don't pass progress notifier to individual tools
    );

    // Update progress after file processing
    await heartbeatNotifier.sendProgress(0.5, 1);

    // Check for error
    if ('error' in filesResult) {
      console.error(`[queryRepo] Error in processFiles:`, filesResult.error);
      return { error: filesResult.error };
    }

    // Generate embedding for the query
    console.error(`[queryRepo] Generating embedding for query: "${semanticSearch}"`);
    const queryEmbedStart = Date.now();
    const [queryEmbedding] = await generateOllamaEmbeddings([semanticSearch]);
    const queryEmbeddingStr = JSON.stringify(queryEmbedding);
    console.error(
      `[queryRepo] Generated query embedding in ${
        Date.now() - queryEmbedStart
      }ms`
    );

    // Update progress after query embedding
    await heartbeatNotifier.sendProgress(0.6, 1);

    // Search for similar chunks using SQLite's JSON functions for vector similarity
    console.error(
      `[queryRepo] Searching for similar chunks with limit: ${limit}`
    );
    const searchStart = Date.now();
    // Use a default limit of 10 if undefined
    const effectiveLimit = limit === undefined ? 10 : limit;

    // Create SQL condition for file pattern filtering
    const filePatternCondition = createFilePatternCondition(
      filePatterns,
      excludePatterns
    );

    const results = dbInterface.all(
      `
      SELECT fc.content, f.path, fc.chunk_number,
             (SELECT  (SELECT SUM(json_extract(value, '$') * json_extract(?, '$[' || key || ']'))
                        FROM json_each(fc.embedding)
                        GROUP BY key IS NOT NULL)
              )/${queryEmbedding.length} as similarity
      FROM file_chunk fc
      JOIN file f ON fc.file_id = f.id
      JOIN branch_file_association bfa ON f.id = bfa.file_id
      WHERE bfa.branch_id = ?
      AND fc.embedding IS NOT NULL
      ${filePatternCondition}
      ORDER BY similarity DESC
      LIMIT ?
    `,
      [queryEmbeddingStr, branchData.branchId, effectiveLimit]
    );
    console.error(
      `[queryRepo] Search completed in ${Date.now() - searchStart}ms, found ${
        results.length
      } results`
    );

    // Update progress after initial search
    await heartbeatNotifier.sendProgress(0.7, 1);

    // If no results found, check if embeddings need to be generated
    if (results.length === 0) {
      console.error(`[queryRepo] No results found, checking if embeddings need to be generated`);
      
      // Check if there are any chunks without embeddings
      const chunksWithoutEmbeddings = dbInterface.get(
        `SELECT COUNT(*) as count 
         FROM file_chunk fc
         JOIN file f ON fc.file_id = f.id
         JOIN branch_file_association bfa ON f.id = bfa.file_id
         WHERE bfa.branch_id = ?
         AND fc.embedding IS NULL`,
        branchData.branchId
      );

      if (chunksWithoutEmbeddings && chunksWithoutEmbeddings.count > 0) {
        console.error(`[queryRepo] Found ${chunksWithoutEmbeddings.count} chunks without embeddings, generating them`);
        
        // Import embedFiles function
        const { embedFiles } = await import('./embedFiles.js');
        
        // Generate embeddings (75-90% of progress)
        await heartbeatNotifier.sendProgress(0.75, 1);
        
        // Generate embeddings
        const embedResult = await embedFiles(
          {
            repoLocalPath: branchData.repoLocalPath,
            branchId: branchData.branchId
          },
          undefined // Don't pass progress notifier to individual tools
        );

        // Update progress after embedding generation
        await heartbeatNotifier.sendProgress(0.9, 1);

        if ('error' in embedResult) {
          console.error(`[queryRepo] Error generating embeddings:`, embedResult.error);
          return { error: embedResult.error };
        }

        // Try searching again after generating embeddings
        console.error(`[queryRepo] Retrying search after generating embeddings`);
        const retryResults = dbInterface.all(
          `
          SELECT fc.content, f.path, fc.chunk_number,
                 (SELECT  (SELECT SUM(json_extract(value, '$') * json_extract(?, '$[' || key || ']'))
                            FROM json_each(fc.embedding)
                            GROUP BY key IS NOT NULL)
                  ) as similarity
          FROM file_chunk fc
          JOIN file f ON fc.file_id = f.id
          JOIN branch_file_association bfa ON f.id = bfa.file_id
          WHERE bfa.branch_id = ?
          AND fc.embedding IS NOT NULL
          ${filePatternCondition}
          ORDER BY similarity DESC
          LIMIT ?
        `,
          [queryEmbeddingStr, branchData.branchId, effectiveLimit]
        );

        console.error(
          `[queryRepo] Retry search completed, found ${retryResults.length} results`
        );
        results.push(...retryResults);
      }
    }

    // Filter results by keywords if provided
    let filteredResults = results;
    if (keywordsSearch && keywordsSearch.length > 0) {
      console.error(
        `[queryRepo] Filtering results by keywords: ${keywordsSearch.join(", ")}`
      );
      const keywordFilterStart = Date.now();

      // Convert keywords to lowercase for case-insensitive matching
      const lowercaseKeywords = keywordsSearch.map((kw) => kw.trim().toLowerCase());

      filteredResults = results.filter((result: { content: string }) => {
        const content = result.content.toLowerCase();
        // Check if the content contains at least one of the keywords
        return lowercaseKeywords.some((keyword) => content.includes(keyword));
      });

      console.error(
        `[queryRepo] Keyword filtering completed in ${
          Date.now() - keywordFilterStart
        }ms, filtered from ${results.length} to ${
          filteredResults.length
        } results`
      );
    }

    // Update progress to completion
    await heartbeatNotifier.sendProgress(1, 1);

    const totalTime = Date.now() - startTime;
    console.error(`[queryRepo] Tool completed in ${totalTime}ms`);

    return {
      output: {
        success: true,
        repoUrl,
        branch: branchData.actualBranch,
        processingTimeMs: totalTime,
        results: filteredResults.map((result: any) => ({
          filePath: result.path,
          chunkNumber: result.chunk_number,
          content: result.content,
          similarity: result.similarity,
        })),
      },
    };
  } catch (error) {
    console.error(`[queryRepo] Error executing tool:`, error);
    return {
      error: {
        message: `Error executing queryRepo tool: ${
          error instanceof Error ? error.message : String(error)
        }`,
      },
    };
  } finally {
    // Always stop the heartbeat when done
    stopHeartbeat();
  }
} 
```

--------------------------------------------------------------------------------
/tools/processFiles.ts:
--------------------------------------------------------------------------------

```typescript
import { z } from "zod";
import dbInterface from "../utils/db.js";
import { ProgressNotifier } from "../utils/types.js";
import { simpleGit } from "simple-git";
import path from "path";
import { extensionToSplitter, splitDocument } from "../utils/codeSplitter.js";
import fs from "fs";

interface RepositoryFile {
  path: string;
  name: string;
  sha: string;
}

interface RepositoryFilesResult {
  files: RepositoryFile[];
  commitSha: string;
}

interface PendingFile {
  id: number;
  path: string;
  sha: string;
}


// Define input schema for processFiles
export const ProcessFilesSchema = z.object({
  repoLocalPath: z.string().describe("Local path to the cloned repository"),
  repoId: z.number().describe("Repository ID in the database"),
  branchId: z.number().describe("Branch ID in the database"),
  actualBranch: z.string().describe("Actual branch name"),
  needsUpdate: z.boolean().describe("Whether the branch needs updating"),
  _meta: z
    .object({
      progressToken: z.union([z.string(), z.number()]).optional(),
    })
    .optional(),
});


/**
 * Get the files in a repository branch
 * @param repoPath Path to the repository
 * @param branchName Name of the branch
 * @returns List of files with their metadata
 */
export const getRepositoryFiles = async (
  repoPath: string,
  branchName: string,
): Promise<RepositoryFilesResult> => {
  const git = simpleGit(repoPath);

  // Checkout the branch
  await git.checkout(branchName);

  // Get the latest commit SHA
  const latestCommit = await git.revparse([branchName]);

  // Get the file tree
  const files: RepositoryFile[] = [];

  // Use git ls-tree to get all files recursively
  const result = await git.raw(["ls-tree", "-r", branchName]);
  const stdout = result.toString();

  // Parse the output
  const lines = stdout.split("\n").filter((line) => line.trim() !== "");

  for (const line of lines) {
    // Format: <mode> <type> <object> <file>
    const [info, filePath] = line.split("\t");
    const [, , sha] = info.split(" ");

    if (filePath) {
      files.push({
        path: filePath,
        name: path.basename(filePath),
        sha,
      });
    }
  }

  return { files, commitSha: latestCommit };
};


/**
 * Process file content and split into chunks
 * @param branchName Branch name
 * @param repoPath Repository path
 */
export const processFileContents = async (
  branchName: string,
  repoPath: string
): Promise<void> => {
  const git = simpleGit(repoPath);

  // Checkout the branch
  await git.checkout(branchName);

  // Get repository and branch IDs
  const repo = dbInterface.get("SELECT id FROM repository WHERE path = ?", repoPath) as { id: number };
  const branch = dbInterface.get(
    "SELECT id FROM branch WHERE name = ? AND repository_id = ?",
    [branchName, repo.id]
  ) as { id: number };

  // Get all pending files for the branch
  const pendingFiles = dbInterface.all(
    `SELECT f.id, f.path, f.sha
     FROM file f
     JOIN branch_file_association bfa ON f.id = bfa.file_id
     WHERE f.status = 'pending' AND bfa.branch_id = ?`,
    branch.id
  ) as PendingFile[];

  for (const file of pendingFiles) {
    console.error(`Processing file: ${file.path}`);
    const extension = file.path.split(".").pop()?.toLowerCase();
    const splitType = extension ? extensionToSplitter(extension) : "ignore";

    if (splitType !== "ignore") {
      try {
        // Get file content
        const filePath = path.join(repoPath, file.path);

        // Skip if file doesn't exist (might have been deleted)
        if (!fs.existsSync(filePath)) {
          console.error(`File ${file.path} doesn't exist, skipping`);
          continue;
        }

        let content = fs.readFileSync(filePath, "utf-8");

        // Check for null bytes in the content
        if (content.includes("\0")) {
          console.error(
            `File ${file.path} contains null bytes. Removing them.`
          );
          content = content.replace(/\0/g, "");
        }

        // Check if the content is valid UTF-8
        try {
          new TextDecoder("utf-8", { fatal: true }).decode(
            new TextEncoder().encode(content)
          );
        } catch (e) {
          console.error(
            `File ${file.path} contains invalid UTF-8 characters. Replacing them.`
          );
          content = content.replace(/[^\x00-\x7F]/g, ""); // Remove non-ASCII characters
        }

        // Truncate content if it's too long
        const maxLength = 1000000; // Adjust this value based on your database column size
        if (content.length > maxLength) {
          console.error(
            `File ${file.path} content is too long. Truncating to ${maxLength} characters.`
          );
          content = content.substring(0, maxLength);
        }

        // Split the document
        const chunks = await splitDocument(file.path, content);

        // Store chunks in the database using dbInterface.transaction
        dbInterface.transaction((db) => {
          for (let i = 0; i < chunks.length; i++) {
            db.prepare(
              `INSERT INTO file_chunk (file_id, content, chunk_number)
               VALUES (?, ?, ?)
               ON CONFLICT(file_id, chunk_number) DO NOTHING`
            ).run(file.id, chunks[i].pageContent, i + 1);
          }

          // Update file status to 'fetched'
          db.prepare("UPDATE file SET status = ? WHERE id = ?").run(
            "fetched",
            file.id
          );
        });
      } catch (error) {
        console.error(`Error processing file ${file.path}:`, error);
      }
    } else {
      // Update file status to 'done' for ignored files
      dbInterface.run("UPDATE file SET status = ? WHERE id = ?", ["done", file.id]);
    }
  }
};

export async function processFiles(
  input: z.infer<typeof ProcessFilesSchema>,
  progressNotifier?: ProgressNotifier
) {
  try {
    console.error(
      `[processFiles] Starting with parameters: ${JSON.stringify(input)}`
    );

    // Check if input is defined
    if (!input) {
      console.error(`[processFiles] Error: Input parameters are undefined`);
      return {
        error: {
          message: "Input parameters are required for processFiles tool",
        },
      };
    }

    const startTime = Date.now();
    const { repoLocalPath, repoId, branchId, actualBranch, needsUpdate } = input;

    // Skip if no update is needed
    if (!needsUpdate) {
      console.error(`[processFiles] No update needed, skipping`);
      return { 
        needsUpdate: false,
        filesToProcess: []
      };
    }

    // Process the repository files
    console.error(
      `[processFiles] Processing repository files (${Date.now() - startTime}ms)`
    );
    // Get all files in the repository
    const { files } = await getRepositoryFiles(repoLocalPath, actualBranch);
    console.error(`[processFiles] Found ${files.length} files in repository`);

    // Define transaction function
    console.error(`[processFiles] Starting file database transaction`);
    const processFiles = (db: any) => {
      // Get existing files to compare
      const existingFiles = db
        .prepare(
          `SELECT f.id, f.path, f.sha FROM file f
               JOIN branch_file_association bfa ON f.id = bfa.file_id
               WHERE bfa.branch_id = ?`
        )
        .all(branchId);
      console.error(
        `[processFiles] Found ${existingFiles.length} existing files in database`
      );

      const existingFileMap = new Map();
      for (const file of existingFiles) {
        existingFileMap.set(file.path, file);
      }

      // Track files that need processing
      const filesToProcess: any[] = [];

      // File counters for logging
      let newFiles = 0;
      let updatedFiles = 0;
      let unchangedFiles = 0;
      let removedFiles = 0;

      // Process each file
      for (const file of files) {
        const existingFile = existingFileMap.get(file.path);
        existingFileMap.delete(file.path); // Remove from map to track what's left later

        if (!existingFile) {
          // New file - but first check if it already exists in the database for another branch
          const existingFileInDB = db.prepare(
            "SELECT id FROM file WHERE repository_id = ? AND path = ? AND sha = ?"
          ).get(repoId, file.path, file.sha);

          let fileId;
          if (existingFileInDB) {
            // File exists but not associated with this branch
            console.error(`[processFiles] File exists in DB but not associated with branch: ${file.path}`);
            fileId = existingFileInDB.id;
            
            // Check if the file is already associated with this branch
            const associationExists = db.prepare(
              "SELECT 1 FROM branch_file_association WHERE branch_id = ? AND file_id = ?"
            ).get(branchId, fileId);

            if (!associationExists) {
              // Associate existing file with current branch
              db.prepare(
                "INSERT INTO branch_file_association (branch_id, file_id) VALUES (?, ?)"
              ).run(branchId, fileId);
            }
          } else {
            // Truly new file
            newFiles++;
            const result = db
              .prepare(
                "INSERT INTO file (repository_id, path, sha, name, status) VALUES (?, ?, ?, ?, 'pending')"
              )
              .run(repoId, file.path, file.sha, file.name);

            fileId = result.lastInsertRowid;

            // Associate with branch
            db.prepare(
              "INSERT INTO branch_file_association (branch_id, file_id) VALUES (?, ?)"
            ).run(branchId, fileId);
          }

          filesToProcess.push({
            id: fileId,
            path: file.path,
            name: file.name,
          });
        } else if (existingFile.sha !== file.sha) {
          // Updated file - SHA changed
          updatedFiles++;
          db.prepare(
            "UPDATE file SET sha = ?, status = 'pending' WHERE id = ?"
          ).run(file.sha, existingFile.id);

          filesToProcess.push({
            id: existingFile.id,
            path: file.path,
            name: file.name,
          });
        } else {
          // Unchanged file
          unchangedFiles++;
        }
      }

      // Remove files that no longer exist in the branch
      for (const [path, file] of existingFileMap.entries()) {
        removedFiles++;
        db.prepare(
          "DELETE FROM branch_file_association WHERE branch_id = ? AND file_id = ?"
        ).run(branchId, file.id);

        // If no other branches reference this file, delete it and its chunks
        const fileStillInUse = db
          .prepare(
            "SELECT 1 FROM branch_file_association WHERE file_id = ? LIMIT 1"
          )
          .get(file.id);

        if (!fileStillInUse) {
          // Delete chunks first
          db.prepare("DELETE FROM file_chunk WHERE file_id = ?").run(file.id);
          // Then delete the file
          db.prepare("DELETE FROM file WHERE id = ?").run(file.id);
        }
      }

      console.error(
        `[processFiles] Files summary: ${newFiles} new, ${updatedFiles} updated, ${unchangedFiles} unchanged, ${removedFiles} removed`
      );
      return filesToProcess;
    };

    // Execute the transaction
    console.error(`[processFiles] Executing file processing transaction`);
    const filesToProcess = dbInterface.transaction((db) => processFiles(db));
    console.error(
      `[processFiles] Transaction completed, processing ${
        filesToProcess.length
      } files (${Date.now() - startTime}ms)`
    );

    // Limit the number of files processed to avoid timeouts
    // This might need adjustment based on actual performance
    const MAX_FILES_TO_PROCESS = 1000000;
    const limitedFiles = filesToProcess.slice(0, MAX_FILES_TO_PROCESS);

    if (limitedFiles.length < filesToProcess.length) {
      console.error(
        `[processFiles] WARNING: Processing only ${limitedFiles.length} of ${filesToProcess.length} files to avoid timeout`
      );
    }

    // Update progress for file processing phase (33% to 66%)
    if (progressNotifier) {
      await progressNotifier.sendProgress(0.33, 1);
    }

    // Process file contents to generate chunks - this was the missing step
    console.error(`[processFiles] Processing file contents for branch: ${actualBranch}`);
    try {
      await processFileContents(actualBranch, repoLocalPath);
      console.error(`[processFiles] File contents processed successfully`);
      
      // Update branch status to files_processed
      dbInterface.run(
        "UPDATE branch SET status = 'files_processed' WHERE id = ?",
        branchId
      );
      
      // Update progress after file content processing
      if (progressNotifier) {
        await progressNotifier.sendProgress(0.66, 1);
      }
    } catch (error) {
      console.error(`[processFiles] Error processing file contents:`, error);
    }

    return {
      needsUpdate: true,
      filesToProcess: limitedFiles,
      repoLocalPath
    };
  } catch (error) {
    console.error(`[processFiles] Error executing tool:`, error);
    return {
      error: {
        message: `Error executing processFiles tool: ${
          error instanceof Error ? error.message : String(error)
        }`,
      },
    };
  }
}

```

--------------------------------------------------------------------------------
/utils/codeSplitter.ts:
--------------------------------------------------------------------------------

```typescript
import {
  RecursiveCharacterTextSplitter,
  TextSplitter,
} from "@langchain/textsplitters";
import fs from "fs";

class SQLSchemaSplitter extends TextSplitter {
  private maxCharacters: number;

  constructor(maxCharacters: number) {
    super();
    this.maxCharacters = maxCharacters;
  }

  // Helper function to parse INSERT statements
  parseValues(valuesPart: string): string[] {
    let valuesArray: string[] = [];
    let currentTuple = "";
    let nestingLevel = 0;
    let inString: boolean = false;
    let stringChar = "";
    let escapeNext = false;

    for (let i = 0; i < valuesPart.length; i++) {
      const char = valuesPart[i];
      currentTuple += char;

      if (escapeNext) {
        escapeNext = false;
      } else if (char === "\\") {
        escapeNext = true;
      } else if (char === "'" || char === '"') {
        if (inString && char === stringChar) {
          inString = false;
        } else if (!inString) {
          inString = true;
          stringChar = char;
        }
      } else if (!inString) {
        if (char === "(") {
          nestingLevel += 1;
        } else if (char === ")") {
          nestingLevel -= 1;
          if (nestingLevel === 0) {
            valuesArray.push(currentTuple.trim());
            currentTuple = "";
            // Skip any commas and spaces
            while (
              i + 1 < valuesPart.length &&
              (valuesPart[i + 1] === "," ||
                valuesPart[i + 1] === " " ||
                valuesPart[i + 1] === "\n")
            ) {
              i++;
            }
          }
        }
      }
    }
    return valuesArray;
  }

  // Split long INSERT statements
  splitInsertStatement(statement: string): string[] {
    const insertIndex = statement.toUpperCase().indexOf("VALUES");
    if (insertIndex === -1) {
      // Cannot split, return the statement as is
      return [statement];
    }

    const insertIntoPart =
      statement.slice(0, insertIndex + "VALUES".length) + " ";
    const valuesPart = statement.slice(insertIndex + "VALUES".length);

    const valuesArray = this.parseValues(valuesPart);
    const insertStatements: string[] = [];

    let currentValues = "";
    for (const valueTuple of valuesArray) {
      const newStatementLength =
        insertIntoPart.length + currentValues.length + valueTuple.length + 1; // +1 for ',' or ';'

      if (newStatementLength <= this.maxCharacters) {
        if (currentValues !== "") {
          currentValues += "," + valueTuple;
        } else {
          currentValues = valueTuple;
        }
      } else {
        // Create a new INSERT statement
        const newStatement = insertIntoPart + currentValues + ";";
        insertStatements.push(newStatement);
        currentValues = valueTuple;
      }
    }
    if (currentValues !== "") {
      const newStatement = insertIntoPart + currentValues + ";";
      insertStatements.push(newStatement);
    }
    return insertStatements;
  }

  /**
   * Enhanced function to split SQL script into statements while handling various SQL constructs,
   * including custom keywords like BBEGI/EEN and EEXCEPTIO/EEN.
   */
  splitSQLStatements(text: string): string[] {
    const statements: string[] = [];
    let currentStatement = "";
    let index = 0;
    let insideString: boolean = false;
    let stringChar = "";
    let insideComment = false;
    let commentType = "";
    let insideFunction = false;
    let insideProcedure = false;
    let insideView = false;
    let insideBlock = false;
    let blockLevel = 0;

    const upperText = text.toUpperCase();

    // Define mappings for custom keywords to standard ones
    const beginKeywords = ["BEGIN", "BBEGI", "BEGINN"];
    const endKeywords = ["END", "EEN"];
    const exceptionKeywords = ["EXCEPTION", "EEXCEPTIO"];

    while (index < text.length) {
      const char = text[index];
      const remainingText = upperText.substring(index);
      currentStatement += char;

      if (insideString) {
        if (char === stringChar) {
          insideString = false;
        } else if (char === "\\") {
          // Skip escaped characters
          index++;
          if (index < text.length) {
            currentStatement += text[index];
          }
        }
      } else if (insideComment) {
        if (commentType === "--" && (char === "\n" || char === "\r")) {
          insideComment = false;
        } else if (commentType === "/*" && remainingText.startsWith("*/")) {
          insideComment = false;
          currentStatement += "*/";
          index += 1; // Skip '/'
        }
      } else if (char === "'" || char === '"') {
        insideString = true;
        stringChar = char;
      } else if (remainingText.startsWith("/*")) {
        insideComment = true;
        commentType = "/*";
        currentStatement += "/*";
        index += 1; // Skip '*'
      } else if (remainingText.startsWith("--")) {
        insideComment = true;
        commentType = "--";
        currentStatement += "--";
        index += 1; // Skip second '-'
      } else if (
        !insideFunction &&
        !insideProcedure &&
        !insideView &&
        !insideBlock
      ) {
        if (
          remainingText.startsWith("CREATE FUNCTION") ||
          remainingText.startsWith("CREATE OR REPLACE FUNCTION")
        ) {
          insideFunction = true;
          blockLevel = 0;
        } else if (
          remainingText.startsWith("CREATE PROCEDURE") ||
          remainingText.startsWith("CREATE OR REPLACE PROCEDURE")
        ) {
          insideProcedure = true;
          blockLevel = 0;
        } else if (
          remainingText.startsWith("CREATE VIEW") ||
          remainingText.startsWith("CREATE OR REPLACE VIEW")
        ) {
          insideView = true;
        } else if (beginKeywords.some((kw) => remainingText.startsWith(kw))) {
          insideBlock = true;
          blockLevel = 1;
          const matchedBegin = beginKeywords.find((kw) =>
            remainingText.startsWith(kw)
          );
          if (matchedBegin && matchedBegin.length > "BEGIN".length) {
            index += matchedBegin.length - "BEGIN".length;
            currentStatement += matchedBegin.substring("BEGIN".length);
          }
        }
      }

      if (insideFunction || insideProcedure || insideBlock) {
        // Check for BEGIN keywords to increase block level
        const matchedBegin = beginKeywords.find((kw) =>
          remainingText.startsWith(kw)
        );
        if (matchedBegin) {
          blockLevel++;
          index += matchedBegin.length - 1;
          currentStatement += matchedBegin.substring(1);
          continue;
        }

        // Check for END keywords to decrease block level
        const matchedEnd = endKeywords.find((kw) =>
          remainingText.startsWith(kw)
        );
        if (
          matchedEnd &&
          (matchedEnd.length === "END".length ||
            matchedEnd.length === "END;".length)
        ) {
          blockLevel--;
          index += matchedEnd.length - 1;
          currentStatement += matchedEnd.substring(1);

          if (blockLevel === 0) {
            if (insideFunction) {
              insideFunction = false;
              statements.push(currentStatement.trim());
              currentStatement = "";
            } else if (insideProcedure) {
              insideProcedure = false;
              statements.push(currentStatement.trim());
              currentStatement = "";
            } else if (insideBlock) {
              insideBlock = false;
              statements.push(currentStatement.trim());
              currentStatement = "";
            }
          }
          continue;
        }
      } else if (insideView) {
        if (char === ";") {
          insideView = false;
          statements.push(currentStatement.trim());
          currentStatement = "";
        }
      } else if (
        char === ";" &&
        !insideFunction &&
        !insideProcedure &&
        !insideView &&
        !insideBlock
      ) {
        statements.push(currentStatement.trim());
        currentStatement = "";
      }

      index++;
    }

    if (currentStatement.trim() !== "") {
      statements.push(currentStatement.trim());
    }

    return statements;
  }

  // Helper method to match keywords from a list at the start of the given text.
  // Returns the matched keyword or null.
  matchKeyword(text: string, keywords: string[]): string | null {
    for (const keyword of keywords) {
      if (text.startsWith(keyword)) {
        return keyword;
      }
    }
    return null;
  }

  async splitText(text: string): Promise<string[]> {
    const statements = this.splitSQLStatements(text);
    const splits: string[] = [];

    for (const statement of statements) {
      // Check if the statement is an INSERT statement
      if (
        statement.toUpperCase().includes("INSERT INTO") &&
        statement.toUpperCase().includes("VALUES")
      ) {
        // Split long INSERT statements
        const splitInserts = this.splitInsertStatement(statement);
        splits.push(...splitInserts);
      } else {
        // For other statements, check if they are too long
        if (statement.length <= this.maxCharacters) {
          splits.push(statement);
        } else {
          // For long statements, split them into chunks
          let currentSplit = "";
          const lines = statement.split("\n");

          for (const line of lines) {
            if (currentSplit.length + line.length + 1 <= this.maxCharacters) {
              currentSplit += (currentSplit ? "\n" : "") + line;
            } else {
              if (currentSplit) {
                splits.push(currentSplit);
              }
              currentSplit = line;
            }
          }

          if (currentSplit) {
            splits.push(currentSplit);
          }
        }
      }
    }

    return splits;
  }
}

export function extensionToSplitter(extension: string): string {
  if (!extension) {
    return "text";
  }
  const extensionLower = extension.toLowerCase();
  switch (extensionLower) {
    // C/C++ extensions
    case "c++":
    case "cpp":
    case "c":
    case "h":
    case "hpp":
    case "m":
    case "mm":
      return "cpp";
    // Go
    case "go":
      return "go";
    // Java
    case "java":
      return "java";
    // JavaScript and related
    case "js":
    case "ts":
    case "typescript":
    case "tsx":
    case "jsx":
    case "javascript":
    case "json":
    case "pbxproj":
      return "js";
    // YAML and related
    case "yaml":
    case "yml":
    case "toml":
    case "ini":
    case "cfg":
    case "conf":
    case "props":
    case "env":
    case "plist":
    case "gemfile":
    case "dockerfile":
    case "podfile":
    case "patch":
      return "text";
    // Shell scripts and related
    case "sh":
    case "bash":
    case "zsh":
    case "fish":
    case "bat":
    case "cmd":
      return "text";
    // Properties and XSD
    case "properties":
    case "xsd":
      return "text";
    // SQL
    case "sql":
      return "sql";
    // PHP
    case "php":
      return "php";
    // Protocol buffers
    case "proto":
      return "proto";
    // Python
    case "py":
    case "python":
      return "python";
    // reStructuredText
    case "rst":
      return "rst";
    // Ruby
    case "rb":
    case "ruby":
      return "ruby";
    // Rust
    case "rs":
    case "rust":
      return "rust";
    // Scala
    case "scala":
      return "scala";
    // Swift
    case "swift":
      return "swift";
    // Markdown
    case "md":
    case "markdown":
      return "markdown";
    // LaTeX
    case "tex":
    case "latex":
      return "latex";
    // HTML and related
    case "html":
    case "htm":
    case "xml":
    case "xsl":
    case "xdt":
    case "xcworkspacedata":
    case "xcprivacy":
    case "xcsettings":
    case "xcscheme":
      return "html";
    // Solidity
    case "sol":
    case "solidity":
      return "sol";
    // Text
    case "text":
    case "txt":
    case "lst":
    case "reg":
      return "text";
    // Additional file extensions
    case "jpr":
    case "jws":
    case "iml":
      return "html";
    case "lock":
    case "jpg":
    case "jpeg":
    case "png":
    case "gif":
    case "bmp":
    case "svg":
    case "ico":
    case "webp":
    case "tiff":
    case "bin":
    case "exe":
    case "dll":
    case "so":
    case "dylib":
    case "obj":
    case "o":
    case "zip":
    case "tar":
    case "gz":
    case "rar":
    case "7z":
    case "jar":
    case "war":
    case "ear":
    case "class":
      return "ignore";
    default:
      return "text";
  }
}

export const splitDocument = (filename: string, code: string) => {
  const extension = filename.split(".").pop();

  const splitType = extensionToSplitter(extension || "");
  if (splitType === "ignore") {
    return [];
  }

  const CHUNK_SIZE_TOKENS = 7000;
  const CHUNK_OVERLAP_TOKENS = 200;

  const CHUNK_SIZE_CHARACTERS = CHUNK_SIZE_TOKENS * 3.25;
  const CHUNK_OVERLAP_CHARACTERS = CHUNK_OVERLAP_TOKENS * 3.25;

  let splitter;

  if (splitType !== "text" && splitType !== "sql") {
    splitter = RecursiveCharacterTextSplitter.fromLanguage(
      splitType as
        | "cpp"
        | "go"
        | "java"
        | "js"
        | "php"
        | "proto"
        | "python"
        | "rst"
        | "ruby"
        | "rust"
        | "scala"
        | "swift"
        | "markdown"
        | "latex"
        | "html"
        | "sol",
      {
        chunkSize: CHUNK_SIZE_CHARACTERS,
        chunkOverlap: CHUNK_OVERLAP_CHARACTERS,
      }
    );
  } else if (splitType === "sql") {
    splitter = new SQLSchemaSplitter(CHUNK_SIZE_CHARACTERS);
  } else {
    splitter = new RecursiveCharacterTextSplitter({
      chunkSize: CHUNK_SIZE_CHARACTERS,
      chunkOverlap: CHUNK_OVERLAP_CHARACTERS,
    });
  }
  return splitter.createDocuments([code], [], {
    chunkHeader: `FILE NAME: ${filename}\n\n---\n\n`,
    appendChunkOverlapHeader: true,
  });
};

```