happyzym/mcp-server-reposearch # codebase.md

# Directory Structure

```
├── .clinerules
├── .github
│   └── workflows
│       └── build.yml
├── .gitignore
├── .reposearchignore
├── package-lock.json
├── package.json
├── README_en.md
├── README.md
├── src
│   ├── index.ts
│   ├── searchEngine.ts
│   ├── types
│   │   └── ignore.d.ts
│   └── types.ts
└── tsconfig.json
```

# Files

--------------------------------------------------------------------------------
/.reposearchignore:
--------------------------------------------------------------------------------

```
# Default .reposearchignore file
# Directories to exclude
node_modules/
.git/
build/
dist/
coverage/
.cache/
.tmp/
temp/
logs/

# Binary and media files
*.exe
*.dll
*.so
*.dylib
*.bin
*.dat
*.db
*.sqlite
*.jpg
*.jpeg
*.png
*.gif
*.ico
*.svg
*.mp3
*.mp4
*.zip
*.tar
*.gz
*.rar
*.7z

# IDE and system files
.DS_Store
Thumbs.db
.idea/
.vscode/
*.swp
*.swo
*.swn
*.bak

# Allow common text files (these override exclusions above)
!*.txt
!*.md
!*.js
!*.ts
!*.jsx
!*.tsx
!*.json
!*.html
!*.css
!*.scss
!*.less
!*.py
!*.java
!*.c
!*.cpp
!*.h
!*.hpp
!*.rs
!*.go
!*.rb
!*.php
!*.xml
!*.yaml
!*.yml
!*.sh
!*.bash
!*.zsh
!*.fish
!*.conf
!*.ini
!*.properties
!*.env

```

--------------------------------------------------------------------------------
/src/types/ignore.d.ts:
--------------------------------------------------------------------------------

```typescript
declare module 'ignore' {
  interface Ignore {
    add(pattern: string | string[]): Ignore;
    ignores(filePath: string): boolean;
  }

  function createIgnore(): Ignore;
  export default createIgnore;
}

```

--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------

```json
{
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "esModuleInterop": true,
    "strict": true,
    "outDir": "build",
    "rootDir": "src",
    "declaration": true,
    "sourceMap": true
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "build"]
}

```

--------------------------------------------------------------------------------
/README_en.md:
--------------------------------------------------------------------------------

```markdown
[中文](./README.md) | [English](./README_en.md)

A mcp server to provide better content search than Cline's builtin `search_files` tools.

Features:
- [x] control the filter by `.reposearchignore` file, and use gitignore format.
- [x] support regex for searching
- output format control:
  - [x] whether include content in result
  - [ ] include surrounding lines
- [x] tokens boom prevention

Notes:
- Currently you need to tell Cline to stop using `search_files` in the system prompt.
```

--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------

```yaml
name: Build and Release

on:
  push:
    tags:
      - "v*.*.*"
      - "v*.*.*.*"

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Setup Node.js
      uses: actions/setup-node@v4
      with:
        node-version: '20'
        cache: 'npm'

    - name: Install dependencies
      run: npm ci

    - name: Build
      run: npm run build

    - name: Create Release
      uses: softprops/action-gh-release@v2
      with:
        files: build/*.js
        generate_release_notes: true
```

--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------

```json
{
  "name": "mcp-server-reposearch",
  "version": "1.1.0",
  "description": "MCP Server for searching text content in files",
  "main": "build/index.js",
  "type": "module",
  "scripts": {
    "build": "tsc",
    "start": "node build/index.js",
    "dev": "tsc && node build/index.js"
  },
  "keywords": [
    "mcp",
    "search",
    "files"
  ],
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.5.0",
    "ignore": "^7.0.3"
  },
  "devDependencies": {
    "@types/node": "^20.0.0",
    "typescript": "^5.0.0"
  }
}

```

--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------

```typescript
export interface SearchOptions {
  query: string;
  isRegex?: boolean;
  caseSensitive?: boolean;
  wholeWord?: boolean;
  includeContent?: boolean;
  maxOutputBytes?: number;
}

export interface SearchResult {
  file: string;
  line: number;
  content?: string;
  matchStart: number;
  matchEnd: number;
}

export interface SearchToolArgs {
  directory: string;
  query: string;
  isRegex?: boolean;
  caseSensitive?: boolean;
  wholeWord?: boolean;
  includeContent?: boolean;
  maxOutputBytes?: number;
}

export interface SearchSummary {
  matchCount: number;
  totalBytes: number;
  limitExceeded: boolean;
}

```

--------------------------------------------------------------------------------
/src/searchEngine.ts:
--------------------------------------------------------------------------------

```typescript
import { promises as fs } from 'fs';
import path from 'path';
import createIgnore from 'ignore';
import { SearchOptions, SearchResult, SearchSummary } from './types.js';

class FileFilter {
  private ignoreInstance = createIgnore();
  private initialized = false;

  async initialize(baseDir: string) {
    if (this.initialized) return;

    try {
      // Try to read .reposearchignore file
      const ignoreFilePath = path.join(baseDir, '.reposearchignore');
      const ignoreContent = await fs.readFile(ignoreFilePath, 'utf-8');
      this.ignoreInstance.add(ignoreContent);
    } catch (error) {
      // If .reposearchignore doesn't exist, use default patterns
      const defaultPatterns = [
        'node_modules/',
        '.git/',
        'build/',
        'dist/',
        // Add basic binary file patterns
        '*.exe', '*.dll', '*.so', '*.dylib',
        '*.jpg', '*.jpeg', '*.png', '*.gif',
        '*.mp3', '*.mp4', '*.zip', '*.tar.gz',
        // Allow common text files
        '!*.txt', '!*.md', '!*.js', '!*.ts',
        '!*.jsx', '!*.tsx', '!*.json', '!*.html',
        '!*.css', '!*.scss', '!*.less', '!*.py',
        '!*.java', '!*.c', '!*.cpp', '!*.h',
        '!*.hpp', '!*.rs', '!*.go', '!*.rb',
        '!*.php', '!*.xml', '!*.yaml', '!*.yml'
      ];
      this.ignoreInstance.add(defaultPatterns);
    }

    this.initialized = true;
  }

  shouldIncludeFile(filePath: string, rootDir: string, isDirectory: boolean = false): boolean {
    // Always use path relative to the root directory
    let relativePath = path.relative(rootDir, filePath);
    
    // For directories, ensure path ends with '/' as per ignore rules
    if (isDirectory) {
      relativePath = relativePath.endsWith('/') ? relativePath : relativePath + '/';
    }
    
    return !this.ignoreInstance.ignores(relativePath);
  }
}

const fileFilter = new FileFilter();

async function* walkDirectory(currentDir: string, rootDir: string): AsyncGenerator<string> {
  // Initialize with root directory
  await fileFilter.initialize(rootDir);
  const entries = await fs.readdir(currentDir, { withFileTypes: true });
  
  for (const entry of entries) {
    const fullPath = path.join(currentDir, entry.name);
    
    if (entry.isDirectory()) {
      // Check if directory should be included, passing isDirectory flag
      if (fileFilter.shouldIncludeFile(fullPath, rootDir, true)) {
        yield* walkDirectory(fullPath, rootDir);
      }
    } else if (fileFilter.shouldIncludeFile(fullPath, rootDir, false)) {
      try {
        // Basic binary file check
        const buffer = await fs.readFile(fullPath, { encoding: 'utf8', flag: 'r' });
        // Check for null bytes which typically indicate binary content
        if (!buffer.includes('\0')) {
          yield fullPath;
        }
      } catch {
        // If we can't read the file or it's binary, skip it
        continue;
      }
    }
  }
}

function createSearchRegex(options: SearchOptions): RegExp {
  let { query, isRegex, caseSensitive, wholeWord } = options;
  
  if (!isRegex) {
    // Escape special regex characters for literal search
    query = query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
  }
  
  if (wholeWord) {
    query = `\\b${query}\\b`;
  }
  
  return new RegExp(query, caseSensitive ? 'g' : 'gi');
}

// Calculate the byte size of a search result
function calculateResultSize(result: SearchResult): number {
  // Calculate JSON string size in bytes
  return Buffer.from(JSON.stringify(result)).length;
}

export async function searchFiles(
  directory: string,
  options: SearchOptions
): Promise<{ results: SearchResult[], summary: SearchSummary }> {
  const results: SearchResult[] = [];
  const regex = createSearchRegex(options);
  let totalBytes = 0;
  let matchCount = 0;
  const maxOutputBytes = options.maxOutputBytes ?? 4096; // Default to 4096 bytes
  const hasLimit = maxOutputBytes >= 0; // -1 means no limit
  
  try {
    for await (const filePath of walkDirectory(directory, directory)) {
      const content = await fs.readFile(filePath, 'utf-8');
      const lines = content.split('\n');
      
      for (let lineNum = 0; lineNum < lines.length; lineNum++) {
        const line = lines[lineNum];
        let match: RegExpExecArray | null;
        
        regex.lastIndex = 0; // Reset regex state
        while ((match = regex.exec(line)) !== null) {
          matchCount++;
          
          const result: SearchResult = {
            file: path.relative(directory, filePath),
            line: lineNum + 1,
            matchStart: match.index,
            matchEnd: match.index + match[0].length,
          };
          
          if (options.includeContent !== false) {
            result.content = line.trim();
          }
          
          const resultSize = calculateResultSize(result);
          totalBytes += resultSize;
          
          // Only add to results if we're under the limit or if there's no limit
          if (!hasLimit || maxOutputBytes === -1 || totalBytes <= maxOutputBytes) {
            results.push(result);
          }
        }
      }
    }
  } catch (error) {
    console.error('Error during file search:', error);
    throw error;
  }
  
  const summary: SearchSummary = {
    matchCount,
    totalBytes,
    limitExceeded: hasLimit && maxOutputBytes !== -1 && totalBytes > maxOutputBytes
  };
  
  // If limit exceeded, clear results to save bandwidth
  if (summary.limitExceeded) {
    return { results: [], summary };
  }
  
  return { results, summary };
}

```

--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------

```typescript
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
  CallToolRequestSchema,
  ErrorCode,
  ListToolsRequestSchema,
  McpError,
} from '@modelcontextprotocol/sdk/types.js';
import { searchFiles } from './searchEngine.js';
import { SearchToolArgs } from './types.js';

class RepoSearchServer {
  private server: Server;

  constructor() {
    this.server = new Server(
      {
        name: 'mcp-server-reposearch',
        version: '1.0.0',
      },
      {
        capabilities: {
          tools: {},
        },
      }
    );

    this.setupToolHandlers();
    
    // Error handling
    this.server.onerror = (error) => console.error('[MCP Error]', error);
    process.on('SIGINT', async () => {
      await this.server.close();
      process.exit(0);
    });
  }

  private setupToolHandlers() {
    this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
      tools: [
        {
          name: 'search',
          description: 'Search for text content in files within a directory, provide more features than builtin `search_files` tool. CAREFULLY set the arguments to avoid introducing too much content. You should use this tool if you need to search the content of file in a repo.',
          inputSchema: {
            type: 'object',
            properties: {
              directory: {
                type: 'string',
                description: 'Directory to search in (use absolute path)',
              },
              query: {
                type: 'string',
                description: 'Search query (keyword or regex)',
              },
              isRegex: {
                type: 'boolean',
                description: 'Whether to treat query as regex pattern',
                default: false,
              },
              caseSensitive: {
                type: 'boolean',
                description: 'Whether to match case sensitively',
                default: false,
              },
              wholeWord: {
                type: 'boolean',
                description: 'Whether to match whole words only',
                default: false,
              },
              includeContent: {
                type: 'boolean',
                description: 'Whether to include matching line content in results. When you don\'t need the detailed content, you MUST disable it to save tokens. When you don\'t need the detailed content, you MUST disable it to save tokens. When you don\'t need the detailed content, you MUST disable it to save tokens.',
                default: true,
              },
              maxOutputBytes: {
                type: 'number',
                description: 'Maximum allowed output size in bytes. Default is 4096. Set to -1 for unlimited output.',
                default: 4096,
              }
            },
            required: ['directory', 'query'],
          },
        },
      ],
    }));

    this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
      if (request.params.name !== 'search') {
        throw new McpError(
          ErrorCode.MethodNotFound,
          `Unknown tool: ${request.params.name}`
        );
      }

      const args = request.params.arguments;
      if (!args || typeof args !== 'object') {
        throw new McpError(ErrorCode.InvalidParams, 'Invalid arguments');
      }

      if (!('directory' in args) || typeof args.directory !== 'string') {
        throw new McpError(ErrorCode.InvalidParams, 'Directory must be a string');
      }

      if (!('query' in args) || typeof args.query !== 'string') {
        throw new McpError(ErrorCode.InvalidParams, 'Query must be a string');
      }

      const searchArgs: SearchToolArgs = {
        directory: args.directory,
        query: args.query,
        isRegex: typeof args.isRegex === 'boolean' ? args.isRegex : false,
        caseSensitive: typeof args.caseSensitive === 'boolean' ? args.caseSensitive : false,
        wholeWord: typeof args.wholeWord === 'boolean' ? args.wholeWord : false,
        includeContent: typeof args.includeContent === 'boolean' ? args.includeContent : true,
        maxOutputBytes: typeof args.maxOutputBytes === 'number' ? args.maxOutputBytes : 4096,
      };

      const { directory, query, isRegex, caseSensitive, wholeWord, includeContent, maxOutputBytes } = searchArgs;

      try {
        const { results, summary } = await searchFiles(directory, {
          query,
          isRegex,
          caseSensitive,
          wholeWord,
          includeContent,
          maxOutputBytes,
        });

        if (summary.limitExceeded) {
          return {
            content: [
              {
                type: 'text',
                text: `Output size limit exceeded. Found ${summary.matchCount} matches with a total size of ${summary.totalBytes} bytes, which exceeds the limit of ${maxOutputBytes} bytes. Try narrowing your search or increasing the maxOutputBytes limit.`,
              },
            ],
          };
        }

        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({
                results,
                summary: {
                  matchCount: summary.matchCount,
                  totalBytes: summary.totalBytes
                }
              }, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: `Search error: ${error instanceof Error ? error.message : String(error)}`,
            },
          ],
          isError: true,
        };
      }
    });
  }

  async run() {
    const transport = new StdioServerTransport();
    await this.server.connect(transport);
    console.error('RepoSearch MCP server running on stdio');
  }
}

const server = new RepoSearchServer();
server.run().catch(console.error);

```