This is page 4 of 4. Use http://codebase.md/zilliztech/claude-context?page={x} to view the full context. # Directory Structure ``` ├── .env.example ├── .eslintrc.js ├── .github │ ├── ISSUE_TEMPLATE │ │ └── bug_report.md │ └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .npmrc ├── .vscode │ ├── extensions.json │ ├── launch.json │ ├── settings.json │ └── tasks.json ├── assets │ ├── Architecture.png │ ├── claude-context.png │ ├── docs │ │ ├── file-inclusion-flow.png │ │ ├── indexing-flow-diagram.png │ │ └── indexing-sequence-diagram.png │ ├── file_synchronizer.png │ ├── mcp_efficiency_analysis_chart.png │ ├── signup_and_create_cluster.jpeg │ ├── signup_and_get_apikey.png │ ├── vscode-setup.png │ └── zilliz_cloud_dashboard.jpeg ├── build-benchmark.json ├── CONTRIBUTING.md ├── docs │ ├── dive-deep │ │ ├── asynchronous-indexing-workflow.md │ │ └── file-inclusion-rules.md │ ├── getting-started │ │ ├── environment-variables.md │ │ ├── prerequisites.md │ │ └── quick-start.md │ ├── README.md │ └── troubleshooting │ ├── faq.md │ └── troubleshooting-guide.md ├── evaluation │ ├── .python-version │ ├── analyze_and_plot_mcp_efficiency.py │ ├── case_study │ │ ├── django_14170 │ │ │ ├── both_conversation.log │ │ │ ├── both_result.json │ │ │ ├── grep_conversation.log │ │ │ ├── grep_result.json │ │ │ └── README.md │ │ ├── pydata_xarray_6938 │ │ │ ├── both_conversation.log │ │ │ ├── both_result.json │ │ │ ├── grep_conversation.log │ │ │ ├── grep_result.json │ │ │ └── README.md │ │ └── README.md │ ├── client.py │ ├── generate_subset_json.py │ ├── pyproject.toml │ ├── README.md │ ├── retrieval │ │ ├── __init__.py │ │ ├── base.py │ │ └── custom.py │ ├── run_evaluation.py │ ├── servers │ │ ├── __init__.py │ │ ├── edit_server.py │ │ ├── grep_server.py │ │ └── read_server.py │ ├── utils │ │ ├── __init__.py │ │ ├── constant.py │ │ ├── file_management.py │ │ ├── format.py │ │ └── llm_factory.py │ └── uv.lock ├── examples │ ├── basic-usage │ │ ├── index.ts │ │ ├── package.json │ │ └── README.md │ └── README.md ├── LICENSE ├── package.json ├── packages │ ├── chrome-extension │ │ ├── CONTRIBUTING.md │ │ ├── package.json │ │ ├── README.md │ │ ├── src │ │ │ ├── background.ts │ │ │ ├── config │ │ │ │ └── milvusConfig.ts │ │ │ ├── content.ts │ │ │ ├── icons │ │ │ │ ├── icon128.png │ │ │ │ ├── icon16.png │ │ │ │ ├── icon32.png │ │ │ │ └── icon48.png │ │ │ ├── manifest.json │ │ │ ├── milvus │ │ │ │ └── chromeMilvusAdapter.ts │ │ │ ├── options.html │ │ │ ├── options.ts │ │ │ ├── storage │ │ │ │ └── indexedRepoManager.ts │ │ │ ├── stubs │ │ │ │ └── milvus-vectordb-stub.ts │ │ │ ├── styles.css │ │ │ └── vm-stub.js │ │ ├── tsconfig.json │ │ └── webpack.config.js │ ├── core │ │ ├── CONTRIBUTING.md │ │ ├── package.json │ │ ├── README.md │ │ ├── src │ │ │ ├── context.ts │ │ │ ├── embedding │ │ │ │ ├── base-embedding.ts │ │ │ │ ├── gemini-embedding.ts │ │ │ │ ├── index.ts │ │ │ │ ├── ollama-embedding.ts │ │ │ │ ├── openai-embedding.ts │ │ │ │ └── voyageai-embedding.ts │ │ │ ├── index.ts │ │ │ ├── splitter │ │ │ │ ├── ast-splitter.ts │ │ │ │ ├── index.ts │ │ │ │ └── langchain-splitter.ts │ │ │ ├── sync │ │ │ │ ├── merkle.ts │ │ │ │ └── synchronizer.ts │ │ │ ├── types.ts │ │ │ ├── utils │ │ │ │ ├── env-manager.ts │ │ │ │ └── index.ts │ │ │ └── vectordb │ │ │ ├── index.ts │ │ │ ├── milvus-restful-vectordb.ts │ │ │ ├── milvus-vectordb.ts │ │ │ ├── types.ts │ │ │ └── zilliz-utils.ts │ │ └── tsconfig.json │ ├── mcp │ │ ├── CONTRIBUTING.md │ │ ├── package.json │ │ ├── README.md │ │ ├── src │ │ │ ├── config.ts │ │ │ ├── embedding.ts │ │ │ ├── handlers.ts │ │ │ ├── index.ts │ │ │ ├── snapshot.ts │ │ │ ├── sync.ts │ │ │ └── utils.ts │ │ └── tsconfig.json │ └── vscode-extension │ ├── CONTRIBUTING.md │ ├── copy-assets.js │ ├── LICENSE │ ├── package.json │ ├── README.md │ ├── resources │ │ ├── activity_bar.svg │ │ └── icon.png │ ├── src │ │ ├── commands │ │ │ ├── indexCommand.ts │ │ │ ├── searchCommand.ts │ │ │ └── syncCommand.ts │ │ ├── config │ │ │ └── configManager.ts │ │ ├── extension.ts │ │ ├── stubs │ │ │ ├── ast-splitter-stub.js │ │ │ └── milvus-vectordb-stub.js │ │ └── webview │ │ ├── scripts │ │ │ └── semanticSearch.js │ │ ├── semanticSearchProvider.ts │ │ ├── styles │ │ │ └── semanticSearch.css │ │ ├── templates │ │ │ └── semanticSearch.html │ │ └── webviewHelper.ts │ ├── tsconfig.json │ ├── wasm │ │ ├── tree-sitter-c_sharp.wasm │ │ ├── tree-sitter-cpp.wasm │ │ ├── tree-sitter-go.wasm │ │ ├── tree-sitter-java.wasm │ │ ├── tree-sitter-javascript.wasm │ │ ├── tree-sitter-python.wasm │ │ ├── tree-sitter-rust.wasm │ │ ├── tree-sitter-scala.wasm │ │ └── tree-sitter-typescript.wasm │ └── webpack.config.js ├── pnpm-lock.yaml ├── pnpm-workspace.yaml ├── python │ ├── README.md │ ├── test_context.ts │ ├── test_endtoend.py │ └── ts_executor.py ├── README.md ├── scripts │ └── build-benchmark.js └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /packages/chrome-extension/src/background.ts: -------------------------------------------------------------------------------- ```typescript // Chrome Extension Background Script with Milvus Integration // This replaces the IndexedDB-based storage with Milvus RESTful API import { ChromeMilvusAdapter, CodeChunk } from './milvus/chromeMilvusAdapter'; import { MilvusConfigManager } from './config/milvusConfig'; import { IndexedRepoManager, IndexedRepository } from './storage/indexedRepoManager'; export { }; const EMBEDDING_DIM = 1536; const EMBEDDING_BATCH_SIZE = 100; const MAX_TOKENS_PER_BATCH = 250000; const MAX_CHUNKS_PER_BATCH = 100; // Cosine similarity function function cosSim(a: number[], b: number[]): number { let dot = 0; let normA = 0; let normB = 0; const len = Math.min(a.length, b.length); for (let i = 0; i < len; i++) { dot += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } if (normA === 0 || normB === 0) { return 0; } return dot / (Math.sqrt(normA) * Math.sqrt(normB)); } class EmbeddingModel { private static config: { apiKey: string; model: string } | null = null; private static async getConfig(): Promise<{ apiKey: string; model: string }> { if (!this.config) { const config = await MilvusConfigManager.getOpenAIConfig(); if (!config) { throw new Error('OpenAI API key is not configured.'); } this.config = config; } return this.config; } static async embedBatch(texts: string[]): Promise<number[][]> { const config = await this.getConfig(); const response = await fetch('https://api.openai.com/v1/embeddings', { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${config.apiKey}`, }, body: JSON.stringify({ model: config.model, input: texts, }), }); if (!response.ok) { const text = await response.text(); throw new Error(`OpenAI API error: ${response.status} - ${text}`); } const json = await response.json(); return json.data.map((d: any) => d.embedding as number[]); } static async embedSingle(text: string): Promise<number[]> { const results = await this.embedBatch([text]); return results[0]; } static async getInstance(_progress_callback: Function | undefined = undefined): Promise<(input: string | string[], options?: any) => Promise<{ data: number[] }>> { return async (input: string | string[], _opts: any = {}): Promise<{ data: number[] }> => { if (Array.isArray(input)) { const embeddings = await this.embedBatch(input); return { data: embeddings.flat() }; } else { const embedding = await this.embedSingle(input); return { data: embedding }; } }; } } class MilvusVectorDB { private adapter: ChromeMilvusAdapter; public readonly repoCollectionName: string; constructor(repoId: string) { this.repoCollectionName = `chrome_repo_${repoId.replace(/[^a-zA-Z0-9]/g, '_')}`; this.adapter = new ChromeMilvusAdapter(this.repoCollectionName); } async initialize(): Promise<void> { try { await this.adapter.initialize(); const exists = await this.adapter.collectionExists(); if (!exists) { await this.adapter.createCollection(EMBEDDING_DIM); } } catch (error) { console.error('Failed to initialize Milvus:', error); throw error; } } async addChunks(chunks: CodeChunk[]): Promise<void> { if (chunks.length === 0) return; try { await this.adapter.insertChunks(chunks); } catch (error) { console.error('Failed to add chunks to Milvus:', error); throw error; } } async searchSimilar(queryVector: number[], limit: number = 20): Promise<CodeChunk[]> { try { const results = await this.adapter.searchSimilar(queryVector, limit, 0.3); return results.map(result => ({ id: result.id, content: result.content, relativePath: result.relativePath, startLine: result.startLine, endLine: result.endLine, fileExtension: result.fileExtension, metadata: result.metadata, score: result.score, // Include score for frontend display vector: [] // Vector not needed for display })); } catch (error) { console.error('Failed to search in Milvus:', error); throw error; } } async clear(): Promise<void> { try { await this.adapter.clearCollection(); // Recreate the collection await this.adapter.createCollection(EMBEDDING_DIM); } catch (error) { console.error('Failed to clear Milvus collection:', error); throw error; } } async getStats(): Promise<{ totalChunks: number } | null> { try { const stats = await this.adapter.getCollectionStats(); return stats ? { totalChunks: stats.totalEntities } : null; } catch (error) { console.error('Failed to get Milvus stats:', error); return null; } } } // Code splitting functionality - using same parameters as VSCode extension function splitCode(content: string, language: string = '', chunkSize: number = 1000, chunkOverlap: number = 200): { content: string; startLine: number; endLine: number }[] { const lines = content.split('\n'); const chunks: { content: string; startLine: number; endLine: number }[] = []; // Simple character-based chunking that approximates LangChain's RecursiveCharacterTextSplitter let currentChunk: string[] = []; let currentSize = 0; let startLine = 1; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const lineSize = line.length + 1; // +1 for newline if (currentSize + lineSize > chunkSize && currentChunk.length > 0) { // Create chunk const chunkContent = currentChunk.join('\n'); chunks.push({ content: chunkContent, startLine: startLine, endLine: startLine + currentChunk.length - 1 }); // Create overlap - use line-based overlap instead of character-based const overlapLines = Math.min( Math.floor(chunkOverlap / (chunkContent.length / currentChunk.length)), currentChunk.length ); const newStartLine = startLine + currentChunk.length - overlapLines; currentChunk = currentChunk.slice(-overlapLines); currentSize = currentChunk.join('\n').length; startLine = newStartLine; } currentChunk.push(line); currentSize += lineSize; } if (currentChunk.length > 0) { chunks.push({ content: currentChunk.join('\n'), startLine: startLine, endLine: startLine + currentChunk.length - 1 }); } return chunks.filter(chunk => chunk.content.trim().length > 0); } // GitHub API helpers (reused from original) async function validateGitHubToken(token: string): Promise<boolean> { try { const response = await fetch('https://api.github.com/user', { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } }); if (response.status === 401) { throw new Error('Invalid GitHub token or token has expired'); } if (response.status === 403) { const remainingRequests = response.headers.get('X-RateLimit-Remaining'); if (remainingRequests === '0') { throw new Error('GitHub API rate limit exceeded. Please try again later.'); } throw new Error('GitHub token does not have sufficient permissions'); } return response.ok; } catch (error) { console.error('GitHub token validation failed:', error); throw error; } } async function getGitHubToken(): Promise<string> { return new Promise((resolve, reject) => { chrome.storage.sync.get(['githubToken'], async (items) => { if (chrome.runtime.lastError) { reject(chrome.runtime.lastError); } else if (!items.githubToken) { reject(new Error('GitHub token not found. Please configure your GitHub token in the extension settings.')); } else { try { // Validate token before returning await validateGitHubToken(items.githubToken); resolve(items.githubToken); } catch (error) { reject(new Error(`GitHub token validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`)); } } }); }); } // Check repository access async function checkRepositoryAccess(owner: string, repo: string): Promise<void> { const token = await getGitHubToken(); const apiUrl = `https://api.github.com/repos/${owner}/${repo}`; const response = await fetch(apiUrl, { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } }); if (response.status === 404) { throw new Error('Repository not found or you do not have access to it. Please check the repository name and ensure your GitHub token has the necessary permissions.'); } if (response.status === 403) { const remainingRequests = response.headers.get('X-RateLimit-Remaining'); if (remainingRequests === '0') { throw new Error('GitHub API rate limit exceeded. Please try again later.'); } throw new Error('Access forbidden. Your GitHub token may not have sufficient permissions to access this repository.'); } if (!response.ok) { throw new Error(`GitHub API error: ${response.status} - ${await response.text()}`); } } // Rate limiting helper async function handleRateLimit(response: Response): Promise<void> { if (response.status === 403) { const remainingRequests = response.headers.get('X-RateLimit-Remaining'); const resetTime = response.headers.get('X-RateLimit-Reset'); if (remainingRequests === '0' && resetTime) { const resetDate = new Date(parseInt(resetTime) * 1000); const waitTime = resetDate.getTime() - Date.now(); if (waitTime > 0 && waitTime < 3600000) { // Wait up to 1 hour console.log(`Rate limit exceeded. Waiting ${Math.ceil(waitTime / 1000)} seconds...`); await new Promise(resolve => setTimeout(resolve, waitTime + 1000)); } else { throw new Error('GitHub API rate limit exceeded. Please try again later.'); } } } } async function fetchRepoFiles(owner: string, repo: string): Promise<any[]> { const token = await getGitHubToken(); // First get the default branch const repoInfoUrl = `https://api.github.com/repos/${owner}/${repo}`; const repoResponse = await fetch(repoInfoUrl, { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } }); if (!repoResponse.ok) { throw new Error(`GitHub API error: ${repoResponse.status} - ${await repoResponse.text()}`); } const repoData = await repoResponse.json(); const defaultBranch = repoData.default_branch || 'main'; const apiUrl = `https://api.github.com/repos/${owner}/${repo}/git/trees/${defaultBranch}?recursive=1`; const response = await fetch(apiUrl, { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } }); if (!response.ok) { await handleRateLimit(response); throw new Error(`GitHub API error: ${response.status} - ${await response.text()}`); } const data = await response.json(); return data.tree.filter((item: any) => item.type === 'blob' && item.path.match(/\.(ts|tsx|js|jsx|py|java|cpp|c|h|hpp|cs|go|rs|php|rb|swift|kt|scala|m|mm|md)$/) ); } async function fetchFileContent(owner: string, repo: string, path: string): Promise<string> { const token = await getGitHubToken(); const apiUrl = `https://api.github.com/repos/${owner}/${repo}/contents/${path}`; const response = await fetch(apiUrl, { headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28' } }); if (!response.ok) { await handleRateLimit(response); throw new Error(`Failed to fetch file: ${response.status} - ${await response.text()}`); } const data = await response.json(); if (data.content) { return atob(data.content.replace(/\n/g, '')); } throw new Error('File content not available'); } // Main message handlers chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { if (request.action === 'indexRepo') { handleIndexRepo(request, sendResponse); return true; // Keep message channel open } else if (request.action === 'searchCode') { handleSearchCode(request, sendResponse); return true; } else if (request.action === 'clearIndex') { handleClearIndex(request, sendResponse); return true; } else if (request.action === 'testMilvusConnection') { handleTestMilvusConnection(sendResponse); return true; } else if (request.action === 'checkIndexStatus') { handleCheckIndexStatus(request, sendResponse); return true; } else if (request.action === 'getIndexedRepos') { handleGetIndexedRepos(sendResponse); return true; } }); async function handleTestMilvusConnection(sendResponse: Function) { try { console.log('Testing Milvus connection...'); const adapter = new ChromeMilvusAdapter('test_connection'); const connected = await adapter.testConnection(); console.log('Milvus connection test completed successfully'); sendResponse({ success: true, connected: true }); } catch (error) { console.error('Milvus connection test failed:', error); let errorMessage = 'Unknown error'; if (error instanceof Error) { errorMessage = error.message; } else if (typeof error === 'string') { errorMessage = error; } // Provide more specific error messages based on common issues if (errorMessage.includes('fetch')) { errorMessage = 'Network error: Unable to connect to Milvus server. Check address and network connectivity.'; } else if (errorMessage.includes('CORS')) { errorMessage = 'CORS error: Cross-origin request blocked. Check server CORS configuration.'; } else if (errorMessage.includes('401') || errorMessage.includes('unauthorized')) { errorMessage = 'Authentication failed: Check your Milvus token or username/password.'; } else if (errorMessage.includes('404')) { errorMessage = 'Server not found: Check your Milvus server address.'; } sendResponse({ success: false, connected: false, error: errorMessage }); } } async function handleIndexRepo(request: any, sendResponse: Function) { try { const { owner, repo } = request; const repoId = `${owner}/${repo}`; sendResponse({ success: true, message: 'Starting indexing process...' }); // Check repository access first await checkRepositoryAccess(owner, repo); // Initialize Milvus const vectorDB = new MilvusVectorDB(repoId); await vectorDB.initialize(); // Use fixed chunking configuration (same as VSCode extension) const chunkSize = 1000; // Same as VSCode extension default const chunkOverlap = 200; // Same as VSCode extension default // Fetch repository files const files = await fetchRepoFiles(owner, repo); console.log(`Found ${files.length} files to index`); // Process files using core package approach const result = await processFileList(files, owner, repo, repoId, vectorDB, chunkSize, chunkOverlap); // Send completion message chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => { if (tabs[0]?.id) { chrome.tabs.sendMessage(tabs[0].id, { action: 'indexComplete', stats: { indexedFiles: result.processedFiles, totalChunks: result.totalChunks } }); } }); await IndexedRepoManager.addIndexedRepo({ id: repoId, owner, repo, totalFiles: result.processedFiles, totalChunks: result.totalChunks, collectionName: vectorDB.repoCollectionName }); } catch (error) { console.error('Indexing failed:', error); chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => { if (tabs[0]?.id) { chrome.tabs.sendMessage(tabs[0].id, { action: 'indexError', error: error instanceof Error ? error.message : 'Unknown error' }); } }); } } async function processFileList( files: any[], owner: string, repo: string, repoId: string, vectorDB: MilvusVectorDB, chunkSize: number, chunkOverlap: number ): Promise<{ processedFiles: number; totalChunks: number }> { console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`); let chunkBuffer: Array<{ chunk: CodeChunk; repoId: string }> = []; let processedFiles = 0; let totalChunks = 0; for (let i = 0; i < files.length; i++) { const file = files[i]; try { const content = await fetchFileContent(owner, repo, file.path); const fileExtension = file.path.split('.').pop() || ''; const chunks = splitCode(content, fileExtension, chunkSize, chunkOverlap); // Log files with many chunks or large content if (chunks.length > 50) { console.warn(`⚠️ File ${file.path} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`); } else if (content.length > 100000) { console.log(`📄 Large file ${file.path}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`); } // Add chunks to buffer for (let j = 0; j < chunks.length; j++) { const chunk = chunks[j]; if (chunk.content.trim().length > 10) { const codeChunk: CodeChunk = { id: `${file.path}_chunk_${j}`, content: chunk.content, relativePath: file.path, startLine: chunk.startLine, endLine: chunk.endLine, fileExtension: fileExtension, metadata: JSON.stringify({ repoId, fileSize: file.size, chunkIndex: j }) }; chunkBuffer.push({ chunk: codeChunk, repoId }); totalChunks++; // Process batch when buffer reaches EMBEDDING_BATCH_SIZE if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) { try { await processChunkBuffer(chunkBuffer, vectorDB); } catch (error) { console.error(`❌ Failed to process chunk batch: ${error}`); } finally { chunkBuffer = []; // Always clear buffer, even on failure } } } } processedFiles++; // Send progress update chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => { if (tabs[0]?.id) { chrome.tabs.sendMessage(tabs[0].id, { action: 'indexProgress', progress: `Indexed ${processedFiles}/${files.length} files (${totalChunks} chunks)` }); } }); } catch (error) { console.warn(`⚠️ Skipping file ${file.path}: ${error}`); } } // Process any remaining chunks in the buffer if (chunkBuffer.length > 0) { console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks`); try { await processChunkBuffer(chunkBuffer, vectorDB); } catch (error) { console.error(`❌ Failed to process final chunk batch: ${error}`); } } return { processedFiles, totalChunks }; } async function processChunkBuffer( chunkBuffer: Array<{ chunk: CodeChunk; repoId: string }>, vectorDB: MilvusVectorDB ): Promise<void> { if (chunkBuffer.length === 0) return; // Extract chunks const chunks = chunkBuffer.map(item => item.chunk); // Estimate tokens (rough estimation: 1 token ≈ 4 characters) const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0); console.log(`🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens)`); await processChunkBatch(chunks, vectorDB); } async function processChunkBatch(chunks: CodeChunk[], vectorDB: MilvusVectorDB): Promise<void> { // Generate embedding vectors using batch processing const chunkContents = chunks.map(chunk => chunk.content); const embeddings: number[][] = await EmbeddingModel.embedBatch(chunkContents); // Add embeddings to chunks const chunksWithEmbeddings = chunks.map((chunk, index) => ({ ...chunk, vector: embeddings[index] })); // Store to vector database await vectorDB.addChunks(chunksWithEmbeddings); } async function handleSearchCode(request: any, sendResponse: Function) { try { const { query, owner, repo } = request; const repoId = `${owner}/${repo}`; // Initialize Milvus const vectorDB = new MilvusVectorDB(repoId); await vectorDB.initialize(); // Get query embedding using batch processing (single query) const queryEmbedding = await EmbeddingModel.embedSingle(query); // Search similar chunks const results = await vectorDB.searchSimilar(queryEmbedding, 20); await IndexedRepoManager.updateLastSearchTime(repoId); sendResponse({ success: true, results }); } catch (error) { console.error('Search failed:', error); sendResponse({ success: false, error: error instanceof Error ? error.message : 'Unknown error' }); } } async function handleClearIndex(request: any, sendResponse: Function) { try { const { owner, repo } = request; const repoId = `${owner}/${repo}`; const vectorDB = new MilvusVectorDB(repoId); await vectorDB.initialize(); await vectorDB.clear(); await IndexedRepoManager.removeIndexedRepo(repoId); sendResponse({ success: true, message: 'Index cleared successfully' }); } catch (error) { console.error('Clear index failed:', error); sendResponse({ success: false, error: error instanceof Error ? error.message : 'Unknown error' }); } } async function handleCheckIndexStatus(request: any, sendResponse: Function) { try { const { owner, repo } = request; const repoId = `${owner}/${repo}`; const indexedRepo = await IndexedRepoManager.isRepoIndexed(repoId); if (indexedRepo) { try { const vectorDB = new MilvusVectorDB(repoId); await vectorDB.initialize(); const stats = await vectorDB.getStats(); sendResponse({ success: true, isIndexed: true, indexInfo: indexedRepo, stats }); } catch (milvusError) { await IndexedRepoManager.removeIndexedRepo(repoId); sendResponse({ success: true, isIndexed: false, message: 'Index record found but collection missing, cleaned up storage' }); } } else { sendResponse({ success: true, isIndexed: false }); } } catch (error) { console.error('Check index status failed:', error); sendResponse({ success: false, error: error instanceof Error ? error.message : 'Unknown error' }); } } async function handleGetIndexedRepos(sendResponse: Function) { try { const repos = await IndexedRepoManager.getRecentlyIndexedRepos(20); sendResponse({ success: true, repos }); } catch (error) { console.error('Get indexed repos failed:', error); sendResponse({ success: false, error: error instanceof Error ? error.message : 'Unknown error' }); } } ``` -------------------------------------------------------------------------------- /packages/chrome-extension/src/content.ts: -------------------------------------------------------------------------------- ```typescript export { }; function isRepoHomePage() { // Don't show on GitHub settings pages if (window.location.pathname.startsWith('/settings/')) { return false; } // Matches /user/repo or /user/repo/tree/branch but not /user/repo/issues etc. return /^\/[^/]+\/[^/]+(\/tree\/[^/]+)?\/?$/.test(window.location.pathname); } function injectUI() { if (!isRepoHomePage()) { const existingContainer = document.getElementById('code-search-container'); if (existingContainer) { existingContainer.remove(); } return; } // Attempt to locate GitHub's sidebar first so the search UI aligns with the "About" section const sidebar = document.querySelector('.Layout-sidebar') as HTMLElement | null; // Fallback to repository navigation bar ("Code", "Issues", etc.) if sidebar is not present const repoNav = document.querySelector('nav.UnderlineNav') as HTMLElement | null; const existingContainer = document.getElementById('code-search-container'); if ((sidebar || repoNav) && !existingContainer) { // Check if GitHub token is set chrome.storage.sync.get('githubToken', (data) => { const hasToken = !!data.githubToken; // Prevent duplicate insertion in case multiple async callbacks race if (document.getElementById('code-search-container')) { return; } const container = document.createElement('div'); container.id = 'code-search-container'; container.className = 'Box color-border-muted mb-3'; container.innerHTML = ` <div class="Box-header color-bg-subtle d-flex flex-items-center"> <h2 class="Box-title flex-auto">Code Search</h2> <a href="#" id="open-settings-link" class="Link--muted"> <svg class="octicon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"> <path d="M8 0a8.2 8.2 0 0 1 .701.031C9.444.095 9.99.645 10.16 1.29l.288 1.107c.018.066.079.158.212.224.231.114.454.243.668.386.123.082.233.09.299.071l1.103-.303c.644-.176 1.392.021 1.82.63.27.385.506.792.704 1.218.315.675.111 1.422-.364 1.891l-.814.806c-.049.048-.098.147-.088.294.016.257.016.515 0 .772-.01.147.038.246.088.294l.814.806c.475.469.679 1.216.364 1.891a7.977 7.977 0 0 1-.704 1.217c-.428.61-1.176.807-1.82.63l-1.102-.302c-.067-.019-.177-.011-.3.071a5.909 5.909 0 0 1-.668.386c-.133.066-.194.158-.211.224l-.29 1.106c-.168.646-.715 1.196-1.458 1.26a8.006 8.006 0 0 1-1.402 0c-.743-.064-1.289-.614-1.458-1.26l-.289-1.106c-.018-.066-.079-.158-.212-.224a5.738 5.738 0 0 1-.668-.386c-.123-.082-.233-.09-.299-.071l-1.103.303c-.644.176-1.392-.021-1.82-.63a8.12 8.12 0 0 1-.704-1.218c-.315-.675-.111-1.422.363-1.891l.815-.806c.05-.048.098-.147.088-.294a6.214 6.214 0 0 1 0-.772c.01-.147-.038-.246-.088-.294l-.815-.806C.635 6.045.431 5.298.746 4.623a7.92 7.92 0 0 1 .704-1.217c.428-.61 1.176-.807 1.82-.63l1.102.302c.067.019.177.011.3-.071.214-.143.437-.272.668-.386.133-.066.194-.158.211-.224l.29-1.106C6.009.645 6.556.095 7.299.03 7.53.01 7.764 0 8 0Zm-.571 1.525c-.036.003-.108.036-.137.146l-.289 1.105c-.147.561-.549.967-.998 1.189-.173.086-.34.183-.5.29-.417.278-.97.423-1.529.27l-1.103-.303c-.109-.03-.175.016-.195.045-.22.312-.412.644-.573.99-.014.031-.021.11.059.19l.815.806c.411.406.562.957.53 1.456a4.709 4.709 0 0 0 0 .582c.032.499-.119 1.05-.53 1.456l-.815.806c-.081.08-.073.159-.059.19.162.346.353.677.573.989.02.03.085.076.195.046l1.102-.303c.56-.153 1.113-.008 1.53.27.161.107.328.204.501.29.447.222.85.629.997 1.189l.289 1.105c.029.109.101.143.137.146a6.6 6.6 0 0 0 1.142 0c.036-.003.108-.036.137-.146l.289-1.105c.147-.561.549-.967.998-1.189.173-.086.34-.183.5-.29.417-.278.97-.423 1.529-.27l1.103.303c.109.03.175-.016.195-.045.22-.313.411-.644.573-.99.014-.031.021-.11-.059-.19l-.815-.806c-.411-.406-.562-.957-.53-1.456a4.709 4.709 0 0 0 0-.582c-.032-.499.119-1.05.53-1.456l.815-.806c.081-.08.073-.159.059-.19a6.464 6.464 0 0 0-.573-.989c-.02-.03-.085-.076-.195-.046l-1.102.303c-.56.153-1.113.008-1.53-.27a4.44 4.44 0 0 0-.501-.29c-.447-.222-.85-.629-.997-1.189l-.289-1.105c-.029-.11-.101-.143-.137-.146a6.6 6.6 0 0 0-1.142 0ZM11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0ZM9.5 8a1.5 1.5 0 1 0-3.001.001A1.5 1.5 0 0 0 9.5 8Z"></path> </svg> </a> </div> <div class="Box-body"> ${!hasToken ? ` <div class="flash flash-warn mb-2"> GitHub token not set. <a href="#" id="open-settings-link-warning" class="settings-link">Configure settings</a> </div> ` : ''} <div class="d-flex flex-column"> <div class="form-group"> <div class="d-flex flex-items-center mb-2" id="search-row"> <input type="text" id="search-input" class="form-control input-sm flex-1" placeholder="Search code..." ${!hasToken ? 'disabled' : ''}> <button id="search-btn" class="btn btn-sm ml-2" ${!hasToken ? 'disabled' : ''}> Search </button> </div> <div class="buttons-container"> <button id="index-repo-btn" class="btn btn-sm" ${!hasToken ? 'disabled' : ''}> Index Repository </button> <button id="clear-index-btn" class="btn btn-sm" ${!hasToken ? 'disabled' : ''}> Clear Index </button> <button id="show-recent-btn" class="btn btn-sm Link--muted" ${!hasToken ? 'disabled' : ''}> Recent Repos </button> </div> </div> <div id="recent-repos" class="Box mt-2" style="display:none;"> <div class="Box-header"> <h3 class="Box-title">Recently Indexed Repositories</h3> <button id="close-recent-btn" class="btn-octicon float-right"> <svg class="octicon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> </div> <div id="recent-repos-list" class="Box-body"> Loading... </div> </div> <div id="search-results" class="Box mt-2" style="display:none;"></div> <div id="indexing-status" class="color-fg-muted text-small mt-2"></div> </div> </div> `; // If sidebar is available, place container at the top; otherwise fallback to below nav bar if (sidebar) { sidebar.prepend(container); } else if (repoNav) { repoNav.parentElement?.insertBefore(container, repoNav.nextSibling); } document.getElementById('index-repo-btn')?.addEventListener('click', startIndexing); document.getElementById('clear-index-btn')?.addEventListener('click', clearIndex); document.getElementById('search-btn')?.addEventListener('click', handleSearch); document.getElementById('show-recent-btn')?.addEventListener('click', showRecentRepos); document.getElementById('close-recent-btn')?.addEventListener('click', hideRecentRepos); document.getElementById('search-input')?.addEventListener('keypress', (e) => { if (e.key === 'Enter') { handleSearch(); } }); // Add event listeners for settings links document.getElementById('open-settings-link')?.addEventListener('click', (e) => { e.preventDefault(); const optionsUrl = chrome.runtime.getURL('options.html'); window.open(optionsUrl, '_blank'); }); document.getElementById('open-settings-link-warning')?.addEventListener('click', (e) => { e.preventDefault(); const optionsUrl = chrome.runtime.getURL('options.html'); window.open(optionsUrl, '_blank'); }); // Check if repository is already indexed automatically checkIndexStatus(); }); } } function startIndexing() { const [owner, repo] = window.location.pathname.slice(1).split('/'); console.log('Start indexing for:', owner, repo); const statusEl = document.getElementById('indexing-status'); if (statusEl) { statusEl.textContent = '🚀 Starting indexing with Milvus...'; statusEl.style.color = '#3b82f6'; } const indexBtn = document.getElementById('index-repo-btn') as HTMLButtonElement; const clearBtn = document.getElementById('clear-index-btn') as HTMLButtonElement; const searchBtn = document.getElementById('search-btn') as HTMLButtonElement; const searchInput = document.getElementById('search-input') as HTMLInputElement; if (indexBtn) { indexBtn.disabled = true; indexBtn.textContent = '⏳ Indexing...'; } if (clearBtn) clearBtn.disabled = true; if (searchBtn) searchBtn.disabled = true; if (searchInput) searchInput.disabled = true; chrome.runtime.sendMessage({ action: 'indexRepo', owner, repo }); } async function checkIndexStatus() { const [owner, repo] = window.location.pathname.slice(1).split('/'); if (!owner || !repo) return; const repoId = `${owner}/${repo}`; const statusEl = document.getElementById('indexing-status'); if (statusEl) statusEl.textContent = 'Checking repository index status...'; try { chrome.runtime.sendMessage( { action: 'checkIndexStatus', owner, repo }, (response) => { if (chrome.runtime.lastError) { console.error('Error checking index status:', chrome.runtime.lastError); updateUIState(false); if (statusEl) statusEl.textContent = 'Repository needs to be indexed before searching'; return; } if (response && response.success) { const isIndexed = response.isIndexed; updateUIState(isIndexed, response.indexInfo); if (isIndexed && response.indexInfo) { const indexedDate = new Date(response.indexInfo.indexedAt).toLocaleDateString(); const lastSearchText = response.indexInfo.lastSearchAt ? ` • Last searched: ${new Date(response.indexInfo.lastSearchAt).toLocaleDateString()}` : ''; if (statusEl) { statusEl.textContent = `✅ Repository indexed on ${indexedDate} (${response.indexInfo.totalFiles} files, ${response.indexInfo.totalChunks} chunks)${lastSearchText}`; statusEl.style.color = '#22c55e'; } } else { if (statusEl) { statusEl.textContent = '❌ Repository needs to be indexed before searching'; statusEl.style.color = '#ef4444'; } } } else { console.error('Check index status failed:', response?.error); updateUIState(false); if (statusEl) { statusEl.textContent = '❌ Repository needs to be indexed before searching'; statusEl.style.color = '#ef4444'; } } } ); } catch (error) { console.error('Error checking index status:', error); updateUIState(false); if (statusEl) { statusEl.textContent = '❌ Repository needs to be indexed before searching'; statusEl.style.color = '#ef4444'; } } } function updateUIState(isIndexed: boolean, indexInfo?: any) { const indexBtn = document.getElementById('index-repo-btn') as HTMLButtonElement; const clearBtn = document.getElementById('clear-index-btn') as HTMLButtonElement; const searchBtn = document.getElementById('search-btn') as HTMLButtonElement; const searchInput = document.getElementById('search-input') as HTMLInputElement; const statusEl = document.getElementById('indexing-status'); if (isIndexed) { if (indexBtn) { indexBtn.textContent = '🔄 Re-Index Repository'; indexBtn.title = 'Re-index the repository to update the search index'; indexBtn.disabled = false; indexBtn.style.backgroundColor = '#fbbf24'; indexBtn.style.color = '#1f2937'; } if (clearBtn) { clearBtn.disabled = false; clearBtn.style.backgroundColor = '#ef4444'; clearBtn.style.color = 'white'; } if (searchBtn) { searchBtn.disabled = false; searchBtn.style.backgroundColor = '#10b981'; searchBtn.style.color = 'white'; } if (searchInput) { searchInput.disabled = false; searchInput.style.borderColor = '#10b981'; } if (statusEl && !indexInfo) { statusEl.textContent = '✅ Repository is indexed and ready for search'; statusEl.style.color = '#22c55e'; } } else { if (indexBtn) { indexBtn.textContent = '📚 Index Repository'; indexBtn.title = 'Index the repository to enable code search'; indexBtn.disabled = false; indexBtn.style.backgroundColor = '#3b82f6'; indexBtn.style.color = 'white'; } if (clearBtn) { clearBtn.disabled = true; clearBtn.style.backgroundColor = '#9ca3af'; clearBtn.style.color = '#6b7280'; } if (searchBtn) { searchBtn.disabled = true; searchBtn.style.backgroundColor = '#9ca3af'; searchBtn.style.color = '#6b7280'; } if (searchInput) { searchInput.disabled = true; searchInput.style.borderColor = '#d1d5db'; searchInput.style.backgroundColor = '#f9fafb'; } if (statusEl && !indexInfo) { statusEl.textContent = '❌ Repository needs to be indexed before searching'; statusEl.style.color = '#ef4444'; } } } function handleSearch() { const inputElement = document.getElementById('search-input') as HTMLInputElement; const query = inputElement.value.trim(); const resultsContainer = document.getElementById('search-results'); const searchButton = document.getElementById('search-btn') as HTMLButtonElement; if (!query || query.length < 3) { if (resultsContainer) resultsContainer.style.display = 'none'; return; } if (searchButton) searchButton.disabled = true; const [owner, repo] = window.location.pathname.slice(1).split('/'); const statusEl = document.getElementById('indexing-status'); if (statusEl) { statusEl.textContent = '🔍 Searching with Milvus...'; statusEl.style.color = '#3b82f6'; } try { chrome.runtime.sendMessage({ action: 'searchCode', owner, repo, query }, (response) => { if (chrome.runtime.lastError) { console.error('Search error:', chrome.runtime.lastError); if (searchButton) searchButton.disabled = false; if (statusEl) { statusEl.textContent = '❌ Search failed: ' + chrome.runtime.lastError.message; statusEl.style.color = '#ef4444'; } return; } if (response && response.success) { displayResults(response.results || []); if (statusEl) { statusEl.textContent = `✅ Found ${response.results?.length || 0} results`; statusEl.style.color = '#22c55e'; } } else { console.error('Search failed:', response?.error); if (statusEl) { statusEl.textContent = '❌ Search failed: ' + (response?.error || 'Unknown error'); statusEl.style.color = '#ef4444'; } } if (searchButton) searchButton.disabled = false; }); } catch (error) { console.error('Error sending search message:', error); if (searchButton) searchButton.disabled = false; if (statusEl) { statusEl.textContent = '❌ Search failed: ' + error; statusEl.style.color = '#ef4444'; } } } function displayResults(results: any[]) { const resultsContainer = document.getElementById('search-results'); if (!resultsContainer) return; if (!results || results.length === 0) { resultsContainer.style.display = 'none'; return; } // Ensure results are sorted by score in descending order (highest similarity first) const sortedResults = [...results].sort((a, b) => { const scoreA = a.score !== undefined && a.score !== null ? a.score : 0; const scoreB = b.score !== undefined && b.score !== null ? b.score : 0; return scoreB - scoreA; }); resultsContainer.innerHTML = ''; resultsContainer.style.display = 'block'; const list = document.createElement('ul'); list.className = 'list-style-none'; sortedResults.forEach(result => { const item = document.createElement('li'); // Extract owner/repo from current URL const [owner, repo] = window.location.pathname.slice(1).split('/'); // Format the file path to show it nicely const filePath = result.relativePath; const fileExt = filePath.split('.').pop(); // Calculate match percentage and determine CSS class const matchPercentage = result.score !== undefined && result.score !== null ? (result.score * 100) : 0; let matchClass = 'low'; if (matchPercentage >= 80) { matchClass = 'high'; } else if (matchPercentage >= 60) { matchClass = 'medium'; } item.innerHTML = ` <div class="d-flex flex-items-center"> <svg class="octicon mr-2 color-fg-muted" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" width="16" height="16"> <path fill-rule="evenodd" d="M3.75 1.5a.25.25 0 00-.25.25v11.5c0 .138.112.25.25.25h8.5a.25.25 0 00.25-.25V6H9.75A1.75 1.75 0 018 4.25V1.5H3.75zm5.75.56v2.19c0 .138.112.25.25.25h2.19L9.5 2.06zM2 1.75C2 .784 2.784 0 3.75 0h5.086c.464 0 .909.184 1.237.513l3.414 3.414c.329.328.513.773.513 1.237v8.086A1.75 1.75 0 0112.25 15h-8.5A1.75 1.75 0 012 13.25V1.75z"></path> </svg> <a href="https://github.com/${owner}/${repo}/blob/main/${result.relativePath}#L${result.startLine}" class="Link--primary flex-auto" style="font-weight: 600;"> ${result.relativePath} </a> <span class="Label Label--secondary ml-1">${fileExt}</span> <span class="color-fg-muted text-small ml-2">Lines ${result.startLine}-${result.endLine}</span> <span class="match-score ${matchClass}">${matchPercentage.toFixed(1)}%</span> </div> <div class="color-fg-muted text-small mt-2"> <pre class="text-small p-3" style="background-color: #f6f8fa; border-radius: 6px; overflow-x: auto; white-space: pre-wrap; font-size: 13px; line-height: 1.5; border: 1px solid #e1e4e8;">${escapeHtml(result.content.substring(0, 300))}${result.content.length > 300 ? '...' : ''}</pre> </div> `; item.className = 'border-bottom py-2 search-result-item'; list.appendChild(item); }); resultsContainer.appendChild(list); } function escapeHtml(unsafe: string) { return unsafe .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, """) .replace(/'/g, "'"); } function clearIndex() { const [owner, repo] = window.location.pathname.slice(1).split('/'); const repoId = `${owner}/${repo}`; const clearBtn = document.getElementById('clear-index-btn') as HTMLButtonElement; if (clearBtn) clearBtn.disabled = true; const statusEl = document.getElementById('indexing-status'); if (statusEl) { statusEl.textContent = '🗑️ Clearing Milvus index...'; statusEl.style.color = '#f59e0b'; } try { chrome.runtime.sendMessage({ action: 'clearIndex', owner, repo }, (response) => { if (chrome.runtime.lastError) { console.error('Error clearing index:', chrome.runtime.lastError); if (clearBtn) clearBtn.disabled = false; if (statusEl) { statusEl.textContent = '❌ Failed to clear index: ' + chrome.runtime.lastError.message; statusEl.style.color = '#ef4444'; } return; } if (response && response.success) { updateUIState(false); if (statusEl) { statusEl.textContent = '✅ Index cleared. Repository needs to be indexed before searching'; statusEl.style.color = '#22c55e'; } // Hide search results if visible const resultsContainer = document.getElementById('search-results'); if (resultsContainer) resultsContainer.style.display = 'none'; // Clear search input const searchInput = document.getElementById('search-input') as HTMLInputElement; if (searchInput) searchInput.value = ''; } else { if (clearBtn) clearBtn.disabled = false; if (statusEl) { statusEl.textContent = '❌ Failed to clear index: ' + (response?.error || 'Unknown error'); statusEl.style.color = '#ef4444'; } } }); } catch (error) { console.error('Error sending clear index message:', error); if (clearBtn) clearBtn.disabled = false; if (statusEl) { statusEl.textContent = '❌ Failed to clear index: ' + error; statusEl.style.color = '#ef4444'; } } } function showRecentRepos() { const recentReposContainer = document.getElementById('recent-repos'); const recentReposList = document.getElementById('recent-repos-list'); if (!recentReposContainer || !recentReposList) return; recentReposContainer.style.display = 'block'; recentReposList.innerHTML = 'Loading...'; chrome.runtime.sendMessage({ action: 'getIndexedRepos' }, (response) => { if (chrome.runtime.lastError) { console.error('Error getting indexed repos:', chrome.runtime.lastError); recentReposList.innerHTML = 'Error loading recent repositories'; return; } if (response && response.success) { displayRecentRepos(response.repos || []); } else { recentReposList.innerHTML = 'Error loading recent repositories: ' + (response?.error || 'Unknown error'); } }); } function hideRecentRepos() { const recentReposContainer = document.getElementById('recent-repos'); if (recentReposContainer) { recentReposContainer.style.display = 'none'; } } function displayRecentRepos(repos: any[]) { const recentReposList = document.getElementById('recent-repos-list'); if (!recentReposList) return; if (!repos || repos.length === 0) { recentReposList.innerHTML = '<div class="color-fg-muted text-center py-3">No recently indexed repositories</div>'; return; } const list = document.createElement('ul'); list.className = 'list-style-none'; repos.forEach(repo => { const item = document.createElement('li'); item.className = 'border-bottom py-2'; const indexedDate = new Date(repo.indexedAt).toLocaleDateString(); const lastSearchDate = repo.lastSearchAt ? new Date(repo.lastSearchAt).toLocaleDateString() : 'Never'; item.innerHTML = ` <div class="d-flex flex-items-center justify-content-between"> <div class="flex-auto"> <a href="https://github.com/${repo.id}" class="Link--primary font-weight-bold"> ${repo.id} </a> <div class="color-fg-muted text-small"> Indexed: ${indexedDate} • ${repo.totalFiles} files, ${repo.totalChunks} chunks </div> <div class="color-fg-muted text-small"> Last search: ${lastSearchDate} </div> </div> <button class="btn btn-sm go-to-repo-btn" data-repo-url="https://github.com/${repo.id}"> Visit </button> </div> `; list.appendChild(item); }); recentReposList.innerHTML = ''; recentReposList.appendChild(list); list.querySelectorAll('.go-to-repo-btn').forEach(button => { button.addEventListener('click', (e) => { const url = (e.target as HTMLElement).getAttribute('data-repo-url'); if (url) { window.location.href = url; } }); }); } // Listen for messages from background script chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { const statusEl = document.getElementById('indexing-status'); if (message.action === 'indexProgress') { if (statusEl) { statusEl.textContent = `🔄 ${message.progress}`; statusEl.style.color = '#3b82f6'; } } else if (message.action === 'indexComplete') { if (statusEl) { statusEl.textContent = `✅ Indexing complete! ${message.stats.indexedFiles} files, ${message.stats.totalChunks} chunks`; statusEl.style.color = '#22c55e'; } updateUIState(true); // Auto-refresh index status after a short delay to get updated info setTimeout(() => { checkIndexStatus(); }, 1000); } else if (message.action === 'indexError') { if (statusEl) { statusEl.textContent = `❌ Indexing failed: ${message.error}`; statusEl.style.color = '#ef4444'; } updateUIState(false); } }); // Inject UI when the page is loaded if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', injectUI); } else { injectUI(); } // Handle dynamic page loads in GitHub (SPA navigation) let lastUrl = window.location.href; new MutationObserver((mutations, observer) => { const currentUrl = window.location.href; if (currentUrl !== lastUrl) { lastUrl = currentUrl; // URL changed, re-inject UI and check index status setTimeout(() => { injectUI(); }, 100); // Small delay to ensure DOM is updated } else { // Just check if UI needs to be injected (for dynamic content) injectUI(); } }).observe(document.body, { childList: true, subtree: true }); ``` -------------------------------------------------------------------------------- /packages/core/src/vectordb/milvus-vectordb.ts: -------------------------------------------------------------------------------- ```typescript import { MilvusClient, DataType, MetricType, FunctionType, LoadState } from '@zilliz/milvus2-sdk-node'; import { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, } from './types'; import { ClusterManager } from './zilliz-utils'; export interface MilvusConfig { address?: string; token?: string; username?: string; password?: string; ssl?: boolean; } export class MilvusVectorDatabase implements VectorDatabase { protected config: MilvusConfig; private client: MilvusClient | null = null; protected initializationPromise: Promise<void>; constructor(config: MilvusConfig) { this.config = config; // Start initialization asynchronously without waiting this.initializationPromise = this.initialize(); } private async initialize(): Promise<void> { const resolvedAddress = await this.resolveAddress(); await this.initializeClient(resolvedAddress); } private async initializeClient(address: string): Promise<void> { const milvusConfig = this.config as MilvusConfig; console.log('🔌 Connecting to vector database at: ', address); this.client = new MilvusClient({ address: address, username: milvusConfig.username, password: milvusConfig.password, token: milvusConfig.token, ssl: milvusConfig.ssl || false, }); } /** * Resolve address from config or token * Common logic for both gRPC and REST implementations */ protected async resolveAddress(): Promise<string> { let finalConfig = { ...this.config }; // If address is not provided, get it using token if (!finalConfig.address && finalConfig.token) { finalConfig.address = await ClusterManager.getAddressFromToken(finalConfig.token); } if (!finalConfig.address) { throw new Error('Address is required and could not be resolved from token'); } return finalConfig.address; } /** * Ensure initialization is complete before method execution */ protected async ensureInitialized(): Promise<void> { await this.initializationPromise; if (!this.client) { throw new Error('Client not initialized'); } } /** * Ensure collection is loaded before search/query operations */ protected async ensureLoaded(collectionName: string): Promise<void> { if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } try { // Check if collection is loaded const result = await this.client.getLoadState({ collection_name: collectionName }); if (result.state !== LoadState.LoadStateLoaded) { console.log(`[MilvusDB] 🔄 Loading collection '${collectionName}' to memory...`); await this.client.loadCollection({ collection_name: collectionName, }); } } catch (error) { console.error(`[MilvusDB] ❌ Failed to ensure collection '${collectionName}' is loaded:`, error); throw error; } } /** * Wait for an index to be ready before proceeding * Polls index build progress with exponential backoff up to 60 seconds */ protected async waitForIndexReady( collectionName: string, fieldName: string, maxWaitTime: number = 60000, // 60 seconds initialInterval: number = 500, // 500ms maxInterval: number = 5000, // 5 seconds backoffMultiplier: number = 1.5 ): Promise<void> { if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } let interval = initialInterval; const startTime = Date.now(); console.log(`[MilvusDB] ⏳ Waiting for index on field '${fieldName}' in collection '${collectionName}' to be ready...`); while (Date.now() - startTime < maxWaitTime) { try { const indexBuildProgress = await this.client.getIndexBuildProgress({ collection_name: collectionName, field_name: fieldName }); // Debug logging to understand the progress console.log(`[MilvusDB] 📊 Index build progress for '${fieldName}': indexed_rows=${indexBuildProgress.indexed_rows}, total_rows=${indexBuildProgress.total_rows}`); console.log(`[MilvusDB] 📊 Full response:`, JSON.stringify(indexBuildProgress)); // Check if index building is complete if (indexBuildProgress.indexed_rows === indexBuildProgress.total_rows) { console.log(`[MilvusDB] ✅ Index on field '${fieldName}' is ready! (${indexBuildProgress.indexed_rows}/${indexBuildProgress.total_rows} rows indexed)`); return; } // Check for error status if (indexBuildProgress.status && indexBuildProgress.status.error_code !== 'Success') { // Handle known issue with older Milvus versions where sparse vector index progress returns incorrect error if (indexBuildProgress.status.reason && indexBuildProgress.status.reason.includes('index duplicates[indexName=]')) { console.log(`[MilvusDB] ⚠️ Index progress check returned known older Milvus issue: ${indexBuildProgress.status.reason}`); console.log(`[MilvusDB] ⚠️ This is a known issue with older Milvus versions - treating as index ready`); return; // Treat as ready since this is a false error } throw new Error(`Index creation failed for field '${fieldName}' in collection '${collectionName}': ${indexBuildProgress.status.reason}`); } console.log(`[MilvusDB] 📊 Index building in progress: ${indexBuildProgress.indexed_rows}/${indexBuildProgress.total_rows} rows indexed`); // Wait with exponential backoff await new Promise(resolve => setTimeout(resolve, interval)); interval = Math.min(interval * backoffMultiplier, maxInterval); } catch (error) { console.error(`[MilvusDB] ❌ Error checking index build progress for field '${fieldName}':`, error); throw error; } } throw new Error(`Timeout waiting for index on field '${fieldName}' in collection '${collectionName}' to be ready after ${maxWaitTime}ms`); } /** * Load collection with retry logic and exponential backoff * Retries up to 5 times with exponential backoff */ protected async loadCollectionWithRetry( collectionName: string, maxRetries: number = 5, initialInterval: number = 1000, // 1 second backoffMultiplier: number = 2 ): Promise<void> { if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } let attempt = 1; let interval = initialInterval; while (attempt <= maxRetries) { try { console.log(`[MilvusDB] 🔄 Loading collection '${collectionName}' to memory (attempt ${attempt}/${maxRetries})...`); await this.client.loadCollection({ collection_name: collectionName, }); console.log(`[MilvusDB] ✅ Collection '${collectionName}' loaded successfully!`); return; } catch (error) { console.error(`[MilvusDB] ❌ Failed to load collection '${collectionName}' on attempt ${attempt}:`, error); if (attempt === maxRetries) { throw new Error(`Failed to load collection '${collectionName}' after ${maxRetries} attempts: ${error}`); } // Wait with exponential backoff before retry console.log(`[MilvusDB] ⏳ Retrying collection load in ${interval}ms...`); await new Promise(resolve => setTimeout(resolve, interval)); interval *= backoffMultiplier; attempt++; } } } async createCollection(collectionName: string, dimension: number, description?: string): Promise<void> { await this.ensureInitialized(); console.log('Beginning collection creation:', collectionName); console.log('Collection dimension:', dimension); const schema = [ { name: 'id', description: 'Document ID', data_type: DataType.VarChar, max_length: 512, is_primary_key: true, }, { name: 'vector', description: 'Embedding vector', data_type: DataType.FloatVector, dim: dimension, }, { name: 'content', description: 'Document content', data_type: DataType.VarChar, max_length: 65535, }, { name: 'relativePath', description: 'Relative path to the codebase', data_type: DataType.VarChar, max_length: 1024, }, { name: 'startLine', description: 'Start line number of the chunk', data_type: DataType.Int64, }, { name: 'endLine', description: 'End line number of the chunk', data_type: DataType.Int64, }, { name: 'fileExtension', description: 'File extension', data_type: DataType.VarChar, max_length: 32, }, { name: 'metadata', description: 'Additional document metadata as JSON string', data_type: DataType.VarChar, max_length: 65535, }, ]; const createCollectionParams = { collection_name: collectionName, description: description || `Claude Context collection: ${collectionName}`, fields: schema, }; if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } await this.client.createCollection(createCollectionParams); // Create index const indexParams = { collection_name: collectionName, field_name: 'vector', index_name: 'vector_index', index_type: 'AUTOINDEX', metric_type: MetricType.COSINE, }; console.log(`[MilvusDB] 🔧 Creating index for field 'vector' in collection '${collectionName}'...`); await this.client.createIndex(indexParams); // Wait for index to be ready before loading collection await this.waitForIndexReady(collectionName, 'vector'); // Load collection to memory with retry logic await this.loadCollectionWithRetry(collectionName); // Verify collection is created correctly await this.client.describeCollection({ collection_name: collectionName, }); } async dropCollection(collectionName: string): Promise<void> { await this.ensureInitialized(); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } await this.client.dropCollection({ collection_name: collectionName, }); } async hasCollection(collectionName: string): Promise<boolean> { await this.ensureInitialized(); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } const result = await this.client.hasCollection({ collection_name: collectionName, }); return Boolean(result.value); } async listCollections(): Promise<string[]> { await this.ensureInitialized(); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } const result = await this.client.showCollections(); // Handle the response format - cast to any to avoid type errors const collections = (result as any).collection_names || (result as any).collections || []; return Array.isArray(collections) ? collections : []; } async insert(collectionName: string, documents: VectorDocument[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } console.log('Inserting documents into collection:', collectionName); const data = documents.map(doc => ({ id: doc.id, vector: doc.vector, content: doc.content, relativePath: doc.relativePath, startLine: doc.startLine, endLine: doc.endLine, fileExtension: doc.fileExtension, metadata: JSON.stringify(doc.metadata), })); await this.client.insert({ collection_name: collectionName, data: data, }); } async search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } const searchParams: any = { collection_name: collectionName, data: [queryVector], limit: options?.topK || 10, output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'], }; // Apply boolean expression filter if provided (e.g., fileExtension in [".ts",".py"]) if (options?.filterExpr && options.filterExpr.trim().length > 0) { searchParams.expr = options.filterExpr; } const searchResult = await this.client.search(searchParams); if (!searchResult.results || searchResult.results.length === 0) { return []; } return searchResult.results.map((result: any) => ({ document: { id: result.id, vector: queryVector, content: result.content, relativePath: result.relativePath, startLine: result.startLine, endLine: result.endLine, fileExtension: result.fileExtension, metadata: JSON.parse(result.metadata || '{}'), }, score: result.score, })); } async delete(collectionName: string, ids: string[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } await this.client.delete({ collection_name: collectionName, filter: `id in [${ids.map(id => `"${id}"`).join(', ')}]`, }); } async query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise<Record<string, any>[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } try { const queryParams: any = { collection_name: collectionName, filter: filter, output_fields: outputFields, }; // Add limit if provided, or default for empty filter expressions if (limit !== undefined) { queryParams.limit = limit; } else if (filter === '' || filter.trim() === '') { // Milvus requires limit when using empty expressions queryParams.limit = 16384; // Default limit for empty filters } const result = await this.client.query(queryParams); if (result.status.error_code !== 'Success') { throw new Error(`Failed to query Milvus: ${result.status.reason}`); } return result.data || []; } catch (error) { console.error(`[MilvusDB] ❌ Failed to query collection '${collectionName}':`, error); throw error; } } async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> { await this.ensureInitialized(); console.log('Beginning hybrid collection creation:', collectionName); console.log('Collection dimension:', dimension); const schema = [ { name: 'id', description: 'Document ID', data_type: DataType.VarChar, max_length: 512, is_primary_key: true, }, { name: 'content', description: 'Full text content for BM25 and storage', data_type: DataType.VarChar, max_length: 65535, enable_analyzer: true, }, { name: 'vector', description: 'Dense vector embedding', data_type: DataType.FloatVector, dim: dimension, }, { name: 'sparse_vector', description: 'Sparse vector embedding from BM25', data_type: DataType.SparseFloatVector, }, { name: 'relativePath', description: 'Relative path to the codebase', data_type: DataType.VarChar, max_length: 1024, }, { name: 'startLine', description: 'Start line number of the chunk', data_type: DataType.Int64, }, { name: 'endLine', description: 'End line number of the chunk', data_type: DataType.Int64, }, { name: 'fileExtension', description: 'File extension', data_type: DataType.VarChar, max_length: 32, }, { name: 'metadata', description: 'Additional document metadata as JSON string', data_type: DataType.VarChar, max_length: 65535, }, ]; // Add BM25 function const functions = [ { name: "content_bm25_emb", description: "content bm25 function", type: FunctionType.BM25, input_field_names: ["content"], output_field_names: ["sparse_vector"], params: {}, }, ]; const createCollectionParams = { collection_name: collectionName, description: description || `Hybrid code context collection: ${collectionName}`, fields: schema, functions: functions, }; if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } await this.client.createCollection(createCollectionParams); // Create indexes for both vector fields // Index for dense vector const denseIndexParams = { collection_name: collectionName, field_name: 'vector', index_name: 'vector_index', index_type: 'AUTOINDEX', metric_type: MetricType.COSINE, }; console.log(`[MilvusDB] 🔧 Creating dense vector index for field 'vector' in collection '${collectionName}'...`); await this.client.createIndex(denseIndexParams); // Wait for dense vector index to be ready await this.waitForIndexReady(collectionName, 'vector'); // Index for sparse vector const sparseIndexParams = { collection_name: collectionName, field_name: 'sparse_vector', index_name: 'sparse_vector_index', index_type: 'SPARSE_INVERTED_INDEX', metric_type: MetricType.BM25, }; console.log(`[MilvusDB] 🔧 Creating sparse vector index for field 'sparse_vector' in collection '${collectionName}'...`); await this.client.createIndex(sparseIndexParams); // Wait for sparse vector index to be ready await this.waitForIndexReady(collectionName, 'sparse_vector'); // Load collection to memory with retry logic await this.loadCollectionWithRetry(collectionName); // Verify collection is created correctly await this.client.describeCollection({ collection_name: collectionName, }); } async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } const data = documents.map(doc => ({ id: doc.id, content: doc.content, vector: doc.vector, relativePath: doc.relativePath, startLine: doc.startLine, endLine: doc.endLine, fileExtension: doc.fileExtension, metadata: JSON.stringify(doc.metadata), })); await this.client.insert({ collection_name: collectionName, data: data, }); } async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); if (!this.client) { throw new Error('MilvusClient is not initialized after ensureInitialized().'); } try { // Generate OpenAI embedding for the first search request (dense) console.log(`[MilvusDB] 🔍 Preparing hybrid search for collection: ${collectionName}`); // Prepare search requests in the correct Milvus format const search_param_1 = { data: Array.isArray(searchRequests[0].data) ? searchRequests[0].data : [searchRequests[0].data], anns_field: searchRequests[0].anns_field, // "vector" param: searchRequests[0].param, // {"nprobe": 10} limit: searchRequests[0].limit }; const search_param_2 = { data: searchRequests[1].data, // query text for sparse search anns_field: searchRequests[1].anns_field, // "sparse_vector" param: searchRequests[1].param, // {"drop_ratio_search": 0.2} limit: searchRequests[1].limit }; // Set rerank strategy to RRF (100) by default const rerank_strategy = { strategy: "rrf", params: { k: 100 } }; console.log(`[MilvusDB] 🔍 Dense search params:`, JSON.stringify({ anns_field: search_param_1.anns_field, param: search_param_1.param, limit: search_param_1.limit, data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A' }, null, 2)); console.log(`[MilvusDB] 🔍 Sparse search params:`, JSON.stringify({ anns_field: search_param_2.anns_field, param: search_param_2.param, limit: search_param_2.limit, query_text: typeof search_param_2.data === 'string' ? search_param_2.data.substring(0, 50) + '...' : 'N/A' }, null, 2)); console.log(`[MilvusDB] 🔍 Rerank strategy:`, JSON.stringify(rerank_strategy, null, 2)); // Execute hybrid search using the correct client.search format const searchParams: any = { collection_name: collectionName, data: [search_param_1, search_param_2], limit: options?.limit || searchRequests[0]?.limit || 10, rerank: rerank_strategy, output_fields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'], }; if (options?.filterExpr && options.filterExpr.trim().length > 0) { searchParams.expr = options.filterExpr; } console.log(`[MilvusDB] 🔍 Complete search request:`, JSON.stringify({ collection_name: searchParams.collection_name, data_count: searchParams.data.length, limit: searchParams.limit, rerank: searchParams.rerank, output_fields: searchParams.output_fields, expr: searchParams.expr }, null, 2)); const searchResult = await this.client.search(searchParams); console.log(`[MilvusDB] 🔍 Search executed, processing results...`); if (!searchResult.results || searchResult.results.length === 0) { console.log(`[MilvusDB] ⚠️ No results returned from Milvus search`); return []; } console.log(`[MilvusDB] ✅ Found ${searchResult.results.length} results from hybrid search`); // Transform results to HybridSearchResult format return searchResult.results.map((result: any) => ({ document: { id: result.id, content: result.content, vector: [], sparse_vector: [], relativePath: result.relativePath, startLine: result.startLine, endLine: result.endLine, fileExtension: result.fileExtension, metadata: JSON.parse(result.metadata || '{}'), }, score: result.score, })); } catch (error) { console.error(`[MilvusDB] ❌ Failed to perform hybrid search on collection '${collectionName}':`, error); throw error; } } /** * Wrapper method to handle collection creation with limit detection for gRPC client * Returns true if collection can be created, false if limit exceeded */ async checkCollectionLimit(): Promise<boolean> { if (!this.client) { throw new Error('MilvusClient is not initialized. Call ensureInitialized() first.'); } const collectionName = `dummy_collection_${Date.now()}`; const createCollectionParams = { collection_name: collectionName, description: 'Test collection for limit check', fields: [ { name: 'id', data_type: DataType.VarChar, max_length: 512, is_primary_key: true, }, { name: 'vector', data_type: DataType.FloatVector, dim: 128, } ] }; try { await this.client.createCollection(createCollectionParams); // Immediately drop the collection after successful creation if (await this.client.hasCollection({ collection_name: collectionName })) { await this.client.dropCollection({ collection_name: collectionName, }); } return true; } catch (error: any) { // Check if the error message contains the collection limit exceeded pattern const errorMessage = error.message || error.toString() || ''; if (/exceeded the limit number of collections/i.test(errorMessage)) { // Return false for collection limit exceeded return false; } // Re-throw other errors as-is throw error; } } } ``` -------------------------------------------------------------------------------- /packages/vscode-extension/src/webview/scripts/semanticSearch.js: -------------------------------------------------------------------------------- ```javascript /** * Semantic Search Webview Controller * Handles all interactions between the webview and the VSCode extension */ class SemanticSearchController { constructor() { this.vscode = acquireVsCodeApi(); this.initializeElements(); this.bindEvents(); this.initializeDefaultProviders(); // Ensure providers are available this.checkIndexStatus(); // Request config immediately to get proper provider data setTimeout(() => { this.requestConfig(); }, 100); } /** * Initialize DOM elements */ initializeElements() { // Search view elements this.searchInput = document.getElementById('searchInput'); this.extFilterInput = document.getElementById('extFilterInput'); this.searchButton = document.getElementById('searchButton'); this.indexButton = document.getElementById('indexButton'); this.settingsButton = document.getElementById('settingsButton'); this.resultsContainer = document.getElementById('resultsContainer'); this.resultsHeader = document.getElementById('resultsHeader'); this.resultsList = document.getElementById('resultsList'); // View elements this.searchView = document.getElementById('searchView'); this.settingsView = document.getElementById('settingsView'); this.backButton = document.getElementById('backButton'); // Settings elements this.providerSelect = document.getElementById('provider'); this.dynamicFields = document.getElementById('dynamicFields'); this.splitterTypeSelect = document.getElementById('splitterType'); this.chunkSizeInput = document.getElementById('chunkSize'); this.chunkOverlapInput = document.getElementById('chunkOverlap'); this.milvusAddressInput = document.getElementById('milvusAddress'); this.milvusTokenInput = document.getElementById('milvusToken'); this.testBtn = document.getElementById('testBtn'); this.saveBtn = document.getElementById('saveBtn'); this.statusDiv = document.getElementById('status'); this.configForm = document.getElementById('configForm'); // Current config state this.currentConfig = null; this.supportedProviders = {}; this.dynamicFieldElements = new Map(); // Store dynamic field elements } /** * Bind event listeners */ bindEvents() { this.searchButton.addEventListener('click', () => this.performSearch()); this.indexButton.addEventListener('click', () => this.performIndex()); this.settingsButton.addEventListener('click', () => this.showSettingsView()); this.backButton.addEventListener('click', () => this.showSearchView()); this.searchInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') { this.performSearch(); } }); // Settings event listeners this.providerSelect.addEventListener('change', () => this.handleProviderChange()); this.splitterTypeSelect.addEventListener('change', () => this.validateForm()); this.chunkSizeInput.addEventListener('input', () => this.validateForm()); this.chunkOverlapInput.addEventListener('input', () => this.validateForm()); this.milvusAddressInput.addEventListener('input', () => this.validateForm()); this.milvusTokenInput.addEventListener('input', () => this.validateForm()); this.testBtn.addEventListener('click', () => this.handleTestConnection()); this.configForm.addEventListener('submit', (e) => this.handleFormSubmit(e)); // Handle messages from extension window.addEventListener('message', (event) => this.handleMessage(event)); // Check index status on load window.addEventListener('load', () => this.checkIndexStatus()); } /** * Perform search operation */ performSearch() { const text = this.searchInput.value.trim(); const extFilterRaw = (this.extFilterInput?.value || '').trim(); const extensions = extFilterRaw ? extFilterRaw.split(',').map(e => e.trim()).filter(Boolean) : []; if (text && !this.searchButton.disabled) { this.vscode.postMessage({ command: 'search', text: text, fileExtensions: extensions }); } } /** * Perform index operation */ performIndex() { this.indexButton.textContent = 'Indexing...'; this.indexButton.disabled = true; this.vscode.postMessage({ command: 'index' }); } /** * Check index status */ checkIndexStatus() { this.vscode.postMessage({ command: 'checkIndex' }); } /** * Show settings view */ showSettingsView() { this.searchView.style.display = 'none'; this.settingsView.style.display = 'block'; // Add default providers if not already loaded this.initializeDefaultProviders(); this.requestConfig(); } /** * Show search view */ showSearchView() { this.settingsView.style.display = 'none'; this.searchView.style.display = 'block'; } /** * Request config from extension */ requestConfig() { this.vscode.postMessage({ command: 'getConfig' }); } /** * Initialize default providers to ensure they show up even if config loading fails */ initializeDefaultProviders() { // Only initialize if providers haven't been loaded yet if (this.providerSelect.children.length <= 1) { // Clear existing options and add placeholder this.providerSelect.innerHTML = '<option value="">Please select...</option>'; // Add basic provider options (models will be loaded from backend) const defaultProviders = [ { value: 'OpenAI', text: 'OpenAI' }, { value: 'VoyageAI', text: 'VoyageAI' }, { value: 'Ollama', text: 'Ollama' }, { value: 'Gemini', text: 'Gemini' } ]; defaultProviders.forEach(provider => { const option = document.createElement('option'); option.value = provider.value; option.textContent = provider.text; this.providerSelect.appendChild(option); }); } } /** * Update search button state based on index availability * @param {boolean} hasIndex - Whether index exists */ updateSearchButtonState(hasIndex) { this.searchButton.disabled = !hasIndex; if (hasIndex) { this.searchButton.title = 'Search the indexed codebase'; } else { this.searchButton.title = 'Please click "Index Current Codebase" first to create an index'; } } /** * Display search results * @param {Array} results - Search results * @param {string} query - Search query */ showResults(results, query) { if (results.length === 0) { this.resultsHeader.textContent = `No results found for "${query}"`; this.resultsList.innerHTML = '<div class="no-results">No matches found</div>'; } else { this.resultsHeader.textContent = `${results.length} result${results.length === 1 ? '' : 's'} for "${query}"`; this.resultsList.innerHTML = results.map((result, index) => this.createResultHTML(result, index + 1)).join(''); } this.resultsContainer.style.display = 'block'; } /** * Create HTML for a single result item * @param {Object} result - Result object * @param {number} rank - Result rank (1-indexed) * @returns {string} HTML string */ createResultHTML(result, rank) { return ` <div class="result-item" onclick="searchController.openFile('${result.relativePath}', ${result.line}, ${result.startLine}, ${result.endLine})"> <div class="result-file"> <span class="result-filename">${result.file}</span> <span class="result-line">Lines ${result.startLine || result.line}-${result.endLine || result.line}</span> </div> <div class="result-preview">${result.preview}</div> <div class="result-context">${result.context}</div> <div class="result-rank" style="margin-top: 8px; text-align: right;">Rank: ${rank}</div> </div> `; } /** * Open file in VSCode editor * @param {string} relativePath - File relative path * @param {number} line - Line number * @param {number} startLine - Start line * @param {number} endLine - End line */ openFile(relativePath, line, startLine, endLine) { this.vscode.postMessage({ command: 'openFile', relativePath: relativePath, line: line, startLine: startLine, endLine: endLine }); } /** * Handle messages from the extension * @param {MessageEvent} event - Message event */ handleMessage(event) { const message = event.data; switch (message.command) { case 'showResults': this.showResults(message.results, message.query); break; case 'indexComplete': this.indexButton.textContent = 'Index Current Codebase'; this.indexButton.disabled = false; break; case 'updateIndexStatus': this.updateSearchButtonState(message.hasIndex); break; case 'configData': this.loadConfig(message.config, message.supportedProviders, message.milvusConfig, message.splitterConfig); break; case 'saveResult': this.saveBtn.disabled = false; this.saveBtn.textContent = 'Save Configuration'; if (message.success) { this.showStatus(message.message, 'success'); // Auto return to search view after successful save setTimeout(() => this.showSearchView(), 1500); } else { this.showStatus(message.message, 'error'); } break; case 'testResult': this.testBtn.disabled = false; this.testBtn.textContent = 'Test Connection'; if (message.success) { this.showStatus(message.message, 'success'); } else { this.showStatus(message.message, 'error'); } break; default: console.warn('Unknown message command:', message.command); } } // Settings methods handleProviderChange() { const selectedProvider = this.providerSelect.value; // Clear existing dynamic fields this.clearDynamicFields(); if (selectedProvider && this.supportedProviders[selectedProvider]) { this.generateDynamicFields(selectedProvider); } else if (selectedProvider) { // If we have a selected provider but no supportedProviders data, request config this.requestConfig(); } this.validateForm(); } /** * Clear all dynamic form fields */ clearDynamicFields() { this.dynamicFields.innerHTML = ''; this.dynamicFieldElements.clear(); } /** * Generate dynamic form fields based on provider configuration */ generateDynamicFields(provider) { const providerInfo = this.supportedProviders[provider]; if (!providerInfo) { return; } const requiredFields = providerInfo.requiredFields || []; const optionalFields = providerInfo.optionalFields || []; const allFields = [...requiredFields, ...optionalFields]; if (allFields.length === 0) { return; } allFields.forEach((field) => { try { const fieldElement = this.createFormField(field, providerInfo); this.dynamicFields.appendChild(fieldElement.container); this.dynamicFieldElements.set(field.name, fieldElement); // Add event listeners if (fieldElement.input) { fieldElement.input.addEventListener('input', () => this.validateForm()); fieldElement.input.addEventListener('change', () => this.validateForm()); } // Add event listeners for select-with-custom model inputs if (fieldElement.selectElement) { fieldElement.selectElement.addEventListener('change', () => this.validateForm()); } if (fieldElement.customInput) { fieldElement.customInput.addEventListener('input', () => this.validateForm()); } } catch (error) { console.error(`Failed to create field ${field.name}:`, error); } }); // Load current values if available this.loadCurrentValues(provider); } /** * Create a form field element based on field definition */ createFormField(field, providerInfo) { const container = document.createElement('div'); container.className = 'form-group'; const label = document.createElement('label'); label.textContent = field.description; label.setAttribute('for', field.name); container.appendChild(label); let input; if (field.name === 'model' && field.inputType === 'select') { // Special handling for model field with select type - create dropdown input = document.createElement('select'); input.id = field.name; input.required = field.required || false; // Add default option const defaultOption = document.createElement('option'); defaultOption.value = ''; defaultOption.textContent = 'Please select...'; input.appendChild(defaultOption); // Populate with models const models = providerInfo.models || {}; Object.entries(models).forEach(([modelId, modelInfo]) => { const option = document.createElement('option'); option.value = modelId; option.textContent = modelId; // Keep description as tooltip if available if (modelInfo && modelInfo.description) { option.title = modelInfo.description; } input.appendChild(option); }); } else if (field.name === 'model' && field.inputType === 'select-with-custom') { // Create a container for both select and custom input const inputContainer = document.createElement('div'); inputContainer.className = 'model-input-container'; // Create select dropdown const selectElement = document.createElement('select'); selectElement.id = field.name + '_select'; selectElement.className = 'model-select'; // Add default option const defaultOption = document.createElement('option'); defaultOption.value = ''; defaultOption.textContent = 'Please select...'; selectElement.appendChild(defaultOption); // Add custom option const customOption = document.createElement('option'); customOption.value = 'custom'; customOption.textContent = 'Custom model...'; selectElement.appendChild(customOption); // Populate with predefined models const models = providerInfo.models || {}; Object.entries(models).forEach(([modelId, modelInfo]) => { const option = document.createElement('option'); option.value = modelId; option.textContent = modelId; if (modelInfo && modelInfo.description) { option.title = modelInfo.description; } selectElement.appendChild(option); }); // Create custom input field (initially hidden) const customInput = document.createElement('input'); customInput.type = 'text'; customInput.id = field.name + '_custom'; customInput.className = 'model-custom-input'; customInput.placeholder = 'Enter custom model name...'; customInput.style.display = 'none'; customInput.style.marginTop = '8px'; // Create the main input that will hold the final value input = document.createElement('input'); input.type = 'hidden'; input.id = field.name; input.required = field.required || false; // Add event listeners selectElement.addEventListener('change', (e) => { if (e.target.value === 'custom') { customInput.style.display = 'block'; customInput.required = field.required || false; customInput.focus(); input.value = customInput.value; } else { customInput.style.display = 'none'; customInput.required = false; input.value = e.target.value; } }); customInput.addEventListener('input', (e) => { input.value = e.target.value; }); inputContainer.appendChild(selectElement); inputContainer.appendChild(customInput); inputContainer.appendChild(input); container.appendChild(inputContainer); return { container, input, field, selectElement, customInput }; } else { // Create input based on inputType input = document.createElement('input'); input.id = field.name; input.required = field.required || false; switch (field.inputType) { case 'password': input.type = 'password'; break; case 'url': input.type = 'url'; break; case 'text': default: input.type = 'text'; break; } if (field.placeholder) { input.placeholder = field.placeholder; } } container.appendChild(input); return { container, input, field }; } /** * Load current values into dynamic fields */ loadCurrentValues(provider) { if (this.currentConfig && this.currentConfig.provider === provider && this.currentConfig.config) { this.dynamicFieldElements.forEach((fieldElement, fieldName) => { const value = this.currentConfig.config[fieldName]; if (value !== undefined && fieldElement.input) { // Handle select-with-custom model fields if (fieldElement.selectElement && fieldElement.customInput) { // Check if the value matches any predefined option const selectElement = fieldElement.selectElement; let foundMatch = false; for (let option of selectElement.options) { if (option.value === value) { selectElement.value = value; fieldElement.input.value = value; foundMatch = true; break; } } // If no match found, use custom input if (!foundMatch && value) { selectElement.value = 'custom'; fieldElement.customInput.value = value; fieldElement.customInput.style.display = 'block'; fieldElement.customInput.required = fieldElement.field.required || false; fieldElement.input.value = value; } } else { // Regular input field fieldElement.input.value = value; } } }); } } validateForm() { const hasProvider = !!this.providerSelect.value; const hasMilvusAddress = !!this.milvusAddressInput.value.trim(); // Check all required dynamic fields let hasAllRequiredFields = true; if (hasProvider && this.supportedProviders[this.providerSelect.value]) { const providerInfo = this.supportedProviders[this.providerSelect.value]; for (const field of providerInfo.requiredFields) { const fieldElement = this.dynamicFieldElements.get(field.name); if (!fieldElement || !fieldElement.input.value.trim()) { hasAllRequiredFields = false; break; } } } else { hasAllRequiredFields = false; } // Test button only needs embedding config const canTestEmbedding = hasProvider && hasAllRequiredFields; // Save button needs all config const canSave = hasProvider && hasAllRequiredFields && hasMilvusAddress; this.testBtn.disabled = !canTestEmbedding; this.saveBtn.disabled = !canSave; } handleTestConnection() { const provider = this.providerSelect.value; if (!provider) { this.showStatus('Please select a provider first', 'error'); return; } // Collect config from dynamic fields const config = this.collectDynamicFieldValues(); if (!config) { this.showStatus('Please complete all required fields', 'error'); return; } const embeddingConfig = { provider: provider, config: config }; this.showStatus('Testing Embedding connection...', 'info'); this.testBtn.disabled = true; this.testBtn.textContent = 'Testing...'; this.vscode.postMessage({ command: 'testEmbedding', config: embeddingConfig }); } /** * Collect values from all dynamic fields */ collectDynamicFieldValues() { const provider = this.providerSelect.value; if (!provider || !this.supportedProviders[provider]) { return null; } const config = {}; const providerInfo = this.supportedProviders[provider]; // Check required fields for (const field of providerInfo.requiredFields) { const fieldElement = this.dynamicFieldElements.get(field.name); if (!fieldElement || !fieldElement.input.value.trim()) { return null; // Missing required field } config[field.name] = fieldElement.input.value.trim(); } // Add optional fields if they have values for (const field of providerInfo.optionalFields) { const fieldElement = this.dynamicFieldElements.get(field.name); if (fieldElement && fieldElement.input.value.trim()) { config[field.name] = fieldElement.input.value.trim(); } } return config; } handleFormSubmit(event) { event.preventDefault(); if (!this.validateCurrentForm()) return; const config = this.getCurrentFormConfig(); this.showStatus('Saving configuration...', 'info'); this.saveBtn.disabled = true; this.saveBtn.textContent = 'Saving...'; this.vscode.postMessage({ command: 'saveConfig', config: config }); } getCurrentFormConfig() { const provider = this.providerSelect.value; const configData = this.collectDynamicFieldValues(); if (!configData) { return null; } const milvusConfig = { address: this.milvusAddressInput.value.trim() }; // Only add token if it's provided and not empty const milvusToken = this.milvusTokenInput.value.trim(); if (milvusToken) { milvusConfig.token = milvusToken; } const splitterConfig = { type: this.splitterTypeSelect.value, chunkSize: parseInt(this.chunkSizeInput.value, 10), chunkOverlap: parseInt(this.chunkOverlapInput.value, 10) }; return { provider: provider, config: configData, milvusConfig: milvusConfig, splitterConfig: splitterConfig }; } validateCurrentForm() { const config = this.getCurrentFormConfig(); if (!config) { this.showStatus('Please complete all required fields', 'error'); return false; } if (!config.provider) { this.showStatus('Please select Embedding Provider', 'error'); return false; } if (!config.milvusConfig || !config.milvusConfig.address) { this.showStatus('Please enter Milvus Address', 'error'); return false; } // Validate splitter configuration if (!config.splitterConfig.type) { this.showStatus('Please select a splitter type', 'error'); return false; } if (config.splitterConfig.chunkSize < 100 || config.splitterConfig.chunkSize > 5000) { this.showStatus('Chunk size must be between 100 and 5000', 'error'); return false; } if (config.splitterConfig.chunkOverlap < 0 || config.splitterConfig.chunkOverlap > 1000) { this.showStatus('Chunk overlap must be between 0 and 1000', 'error'); return false; } if (config.splitterConfig.chunkOverlap >= config.splitterConfig.chunkSize) { this.showStatus('Chunk overlap must be less than chunk size', 'error'); return false; } return true; } showStatus(message, type) { this.statusDiv.textContent = message; this.statusDiv.className = `status-message ${type}`; this.statusDiv.style.display = 'block'; if (type === 'success' || type === 'info') { setTimeout(() => { this.statusDiv.style.display = 'none'; }, 3000); } } loadConfig(config, providers, milvusConfig, splitterConfig) { this.currentConfig = config; // Only update providers if we actually received them from backend if (providers && Object.keys(providers).length > 0) { this.supportedProviders = providers; // Update provider select with backend data this.providerSelect.innerHTML = '<option value="">Please select...</option>'; Object.entries(providers).forEach(([providerId, providerInfo]) => { const option = document.createElement('option'); option.value = providerId; option.textContent = providerInfo.name; this.providerSelect.appendChild(option); }); } else { // Request config again if we don't have provider data setTimeout(() => this.requestConfig(), 100); } if (config) { this.providerSelect.value = config.provider; this.handleProviderChange(); } // Load Milvus config if (milvusConfig) { this.milvusAddressInput.value = milvusConfig.address || ''; this.milvusTokenInput.value = milvusConfig.token || ''; } // Load splitter config if (splitterConfig) { this.splitterTypeSelect.value = splitterConfig.type || 'langchain'; this.chunkSizeInput.value = splitterConfig.chunkSize || 1000; this.chunkOverlapInput.value = splitterConfig.chunkOverlap || 200; } else { // Set default values this.splitterTypeSelect.value = 'langchain'; this.chunkSizeInput.value = 1000; this.chunkOverlapInput.value = 200; } this.validateForm(); } } // Initialize the controller when the DOM is loaded let searchController; if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => { searchController = new SemanticSearchController(); }); } else { searchController = new SemanticSearchController(); } ``` -------------------------------------------------------------------------------- /packages/core/src/vectordb/milvus-restful-vectordb.ts: -------------------------------------------------------------------------------- ```typescript /** * Milvus RESTful Vector Database Implementation * * This RESTful implementation of Milvus vector database is specifically designed for * environments with strict dependency constraints, e.g. VSCode Extensions, Chrome Extensions, etc. * * The standard Milvus gRPC implementation requires some dependencies and modules * that are not available or restricted in these constrained environments. This RESTful * implementation uses only HTTP requests, making it compatible with them. */ import { VectorDocument, SearchOptions, VectorSearchResult, VectorDatabase, HybridSearchRequest, HybridSearchOptions, HybridSearchResult, COLLECTION_LIMIT_MESSAGE } from './types'; import { ClusterManager } from './zilliz-utils'; export interface MilvusRestfulConfig { address?: string; token?: string; username?: string; password?: string; database?: string; } /** * TODO: Change this usage to checkCollectionLimit() * Wrapper function to handle collection creation with limit detection * This is the single point where collection limit errors are detected and handled */ async function createCollectionWithLimitCheck( makeRequestFn: (endpoint: string, method: 'GET' | 'POST', data?: any) => Promise<any>, collectionSchema: any ): Promise<void> { try { await makeRequestFn('/collections/create', 'POST', collectionSchema); } catch (error: any) { // Check if the error message contains the collection limit exceeded pattern const errorMessage = error.message || error.toString() || ''; if (/exceeded the limit number of collections/i.test(errorMessage)) { // Throw the exact message string, not an Error object throw COLLECTION_LIMIT_MESSAGE; } // Re-throw other errors as-is throw error; } } /** * Milvus Vector Database implementation using REST API * This implementation is designed for environments where gRPC is not available, * such as VSCode extensions or browser environments. */ export class MilvusRestfulVectorDatabase implements VectorDatabase { protected config: MilvusRestfulConfig; private baseUrl: string | null = null; protected initializationPromise: Promise<void>; constructor(config: MilvusRestfulConfig) { this.config = config; // Start initialization asynchronously without waiting this.initializationPromise = this.initialize(); } private async initialize(): Promise<void> { const resolvedAddress = await this.resolveAddress(); await this.initializeClient(resolvedAddress); } private async initializeClient(address: string): Promise<void> { // Ensure address has protocol prefix let processedAddress = address; if (!processedAddress.startsWith('http://') && !processedAddress.startsWith('https://')) { processedAddress = `http://${processedAddress}`; } this.baseUrl = processedAddress.replace(/\/$/, '') + '/v2/vectordb'; console.log(`🔌 Connecting to Milvus REST API at: ${processedAddress}`); } /** * Resolve address from config or token * Common logic for both gRPC and REST implementations */ protected async resolveAddress(): Promise<string> { let finalConfig = { ...this.config }; // If address is not provided, get it using token if (!finalConfig.address && finalConfig.token) { finalConfig.address = await ClusterManager.getAddressFromToken(finalConfig.token); } if (!finalConfig.address) { throw new Error('Address is required and could not be resolved from token'); } return finalConfig.address; } /** * Ensure initialization is complete before method execution */ protected async ensureInitialized(): Promise<void> { await this.initializationPromise; if (!this.baseUrl) { throw new Error('Base URL not initialized'); } } /** * Ensure collection is loaded before search/query operations */ protected async ensureLoaded(collectionName: string): Promise<void> { try { const restfulConfig = this.config as MilvusRestfulConfig; // Check if collection is loaded const response = await this.makeRequest('/collections/get_load_state', 'POST', { collectionName, dbName: restfulConfig.database }); const loadState = response.data?.loadState; if (loadState !== 'LoadStateLoaded') { console.log(`[MilvusRestfulDB] 🔄 Loading collection '${collectionName}' to memory...`); await this.loadCollection(collectionName); } } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to ensure collection '${collectionName}' is loaded:`, error); throw error; } } /** * Make HTTP request to Milvus REST API */ private async makeRequest(endpoint: string, method: 'GET' | 'POST' = 'POST', data?: any): Promise<any> { const url = `${this.baseUrl}${endpoint}`; const headers: Record<string, string> = { 'Content-Type': 'application/json', 'Accept': 'application/json' }; // Handle authentication if (this.config.token) { headers['Authorization'] = `Bearer ${this.config.token}`; } else if (this.config.username && this.config.password) { headers['Authorization'] = `Bearer ${this.config.username}:${this.config.password}`; } const requestOptions: RequestInit = { method, headers, }; if (data && method === 'POST') { requestOptions.body = JSON.stringify(data); } try { const response = await fetch(url, requestOptions); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const result: any = await response.json(); if (result.code !== 0 && result.code !== 200) { throw new Error(`Milvus API error: ${result.message || 'Unknown error'}`); } return result; } catch (error) { console.error(`[MilvusRestfulDB] Milvus REST API request failed:`, error); throw error; } } async createCollection(collectionName: string, dimension: number, description?: string): Promise<void> { await this.ensureInitialized(); try { const restfulConfig = this.config as MilvusRestfulConfig; // Build collection schema based on the original milvus-vectordb.ts implementation // Note: REST API doesn't support description parameter in collection creation // Unlike gRPC version, the description parameter is ignored in REST API const collectionSchema = { collectionName, dbName: restfulConfig.database, schema: { enableDynamicField: false, fields: [ { fieldName: "id", dataType: "VarChar", isPrimary: true, elementTypeParams: { max_length: 512 } }, { fieldName: "vector", dataType: "FloatVector", elementTypeParams: { dim: dimension } }, { fieldName: "content", dataType: "VarChar", elementTypeParams: { max_length: 65535 } }, { fieldName: "relativePath", dataType: "VarChar", elementTypeParams: { max_length: 1024 } }, { fieldName: "startLine", dataType: "Int64" }, { fieldName: "endLine", dataType: "Int64" }, { fieldName: "fileExtension", dataType: "VarChar", elementTypeParams: { max_length: 32 } }, { fieldName: "metadata", dataType: "VarChar", elementTypeParams: { max_length: 65535 } } ] } }; // Step 1: Create collection with schema await createCollectionWithLimitCheck(this.makeRequest.bind(this), collectionSchema); // Step 2: Create index for vector field (separate API call) await this.createIndex(collectionName); // Step 3: Load collection to memory for searching await this.loadCollection(collectionName); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to create collection '${collectionName}':`, error); throw error; } } /** * Create index for vector field using the Index Create API */ private async createIndex(collectionName: string): Promise<void> { try { const restfulConfig = this.config as MilvusRestfulConfig; const indexParams = { collectionName, dbName: restfulConfig.database, indexParams: [ { fieldName: "vector", indexName: "vector_index", metricType: "COSINE", index_type: "AUTOINDEX" } ] }; await this.makeRequest('/indexes/create', 'POST', indexParams); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to create index for collection '${collectionName}':`, error); throw error; } } /** * Load collection to memory for searching */ private async loadCollection(collectionName: string): Promise<void> { try { const restfulConfig = this.config as MilvusRestfulConfig; await this.makeRequest('/collections/load', 'POST', { collectionName, dbName: restfulConfig.database }); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to load collection '${collectionName}':`, error); throw error; } } async dropCollection(collectionName: string): Promise<void> { await this.ensureInitialized(); try { const restfulConfig = this.config as MilvusRestfulConfig; await this.makeRequest('/collections/drop', 'POST', { collectionName, dbName: restfulConfig.database }); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to drop collection '${collectionName}':`, error); throw error; } } async hasCollection(collectionName: string): Promise<boolean> { await this.ensureInitialized(); try { const restfulConfig = this.config as MilvusRestfulConfig; const response = await this.makeRequest('/collections/has', 'POST', { collectionName, dbName: restfulConfig.database }); const exists = response.data?.has || false; return exists; } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to check collection '${collectionName}' existence:`, error); throw error; } } async listCollections(): Promise<string[]> { await this.ensureInitialized(); try { const restfulConfig = this.config as MilvusRestfulConfig; const response = await this.makeRequest('/collections/list', 'POST', { dbName: restfulConfig.database }); return response.data || []; } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to list collections:`, error); throw error; } } async insert(collectionName: string, documents: VectorDocument[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); try { const restfulConfig = this.config as MilvusRestfulConfig; // Transform VectorDocument array to Milvus entity format const data = documents.map(doc => ({ id: doc.id, vector: doc.vector, content: doc.content, relativePath: doc.relativePath, startLine: doc.startLine, endLine: doc.endLine, fileExtension: doc.fileExtension, metadata: JSON.stringify(doc.metadata) // Convert metadata object to JSON string })); const insertRequest = { collectionName, data, dbName: restfulConfig.database }; await this.makeRequest('/entities/insert', 'POST', insertRequest); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to insert documents into collection '${collectionName}':`, error); throw error; } } async search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); const topK = options?.topK || 10; try { const restfulConfig = this.config as MilvusRestfulConfig; // Build search request according to Milvus REST API specification const searchRequest: any = { collectionName, dbName: restfulConfig.database, data: [queryVector], // Array of query vectors annsField: "vector", // Vector field name limit: topK, outputFields: [ "content", "relativePath", "startLine", "endLine", "fileExtension", "metadata" ], searchParams: { metricType: "COSINE", // Match the index metric type params: {} } }; // Apply boolean expression filter if provided (e.g., fileExtension in ['.ts','.py']) if (options?.filterExpr && options.filterExpr.trim().length > 0) { searchRequest.filter = options.filterExpr; } const response = await this.makeRequest('/entities/search', 'POST', searchRequest); // Transform response to VectorSearchResult format const results: VectorSearchResult[] = (response.data || []).map((item: any) => { // Parse metadata from JSON string let metadata = {}; try { metadata = JSON.parse(item.metadata || '{}'); } catch (error) { console.warn(`[MilvusRestfulDB] Failed to parse metadata for item ${item.id}:`, error); metadata = {}; } return { document: { id: item.id?.toString() || '', vector: queryVector, // Vector not returned in search results content: item.content || '', relativePath: item.relativePath || '', startLine: item.startLine || 0, endLine: item.endLine || 0, fileExtension: item.fileExtension || '', metadata: metadata }, score: item.distance || 0 }; }); return results; } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to search in collection '${collectionName}':`, error); throw error; } } async delete(collectionName: string, ids: string[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); try { const restfulConfig = this.config as MilvusRestfulConfig; // Build filter expression for deleting by IDs // Format: id in ["id1", "id2", "id3"] const filter = `id in [${ids.map(id => `"${id}"`).join(', ')}]`; const deleteRequest = { collectionName, filter, dbName: restfulConfig.database }; await this.makeRequest('/entities/delete', 'POST', deleteRequest); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to delete documents from collection '${collectionName}':`, error); throw error; } } async query(collectionName: string, filter: string, outputFields: string[], limit?: number): Promise<Record<string, any>[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); try { const restfulConfig = this.config as MilvusRestfulConfig; const queryRequest = { collectionName, dbName: restfulConfig.database, filter, outputFields, limit: limit || 16384, // Use provided limit or default offset: 0 }; const response = await this.makeRequest('/entities/query', 'POST', queryRequest); if (response.code !== 0) { throw new Error(`Failed to query Milvus: ${response.message || 'Unknown error'}`); } return response.data || []; } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to query collection '${collectionName}':`, error); throw error; } } async createHybridCollection(collectionName: string, dimension: number, description?: string): Promise<void> { try { const restfulConfig = this.config as MilvusRestfulConfig; const collectionSchema = { collectionName, dbName: restfulConfig.database, schema: { enableDynamicField: false, functions: [ { name: "content_bm25_emb", description: "content bm25 function", type: "BM25", inputFieldNames: ["content"], outputFieldNames: ["sparse_vector"], params: {}, }, ], fields: [ { fieldName: "id", dataType: "VarChar", isPrimary: true, elementTypeParams: { max_length: 512 } }, { fieldName: "content", dataType: "VarChar", elementTypeParams: { max_length: 65535, enable_analyzer: true } }, { fieldName: "vector", dataType: "FloatVector", elementTypeParams: { dim: dimension } }, { fieldName: "sparse_vector", dataType: "SparseFloatVector" }, { fieldName: "relativePath", dataType: "VarChar", elementTypeParams: { max_length: 1024 } }, { fieldName: "startLine", dataType: "Int64" }, { fieldName: "endLine", dataType: "Int64" }, { fieldName: "fileExtension", dataType: "VarChar", elementTypeParams: { max_length: 32 } }, { fieldName: "metadata", dataType: "VarChar", elementTypeParams: { max_length: 65535 } } ] } }; // Step 1: Create collection with schema and functions await createCollectionWithLimitCheck(this.makeRequest.bind(this), collectionSchema); // Step 2: Create indexes for both vector fields await this.createHybridIndexes(collectionName); // Step 3: Load collection to memory for searching await this.loadCollection(collectionName); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to create hybrid collection '${collectionName}':`, error); throw error; } } private async createHybridIndexes(collectionName: string): Promise<void> { try { const restfulConfig = this.config as MilvusRestfulConfig; // Create index for dense vector const denseIndexParams = { collectionName, dbName: restfulConfig.database, indexParams: [ { fieldName: "vector", indexName: "vector_index", metricType: "COSINE", index_type: "AUTOINDEX" } ] }; await this.makeRequest('/indexes/create', 'POST', denseIndexParams); // Create index for sparse vector const sparseIndexParams = { collectionName, dbName: restfulConfig.database, indexParams: [ { fieldName: "sparse_vector", indexName: "sparse_vector_index", metricType: "BM25", index_type: "SPARSE_INVERTED_INDEX" } ] }; await this.makeRequest('/indexes/create', 'POST', sparseIndexParams); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to create hybrid indexes for collection '${collectionName}':`, error); throw error; } } async insertHybrid(collectionName: string, documents: VectorDocument[]): Promise<void> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); try { const restfulConfig = this.config as MilvusRestfulConfig; const data = documents.map(doc => ({ id: doc.id, content: doc.content, vector: doc.vector, relativePath: doc.relativePath, startLine: doc.startLine, endLine: doc.endLine, fileExtension: doc.fileExtension, metadata: JSON.stringify(doc.metadata), })); const insertRequest = { collectionName, dbName: restfulConfig.database, data: data }; const response = await this.makeRequest('/entities/insert', 'POST', insertRequest); if (response.code !== 0) { throw new Error(`Insert failed: ${response.message || 'Unknown error'}`); } } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to insert hybrid documents to collection '${collectionName}':`, error); throw error; } } async hybridSearch(collectionName: string, searchRequests: HybridSearchRequest[], options?: HybridSearchOptions): Promise<HybridSearchResult[]> { await this.ensureInitialized(); await this.ensureLoaded(collectionName); try { const restfulConfig = this.config as MilvusRestfulConfig; console.log(`[MilvusRestfulDB] 🔍 Preparing hybrid search for collection: ${collectionName}`); // Prepare search requests according to Milvus REST API hybrid search specification // For dense vector search - data must be array of vectors: [[0.1, 0.2, 0.3, ...]] const search_param_1: any = { data: Array.isArray(searchRequests[0].data) ? [searchRequests[0].data] : [[searchRequests[0].data]], annsField: searchRequests[0].anns_field, // "vector" limit: searchRequests[0].limit, outputFields: ["*"], searchParams: { metricType: "COSINE", params: searchRequests[0].param || { "nprobe": 10 } } }; // For sparse vector search - data must be array of queries: ["query text"] const search_param_2: any = { data: Array.isArray(searchRequests[1].data) ? searchRequests[1].data : [searchRequests[1].data], annsField: searchRequests[1].anns_field, // "sparse_vector" limit: searchRequests[1].limit, outputFields: ["*"], searchParams: { metricType: "BM25", params: searchRequests[1].param || { "drop_ratio_search": 0.2 } } }; // Apply filter to both search parameters if provided if (options?.filterExpr && options.filterExpr.trim().length > 0) { search_param_1.filter = options.filterExpr; search_param_2.filter = options.filterExpr; } const rerank_strategy = { strategy: "rrf", params: { k: 100 } }; console.log(`[MilvusRestfulDB] 🔍 Dense search params:`, JSON.stringify({ annsField: search_param_1.annsField, limit: search_param_1.limit, data_length: Array.isArray(search_param_1.data[0]) ? search_param_1.data[0].length : 'N/A', searchParams: search_param_1.searchParams }, null, 2)); console.log(`[MilvusRestfulDB] 🔍 Sparse search params:`, JSON.stringify({ annsField: search_param_2.annsField, limit: search_param_2.limit, query_text: typeof search_param_2.data[0] === 'string' ? search_param_2.data[0].substring(0, 50) + '...' : 'N/A', searchParams: search_param_2.searchParams }, null, 2)); const hybridSearchRequest: any = { collectionName, dbName: restfulConfig.database, search: [search_param_1, search_param_2], rerank: rerank_strategy, limit: options?.limit || searchRequests[0]?.limit || 10, outputFields: ['id', 'content', 'relativePath', 'startLine', 'endLine', 'fileExtension', 'metadata'], }; console.log(`[MilvusRestfulDB] 🔍 Executing REST API hybrid search...`); const response = await this.makeRequest('/entities/hybrid_search', 'POST', hybridSearchRequest); if (response.code !== 0) { throw new Error(`Hybrid search failed: ${response.message || 'Unknown error'}`); } const results = response.data || []; console.log(`[MilvusRestfulDB] ✅ Found ${results.length} results from hybrid search`); // Transform response to HybridSearchResult format return results.map((result: any) => ({ document: { id: result.id, content: result.content, vector: [], // Vector not returned in search results sparse_vector: [], // Vector not returned in search results relativePath: result.relativePath, startLine: result.startLine, endLine: result.endLine, fileExtension: result.fileExtension, metadata: JSON.parse(result.metadata || '{}'), }, score: result.score || result.distance || 0, })); } catch (error) { console.error(`[MilvusRestfulDB] ❌ Failed to perform hybrid search on collection '${collectionName}':`, error); throw error; } } /** * Check collection limit * Returns true if collection can be created, false if limit exceeded * TODO: Implement proper collection limit checking for REST API */ async checkCollectionLimit(): Promise<boolean> { // TODO: Implement REST API version of collection limit checking // For now, always return true to maintain compatibility console.warn('[MilvusRestfulDB] ⚠️ checkCollectionLimit not implemented for REST API - returning true'); return true; } } ``` -------------------------------------------------------------------------------- /packages/mcp/src/handlers.ts: -------------------------------------------------------------------------------- ```typescript import * as fs from "fs"; import * as path from "path"; import * as crypto from "crypto"; import { Context, COLLECTION_LIMIT_MESSAGE } from "@zilliz/claude-context-core"; import { SnapshotManager } from "./snapshot.js"; import { ensureAbsolutePath, truncateContent, trackCodebasePath } from "./utils.js"; export class ToolHandlers { private context: Context; private snapshotManager: SnapshotManager; private indexingStats: { indexedFiles: number; totalChunks: number } | null = null; private currentWorkspace: string; constructor(context: Context, snapshotManager: SnapshotManager) { this.context = context; this.snapshotManager = snapshotManager; this.currentWorkspace = process.cwd(); console.log(`[WORKSPACE] Current workspace: ${this.currentWorkspace}`); } /** * Sync indexed codebases from Zilliz Cloud collections * This method fetches all collections from the vector database, * gets the first document from each collection to extract codebasePath from metadata, * and updates the snapshot with discovered codebases. * * Logic: Compare mcp-codebase-snapshot.json with zilliz cloud collections * - If local snapshot has extra directories (not in cloud), remove them * - If local snapshot is missing directories (exist in cloud), ignore them */ private async syncIndexedCodebasesFromCloud(): Promise<void> { try { console.log(`[SYNC-CLOUD] 🔄 Syncing indexed codebases from Zilliz Cloud...`); // Get all collections using the interface method const vectorDb = this.context.getVectorDatabase(); // Use the new listCollections method from the interface const collections = await vectorDb.listCollections(); console.log(`[SYNC-CLOUD] 📋 Found ${collections.length} collections in Zilliz Cloud`); if (collections.length === 0) { console.log(`[SYNC-CLOUD] ✅ No collections found in cloud`); // If no collections in cloud, remove all local codebases const localCodebases = this.snapshotManager.getIndexedCodebases(); if (localCodebases.length > 0) { console.log(`[SYNC-CLOUD] 🧹 Removing ${localCodebases.length} local codebases as cloud has no collections`); for (const codebasePath of localCodebases) { this.snapshotManager.removeIndexedCodebase(codebasePath); console.log(`[SYNC-CLOUD] ➖ Removed local codebase: ${codebasePath}`); } this.snapshotManager.saveCodebaseSnapshot(); console.log(`[SYNC-CLOUD] 💾 Updated snapshot to match empty cloud state`); } return; } const cloudCodebases = new Set<string>(); // Check each collection for codebase path for (const collectionName of collections) { try { // Skip collections that don't match the code_chunks pattern (support both legacy and new collections) if (!collectionName.startsWith('code_chunks_') && !collectionName.startsWith('hybrid_code_chunks_')) { console.log(`[SYNC-CLOUD] ⏭️ Skipping non-code collection: ${collectionName}`); continue; } console.log(`[SYNC-CLOUD] 🔍 Checking collection: ${collectionName}`); // Query the first document to get metadata const results = await vectorDb.query( collectionName, '', // Empty filter to get all results ['metadata'], // Only fetch metadata field 1 // Only need one result to extract codebasePath ); if (results && results.length > 0) { const firstResult = results[0]; const metadataStr = firstResult.metadata; if (metadataStr) { try { const metadata = JSON.parse(metadataStr); const codebasePath = metadata.codebasePath; if (codebasePath && typeof codebasePath === 'string') { console.log(`[SYNC-CLOUD] 📍 Found codebase path: ${codebasePath} in collection: ${collectionName}`); cloudCodebases.add(codebasePath); } else { console.warn(`[SYNC-CLOUD] ⚠️ No codebasePath found in metadata for collection: ${collectionName}`); } } catch (parseError) { console.warn(`[SYNC-CLOUD] ⚠️ Failed to parse metadata JSON for collection ${collectionName}:`, parseError); } } else { console.warn(`[SYNC-CLOUD] ⚠️ No metadata found in collection: ${collectionName}`); } } else { console.log(`[SYNC-CLOUD] ℹ️ Collection ${collectionName} is empty`); } } catch (collectionError: any) { console.warn(`[SYNC-CLOUD] ⚠️ Error checking collection ${collectionName}:`, collectionError.message || collectionError); // Continue with next collection } } console.log(`[SYNC-CLOUD] 📊 Found ${cloudCodebases.size} valid codebases in cloud`); // Get current local codebases const localCodebases = new Set(this.snapshotManager.getIndexedCodebases()); console.log(`[SYNC-CLOUD] 📊 Found ${localCodebases.size} local codebases in snapshot`); let hasChanges = false; // Remove local codebases that don't exist in cloud for (const localCodebase of localCodebases) { if (!cloudCodebases.has(localCodebase)) { this.snapshotManager.removeIndexedCodebase(localCodebase); hasChanges = true; console.log(`[SYNC-CLOUD] ➖ Removed local codebase (not in cloud): ${localCodebase}`); } } // Note: We don't add cloud codebases that are missing locally (as per user requirement) console.log(`[SYNC-CLOUD] ℹ️ Skipping addition of cloud codebases not present locally (per sync policy)`); if (hasChanges) { this.snapshotManager.saveCodebaseSnapshot(); console.log(`[SYNC-CLOUD] 💾 Updated snapshot to match cloud state`); } else { console.log(`[SYNC-CLOUD] ✅ Local snapshot already matches cloud state`); } console.log(`[SYNC-CLOUD] ✅ Cloud sync completed successfully`); } catch (error: any) { console.error(`[SYNC-CLOUD] ❌ Error syncing codebases from cloud:`, error.message || error); // Don't throw - this is not critical for the main functionality } } public async handleIndexCodebase(args: any) { const { path: codebasePath, force, splitter, customExtensions, ignorePatterns } = args; const forceReindex = force || false; const splitterType = splitter || 'ast'; // Default to AST const customFileExtensions = customExtensions || []; const customIgnorePatterns = ignorePatterns || []; try { // Sync indexed codebases from cloud first await this.syncIndexedCodebasesFromCloud(); // Validate splitter parameter if (splitterType !== 'ast' && splitterType !== 'langchain') { return { content: [{ type: "text", text: `Error: Invalid splitter type '${splitterType}'. Must be 'ast' or 'langchain'.` }], isError: true }; } // Force absolute path resolution - warn if relative path provided const absolutePath = ensureAbsolutePath(codebasePath); // Validate path exists if (!fs.existsSync(absolutePath)) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'` }], isError: true }; } // Check if it's a directory const stat = fs.statSync(absolutePath); if (!stat.isDirectory()) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' is not a directory` }], isError: true }; } // Check if already indexing if (this.snapshotManager.getIndexingCodebases().includes(absolutePath)) { return { content: [{ type: "text", text: `Codebase '${absolutePath}' is already being indexed in the background. Please wait for completion.` }], isError: true }; } //Check if the snapshot and cloud index are in sync if (this.snapshotManager.getIndexedCodebases().includes(absolutePath) !== await this.context.hasIndex(absolutePath)) { console.warn(`[INDEX-VALIDATION] ❌ Snapshot and cloud index mismatch: ${absolutePath}`); } // Check if already indexed (unless force is true) if (!forceReindex && this.snapshotManager.getIndexedCodebases().includes(absolutePath)) { return { content: [{ type: "text", text: `Codebase '${absolutePath}' is already indexed. Use force=true to re-index.` }], isError: true }; } // If force reindex and codebase is already indexed, remove it if (forceReindex) { if (this.snapshotManager.getIndexedCodebases().includes(absolutePath)) { console.log(`[FORCE-REINDEX] 🔄 Removing '${absolutePath}' from indexed list for re-indexing`); this.snapshotManager.removeIndexedCodebase(absolutePath); } if (await this.context.hasIndex(absolutePath)) { console.log(`[FORCE-REINDEX] 🔄 Clearing index for '${absolutePath}'`); await this.context.clearIndex(absolutePath); } } // CRITICAL: Pre-index collection creation validation try { console.log(`[INDEX-VALIDATION] 🔍 Validating collection creation capability`); const canCreateCollection = await this.context.getVectorDatabase().checkCollectionLimit(); if (!canCreateCollection) { console.error(`[INDEX-VALIDATION] ❌ Collection limit validation failed: ${absolutePath}`); // CRITICAL: Immediately return the COLLECTION_LIMIT_MESSAGE to MCP client return { content: [{ type: "text", text: COLLECTION_LIMIT_MESSAGE }], isError: true }; } console.log(`[INDEX-VALIDATION] ✅ Collection creation validation completed`); } catch (validationError: any) { // Handle other collection creation errors console.error(`[INDEX-VALIDATION] ❌ Collection creation validation failed:`, validationError); return { content: [{ type: "text", text: `Error validating collection creation: ${validationError.message || validationError}` }], isError: true }; } // Add custom extensions if provided if (customFileExtensions.length > 0) { console.log(`[CUSTOM-EXTENSIONS] Adding ${customFileExtensions.length} custom extensions: ${customFileExtensions.join(', ')}`); this.context.addCustomExtensions(customFileExtensions); } // Add custom ignore patterns if provided (before loading file-based patterns) if (customIgnorePatterns.length > 0) { console.log(`[IGNORE-PATTERNS] Adding ${customIgnorePatterns.length} custom ignore patterns: ${customIgnorePatterns.join(', ')}`); this.context.addCustomIgnorePatterns(customIgnorePatterns); } // Check current status and log if retrying after failure const currentStatus = this.snapshotManager.getCodebaseStatus(absolutePath); if (currentStatus === 'indexfailed') { const failedInfo = this.snapshotManager.getCodebaseInfo(absolutePath) as any; console.log(`[BACKGROUND-INDEX] Retrying indexing for previously failed codebase. Previous error: ${failedInfo?.errorMessage || 'Unknown error'}`); } // Set to indexing status and save snapshot immediately this.snapshotManager.setCodebaseIndexing(absolutePath, 0); this.snapshotManager.saveCodebaseSnapshot(); // Track the codebase path for syncing trackCodebasePath(absolutePath); // Start background indexing - now safe to proceed this.startBackgroundIndexing(absolutePath, forceReindex, splitterType); const pathInfo = codebasePath !== absolutePath ? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'` : ''; const extensionInfo = customFileExtensions.length > 0 ? `\nUsing ${customFileExtensions.length} custom extensions: ${customFileExtensions.join(', ')}` : ''; const ignoreInfo = customIgnorePatterns.length > 0 ? `\nUsing ${customIgnorePatterns.length} custom ignore patterns: ${customIgnorePatterns.join(', ')}` : ''; return { content: [{ type: "text", text: `Started background indexing for codebase '${absolutePath}' using ${splitterType.toUpperCase()} splitter.${pathInfo}${extensionInfo}${ignoreInfo}\n\nIndexing is running in the background. You can search the codebase while indexing is in progress, but results may be incomplete until indexing completes.` }] }; } catch (error: any) { // Enhanced error handling to prevent MCP service crash console.error('Error in handleIndexCodebase:', error); // Ensure we always return a proper MCP response, never throw return { content: [{ type: "text", text: `Error starting indexing: ${error.message || error}` }], isError: true }; } } private async startBackgroundIndexing(codebasePath: string, forceReindex: boolean, splitterType: string) { const absolutePath = codebasePath; let lastSaveTime = 0; // Track last save timestamp try { console.log(`[BACKGROUND-INDEX] Starting background indexing for: ${absolutePath}`); // Note: If force reindex, collection was already cleared during validation phase if (forceReindex) { console.log(`[BACKGROUND-INDEX] ℹ️ Force reindex mode - collection was already cleared during validation`); } // Use the existing Context instance for indexing. let contextForThisTask = this.context; if (splitterType !== 'ast') { console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`); } // Load ignore patterns from files first (including .ignore, .gitignore, etc.) await this.context.getLoadedIgnorePatterns(absolutePath); // Initialize file synchronizer with proper ignore patterns (including project-specific patterns) const { FileSynchronizer } = await import("@zilliz/claude-context-core"); const ignorePatterns = this.context.getIgnorePatterns() || []; console.log(`[BACKGROUND-INDEX] Using ignore patterns: ${ignorePatterns.join(', ')}`); const synchronizer = new FileSynchronizer(absolutePath, ignorePatterns); await synchronizer.initialize(); // Store synchronizer in the context (let context manage collection names) await this.context.getPreparedCollection(absolutePath); const collectionName = this.context.getCollectionName(absolutePath); this.context.setSynchronizer(collectionName, synchronizer); if (contextForThisTask !== this.context) { contextForThisTask.setSynchronizer(collectionName, synchronizer); } console.log(`[BACKGROUND-INDEX] Starting indexing with ${splitterType} splitter for: ${absolutePath}`); // Log embedding provider information before indexing const embeddingProvider = this.context.getEmbedding(); console.log(`[BACKGROUND-INDEX] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} with dimension: ${embeddingProvider.getDimension()}`); // Start indexing with the appropriate context and progress tracking console.log(`[BACKGROUND-INDEX] 🚀 Beginning codebase indexing process...`); const stats = await contextForThisTask.indexCodebase(absolutePath, (progress) => { // Update progress in snapshot manager using new method this.snapshotManager.setCodebaseIndexing(absolutePath, progress.percentage); // Save snapshot periodically (every 2 seconds to avoid too frequent saves) const currentTime = Date.now(); if (currentTime - lastSaveTime >= 2000) { // 2 seconds = 2000ms this.snapshotManager.saveCodebaseSnapshot(); lastSaveTime = currentTime; console.log(`[BACKGROUND-INDEX] 💾 Saved progress snapshot at ${progress.percentage.toFixed(1)}%`); } console.log(`[BACKGROUND-INDEX] Progress: ${progress.phase} - ${progress.percentage}% (${progress.current}/${progress.total})`); }); console.log(`[BACKGROUND-INDEX] ✅ Indexing completed successfully! Files: ${stats.indexedFiles}, Chunks: ${stats.totalChunks}`); // Set codebase to indexed status with complete statistics this.snapshotManager.setCodebaseIndexed(absolutePath, stats); this.indexingStats = { indexedFiles: stats.indexedFiles, totalChunks: stats.totalChunks }; // Save snapshot after updating codebase lists this.snapshotManager.saveCodebaseSnapshot(); let message = `Background indexing completed for '${absolutePath}' using ${splitterType.toUpperCase()} splitter.\nIndexed ${stats.indexedFiles} files, ${stats.totalChunks} chunks.`; if (stats.status === 'limit_reached') { message += `\n⚠️ Warning: Indexing stopped because the chunk limit (450,000) was reached. The index may be incomplete.`; } console.log(`[BACKGROUND-INDEX] ${message}`); } catch (error: any) { console.error(`[BACKGROUND-INDEX] Error during indexing for ${absolutePath}:`, error); // Get the last attempted progress const lastProgress = this.snapshotManager.getIndexingProgress(absolutePath); // Set codebase to failed status with error information const errorMessage = error.message || String(error); this.snapshotManager.setCodebaseIndexFailed(absolutePath, errorMessage, lastProgress); this.snapshotManager.saveCodebaseSnapshot(); // Log error but don't crash MCP service - indexing errors are handled gracefully console.error(`[BACKGROUND-INDEX] Indexing failed for ${absolutePath}: ${errorMessage}`); } } public async handleSearchCode(args: any) { const { path: codebasePath, query, limit = 10, extensionFilter } = args; const resultLimit = limit || 10; try { // Sync indexed codebases from cloud first await this.syncIndexedCodebasesFromCloud(); // Force absolute path resolution - warn if relative path provided const absolutePath = ensureAbsolutePath(codebasePath); // Validate path exists if (!fs.existsSync(absolutePath)) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'` }], isError: true }; } // Check if it's a directory const stat = fs.statSync(absolutePath); if (!stat.isDirectory()) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' is not a directory` }], isError: true }; } trackCodebasePath(absolutePath); // Check if this codebase is indexed or being indexed const isIndexed = this.snapshotManager.getIndexedCodebases().includes(absolutePath); const isIndexing = this.snapshotManager.getIndexingCodebases().includes(absolutePath); if (!isIndexed && !isIndexing) { return { content: [{ type: "text", text: `Error: Codebase '${absolutePath}' is not indexed. Please index it first using the index_codebase tool.` }], isError: true }; } // Show indexing status if codebase is being indexed let indexingStatusMessage = ''; if (isIndexing) { indexingStatusMessage = `\n⚠️ **Indexing in Progress**: This codebase is currently being indexed in the background. Search results may be incomplete until indexing completes.`; } console.log(`[SEARCH] Searching in codebase: ${absolutePath}`); console.log(`[SEARCH] Query: "${query}"`); console.log(`[SEARCH] Indexing status: ${isIndexing ? 'In Progress' : 'Completed'}`); // Log embedding provider information before search const embeddingProvider = this.context.getEmbedding(); console.log(`[SEARCH] 🧠 Using embedding provider: ${embeddingProvider.getProvider()} for search`); console.log(`[SEARCH] 🔍 Generating embeddings for query using ${embeddingProvider.getProvider()}...`); // Build filter expression from extensionFilter list let filterExpr: string | undefined = undefined; if (Array.isArray(extensionFilter) && extensionFilter.length > 0) { const cleaned = extensionFilter .filter((v: any) => typeof v === 'string') .map((v: string) => v.trim()) .filter((v: string) => v.length > 0); const invalid = cleaned.filter((e: string) => !(e.startsWith('.') && e.length > 1 && !/\s/.test(e))); if (invalid.length > 0) { return { content: [{ type: 'text', text: `Error: Invalid file extensions in extensionFilter: ${JSON.stringify(invalid)}. Use proper extensions like '.ts', '.py'.` }], isError: true }; } const quoted = cleaned.map((e: string) => `'${e}'`).join(', '); filterExpr = `fileExtension in [${quoted}]`; } // Search in the specified codebase const searchResults = await this.context.semanticSearch( absolutePath, query, Math.min(resultLimit, 50), 0.3, filterExpr ); console.log(`[SEARCH] ✅ Search completed! Found ${searchResults.length} results using ${embeddingProvider.getProvider()} embeddings`); if (searchResults.length === 0) { let noResultsMessage = `No results found for query: "${query}" in codebase '${absolutePath}'`; if (isIndexing) { noResultsMessage += `\n\nNote: This codebase is still being indexed. Try searching again after indexing completes, or the query may not match any indexed content.`; } return { content: [{ type: "text", text: noResultsMessage }] }; } // Format results const formattedResults = searchResults.map((result: any, index: number) => { const location = `${result.relativePath}:${result.startLine}-${result.endLine}`; const context = truncateContent(result.content, 5000); const codebaseInfo = path.basename(absolutePath); return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` + ` Location: ${location}\n` + ` Rank: ${index + 1}\n` + ` Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`; }).join('\n'); let resultMessage = `Found ${searchResults.length} results for query: "${query}" in codebase '${absolutePath}'${indexingStatusMessage}\n\n${formattedResults}`; if (isIndexing) { resultMessage += `\n\n💡 **Tip**: This codebase is still being indexed. More results may become available as indexing progresses.`; } return { content: [{ type: "text", text: resultMessage }] }; } catch (error) { // Check if this is the collection limit error // Handle both direct string throws and Error objects containing the message const errorMessage = typeof error === 'string' ? error : (error instanceof Error ? error.message : String(error)); if (errorMessage === COLLECTION_LIMIT_MESSAGE || errorMessage.includes(COLLECTION_LIMIT_MESSAGE)) { // Return the collection limit message as a successful response // This ensures LLM treats it as final answer, not as retryable error return { content: [{ type: "text", text: COLLECTION_LIMIT_MESSAGE }] }; } return { content: [{ type: "text", text: `Error searching code: ${errorMessage} Please check if the codebase has been indexed first.` }], isError: true }; } } public async handleClearIndex(args: any) { const { path: codebasePath } = args; if (this.snapshotManager.getIndexedCodebases().length === 0 && this.snapshotManager.getIndexingCodebases().length === 0) { return { content: [{ type: "text", text: "No codebases are currently indexed or being indexed." }] }; } try { // Force absolute path resolution - warn if relative path provided const absolutePath = ensureAbsolutePath(codebasePath); // Validate path exists if (!fs.existsSync(absolutePath)) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'` }], isError: true }; } // Check if it's a directory const stat = fs.statSync(absolutePath); if (!stat.isDirectory()) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' is not a directory` }], isError: true }; } // Check if this codebase is indexed or being indexed const isIndexed = this.snapshotManager.getIndexedCodebases().includes(absolutePath); const isIndexing = this.snapshotManager.getIndexingCodebases().includes(absolutePath); if (!isIndexed && !isIndexing) { return { content: [{ type: "text", text: `Error: Codebase '${absolutePath}' is not indexed or being indexed.` }], isError: true }; } console.log(`[CLEAR] Clearing codebase: ${absolutePath}`); try { await this.context.clearIndex(absolutePath); console.log(`[CLEAR] Successfully cleared index for: ${absolutePath}`); } catch (error: any) { const errorMsg = `Failed to clear ${absolutePath}: ${error.message}`; console.error(`[CLEAR] ${errorMsg}`); return { content: [{ type: "text", text: errorMsg }], isError: true }; } // Completely remove the cleared codebase from snapshot this.snapshotManager.removeCodebaseCompletely(absolutePath); // Reset indexing stats if this was the active codebase this.indexingStats = null; // Save snapshot after clearing index this.snapshotManager.saveCodebaseSnapshot(); let resultText = `Successfully cleared codebase '${absolutePath}'`; const remainingIndexed = this.snapshotManager.getIndexedCodebases().length; const remainingIndexing = this.snapshotManager.getIndexingCodebases().length; if (remainingIndexed > 0 || remainingIndexing > 0) { resultText += `\n${remainingIndexed} other indexed codebase(s) and ${remainingIndexing} indexing codebase(s) remain`; } return { content: [{ type: "text", text: resultText }] }; } catch (error) { // Check if this is the collection limit error // Handle both direct string throws and Error objects containing the message const errorMessage = typeof error === 'string' ? error : (error instanceof Error ? error.message : String(error)); if (errorMessage === COLLECTION_LIMIT_MESSAGE || errorMessage.includes(COLLECTION_LIMIT_MESSAGE)) { // Return the collection limit message as a successful response // This ensures LLM treats it as final answer, not as retryable error return { content: [{ type: "text", text: COLLECTION_LIMIT_MESSAGE }] }; } return { content: [{ type: "text", text: `Error clearing index: ${errorMessage}` }], isError: true }; } } public async handleGetIndexingStatus(args: any) { const { path: codebasePath } = args; try { // Force absolute path resolution const absolutePath = ensureAbsolutePath(codebasePath); // Validate path exists if (!fs.existsSync(absolutePath)) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'` }], isError: true }; } // Check if it's a directory const stat = fs.statSync(absolutePath); if (!stat.isDirectory()) { return { content: [{ type: "text", text: `Error: Path '${absolutePath}' is not a directory` }], isError: true }; } // Check indexing status using new status system const status = this.snapshotManager.getCodebaseStatus(absolutePath); const info = this.snapshotManager.getCodebaseInfo(absolutePath); let statusMessage = ''; switch (status) { case 'indexed': if (info && 'indexedFiles' in info) { const indexedInfo = info as any; statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`; statusMessage += `\n📊 Statistics: ${indexedInfo.indexedFiles} files, ${indexedInfo.totalChunks} chunks`; statusMessage += `\n📅 Status: ${indexedInfo.indexStatus}`; statusMessage += `\n🕐 Last updated: ${new Date(indexedInfo.lastUpdated).toLocaleString()}`; } else { statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`; } break; case 'indexing': if (info && 'indexingPercentage' in info) { const indexingInfo = info as any; const progressPercentage = indexingInfo.indexingPercentage || 0; statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed. Progress: ${progressPercentage.toFixed(1)}%`; // Add more detailed status based on progress if (progressPercentage < 10) { statusMessage += ' (Preparing and scanning files...)'; } else if (progressPercentage < 100) { statusMessage += ' (Processing files and generating embeddings...)'; } statusMessage += `\n🕐 Last updated: ${new Date(indexingInfo.lastUpdated).toLocaleString()}`; } else { statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed.`; } break; case 'indexfailed': if (info && 'errorMessage' in info) { const failedInfo = info as any; statusMessage = `❌ Codebase '${absolutePath}' indexing failed.`; statusMessage += `\n🚨 Error: ${failedInfo.errorMessage}`; if (failedInfo.lastAttemptedPercentage !== undefined) { statusMessage += `\n📊 Failed at: ${failedInfo.lastAttemptedPercentage.toFixed(1)}% progress`; } statusMessage += `\n🕐 Failed at: ${new Date(failedInfo.lastUpdated).toLocaleString()}`; statusMessage += `\n💡 You can retry indexing by running the index_codebase command again.`; } else { statusMessage = `❌ Codebase '${absolutePath}' indexing failed. You can retry indexing.`; } break; case 'not_found': default: statusMessage = `❌ Codebase '${absolutePath}' is not indexed. Please use the index_codebase tool to index it first.`; break; } const pathInfo = codebasePath !== absolutePath ? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'` : ''; return { content: [{ type: "text", text: statusMessage + pathInfo }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error getting indexing status: ${error.message || error}` }], isError: true }; } } } ``` -------------------------------------------------------------------------------- /packages/core/src/context.ts: -------------------------------------------------------------------------------- ```typescript import { Splitter, CodeChunk, AstCodeSplitter } from './splitter'; import { Embedding, EmbeddingVector, OpenAIEmbedding } from './embedding'; import { VectorDatabase, VectorDocument, VectorSearchResult, HybridSearchRequest, HybridSearchOptions, HybridSearchResult } from './vectordb'; import { SemanticSearchResult } from './types'; import { envManager } from './utils/env-manager'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import { FileSynchronizer } from './sync/synchronizer'; const DEFAULT_SUPPORTED_EXTENSIONS = [ // Programming languages '.ts', '.tsx', '.js', '.jsx', '.py', '.java', '.cpp', '.c', '.h', '.hpp', '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.m', '.mm', // Text and markup files '.md', '.markdown', '.ipynb', // '.txt', '.json', '.yaml', '.yml', '.xml', '.html', '.htm', // '.css', '.scss', '.less', '.sql', '.sh', '.bash', '.env' ]; const DEFAULT_IGNORE_PATTERNS = [ // Common build output and dependency directories 'node_modules/**', 'dist/**', 'build/**', 'out/**', 'target/**', 'coverage/**', '.nyc_output/**', // IDE and editor files '.vscode/**', '.idea/**', '*.swp', '*.swo', // Version control '.git/**', '.svn/**', '.hg/**', // Cache directories '.cache/**', '__pycache__/**', '.pytest_cache/**', // Logs and temporary files 'logs/**', 'tmp/**', 'temp/**', '*.log', // Environment and config files '.env', '.env.*', '*.local', // Minified and bundled files '*.min.js', '*.min.css', '*.min.map', '*.bundle.js', '*.bundle.css', '*.chunk.js', '*.vendor.js', '*.polyfills.js', '*.runtime.js', '*.map', // source map files 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out', 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache', 'coverage', '.nyc_output', 'logs', 'tmp', 'temp' ]; export interface ContextConfig { embedding?: Embedding; vectorDatabase?: VectorDatabase; codeSplitter?: Splitter; supportedExtensions?: string[]; ignorePatterns?: string[]; customExtensions?: string[]; // New: custom extensions from MCP customIgnorePatterns?: string[]; // New: custom ignore patterns from MCP } export class Context { private embedding: Embedding; private vectorDatabase: VectorDatabase; private codeSplitter: Splitter; private supportedExtensions: string[]; private ignorePatterns: string[]; private synchronizers = new Map<string, FileSynchronizer>(); constructor(config: ContextConfig = {}) { // Initialize services this.embedding = config.embedding || new OpenAIEmbedding({ apiKey: envManager.get('OPENAI_API_KEY') || 'your-openai-api-key', model: 'text-embedding-3-small', ...(envManager.get('OPENAI_BASE_URL') && { baseURL: envManager.get('OPENAI_BASE_URL') }) }); if (!config.vectorDatabase) { throw new Error('VectorDatabase is required. Please provide a vectorDatabase instance in the config.'); } this.vectorDatabase = config.vectorDatabase; this.codeSplitter = config.codeSplitter || new AstCodeSplitter(2500, 300); // Load custom extensions from environment variables const envCustomExtensions = this.getCustomExtensionsFromEnv(); // Combine default extensions with config extensions and env extensions const allSupportedExtensions = [ ...DEFAULT_SUPPORTED_EXTENSIONS, ...(config.supportedExtensions || []), ...(config.customExtensions || []), ...envCustomExtensions ]; // Remove duplicates this.supportedExtensions = [...new Set(allSupportedExtensions)]; // Load custom ignore patterns from environment variables const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv(); // Start with default ignore patterns const allIgnorePatterns = [ ...DEFAULT_IGNORE_PATTERNS, ...(config.ignorePatterns || []), ...(config.customIgnorePatterns || []), ...envCustomIgnorePatterns ]; // Remove duplicates this.ignorePatterns = [...new Set(allIgnorePatterns)]; console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`); if (envCustomExtensions.length > 0) { console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`); } if (envCustomIgnorePatterns.length > 0) { console.log(`[Context] 🚫 Loaded ${envCustomIgnorePatterns.length} custom ignore patterns from environment: ${envCustomIgnorePatterns.join(', ')}`); } } /** * Get embedding instance */ getEmbedding(): Embedding { return this.embedding; } /** * Get vector database instance */ getVectorDatabase(): VectorDatabase { return this.vectorDatabase; } /** * Get code splitter instance */ getCodeSplitter(): Splitter { return this.codeSplitter; } /** * Get supported extensions */ getSupportedExtensions(): string[] { return [...this.supportedExtensions]; } /** * Get ignore patterns */ getIgnorePatterns(): string[] { return [...this.ignorePatterns]; } /** * Get synchronizers map */ getSynchronizers(): Map<string, FileSynchronizer> { return new Map(this.synchronizers); } /** * Set synchronizer for a collection */ setSynchronizer(collectionName: string, synchronizer: FileSynchronizer): void { this.synchronizers.set(collectionName, synchronizer); } /** * Public wrapper for loadIgnorePatterns private method */ async getLoadedIgnorePatterns(codebasePath: string): Promise<void> { return this.loadIgnorePatterns(codebasePath); } /** * Public wrapper for prepareCollection private method */ async getPreparedCollection(codebasePath: string): Promise<void> { return this.prepareCollection(codebasePath); } /** * Get isHybrid setting from environment variable with default true */ private getIsHybrid(): boolean { const isHybridEnv = envManager.get('HYBRID_MODE'); if (isHybridEnv === undefined || isHybridEnv === null) { return true; // Default to true } return isHybridEnv.toLowerCase() === 'true'; } /** * Generate collection name based on codebase path and hybrid mode */ public getCollectionName(codebasePath: string): string { const isHybrid = this.getIsHybrid(); const normalizedPath = path.resolve(codebasePath); const hash = crypto.createHash('md5').update(normalizedPath).digest('hex'); const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks'; return `${prefix}_${hash.substring(0, 8)}`; } /** * Index a codebase for semantic search * @param codebasePath Codebase root path * @param progressCallback Optional progress callback function * @param forceReindex Whether to recreate the collection even if it exists * @returns Indexing statistics */ async indexCodebase( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void, forceReindex: boolean = false ): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid search' : 'semantic search'; console.log(`[Context] 🚀 Starting to index codebase with ${searchType}: ${codebasePath}`); // 1. Load ignore patterns from various ignore files await this.loadIgnorePatterns(codebasePath); // 2. Check and prepare vector collection progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 }); console.log(`Debug2: Preparing vector collection for codebase${forceReindex ? ' (FORCE REINDEX)' : ''}`); await this.prepareCollection(codebasePath, forceReindex); // 3. Recursively traverse codebase to get all supported files progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 }); const codeFiles = await this.getCodeFiles(codebasePath); console.log(`[Context] 📁 Found ${codeFiles.length} code files`); if (codeFiles.length === 0) { progressCallback?.({ phase: 'No files to index', current: 100, total: 100, percentage: 100 }); return { indexedFiles: 0, totalChunks: 0, status: 'completed' }; } // 3. Process each file with streaming chunk processing // Reserve 10% for preparation, 90% for actual indexing const indexingStartPercentage = 10; const indexingEndPercentage = 100; const indexingRange = indexingEndPercentage - indexingStartPercentage; const result = await this.processFileList( codeFiles, codebasePath, (filePath, fileIndex, totalFiles) => { // Calculate progress percentage const progressPercentage = indexingStartPercentage + (fileIndex / totalFiles) * indexingRange; console.log(`[Context] 📊 Processed ${fileIndex}/${totalFiles} files`); progressCallback?.({ phase: `Processing files (${fileIndex}/${totalFiles})...`, current: fileIndex, total: totalFiles, percentage: Math.round(progressPercentage) }); } ); console.log(`[Context] ✅ Codebase indexing completed! Processed ${result.processedFiles} files in total, generated ${result.totalChunks} code chunks`); progressCallback?.({ phase: 'Indexing complete!', current: result.processedFiles, total: codeFiles.length, percentage: 100 }); return { indexedFiles: result.processedFiles, totalChunks: result.totalChunks, status: result.status }; } async reindexByChange( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void ): Promise<{ added: number, removed: number, modified: number }> { const collectionName = this.getCollectionName(codebasePath); const synchronizer = this.synchronizers.get(collectionName); if (!synchronizer) { // Load project-specific ignore patterns before creating FileSynchronizer await this.loadIgnorePatterns(codebasePath); // To be safe, let's initialize if it's not there. const newSynchronizer = new FileSynchronizer(codebasePath, this.ignorePatterns); await newSynchronizer.initialize(); this.synchronizers.set(collectionName, newSynchronizer); } const currentSynchronizer = this.synchronizers.get(collectionName)!; progressCallback?.({ phase: 'Checking for file changes...', current: 0, total: 100, percentage: 0 }); const { added, removed, modified } = await currentSynchronizer.checkForChanges(); const totalChanges = added.length + removed.length + modified.length; if (totalChanges === 0) { progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 }); console.log('[Context] ✅ No file changes detected.'); return { added: 0, removed: 0, modified: 0 }; } console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`); let processedChanges = 0; const updateProgress = (phase: string) => { processedChanges++; const percentage = Math.round((processedChanges / (removed.length + modified.length + added.length)) * 100); progressCallback?.({ phase, current: processedChanges, total: totalChanges, percentage }); }; // Handle removed files for (const file of removed) { await this.deleteFileChunks(collectionName, file); updateProgress(`Removed ${file}`); } // Handle modified files for (const file of modified) { await this.deleteFileChunks(collectionName, file); updateProgress(`Deleted old chunks for ${file}`); } // Handle added and modified files const filesToIndex = [...added, ...modified].map(f => path.join(codebasePath, f)); if (filesToIndex.length > 0) { await this.processFileList( filesToIndex, codebasePath, (filePath, fileIndex, totalFiles) => { updateProgress(`Indexed ${filePath} (${fileIndex}/${totalFiles})`); } ); } console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`); progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 }); return { added: added.length, removed: removed.length, modified: modified.length }; } private async deleteFileChunks(collectionName: string, relativePath: string): Promise<void> { // Escape backslashes for Milvus query expression (Windows path compatibility) const escapedPath = relativePath.replace(/\\/g, '\\\\'); const results = await this.vectorDatabase.query( collectionName, `relativePath == "${escapedPath}"`, ['id'] ); if (results.length > 0) { const ids = results.map(r => r.id as string).filter(id => id); if (ids.length > 0) { await this.vectorDatabase.delete(collectionName, ids); console.log(`[Context] Deleted ${ids.length} chunks for file ${relativePath}`); } } } /** * Semantic search with unified implementation * @param codebasePath Codebase path to search in * @param query Search query * @param topK Number of results to return * @param threshold Similarity threshold */ async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string): Promise<SemanticSearchResult[]> { const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid search' : 'semantic search'; console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`); const collectionName = this.getCollectionName(codebasePath); console.log(`[Context] 🔍 Using collection: ${collectionName}`); // Check if collection exists and has data const hasCollection = await this.vectorDatabase.hasCollection(collectionName); if (!hasCollection) { console.log(`[Context] ⚠️ Collection '${collectionName}' does not exist. Please index the codebase first.`); return []; } if (isHybrid === true) { try { // Check collection stats to see if it has data const stats = await this.vectorDatabase.query(collectionName, '', ['id'], 1); console.log(`[Context] 🔍 Collection '${collectionName}' exists and appears to have data`); } catch (error) { console.log(`[Context] ⚠️ Collection '${collectionName}' exists but may be empty or not properly indexed:`, error); } // 1. Generate query vector console.log(`[Context] 🔍 Generating embeddings for query: "${query}"`); const queryEmbedding: EmbeddingVector = await this.embedding.embed(query); console.log(`[Context] ✅ Generated embedding vector with dimension: ${queryEmbedding.vector.length}`); console.log(`[Context] 🔍 First 5 embedding values: [${queryEmbedding.vector.slice(0, 5).join(', ')}]`); // 2. Prepare hybrid search requests const searchRequests: HybridSearchRequest[] = [ { data: queryEmbedding.vector, anns_field: "vector", param: { "nprobe": 10 }, limit: topK }, { data: query, anns_field: "sparse_vector", param: { "drop_ratio_search": 0.2 }, limit: topK } ]; console.log(`[Context] 🔍 Search request 1 (dense): anns_field="${searchRequests[0].anns_field}", vector_dim=${queryEmbedding.vector.length}, limit=${searchRequests[0].limit}`); console.log(`[Context] 🔍 Search request 2 (sparse): anns_field="${searchRequests[1].anns_field}", query_text="${query}", limit=${searchRequests[1].limit}`); // 3. Execute hybrid search console.log(`[Context] 🔍 Executing hybrid search with RRF reranking...`); const searchResults: HybridSearchResult[] = await this.vectorDatabase.hybridSearch( collectionName, searchRequests, { rerank: { strategy: 'rrf', params: { k: 100 } }, limit: topK, filterExpr } ); console.log(`[Context] 🔍 Raw search results count: ${searchResults.length}`); // 4. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ content: result.document.content, relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, language: result.document.metadata.language || 'unknown', score: result.score })); console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`); if (results.length > 0) { console.log(`[Context] 🔍 Top result score: ${results[0].score}, path: ${results[0].relativePath}`); } return results; } else { // Regular semantic search // 1. Generate query vector const queryEmbedding: EmbeddingVector = await this.embedding.embed(query); // 2. Search in vector database const searchResults: VectorSearchResult[] = await this.vectorDatabase.search( collectionName, queryEmbedding.vector, { topK, threshold, filterExpr } ); // 3. Convert to semantic search result format const results: SemanticSearchResult[] = searchResults.map(result => ({ content: result.document.content, relativePath: result.document.relativePath, startLine: result.document.startLine, endLine: result.document.endLine, language: result.document.metadata.language || 'unknown', score: result.score })); console.log(`[Context] ✅ Found ${results.length} relevant results`); return results; } } /** * Check if index exists for codebase * @param codebasePath Codebase path to check * @returns Whether index exists */ async hasIndex(codebasePath: string): Promise<boolean> { const collectionName = this.getCollectionName(codebasePath); return await this.vectorDatabase.hasCollection(collectionName); } /** * Clear index * @param codebasePath Codebase path to clear index for * @param progressCallback Optional progress callback function */ async clearIndex( codebasePath: string, progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void ): Promise<void> { console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`); progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 }); const collectionName = this.getCollectionName(codebasePath); const collectionExists = await this.vectorDatabase.hasCollection(collectionName); progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 }); if (collectionExists) { await this.vectorDatabase.dropCollection(collectionName); } // Delete snapshot file await FileSynchronizer.deleteSnapshot(codebasePath); progressCallback?.({ phase: 'Index cleared', current: 100, total: 100, percentage: 100 }); console.log('[Context] ✅ Index data cleaned'); } /** * Update ignore patterns (merges with default patterns and existing patterns) * @param ignorePatterns Array of ignore patterns to add to defaults */ updateIgnorePatterns(ignorePatterns: string[]): void { // Merge with default patterns and any existing custom patterns, avoiding duplicates const mergedPatterns = [...DEFAULT_IGNORE_PATTERNS, ...ignorePatterns]; const uniquePatterns: string[] = []; const patternSet = new Set(mergedPatterns); patternSet.forEach(pattern => uniquePatterns.push(pattern)); this.ignorePatterns = uniquePatterns; console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`); } /** * Add custom ignore patterns (from MCP or other sources) without replacing existing ones * @param customPatterns Array of custom ignore patterns to add */ addCustomIgnorePatterns(customPatterns: string[]): void { if (customPatterns.length === 0) return; // Merge current patterns with new custom patterns, avoiding duplicates const mergedPatterns = [...this.ignorePatterns, ...customPatterns]; const uniquePatterns: string[] = []; const patternSet = new Set(mergedPatterns); patternSet.forEach(pattern => uniquePatterns.push(pattern)); this.ignorePatterns = uniquePatterns; console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`); } /** * Reset ignore patterns to defaults only */ resetIgnorePatternsToDefaults(): void { this.ignorePatterns = [...DEFAULT_IGNORE_PATTERNS]; console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`); } /** * Update embedding instance * @param embedding New embedding instance */ updateEmbedding(embedding: Embedding): void { this.embedding = embedding; console.log(`[Context] 🔄 Updated embedding provider: ${embedding.getProvider()}`); } /** * Update vector database instance * @param vectorDatabase New vector database instance */ updateVectorDatabase(vectorDatabase: VectorDatabase): void { this.vectorDatabase = vectorDatabase; console.log(`[Context] 🔄 Updated vector database`); } /** * Update splitter instance * @param splitter New splitter instance */ updateSplitter(splitter: Splitter): void { this.codeSplitter = splitter; console.log(`[Context] 🔄 Updated splitter instance`); } /** * Prepare vector collection */ private async prepareCollection(codebasePath: string, forceReindex: boolean = false): Promise<void> { const isHybrid = this.getIsHybrid(); const collectionType = isHybrid === true ? 'hybrid vector' : 'vector'; console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`); const collectionName = this.getCollectionName(codebasePath); // Check if collection already exists const collectionExists = await this.vectorDatabase.hasCollection(collectionName); if (collectionExists && !forceReindex) { console.log(`📋 Collection ${collectionName} already exists, skipping creation`); return; } if (collectionExists && forceReindex) { console.log(`[Context] 🗑️ Dropping existing collection ${collectionName} for force reindex...`); await this.vectorDatabase.dropCollection(collectionName); console.log(`[Context] ✅ Collection ${collectionName} dropped successfully`); } console.log(`[Context] 🔍 Detecting embedding dimension for ${this.embedding.getProvider()} provider...`); const dimension = await this.embedding.detectDimension(); console.log(`[Context] 📏 Detected dimension: ${dimension} for ${this.embedding.getProvider()}`); const dirName = path.basename(codebasePath); if (isHybrid === true) { await this.vectorDatabase.createHybridCollection(collectionName, dimension, `Hybrid Index for ${dirName}`); } else { await this.vectorDatabase.createCollection(collectionName, dimension, `Index for ${dirName}`); } console.log(`[Context] ✅ Collection ${collectionName} created successfully (dimension: ${dimension})`); } /** * Recursively get all code files in the codebase */ private async getCodeFiles(codebasePath: string): Promise<string[]> { const files: string[] = []; const traverseDirectory = async (currentPath: string) => { const entries = await fs.promises.readdir(currentPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentPath, entry.name); // Check if path matches ignore patterns if (this.matchesIgnorePattern(fullPath, codebasePath)) { continue; } if (entry.isDirectory()) { await traverseDirectory(fullPath); } else if (entry.isFile()) { const ext = path.extname(entry.name); if (this.supportedExtensions.includes(ext)) { files.push(fullPath); } } } }; await traverseDirectory(codebasePath); return files; } /** * Process a list of files with streaming chunk processing * @param filePaths Array of file paths to process * @param codebasePath Base path for the codebase * @param onFileProcessed Callback called when each file is processed * @returns Object with processed file count and total chunk count */ private async processFileList( filePaths: string[], codebasePath: string, onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { const isHybrid = this.getIsHybrid(); const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10)); const CHUNK_LIMIT = 450000; console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`); let chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }> = []; let processedFiles = 0; let totalChunks = 0; let limitReached = false; for (let i = 0; i < filePaths.length; i++) { const filePath = filePaths[i]; try { const content = await fs.promises.readFile(filePath, 'utf-8'); const language = this.getLanguageFromExtension(path.extname(filePath)); const chunks = await this.codeSplitter.split(content, language, filePath); // Log files with many chunks or large content if (chunks.length > 50) { console.warn(`[Context] ⚠️ File ${filePath} generated ${chunks.length} chunks (${Math.round(content.length / 1024)}KB)`); } else if (content.length > 100000) { console.log(`📄 Large file ${filePath}: ${Math.round(content.length / 1024)}KB -> ${chunks.length} chunks`); } // Add chunks to buffer for (const chunk of chunks) { chunkBuffer.push({ chunk, codebasePath }); totalChunks++; // Process batch when buffer reaches EMBEDDING_BATCH_SIZE if (chunkBuffer.length >= EMBEDDING_BATCH_SIZE) { try { await this.processChunkBuffer(chunkBuffer); } catch (error) { const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.error(`[Context] ❌ Failed to process chunk batch for ${searchType}:`, error); if (error instanceof Error) { console.error('[Context] Stack trace:', error.stack); } } finally { chunkBuffer = []; // Always clear buffer, even on failure } } // Check if chunk limit is reached if (totalChunks >= CHUNK_LIMIT) { console.warn(`[Context] ⚠️ Chunk limit of ${CHUNK_LIMIT} reached. Stopping indexing.`); limitReached = true; break; // Exit the inner loop (over chunks) } } processedFiles++; onFileProcessed?.(filePath, i + 1, filePaths.length); if (limitReached) { break; // Exit the outer loop (over files) } } catch (error) { console.warn(`[Context] ⚠️ Skipping file ${filePath}: ${error}`); } } // Process any remaining chunks in the buffer if (chunkBuffer.length > 0) { const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.log(`📝 Processing final batch of ${chunkBuffer.length} chunks for ${searchType}`); try { await this.processChunkBuffer(chunkBuffer); } catch (error) { console.error(`[Context] ❌ Failed to process final chunk batch for ${searchType}:`, error); if (error instanceof Error) { console.error('[Context] Stack trace:', error.stack); } } } return { processedFiles, totalChunks, status: limitReached ? 'limit_reached' : 'completed' }; } /** * Process accumulated chunk buffer */ private async processChunkBuffer(chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }>): Promise<void> { if (chunkBuffer.length === 0) return; // Extract chunks and ensure they all have the same codebasePath const chunks = chunkBuffer.map(item => item.chunk); const codebasePath = chunkBuffer[0].codebasePath; // Estimate tokens (rough estimation: 1 token ≈ 4 characters) const estimatedTokens = chunks.reduce((sum, chunk) => sum + Math.ceil(chunk.content.length / 4), 0); const isHybrid = this.getIsHybrid(); const searchType = isHybrid === true ? 'hybrid' : 'regular'; console.log(`[Context] 🔄 Processing batch of ${chunks.length} chunks (~${estimatedTokens} tokens) for ${searchType}`); await this.processChunkBatch(chunks, codebasePath); } /** * Process a batch of chunks */ private async processChunkBatch(chunks: CodeChunk[], codebasePath: string): Promise<void> { const isHybrid = this.getIsHybrid(); // Generate embedding vectors const chunkContents = chunks.map(chunk => chunk.content); const embeddings = await this.embedding.embedBatch(chunkContents); if (isHybrid === true) { // Create hybrid vector documents const documents: VectorDocument[] = chunks.map((chunk, index) => { if (!chunk.metadata.filePath) { throw new Error(`Missing filePath in chunk metadata at index ${index}`); } const relativePath = path.relative(codebasePath, chunk.metadata.filePath); const fileExtension = path.extname(chunk.metadata.filePath); const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata; return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), content: chunk.content, // Full text content for BM25 and storage vector: embeddings[index].vector, // Dense vector relativePath, startLine: chunk.metadata.startLine || 0, endLine: chunk.metadata.endLine || 0, fileExtension, metadata: { ...restMetadata, codebasePath, language: chunk.metadata.language || 'unknown', chunkIndex: index } }; }); // Store to vector database await this.vectorDatabase.insertHybrid(this.getCollectionName(codebasePath), documents); } else { // Create regular vector documents const documents: VectorDocument[] = chunks.map((chunk, index) => { if (!chunk.metadata.filePath) { throw new Error(`Missing filePath in chunk metadata at index ${index}`); } const relativePath = path.relative(codebasePath, chunk.metadata.filePath); const fileExtension = path.extname(chunk.metadata.filePath); const { filePath, startLine, endLine, ...restMetadata } = chunk.metadata; return { id: this.generateId(relativePath, chunk.metadata.startLine || 0, chunk.metadata.endLine || 0, chunk.content), vector: embeddings[index].vector, content: chunk.content, relativePath, startLine: chunk.metadata.startLine || 0, endLine: chunk.metadata.endLine || 0, fileExtension, metadata: { ...restMetadata, codebasePath, language: chunk.metadata.language || 'unknown', chunkIndex: index } }; }); // Store to vector database await this.vectorDatabase.insert(this.getCollectionName(codebasePath), documents); } } /** * Get programming language based on file extension */ private getLanguageFromExtension(ext: string): string { const languageMap: Record<string, string> = { '.ts': 'typescript', '.tsx': 'typescript', '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', '.java': 'java', '.cpp': 'cpp', '.c': 'c', '.h': 'c', '.hpp': 'cpp', '.cs': 'csharp', '.go': 'go', '.rs': 'rust', '.php': 'php', '.rb': 'ruby', '.swift': 'swift', '.kt': 'kotlin', '.scala': 'scala', '.m': 'objective-c', '.mm': 'objective-c', '.ipynb': 'jupyter' }; return languageMap[ext] || 'text'; } /** * Generate unique ID based on chunk content and location * @param relativePath Relative path to the file * @param startLine Start line number * @param endLine End line number * @param content Chunk content * @returns Hash-based unique ID */ private generateId(relativePath: string, startLine: number, endLine: number, content: string): string { const combinedString = `${relativePath}:${startLine}:${endLine}:${content}`; const hash = crypto.createHash('sha256').update(combinedString, 'utf-8').digest('hex'); return `chunk_${hash.substring(0, 16)}`; } /** * Read ignore patterns from file (e.g., .gitignore) * @param filePath Path to the ignore file * @returns Array of ignore patterns */ static async getIgnorePatternsFromFile(filePath: string): Promise<string[]> { try { const content = await fs.promises.readFile(filePath, 'utf-8'); return content .split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')); // Filter out empty lines and comments } catch (error) { console.warn(`[Context] ⚠️ Could not read ignore file ${filePath}: ${error}`); return []; } } /** * Load ignore patterns from various ignore files in the codebase * This method preserves any existing custom patterns that were added before * @param codebasePath Path to the codebase */ private async loadIgnorePatterns(codebasePath: string): Promise<void> { try { let fileBasedPatterns: string[] = []; // Load all .xxxignore files in codebase directory const ignoreFiles = await this.findIgnoreFiles(codebasePath); for (const ignoreFile of ignoreFiles) { const patterns = await this.loadIgnoreFile(ignoreFile, path.basename(ignoreFile)); fileBasedPatterns.push(...patterns); } // Load global ~/.context/.contextignore const globalIgnorePatterns = await this.loadGlobalIgnoreFile(); fileBasedPatterns.push(...globalIgnorePatterns); // Merge file-based patterns with existing patterns (which may include custom MCP patterns) if (fileBasedPatterns.length > 0) { this.addCustomIgnorePatterns(fileBasedPatterns); console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`); } else { console.log('📄 No ignore files found, keeping existing patterns'); } } catch (error) { console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`); // Continue with existing patterns on error - don't reset them } } /** * Find all .xxxignore files in the codebase directory * @param codebasePath Path to the codebase * @returns Array of ignore file paths */ private async findIgnoreFiles(codebasePath: string): Promise<string[]> { try { const entries = await fs.promises.readdir(codebasePath, { withFileTypes: true }); const ignoreFiles: string[] = []; for (const entry of entries) { if (entry.isFile() && entry.name.startsWith('.') && entry.name.endsWith('ignore')) { ignoreFiles.push(path.join(codebasePath, entry.name)); } } if (ignoreFiles.length > 0) { console.log(`📄 Found ignore files: ${ignoreFiles.map(f => path.basename(f)).join(', ')}`); } return ignoreFiles; } catch (error) { console.warn(`[Context] ⚠️ Failed to scan for ignore files: ${error}`); return []; } } /** * Load global ignore file from ~/.context/.contextignore * @returns Array of ignore patterns */ private async loadGlobalIgnoreFile(): Promise<string[]> { try { const homeDir = require('os').homedir(); const globalIgnorePath = path.join(homeDir, '.context', '.contextignore'); return await this.loadIgnoreFile(globalIgnorePath, 'global .contextignore'); } catch (error) { // Global ignore file is optional, don't log warnings return []; } } /** * Load ignore patterns from a specific ignore file * @param filePath Path to the ignore file * @param fileName Display name for logging * @returns Array of ignore patterns */ private async loadIgnoreFile(filePath: string, fileName: string): Promise<string[]> { try { await fs.promises.access(filePath); console.log(`📄 Found ${fileName} file at: ${filePath}`); const ignorePatterns = await Context.getIgnorePatternsFromFile(filePath); if (ignorePatterns.length > 0) { console.log(`[Context] 🚫 Loaded ${ignorePatterns.length} ignore patterns from ${fileName}`); return ignorePatterns; } else { console.log(`📄 ${fileName} file found but no valid patterns detected`); return []; } } catch (error) { if (fileName.includes('global')) { console.log(`📄 No ${fileName} file found`); } return []; } } /** * Check if a path matches any ignore pattern * @param filePath Path to check * @param basePath Base path for relative pattern matching * @returns True if path should be ignored */ private matchesIgnorePattern(filePath: string, basePath: string): boolean { if (this.ignorePatterns.length === 0) { return false; } const relativePath = path.relative(basePath, filePath); const normalizedPath = relativePath.replace(/\\/g, '/'); // Normalize path separators for (const pattern of this.ignorePatterns) { if (this.isPatternMatch(normalizedPath, pattern)) { return true; } } return false; } /** * Simple glob pattern matching * @param filePath File path to test * @param pattern Glob pattern * @returns True if pattern matches */ private isPatternMatch(filePath: string, pattern: string): boolean { // Handle directory patterns (ending with /) if (pattern.endsWith('/')) { const dirPattern = pattern.slice(0, -1); const pathParts = filePath.split('/'); return pathParts.some(part => this.simpleGlobMatch(part, dirPattern)); } // Handle file patterns if (pattern.includes('/')) { // Pattern with path separator - match exact path return this.simpleGlobMatch(filePath, pattern); } else { // Pattern without path separator - match filename in any directory const fileName = path.basename(filePath); return this.simpleGlobMatch(fileName, pattern); } } /** * Simple glob matching supporting * wildcard * @param text Text to test * @param pattern Pattern with * wildcards * @returns True if pattern matches */ private simpleGlobMatch(text: string, pattern: string): boolean { // Convert glob pattern to regex const regexPattern = pattern .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except * .replace(/\*/g, '.*'); // Convert * to .* const regex = new RegExp(`^${regexPattern}$`); return regex.test(text); } /** * Get custom extensions from environment variables * Supports CUSTOM_EXTENSIONS as comma-separated list * @returns Array of custom extensions */ private getCustomExtensionsFromEnv(): string[] { const envExtensions = envManager.get('CUSTOM_EXTENSIONS'); if (!envExtensions) { return []; } try { const extensions = envExtensions .split(',') .map(ext => ext.trim()) .filter(ext => ext.length > 0) .map(ext => ext.startsWith('.') ? ext : `.${ext}`); // Ensure extensions start with dot return extensions; } catch (error) { console.warn(`[Context] ⚠️ Failed to parse CUSTOM_EXTENSIONS: ${error}`); return []; } } /** * Get custom ignore patterns from environment variables * Supports CUSTOM_IGNORE_PATTERNS as comma-separated list * @returns Array of custom ignore patterns */ private getCustomIgnorePatternsFromEnv(): string[] { const envIgnorePatterns = envManager.get('CUSTOM_IGNORE_PATTERNS'); if (!envIgnorePatterns) { return []; } try { const patterns = envIgnorePatterns .split(',') .map(pattern => pattern.trim()) .filter(pattern => pattern.length > 0); return patterns; } catch (error) { console.warn(`[Context] ⚠️ Failed to parse CUSTOM_IGNORE_PATTERNS: ${error}`); return []; } } /** * Add custom extensions (from MCP or other sources) without replacing existing ones * @param customExtensions Array of custom extensions to add */ addCustomExtensions(customExtensions: string[]): void { if (customExtensions.length === 0) return; // Ensure extensions start with dot const normalizedExtensions = customExtensions.map(ext => ext.startsWith('.') ? ext : `.${ext}` ); // Merge current extensions with new custom extensions, avoiding duplicates const mergedExtensions = [...this.supportedExtensions, ...normalizedExtensions]; const uniqueExtensions: string[] = [...new Set(mergedExtensions)]; this.supportedExtensions = uniqueExtensions; console.log(`[Context] 📎 Added ${customExtensions.length} custom extensions. Total: ${this.supportedExtensions.length} extensions`); } /** * Get current splitter information */ getSplitterInfo(): { type: string; hasBuiltinFallback: boolean; supportedLanguages?: string[] } { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); return { type: 'ast', hasBuiltinFallback: true, supportedLanguages: AstCodeSplitter.getSupportedLanguages() }; } else { return { type: 'langchain', hasBuiltinFallback: false }; } } /** * Check if current splitter supports a specific language * @param language Programming language */ isLanguageSupported(language: string): boolean { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); return AstCodeSplitter.isLanguageSupported(language); } // LangChain splitter supports most languages return true; } /** * Get which strategy would be used for a specific language * @param language Programming language */ getSplitterStrategyForLanguage(language: string): { strategy: 'ast' | 'langchain'; reason: string } { const splitterName = this.codeSplitter.constructor.name; if (splitterName === 'AstCodeSplitter') { const { AstCodeSplitter } = require('./splitter/ast-splitter'); const isSupported = AstCodeSplitter.isLanguageSupported(language); return { strategy: isSupported ? 'ast' : 'langchain', reason: isSupported ? 'Language supported by AST parser' : 'Language not supported by AST, will fallback to LangChain' }; } else { return { strategy: 'langchain', reason: 'Using LangChain splitter directly' }; } } } ```