This is page 2 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .bumpversion.cfg ├── .codecov.yml ├── .compile-venv-py3.11 │ ├── bin │ │ ├── activate │ │ ├── activate.csh │ │ ├── activate.fish │ │ ├── Activate.ps1 │ │ ├── coverage │ │ ├── coverage-3.11 │ │ ├── coverage3 │ │ ├── pip │ │ ├── pip-compile │ │ ├── pip-sync │ │ ├── pip3 │ │ ├── pip3.11 │ │ ├── py.test │ │ ├── pyproject-build │ │ ├── pytest │ │ ├── python │ │ ├── python3 │ │ ├── python3.11 │ │ └── wheel │ └── pyvenv.cfg ├── .env.example ├── .github │ └── workflows │ ├── build-verification.yml │ ├── publish.yml │ └── tdd-verification.yml ├── .gitignore ├── async_fixture_wrapper.py ├── CHANGELOG.md ├── CLAUDE.md ├── codebase_structure.txt ├── component_test_runner.py ├── CONTRIBUTING.md ├── core_workflows.txt ├── debug_tests.md ├── Dockerfile ├── docs │ ├── adrs │ │ └── 001_use_docker_for_qdrant.md │ ├── api.md │ ├── components │ │ └── README.md │ ├── cookbook.md │ ├── development │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ └── README.md │ ├── documentation_map.md │ ├── documentation_summary.md │ ├── features │ │ ├── adr-management.md │ │ ├── code-analysis.md │ │ └── documentation.md │ ├── getting-started │ │ ├── configuration.md │ │ ├── docker-setup.md │ │ ├── installation.md │ │ ├── qdrant_setup.md │ │ └── quickstart.md │ ├── qdrant_setup.md │ ├── README.md │ ├── SSE_INTEGRATION.md │ ├── system_architecture │ │ └── README.md │ ├── templates │ │ └── adr.md │ ├── testing_guide.md │ ├── troubleshooting │ │ ├── common-issues.md │ │ └── faq.md │ ├── vector_store_best_practices.md │ └── workflows │ └── README.md ├── error_logs.txt ├── examples │ └── use_with_claude.py ├── github-actions-documentation.md ├── Makefile ├── module_summaries │ ├── backend_summary.txt │ ├── database_summary.txt │ └── frontend_summary.txt ├── output.txt ├── package-lock.json ├── package.json ├── PLAN.md ├── prepare_codebase.sh ├── PULL_REQUEST.md ├── pyproject.toml ├── pytest.ini ├── README.md ├── requirements-3.11.txt ├── requirements-3.11.txt.backup ├── requirements-dev.txt ├── requirements.in ├── requirements.txt ├── run_build_verification.sh ├── run_fixed_tests.sh ├── run_test_with_path_fix.sh ├── run_tests.py ├── scripts │ ├── check_qdrant_health.sh │ ├── compile_requirements.sh │ ├── load_example_patterns.py │ ├── macos_install.sh │ ├── README.md │ ├── setup_qdrant.sh │ ├── start_mcp_server.sh │ ├── store_code_relationships.py │ ├── store_report_in_mcp.py │ ├── validate_knowledge_base.py │ ├── validate_poc.py │ ├── validate_vector_store.py │ └── verify_build.py ├── server.py ├── setup_qdrant_collection.py ├── setup.py ├── src │ └── mcp_codebase_insight │ ├── __init__.py │ ├── __main__.py │ ├── asgi.py │ ├── core │ │ ├── __init__.py │ │ ├── adr.py │ │ ├── cache.py │ │ ├── component_status.py │ │ ├── config.py │ │ ├── debug.py │ │ ├── di.py │ │ ├── documentation.py │ │ ├── embeddings.py │ │ ├── errors.py │ │ ├── health.py │ │ ├── knowledge.py │ │ ├── metrics.py │ │ ├── prompts.py │ │ ├── sse.py │ │ ├── state.py │ │ ├── task_tracker.py │ │ ├── tasks.py │ │ └── vector_store.py │ ├── models.py │ ├── server_test_isolation.py │ ├── server.py │ ├── utils │ │ ├── __init__.py │ │ └── logger.py │ └── version.py ├── start-mcpserver.sh ├── summary_document.txt ├── system-architecture.md ├── system-card.yml ├── test_fix_helper.py ├── test_fixes.md ├── test_function.txt ├── test_imports.py ├── tests │ ├── components │ │ ├── conftest.py │ │ ├── test_core_components.py │ │ ├── test_embeddings.py │ │ ├── test_knowledge_base.py │ │ ├── test_sse_components.py │ │ ├── test_stdio_components.py │ │ ├── test_task_manager.py │ │ └── test_vector_store.py │ ├── config │ │ └── test_config_and_env.py │ ├── conftest.py │ ├── integration │ │ ├── fixed_test2.py │ │ ├── test_api_endpoints.py │ │ ├── test_api_endpoints.py-e │ │ ├── test_communication_integration.py │ │ └── test_server.py │ ├── README.md │ ├── README.test.md │ ├── test_build_verifier.py │ └── test_file_relationships.py └── trajectories └── tosinakinosho ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9 │ └── db62b9 │ └── config.yaml ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e │ └── 03565e │ ├── 03565e.traj │ └── config.yaml └── default__openrouter └── anthropic └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e └── 03565e ├── 03565e.pred ├── 03565e.traj └── config.yaml ``` # Files -------------------------------------------------------------------------------- /docs/adrs/001_use_docker_for_qdrant.md: -------------------------------------------------------------------------------- ```markdown 1 | # Use Docker for Qdrant Vector Database 2 | 3 | ## Status 4 | 5 | Accepted 6 | 7 | ## Context 8 | 9 | We need a vector database to store and search through code patterns and documentation embeddings. Qdrant is chosen as our vector database solution, and we need to determine the best way to deploy and manage it. 10 | 11 | ## Decision Drivers 12 | 13 | * Ease of deployment and setup 14 | * Development environment consistency 15 | * Production readiness 16 | * Resource management 17 | * Scalability 18 | * Maintainability 19 | 20 | ## Considered Options 21 | 22 | ### Option 1: Docker Container 23 | 24 | * Use official Qdrant Docker image 25 | * Run as containerized service 26 | * Manage with Docker Compose for local development 27 | * Use Kubernetes for production deployment 28 | 29 | ### Option 2: Native Installation 30 | 31 | * Install Qdrant directly on host system 32 | * Manage as system service 33 | * Configure through system files 34 | * Handle updates through package manager 35 | 36 | ### Option 3: Cloud-Hosted Solution 37 | 38 | * Use managed Qdrant Cloud service 39 | * Pay per usage 40 | * Managed infrastructure 41 | * Automatic updates and maintenance 42 | 43 | ## Decision 44 | 45 | We will use Docker for running Qdrant. This decision is based on several factors: 46 | 47 | 1. **Development Environment**: Docker provides consistent environment across all developer machines 48 | 2. **Easy Setup**: Simple `docker run` command to get started 49 | 3. **Resource Isolation**: Container ensures clean resource management 50 | 4. **Version Control**: Easy version management through Docker tags 51 | 5. **Production Ready**: Same container can be used in production 52 | 6. **Scaling**: Can be deployed to Kubernetes when needed 53 | 54 | ## Expected Consequences 55 | 56 | ### Positive Consequences 57 | 58 | * Consistent environment across development and production 59 | * Easy setup process for new developers 60 | * Clean isolation from other system components 61 | * Simple version management 62 | * Clear resource boundaries 63 | * Easy backup and restore procedures 64 | * Portable across different platforms 65 | 66 | ### Negative Consequences 67 | 68 | * Additional Docker knowledge required 69 | * Small performance overhead from containerization 70 | * Need to manage container resources carefully 71 | * Additional complexity in monitoring setup 72 | 73 | ## Pros and Cons of the Options 74 | 75 | ### Docker Container 76 | 77 | * ✅ Consistent environment 78 | * ✅ Easy setup and teardown 79 | * ✅ Good isolation 80 | * ✅ Version control 81 | * ✅ Production ready 82 | * ❌ Container overhead 83 | * ❌ Requires Docker knowledge 84 | 85 | ### Native Installation 86 | 87 | * ✅ Direct system access 88 | * ✅ No containerization overhead 89 | * ✅ Full control over configuration 90 | * ❌ System-dependent setup 91 | * ❌ Potential conflicts with system packages 92 | * ❌ More complex version management 93 | 94 | ### Cloud-Hosted Solution 95 | 96 | * ✅ No infrastructure management 97 | * ✅ Automatic scaling 98 | * ✅ Managed backups 99 | * ❌ Higher cost 100 | * ❌ Less control 101 | * ❌ Internet dependency 102 | * ❌ Potential latency issues 103 | 104 | ## Implementation 105 | 106 | ### Docker Run Command 107 | 108 | ```bash 109 | docker run -d -p 6333:6333 -p 6334:6334 \ 110 | -v $(pwd)/qdrant_storage:/qdrant/storage \ 111 | qdrant/qdrant 112 | ``` 113 | 114 | ### Docker Compose Configuration 115 | 116 | ```yaml 117 | version: '3.8' 118 | services: 119 | qdrant: 120 | image: qdrant/qdrant 121 | ports: 122 | - "6333:6333" 123 | - "6334:6334" 124 | volumes: 125 | - qdrant_storage:/qdrant/storage 126 | environment: 127 | - RUST_LOG=info 128 | 129 | volumes: 130 | qdrant_storage: 131 | ``` 132 | 133 | ## Notes 134 | 135 | * Monitor container resource usage in production 136 | * Set up proper backup procedures for the storage volume 137 | * Consider implementing health checks 138 | * Document recovery procedures 139 | 140 | ## Metadata 141 | 142 | * Created: 2025-03-19 143 | * Last Modified: 2025-03-19 144 | * Author: Development Team 145 | * Approvers: Technical Lead, Infrastructure Team 146 | * Status: Accepted 147 | * Tags: infrastructure, database, docker, vector-search 148 | * References: 149 | * [Qdrant Docker Documentation](https://qdrant.tech/documentation/guides/installation/#docker) 150 | * [Docker Best Practices](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/) 151 | ``` -------------------------------------------------------------------------------- /tests/components/test_vector_store.py: -------------------------------------------------------------------------------- ```python 1 | import pytest 2 | import pytest_asyncio 3 | import uuid 4 | import sys 5 | import os 6 | from pathlib import Path 7 | from typing import AsyncGenerator, Dict 8 | from fastapi.testclient import TestClient 9 | # Ensure the src directory is in the Python path 10 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) 11 | from src.mcp_codebase_insight.core.vector_store import VectorStore 12 | from src.mcp_codebase_insight.core.config import ServerConfig 13 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding 14 | import logging 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | @pytest_asyncio.fixture 19 | async def test_metadata() -> Dict: 20 | """Standard test metadata for consistency across tests.""" 21 | return { 22 | "type": "code", 23 | "language": "python", 24 | "title": "Test Code", 25 | "description": "Test code snippet for vector store testing", 26 | "tags": ["test", "vector"] 27 | } 28 | 29 | @pytest_asyncio.fixture 30 | async def embedder(): 31 | return SentenceTransformerEmbedding() 32 | 33 | @pytest_asyncio.fixture 34 | async def vector_store(test_config: ServerConfig, embedder): 35 | store = VectorStore(test_config.qdrant_url, embedder) 36 | await store.initialize() 37 | yield store 38 | await store.cleanup() 39 | 40 | @pytest.mark.asyncio 41 | async def test_vector_store_initialization(vector_store: VectorStore): 42 | """Test that vector store initializes correctly.""" 43 | assert vector_store is not None 44 | assert vector_store.embedder is not None 45 | assert vector_store.client is not None 46 | assert vector_store.initialized is True 47 | logger.info("Vector store initialization test passed") 48 | 49 | @pytest.mark.asyncio 50 | async def test_vector_store_add_and_search(vector_store: VectorStore, test_metadata: Dict): 51 | """Test adding and searching vectors.""" 52 | # Test data 53 | test_text = "Test code snippet with unique identifier" 54 | 55 | # Add vector 56 | logger.info("Adding vector to store") 57 | vector_id = await vector_store.add_vector(test_text, test_metadata) 58 | assert vector_id is not None 59 | 60 | # Search for similar vectors 61 | logger.info("Searching for similar vectors") 62 | results = await vector_store.search_similar(test_text, limit=1) 63 | assert len(results) > 0 64 | 65 | # Use get() with default value for safety 66 | assert results[0].metadata.get("type", "unknown") == "code" 67 | 68 | # Log metadata for debugging 69 | logger.info(f"Original metadata: {test_metadata}") 70 | logger.info(f"Retrieved metadata: {results[0].metadata}") 71 | 72 | # Verify all expected metadata fields are present 73 | missing_keys = [] 74 | for key in test_metadata: 75 | if key not in results[0].metadata: 76 | missing_keys.append(key) 77 | 78 | assert not missing_keys, f"Metadata is missing expected keys: {missing_keys}" 79 | 80 | logger.info("Vector store add and search test passed") 81 | 82 | @pytest.mark.asyncio 83 | async def test_vector_store_cleanup(test_config: ServerConfig, embedder: SentenceTransformerEmbedding): 84 | """Test that cleanup works correctly.""" 85 | # Use the configured collection name for this test 86 | # This ensures we're using the properly initialized collection 87 | collection_name = os.environ.get("MCP_COLLECTION_NAME", test_config.collection_name) 88 | 89 | store = VectorStore( 90 | test_config.qdrant_url, 91 | embedder, 92 | collection_name=collection_name 93 | ) 94 | 95 | logger.info(f"Initializing vector store with collection {collection_name}") 96 | await store.initialize() 97 | assert store.initialized is True 98 | 99 | # Add a vector to verify there's something to clean up 100 | await store.add_vector("Test cleanup text", {"type": "test"}) 101 | 102 | # Now clean up 103 | logger.info(f"Cleaning up vector store with collection {collection_name}") 104 | await store.cleanup() 105 | 106 | # Verify the store is no longer initialized 107 | assert store.initialized is False 108 | 109 | # Clean up remaining resources 110 | await store.close() 111 | 112 | logger.info("Vector store cleanup test passed") ``` -------------------------------------------------------------------------------- /system-card.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: MCP Codebase Insight 2 | version: 0.1.0 3 | description: A system for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management. 4 | 5 | poc_scope: 6 | - Vector-based code analysis and similarity search 7 | - Pattern detection and knowledge base operations 8 | - Dual-transport architecture (SSE and stdio) 9 | - Task management and tracking 10 | - Memory operations and persistence 11 | 12 | environment: 13 | requirements: 14 | python: ">=3.11" 15 | docker: ">=20.10.0" 16 | ram_gb: 4 17 | cpu_cores: 2 18 | disk_space_gb: 20 19 | 20 | dependencies: 21 | core: 22 | - mcp-firecrawl 23 | - httpx-sse 24 | - python-frontmatter 25 | - qdrant-client>=1.13.3 26 | - fastapi>=0.115.12 27 | - numpy>=2.2.4 28 | 29 | transport: 30 | - mcp-transport 31 | - mcp-stdio 32 | - mcp-sse 33 | 34 | development: 35 | - pytest 36 | - black 37 | - isort 38 | - mypy 39 | - pip-tools 40 | - bump2version 41 | 42 | configuration: 43 | env_vars: 44 | required: 45 | - QDRANT_HOST 46 | - QDRANT_PORT 47 | - API_KEY 48 | - TRANSPORT_MODE 49 | optional: 50 | - DEBUG_MODE 51 | - LOG_LEVEL 52 | 53 | files: 54 | required: 55 | - .env 56 | - docker-compose.yml 57 | optional: 58 | - .env.local 59 | 60 | setup: 61 | steps: 62 | 1_environment: 63 | - Create and activate Python virtual environment 64 | - Install dependencies from requirements.txt 65 | - Copy .env.example to .env and configure 66 | 67 | 2_services: 68 | - Start Docker 69 | - Run docker-compose up for Qdrant 70 | - Wait for services to be ready 71 | 72 | 3_validation: 73 | - Run main PoC validation script 74 | - Check individual component validations if needed 75 | - Verify transport configurations 76 | 77 | validation: 78 | scripts: 79 | main: 80 | path: scripts/validate_poc.py 81 | description: "Main validation script that orchestrates all component checks" 82 | 83 | components: 84 | vector_store: 85 | path: scripts/validate_vector_store.py 86 | description: "Validates vector store operations and search functionality" 87 | 88 | knowledge_base: 89 | path: scripts/validate_knowledge_base.py 90 | description: "Tests knowledge base operations and entity relations" 91 | 92 | transport: 93 | description: "Transport validation is included in the main PoC script" 94 | 95 | health_checks: 96 | services: 97 | qdrant: 98 | endpoint: http://localhost:6333/health 99 | method: GET 100 | 101 | api: 102 | endpoint: http://localhost:8000/health 103 | method: GET 104 | headers: 105 | Authorization: "Bearer ${API_KEY}" 106 | 107 | functional_checks: 108 | vector_store: 109 | - Test vector operations with sample code 110 | - Validate embedding dimensions 111 | - Verify search functionality 112 | 113 | knowledge_base: 114 | - Create and verify test entities 115 | - Test entity relations 116 | - Validate query operations 117 | 118 | transport: 119 | sse: 120 | - Verify event stream connection 121 | - Test bidirectional communication 122 | - Check error handling 123 | 124 | stdio: 125 | - Verify process communication 126 | - Test command execution 127 | - Validate response format 128 | 129 | troubleshooting: 130 | environment: 131 | - Check Python and Docker versions 132 | - Verify system resources 133 | - Validate dependency installation 134 | 135 | services: 136 | - Check Docker container status 137 | - View service logs 138 | - Verify port availability 139 | 140 | transport: 141 | - Test SSE endpoint connectivity 142 | - Verify stdio binary functionality 143 | - Check authentication configuration 144 | 145 | data: 146 | - Verify Qdrant collection status 147 | - Check knowledge base connectivity 148 | - Test data persistence 149 | 150 | metrics: 151 | collection: 152 | - System resource usage 153 | - Request latency 154 | - Transport performance 155 | - Operation success rates 156 | 157 | monitoring: 158 | - Component health status 159 | - Error rates and types 160 | - Resource utilization 161 | - Transport switching events 162 | 163 | documentation: 164 | references: 165 | - docs/system_architecture/README.md 166 | - docs/api/README.md 167 | - docs/adrs/006_transport_protocols.md 168 | - docs/development/README.md ``` -------------------------------------------------------------------------------- /examples/use_with_claude.py: -------------------------------------------------------------------------------- ```python 1 | """Example of using MCP Codebase Insight with Claude.""" 2 | 3 | import json 4 | import httpx 5 | import os 6 | from typing import Dict, Any 7 | import asyncio 8 | 9 | # Configure server URL 10 | SERVER_URL = os.getenv("MCP_SERVER_URL", "http://localhost:3000") 11 | 12 | async def call_tool(name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: 13 | """Call a tool endpoint on the server.""" 14 | async with httpx.AsyncClient() as client: 15 | response = await client.post( 16 | f"{SERVER_URL}/tools/{name}", 17 | json={ 18 | "name": name, 19 | "arguments": arguments 20 | } 21 | ) 22 | response.raise_for_status() 23 | return response.json() 24 | 25 | async def analyze_code(code: str, context: Dict[str, Any] = None) -> Dict[str, Any]: 26 | """Analyze code using the server.""" 27 | return await call_tool("analyze-code", { 28 | "code": code, 29 | "context": context or {} 30 | }) 31 | 32 | async def search_knowledge(query: str, pattern_type: str = None) -> Dict[str, Any]: 33 | """Search knowledge base.""" 34 | return await call_tool("search-knowledge", { 35 | "query": query, 36 | "type": pattern_type, 37 | "limit": 5 38 | }) 39 | 40 | async def create_adr( 41 | title: str, 42 | context: Dict[str, Any], 43 | options: list, 44 | decision: str 45 | ) -> Dict[str, Any]: 46 | """Create an ADR.""" 47 | return await call_tool("create-adr", { 48 | "title": title, 49 | "context": context, 50 | "options": options, 51 | "decision": decision 52 | }) 53 | 54 | async def debug_issue( 55 | description: str, 56 | issue_type: str = None, 57 | context: Dict[str, Any] = None 58 | ) -> Dict[str, Any]: 59 | """Debug an issue.""" 60 | return await call_tool("debug-issue", { 61 | "description": description, 62 | "type": issue_type, 63 | "context": context or {} 64 | }) 65 | 66 | async def get_task_status(task_id: str) -> Dict[str, Any]: 67 | """Get task status and results.""" 68 | return await call_tool("get-task", { 69 | "task_id": task_id 70 | }) 71 | 72 | async def main(): 73 | """Example usage.""" 74 | try: 75 | # Example code analysis 76 | code = """ 77 | def calculate_fibonacci(n: int) -> int: 78 | if n <= 1: 79 | return n 80 | return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) 81 | """ 82 | print("\nAnalyzing code...") 83 | result = await analyze_code(code) 84 | print(json.dumps(result, indent=2)) 85 | 86 | # Example knowledge search 87 | print("\nSearching knowledge base...") 88 | result = await search_knowledge( 89 | query="What are the best practices for error handling in Python?", 90 | pattern_type="code" 91 | ) 92 | print(json.dumps(result, indent=2)) 93 | 94 | # Example ADR creation 95 | print("\nCreating ADR...") 96 | result = await create_adr( 97 | title="Use FastAPI for REST API", 98 | context={ 99 | "problem": "Need a modern Python web framework", 100 | "constraints": ["Must be async", "Must have good documentation"] 101 | }, 102 | options=[ 103 | { 104 | "title": "FastAPI", 105 | "pros": ["Async by default", "Great docs", "Type hints"], 106 | "cons": ["Newer framework"] 107 | }, 108 | { 109 | "title": "Flask", 110 | "pros": ["Mature", "Simple"], 111 | "cons": ["Not async by default"] 112 | } 113 | ], 114 | decision="We will use FastAPI for its async support and type hints" 115 | ) 116 | print(json.dumps(result, indent=2)) 117 | 118 | # Example debugging 119 | print("\nDebugging issue...") 120 | result = await debug_issue( 121 | description="Application crashes when processing large files", 122 | issue_type="performance", 123 | context={ 124 | "file_size": "2GB", 125 | "memory_usage": "8GB", 126 | "error": "MemoryError" 127 | } 128 | ) 129 | print(json.dumps(result, indent=2)) 130 | 131 | except Exception as e: 132 | print(f"Error: {e}") 133 | 134 | if __name__ == "__main__": 135 | asyncio.run(main()) 136 | ``` -------------------------------------------------------------------------------- /system-architecture.md: -------------------------------------------------------------------------------- ```markdown 1 | # System Architecture - MCP Codebase Insight 2 | 3 | This document outlines the system architecture of the MCP Codebase Insight project using various diagrams to illustrate different aspects of the system. 4 | 5 | ## High-Level System Architecture 6 | 7 | ```mermaid 8 | graph TB 9 | Client[Client Applications] --> API[FastAPI Server] 10 | API --> Core[Core Services] 11 | 12 | subgraph Core Services 13 | CodeAnalysis[Code Analysis Service] 14 | ADR[ADR Management] 15 | Doc[Documentation Service] 16 | Knowledge[Knowledge Base] 17 | Debug[Debug System] 18 | Metrics[Metrics & Health] 19 | Cache[Caching System] 20 | end 21 | 22 | Core --> VectorDB[(Qdrant Vector DB)] 23 | Core --> FileSystem[(File System)] 24 | 25 | CodeAnalysis --> VectorDB 26 | Knowledge --> VectorDB 27 | ADR --> FileSystem 28 | Doc --> FileSystem 29 | ``` 30 | 31 | ## Component Relationships 32 | 33 | ```mermaid 34 | graph LR 35 | subgraph Core Components 36 | Embeddings[Embeddings Service] 37 | VectorStore[Vector Store Service] 38 | Knowledge[Knowledge Service] 39 | Tasks[Tasks Service] 40 | Prompts[Prompts Service] 41 | Debug[Debug Service] 42 | Health[Health Service] 43 | Config[Config Service] 44 | Cache[Cache Service] 45 | end 46 | 47 | Embeddings --> VectorStore 48 | Knowledge --> VectorStore 49 | Knowledge --> Embeddings 50 | Tasks --> Knowledge 51 | Debug --> Knowledge 52 | Prompts --> Tasks 53 | Health --> Cache 54 | 55 | %% Instead of linking to "Core Components", link to each node individually 56 | Config --> Embeddings 57 | Config --> VectorStore 58 | Config --> Knowledge 59 | Config --> Tasks 60 | Config --> Prompts 61 | Config --> Debug 62 | Config --> Health 63 | Config --> Cache 64 | ``` 65 | 66 | ## Data Flow Architecture 67 | 68 | ```mermaid 69 | sequenceDiagram 70 | participant Client 71 | participant API 72 | participant Knowledge 73 | participant Embeddings 74 | participant VectorStore 75 | participant Cache 76 | 77 | Client->>API: Request Analysis 78 | API->>Cache: Check Cache 79 | alt Cache Hit 80 | Cache-->>API: Return Cached Result 81 | else Cache Miss 82 | API->>Knowledge: Process Request 83 | Knowledge->>Embeddings: Generate Embeddings 84 | Embeddings->>VectorStore: Store/Query Vectors 85 | VectorStore-->>Knowledge: Vector Results 86 | Knowledge-->>API: Analysis Results 87 | API->>Cache: Store Results 88 | API-->>Client: Return Results 89 | end 90 | ``` 91 | 92 | ## Directory Structure 93 | 94 | ```mermaid 95 | graph TD 96 | Root[mcp-codebase-insight] --> Src[src/] 97 | Root --> Tests[tests/] 98 | Root --> Docs[docs/] 99 | Root --> Scripts[scripts/] 100 | Root --> Knowledge[knowledge/] 101 | 102 | Src --> Core[core/] 103 | Src --> Utils[utils/] 104 | 105 | Core --> Components{Core Components} 106 | Components --> ADR[adr.py] 107 | Components --> Cache[cache.py] 108 | Components --> Config[config.py] 109 | Components --> Debug[debug.py] 110 | Components --> Doc[documentation.py] 111 | Components --> Embed[embeddings.py] 112 | Components --> Know[knowledge.py] 113 | Components --> Vector[vector_store.py] 114 | 115 | Knowledge --> Patterns[patterns/] 116 | Knowledge --> Tasks[tasks/] 117 | Knowledge --> Prompts[prompts/] 118 | ``` 119 | 120 | ## Security and Authentication Flow 121 | 122 | ```mermaid 123 | graph TD 124 | Request[Client Request] --> Auth[Authentication Layer] 125 | Auth --> Validation[Request Validation] 126 | Validation --> RateLimit[Rate Limiting] 127 | RateLimit --> Processing[Request Processing] 128 | 129 | subgraph Security Measures 130 | Auth 131 | Validation 132 | RateLimit 133 | Logging[Audit Logging] 134 | end 135 | 136 | Processing --> Logging 137 | Processing --> Response[API Response] 138 | ``` 139 | 140 | This architecture documentation illustrates the main components and their interactions within the MCP Codebase Insight system. The system is designed to be modular, scalable, and maintainable, with clear separation of concerns between different components. 141 | 142 | Key architectural decisions: 143 | 1. Use of FastAPI for high-performance API endpoints 144 | 2. Vector database (Qdrant) for efficient similarity search 145 | 3. Modular core services for different functionalities 146 | 4. Caching layer for improved performance 147 | 5. Clear separation between data storage and business logic 148 | 6. Comprehensive security measures 149 | 7. Structured knowledge management system ``` -------------------------------------------------------------------------------- /docs/documentation_summary.md: -------------------------------------------------------------------------------- ```markdown 1 | # MCP Codebase Insight Documentation Structure 2 | 3 | ## Architecture Decision Records (ADRs) 4 | 5 | ### Testing Strategy (ADR-0001) 6 | Core decisions about testing infrastructure, focusing on: 7 | - Server management and startup 8 | - Test client configuration 9 | - SSE testing approach 10 | 11 | Implemented by: 12 | - `tests.integration.test_sse.test_server_instance` 13 | - `tests.integration.test_sse.test_client` 14 | - `src.mcp_codebase_insight.server.lifespan` 15 | 16 | ### SSE Testing Strategy (ADR-0002) 17 | Detailed approach to testing Server-Sent Events, covering: 18 | - Connection management 19 | - Event handling 20 | - Test patterns 21 | 22 | Implemented by: 23 | - `tests.framework.sse.SSETestManager` 24 | - `tests.integration.test_sse.test_sse_message_flow` 25 | 26 | ### Comprehensive Testing Strategy (ADR-0003) 27 | Framework for testing all components: 28 | - Server testing framework 29 | - SSE test management 30 | - Test client configuration 31 | - Integration patterns 32 | 33 | Implemented by: 34 | - `tests.framework.server.ServerTestFramework` 35 | - `tests.framework.sse.SSETestManager` 36 | - `tests.conftest.configured_test_client` 37 | 38 | ### Documentation Linking Strategy (ADR-0004) 39 | System for maintaining documentation-code relationships: 40 | - Documentation node management 41 | - Code element tracking 42 | - Link validation 43 | 44 | Implemented by: 45 | - `src.mcp_codebase_insight.documentation.models.DocNode` 46 | - `src.mcp_codebase_insight.documentation.models.DocumentationMap` 47 | - `src.mcp_codebase_insight.documentation.loader.DocLoader` 48 | 49 | ## Feature Documentation 50 | 51 | ### Code Analysis 52 | Overview of code analysis capabilities: 53 | - Pattern detection 54 | - Quality analysis 55 | - Dependency tracking 56 | 57 | Implemented by: 58 | - `src.mcp_codebase_insight.analysis` 59 | 60 | ### ADR Management 61 | Tools for managing Architecture Decision Records: 62 | - ADR creation 63 | - Status tracking 64 | - Implementation linking 65 | 66 | Implemented by: 67 | - `src.mcp_codebase_insight.adr` 68 | 69 | ### Documentation Management 70 | Documentation tooling and processes: 71 | - Documentation-code linking 72 | - Validation tools 73 | - Generation utilities 74 | 75 | Implemented by: 76 | - `src.mcp_codebase_insight.documentation` 77 | - `src.mcp_codebase_insight.documentation.annotations` 78 | 79 | ## Testing Documentation 80 | 81 | ### Server Testing 82 | Framework and patterns for server testing: 83 | - Server lifecycle management 84 | - Health checking 85 | - Configuration testing 86 | 87 | Implemented by: 88 | - `tests.framework.server.ServerTestFramework` 89 | - `tests.conftest.configured_test_client` 90 | 91 | ### SSE Testing 92 | Patterns and tools for SSE testing: 93 | - Connection management 94 | - Event verification 95 | - Integration testing 96 | 97 | Implemented by: 98 | - `tests.framework.sse.SSETestManager` 99 | - `tests.integration.test_sse.test_sse_connection` 100 | - `tests.integration.test_sse.test_sse_message_flow` 101 | 102 | ## Key Components 103 | 104 | ### Server Framework 105 | - Server configuration and lifecycle management 106 | - Health check endpoints 107 | - SSE infrastructure 108 | 109 | Key files: 110 | - `src.mcp_codebase_insight.server.ServerConfig` 111 | - `src.mcp_codebase_insight.server.lifespan` 112 | 113 | ### Testing Framework 114 | - Test client configuration 115 | - Server test fixtures 116 | - SSE test utilities 117 | 118 | Key files: 119 | - `tests.framework.server.ServerTestFramework` 120 | - `tests.framework.sse.SSETestManager` 121 | - `tests.conftest.configured_test_client` 122 | 123 | ### Documentation Tools 124 | - Documentation-code linking 125 | - Validation utilities 126 | - Generation tools 127 | 128 | Key files: 129 | - `src.mcp_codebase_insight.documentation.models` 130 | - `src.mcp_codebase_insight.documentation.loader` 131 | - `src.mcp_codebase_insight.documentation.annotations` 132 | 133 | ## Documentation Coverage 134 | 135 | ### Well-Documented Areas 136 | 1. Testing infrastructure 137 | - Server testing framework 138 | - SSE testing components 139 | - Test client configuration 140 | 141 | 2. Documentation management 142 | - Documentation models 143 | - Loading and validation 144 | - Code annotations 145 | 146 | ### Areas Needing More Documentation 147 | 1. Code analysis features 148 | - Implementation details 149 | - Usage patterns 150 | - Configuration options 151 | 152 | 2. ADR management tools 153 | - CLI interface 154 | - Template system 155 | - Integration features 156 | 157 | ## Next Steps 158 | 159 | 1. **Documentation Improvements** 160 | - Add more code examples 161 | - Create API reference docs 162 | - Expand configuration guides 163 | 164 | 2. **Testing Enhancements** 165 | - Add performance test docs 166 | - Document error scenarios 167 | - Create debugging guides 168 | 169 | 3. **Feature Documentation** 170 | - Complete code analysis docs 171 | - Expand ADR management docs 172 | - Add integration guides ``` -------------------------------------------------------------------------------- /docs/troubleshooting/faq.md: -------------------------------------------------------------------------------- ```markdown 1 | # Frequently Asked Questions 2 | 3 | > 🚧 **Documentation In Progress** 4 | > 5 | > This documentation is being actively developed. More details will be added soon. 6 | 7 | ## General Questions 8 | 9 | ### What is MCP Codebase Insight? 10 | MCP Codebase Insight is a tool for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management. 11 | 12 | ### What are the system requirements? 13 | - Python 3.11 or higher 14 | - 4GB RAM minimum (8GB recommended) 15 | - 2GB free disk space 16 | - Docker (optional, for containerized deployment) 17 | 18 | ### Which operating systems are supported? 19 | - Linux (Ubuntu 20.04+, CentOS 8+) 20 | - macOS (10.15+) 21 | - Windows 10/11 with WSL2 22 | 23 | ## Installation 24 | 25 | ### Do I need to install Qdrant separately? 26 | Yes, Qdrant is required for vector storage. You can install it via Docker (recommended) or from source. See the [Qdrant Setup Guide](../getting-started/qdrant_setup.md). 27 | 28 | ### Can I use a different vector database? 29 | Currently, only Qdrant is supported. Support for other vector databases may be added in future releases. 30 | 31 | ### Why am I getting permission errors during installation? 32 | This usually happens when trying to install in system directories. Try: 33 | 1. Using a virtual environment 34 | 2. Installing with `--user` flag 35 | 3. Using proper permissions for directories 36 | 37 | ## Usage 38 | 39 | ### How do I start analyzing my codebase? 40 | 1. Install MCP Codebase Insight 41 | 2. Set up Qdrant 42 | 3. Configure your environment 43 | 4. Run the server 44 | 5. Use the API or CLI to analyze your code 45 | 46 | ### Can I analyze multiple repositories at once? 47 | Yes, you can analyze multiple repositories by: 48 | 1. Using batch analysis 49 | 2. Creating separate collections 50 | 3. Merging results afterward 51 | 52 | ### How do I customize the analysis? 53 | You can customize: 54 | - Analysis patterns 55 | - Vector search parameters 56 | - Documentation generation 57 | - Output formats 58 | 59 | See the [Configuration Guide](../getting-started/configuration.md). 60 | 61 | ## Performance 62 | 63 | ### Why is vector search slow? 64 | Common reasons: 65 | 1. Large vector collection 66 | 2. Limited memory 67 | 3. Network latency 68 | 4. Insufficient CPU resources 69 | 70 | Solutions: 71 | 1. Enable disk storage 72 | 2. Adjust batch size 73 | 3. Optimize search parameters 74 | 4. Scale hardware resources 75 | 76 | ### How much memory do I need? 77 | Memory requirements depend on: 78 | - Codebase size 79 | - Vector collection size 80 | - Batch processing size 81 | - Concurrent operations 82 | 83 | Minimum: 4GB RAM 84 | Recommended: 8GB+ RAM 85 | 86 | ### Can I run it in production? 87 | Yes, but consider: 88 | 1. Setting up authentication 89 | 2. Configuring CORS 90 | 3. Using SSL/TLS 91 | 4. Implementing monitoring 92 | 5. Setting up backups 93 | 94 | ## Features 95 | 96 | ### Does it support my programming language? 97 | Currently supported: 98 | - Python 99 | - JavaScript/TypeScript 100 | - Java 101 | - Go 102 | - Ruby 103 | 104 | More languages planned for future releases. 105 | 106 | ### Can it generate documentation? 107 | Yes, it can: 108 | 1. Generate API documentation 109 | 2. Create architecture diagrams 110 | 3. Maintain ADRs 111 | 4. Build knowledge bases 112 | 113 | ### How does pattern detection work? 114 | Pattern detection uses: 115 | 1. Vector embeddings 116 | 2. AST analysis 117 | 3. Semantic search 118 | 4. Machine learning models 119 | 120 | ## Integration 121 | 122 | ### Can I integrate with my IDE? 123 | Yes, through: 124 | 1. REST API 125 | 2. Language Server Protocol 126 | 3. Custom extensions 127 | 128 | ### Does it work with CI/CD pipelines? 129 | Yes, you can: 130 | 1. Run analysis in CI 131 | 2. Generate reports 132 | 3. Enforce patterns 133 | 4. Update documentation 134 | 135 | ### Can I use it with existing tools? 136 | Integrates with: 137 | 1. Git 138 | 2. Documentation generators 139 | 3. Code quality tools 140 | 4. Issue trackers 141 | 142 | ## Troubleshooting 143 | 144 | ### Where are the log files? 145 | Default locations: 146 | - Server logs: `./logs/server.log` 147 | - Access logs: `./logs/access.log` 148 | - Debug logs: `./logs/debug.log` 149 | 150 | ### How do I report bugs? 151 | 1. Check [existing issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 152 | 2. Create new issue with: 153 | - Clear description 154 | - Steps to reproduce 155 | - System information 156 | - Log files 157 | 158 | ### How do I get support? 159 | Support options: 160 | 1. [Documentation](../README.md) 161 | 2. [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 162 | 3. [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 163 | 4. [Discord Community](https://discord.gg/mcp-codebase-insight) 164 | 165 | ## Next Steps 166 | 167 | - [Common Issues](common-issues.md) 168 | - [Installation Guide](../getting-started/installation.md) 169 | - [Configuration Guide](../getting-started/configuration.md) ``` -------------------------------------------------------------------------------- /docs/getting-started/docker-setup.md: -------------------------------------------------------------------------------- ```markdown 1 | # Docker Setup Guide 2 | 3 | This guide covers how to set up and run MCP Codebase Insight using Docker. 4 | 5 | ## Prerequisites 6 | 7 | - Docker installed on your system 8 | - Basic knowledge of Docker commands 9 | - Qdrant vector database accessible from your Docker network (required) 10 | 11 | ## Running Qdrant Container 12 | 13 | MCP Codebase Insight requires a running Qdrant instance. Make sure to start Qdrant before running the MCP container: 14 | 15 | ```bash 16 | # Create a directory for Qdrant data 17 | mkdir -p qdrant_data 18 | 19 | # Pull and run Qdrant container 20 | docker pull qdrant/qdrant 21 | docker run -d --name qdrant \ 22 | -p 6333:6333 -p 6334:6334 \ 23 | -v $(pwd)/qdrant_data:/qdrant/storage \ 24 | qdrant/qdrant 25 | ``` 26 | 27 | You can access the Qdrant web UI at http://localhost:6334 to verify it's running correctly. 28 | 29 | ## Quick Start with Docker 30 | 31 | ```bash 32 | # Pull the image 33 | docker pull modelcontextprotocol/mcp-codebase-insight 34 | 35 | # Run the container 36 | docker run -p 3000:3000 \ 37 | --env-file .env \ 38 | -v $(pwd)/docs:/app/docs \ 39 | -v $(pwd)/knowledge:/app/knowledge \ 40 | tosin2013/mcp-codebase-insight 41 | ``` 42 | 43 | ## Creating a .env File for Docker 44 | 45 | Create a `.env` file in your project directory with the following content: 46 | 47 | ``` 48 | MCP_HOST=0.0.0.0 49 | MCP_PORT=3000 50 | MCP_LOG_LEVEL=INFO 51 | QDRANT_URL=http://host.docker.internal:6333 52 | MCP_DOCS_CACHE_DIR=/app/docs 53 | MCP_ADR_DIR=/app/docs/adrs 54 | MCP_KB_STORAGE_DIR=/app/knowledge 55 | MCP_DISK_CACHE_DIR=/app/cache 56 | ``` 57 | 58 | > **Note:** When using Docker, the host is set to `0.0.0.0` to allow connections from outside the container. If your Qdrant instance is running on the host machine, use `host.docker.internal` instead of `localhost`. 59 | 60 | ## Volume Mounts 61 | 62 | The Docker command mounts several directories from your host system into the container: 63 | 64 | - `$(pwd)/docs:/app/docs`: Maps your local docs directory to the container's docs directory 65 | - `$(pwd)/knowledge:/app/knowledge`: Maps your local knowledge directory to the container's knowledge directory 66 | 67 | Make sure these directories exist on your host system before running the container: 68 | 69 | ```bash 70 | mkdir -p docs/adrs knowledge 71 | ``` 72 | 73 | ## Using Docker Compose 74 | 75 | For a more manageable setup, you can use Docker Compose. Create a `docker-compose.yml` file in your project directory: 76 | 77 | ```yaml 78 | version: '3' 79 | 80 | services: 81 | mcp-codebase-insight: 82 | image: tosin2013/mcp-codebase-insight 83 | ports: 84 | - "3000:3000" 85 | volumes: 86 | - ./docs:/app/docs 87 | - ./knowledge:/app/knowledge 88 | - ./cache:/app/cache 89 | env_file: 90 | - .env 91 | networks: 92 | - mcp-network 93 | 94 | qdrant: 95 | image: qdrant/qdrant 96 | ports: 97 | - "6333:6333" 98 | volumes: 99 | - ./qdrant_data:/qdrant/storage 100 | networks: 101 | - mcp-network 102 | 103 | networks: 104 | mcp-network: 105 | driver: bridge 106 | ``` 107 | 108 | Then start the services: 109 | 110 | ```bash 111 | docker-compose up -d 112 | ``` 113 | 114 | ## Advanced Docker Configuration 115 | 116 | ### Using Custom Embedding Models 117 | 118 | To use a custom embedding model, add the model path to your volume mounts and update the environment configuration: 119 | 120 | ```bash 121 | docker run -p 3000:3000 \ 122 | --env-file .env \ 123 | -v $(pwd)/docs:/app/docs \ 124 | -v $(pwd)/knowledge:/app/knowledge \ 125 | -v $(pwd)/models:/app/models \ 126 | -e MCP_EMBEDDING_MODEL=/app/models/custom-model \ 127 | tosin2013/mcp-codebase-insight 128 | ``` 129 | 130 | ### Securing Your Docker Deployment 131 | 132 | For production environments: 133 | 134 | 1. Use Docker networks to isolate the MCP and Qdrant services 135 | 2. Don't expose the Qdrant port to the public internet 136 | 3. Set up proper authentication for both services 137 | 4. Use Docker secrets for sensitive information 138 | 5. Consider using a reverse proxy with HTTPS for the API 139 | 140 | ## Troubleshooting Docker Issues 141 | 142 | ### Connection Refused to Qdrant 143 | 144 | If you're getting connection errors to Qdrant, check: 145 | 146 | - Is Qdrant running? (`docker ps | grep qdrant`) 147 | - Is the URL correct in the `.env` file? 148 | - Are both services on the same Docker network? 149 | - Try using the service name instead of `host.docker.internal` if using Docker Compose 150 | 151 | ### Container Exits Immediately 152 | 153 | If the container exits immediately: 154 | 155 | - Check the Docker logs: `docker logs <container_id>` 156 | - Ensure all required environment variables are set 157 | - Verify that the mounted directories have correct permissions 158 | 159 | ### Out of Memory Errors 160 | 161 | If you encounter out of memory errors: 162 | 163 | - Increase the memory limit for the container 164 | - Reduce the vector dimension or batch size in your configuration 165 | - Consider using a more efficient embedding model 166 | ``` -------------------------------------------------------------------------------- /docs/troubleshooting/common-issues.md: -------------------------------------------------------------------------------- ```markdown 1 | # Troubleshooting Guide 2 | 3 | > 🚧 **Documentation In Progress** 4 | > 5 | > This documentation is being actively developed. More details will be added soon. 6 | 7 | ## Common Issues 8 | 9 | ### Installation Issues 10 | 11 | #### 1. Dependencies Installation Fails 12 | ```bash 13 | Error: Failed building wheel for sentence-transformers 14 | ``` 15 | 16 | **Solution:** 17 | ```bash 18 | # Update pip and install wheel 19 | pip install --upgrade pip 20 | pip install wheel 21 | 22 | # Try installing with specific version 23 | pip install sentence-transformers==2.2.2 24 | 25 | # If still failing, install system dependencies 26 | # Ubuntu/Debian: 27 | sudo apt-get install python3-dev build-essential 28 | # CentOS/RHEL: 29 | sudo yum install python3-devel gcc 30 | ``` 31 | 32 | #### 2. Permission Denied 33 | ```bash 34 | PermissionError: [Errno 13] Permission denied: '/usr/local/lib/python3.11/site-packages' 35 | ``` 36 | 37 | **Solution:** 38 | ```bash 39 | # Install in user space 40 | pip install --user mcp-codebase-insight 41 | 42 | # Or fix directory permissions 43 | sudo chown -R $USER:$USER venv/ 44 | ``` 45 | 46 | ### Server Issues 47 | 48 | #### 1. Port Already in Use 49 | ```bash 50 | [Errno 48] Address already in use 51 | ``` 52 | 53 | **Solution:** 54 | ```bash 55 | # Find process using the port 56 | lsof -i :3000 # On Linux/macOS 57 | netstat -ano | findstr :3000 # On Windows 58 | 59 | # Kill the process 60 | kill -9 <PID> 61 | 62 | # Or use a different port 63 | mcp-codebase-insight --port 3001 64 | ``` 65 | 66 | #### 2. Server Won't Start 67 | ```bash 68 | ERROR: [Errno 2] No such file or directory: './docs' 69 | ``` 70 | 71 | **Solution:** 72 | ```bash 73 | # Create required directories 74 | mkdir -p docs/adrs knowledge cache 75 | 76 | # Fix permissions 77 | chmod -R 755 docs knowledge cache 78 | ``` 79 | 80 | ### Vector Store Issues 81 | 82 | #### 1. Qdrant Connection Failed 83 | ```bash 84 | ConnectionError: Failed to connect to Qdrant server 85 | ``` 86 | 87 | **Solution:** 88 | ```bash 89 | # Check if Qdrant is running 90 | curl http://localhost:6333/health 91 | 92 | # Start Qdrant if not running 93 | docker start qdrant 94 | 95 | # Verify environment variable 96 | echo $QDRANT_URL 97 | # Should be: http://localhost:6333 98 | ``` 99 | 100 | #### 2. Collection Creation Failed 101 | ```bash 102 | Error: Collection 'code_vectors' already exists 103 | ``` 104 | 105 | **Solution:** 106 | ```bash 107 | # List existing collections 108 | curl http://localhost:6333/collections 109 | 110 | # Delete existing collection if needed 111 | curl -X DELETE http://localhost:6333/collections/code_vectors 112 | 113 | # Create new collection with correct parameters 114 | python -c " 115 | from qdrant_client import QdrantClient 116 | client = QdrantClient('localhost', port=6333) 117 | client.recreate_collection( 118 | collection_name='code_vectors', 119 | vectors_config={'size': 384, 'distance': 'Cosine'} 120 | ) 121 | " 122 | ``` 123 | 124 | ### Memory Issues 125 | 126 | #### 1. Out of Memory 127 | ```bash 128 | MemoryError: Unable to allocate array with shape (1000000, 384) 129 | ``` 130 | 131 | **Solution:** 132 | ```yaml 133 | # Adjust batch size in config.yaml 134 | vector_store: 135 | batch_size: 100 # Reduce from default 136 | 137 | # Or set environment variable 138 | export MCP_BATCH_SIZE=100 139 | ``` 140 | 141 | #### 2. Slow Performance 142 | ```bash 143 | WARNING: Vector search taking longer than expected 144 | ``` 145 | 146 | **Solution:** 147 | ```yaml 148 | # Enable disk storage in config.yaml 149 | vector_store: 150 | on_disk: true 151 | 152 | # Adjust cache size 153 | performance: 154 | cache_size: 1000 155 | ``` 156 | 157 | ### Documentation Issues 158 | 159 | #### 1. Documentation Map Failed 160 | ```bash 161 | Error: Unable to create documentation map: Invalid directory structure 162 | ``` 163 | 164 | **Solution:** 165 | ```bash 166 | # Verify directory structure 167 | tree docs/ 168 | 169 | # Create required structure 170 | mkdir -p docs/{adrs,api,components} 171 | touch docs/index.md 172 | ``` 173 | 174 | #### 2. Search Not Working 175 | ```bash 176 | Error: Search index not found 177 | ``` 178 | 179 | **Solution:** 180 | ```bash 181 | # Rebuild search index 182 | curl -X POST http://localhost:3000/api/docs/rebuild-index 183 | 184 | # Verify index exists 185 | ls -l docs/.search_index 186 | ``` 187 | 188 | ## Debugging Tips 189 | 190 | ### 1. Enable Debug Logging 191 | ```bash 192 | # Set environment variable 193 | export MCP_LOG_LEVEL=DEBUG 194 | 195 | # Or use command line flag 196 | mcp-codebase-insight --debug 197 | ``` 198 | 199 | ### 2. Check System Resources 200 | ```bash 201 | # Check memory usage 202 | free -h 203 | 204 | # Check disk space 205 | df -h 206 | 207 | # Check CPU usage 208 | top 209 | ``` 210 | 211 | ### 3. Verify Configuration 212 | ```bash 213 | # Print current config 214 | mcp-codebase-insight show-config 215 | 216 | # Validate config file 217 | mcp-codebase-insight validate-config --config config.yaml 218 | ``` 219 | 220 | ## Getting Help 221 | 222 | If you're still experiencing issues: 223 | 224 | 1. Check the [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 225 | 2. Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 226 | 3. Review the [FAQ](faq.md) 227 | 4. Contact Support: 228 | - Discord: [Join Server](https://discord.gg/mcp-codebase-insight) 229 | - Email: [email protected] 230 | 231 | ## Next Steps 232 | 233 | - [Installation Guide](../getting-started/installation.md) 234 | - [Configuration Guide](../getting-started/configuration.md) 235 | - [Development Guide](../development/README.md) ``` -------------------------------------------------------------------------------- /docs/getting-started/configuration.md: -------------------------------------------------------------------------------- ```markdown 1 | # Configuration Guide 2 | 3 | > 🚧 **Documentation In Progress** 4 | > 5 | > This documentation is being actively developed. More details will be added soon. 6 | 7 | ## Configuration Methods 8 | 9 | MCP Codebase Insight can be configured through: 10 | 1. Environment variables 11 | 2. Configuration file 12 | 3. Command-line arguments 13 | 14 | Priority order (highest to lowest): 15 | 1. Command-line arguments 16 | 2. Environment variables 17 | 3. Configuration file 18 | 4. Default values 19 | 20 | ## Environment Variables 21 | 22 | ### Required Variables 23 | ```bash 24 | # Server Configuration 25 | MCP_HOST=127.0.0.1 26 | MCP_PORT=3000 27 | 28 | # Vector Store 29 | QDRANT_URL=http://localhost:6333 30 | 31 | # Storage Paths 32 | MCP_DOCS_CACHE_DIR=./docs 33 | MCP_ADR_DIR=./docs/adrs 34 | MCP_KB_STORAGE_DIR=./knowledge 35 | MCP_DISK_CACHE_DIR=./cache 36 | ``` 37 | 38 | ### Optional Variables 39 | ```bash 40 | # Logging 41 | MCP_LOG_LEVEL=INFO 42 | MCP_LOG_FORMAT=json 43 | 44 | # Performance 45 | MCP_CACHE_SIZE=1000 46 | MCP_WORKER_COUNT=4 47 | ``` 48 | 49 | ## Configuration File 50 | 51 | Create `config.yaml` in your project root: 52 | 53 | ```yaml 54 | server: 55 | host: 127.0.0.1 56 | port: 3000 57 | workers: 4 58 | 59 | vector_store: 60 | url: http://localhost:6333 61 | collection: code_vectors 62 | 63 | storage: 64 | docs_cache: ./docs 65 | adr_dir: ./docs/adrs 66 | kb_storage: ./knowledge 67 | disk_cache: ./cache 68 | 69 | logging: 70 | level: INFO 71 | format: json 72 | ``` 73 | 74 | ## Command-line Arguments 75 | 76 | ```bash 77 | mcp-codebase-insight --help 78 | 79 | Options: 80 | --host TEXT Server host address 81 | --port INTEGER Server port 82 | --log-level TEXT Logging level 83 | --debug Enable debug mode 84 | --config PATH Path to config file 85 | --qdrant-url TEXT Qdrant server URL 86 | --docs-dir PATH Documentation directory 87 | --adr-dir PATH ADR directory 88 | --kb-dir PATH Knowledge base directory 89 | --cache-dir PATH Cache directory 90 | --workers INTEGER Number of workers 91 | --batch-size INTEGER Batch size 92 | --timeout INTEGER Request timeout 93 | --auth Enable authentication 94 | --metrics Enable metrics 95 | --help Show this message and exit 96 | ``` 97 | 98 | ## Feature-specific Configuration 99 | 100 | ### 1. Vector Store Configuration 101 | 102 | ```yaml 103 | vector_store: 104 | # Embedding model settings 105 | model: 106 | name: all-MiniLM-L6-v2 107 | dimension: 384 108 | normalize: true 109 | 110 | # Collection settings 111 | collection: 112 | name: mcp_vectors 113 | distance: Cosine 114 | on_disk: false 115 | 116 | # Search settings 117 | search: 118 | limit: 10 119 | threshold: 0.75 120 | ``` 121 | 122 | ### 2. Documentation Management 123 | 124 | ```yaml 125 | documentation: 126 | # Auto-generation settings 127 | auto_generate: true 128 | min_confidence: 0.8 129 | 130 | # Crawling settings 131 | crawl: 132 | max_depth: 3 133 | timeout: 30 134 | exclude_patterns: ["*.git*", "node_modules"] 135 | 136 | # Storage settings 137 | storage: 138 | format: markdown 139 | index_file: _index.md 140 | ``` 141 | 142 | ### 3. ADR Management 143 | 144 | ```yaml 145 | adr: 146 | # Template settings 147 | template_dir: templates/adr 148 | default_template: default.md 149 | 150 | # Workflow settings 151 | require_approval: true 152 | auto_number: true 153 | 154 | # Storage settings 155 | storage: 156 | format: markdown 157 | naming: date-title 158 | ``` 159 | 160 | ## Environment-specific Configurations 161 | 162 | ### Development 163 | 164 | ```yaml 165 | debug: true 166 | log_level: DEBUG 167 | metrics: 168 | enabled: false 169 | vector_store: 170 | on_disk: false 171 | ``` 172 | 173 | ### Production 174 | 175 | ```yaml 176 | debug: false 177 | log_level: INFO 178 | security: 179 | auth_enabled: true 180 | allowed_origins: ["https://your-domain.com"] 181 | metrics: 182 | enabled: true 183 | vector_store: 184 | on_disk: true 185 | ``` 186 | 187 | ### Testing 188 | 189 | ```yaml 190 | debug: true 191 | log_level: DEBUG 192 | vector_store: 193 | collection_name: test_vectors 194 | storage: 195 | docs_cache_dir: ./test/docs 196 | ``` 197 | 198 | ## Best Practices 199 | 200 | 1. **Security** 201 | - Always enable authentication in production 202 | - Use environment variables for sensitive values 203 | - Restrict CORS origins in production 204 | 205 | 2. **Performance** 206 | - Adjust worker count based on CPU cores 207 | - Enable disk storage for large vector collections 208 | - Configure appropriate batch sizes 209 | 210 | 3. **Monitoring** 211 | - Enable metrics in production 212 | - Set appropriate log levels 213 | - Configure health check endpoints 214 | 215 | 4. **Storage** 216 | - Use absolute paths in production 217 | - Implement backup strategies 218 | - Monitor disk usage 219 | 220 | ## Validation 221 | 222 | To validate your configuration: 223 | 224 | ```bash 225 | mcp-codebase-insight validate-config --config config.yaml 226 | ``` 227 | 228 | ## Troubleshooting 229 | 230 | Common configuration issues and solutions: 231 | 232 | 1. **Permission Denied** 233 | ```bash 234 | # Fix directory permissions 235 | chmod -R 755 docs knowledge cache 236 | ``` 237 | 238 | 2. **Port Already in Use** 239 | ```bash 240 | # Use different port 241 | export MCP_PORT=3001 242 | ``` 243 | 244 | 3. **Memory Issues** 245 | ```yaml 246 | # Adjust batch size 247 | performance: 248 | batch_size: 50 249 | ``` 250 | 251 | ## Next Steps 252 | 253 | - [Quick Start Guide](quickstart.md) 254 | - [API Reference](../api/rest-api.md) 255 | - [Development Guide](../development/README.md) 256 | ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/embeddings.py: -------------------------------------------------------------------------------- ```python 1 | """Text embedding using sentence-transformers.""" 2 | 3 | from typing import List, Union 4 | import asyncio 5 | import logging 6 | 7 | from sentence_transformers import SentenceTransformer 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class SentenceTransformerEmbedding: 12 | """Text embedding using sentence-transformers.""" 13 | 14 | def __init__(self, model_name: str = "all-MiniLM-L6-v2"): 15 | """Initialize embedding model.""" 16 | self.model_name = model_name 17 | self.model = None 18 | self.vector_size = None 19 | self.initialized = False 20 | 21 | async def initialize(self): 22 | """Initialize the embedding model.""" 23 | if self.initialized: 24 | return 25 | 26 | max_retries = 3 27 | retry_delay = 2.0 28 | 29 | for attempt in range(max_retries): 30 | try: 31 | # Define the model loading function 32 | def load_model(): 33 | logger.debug(f"Loading model {self.model_name}") 34 | model = SentenceTransformer(self.model_name) 35 | vector_size = model.get_sentence_embedding_dimension() 36 | return model, vector_size 37 | 38 | # Load the model with a timeout 39 | logger.debug(f"Starting model loading attempt {attempt + 1}/{max_retries}") 40 | model, vector_size = await asyncio.to_thread(load_model) 41 | 42 | self.model = model 43 | self.vector_size = vector_size 44 | self.initialized = True 45 | logger.debug(f"Model loaded successfully with vector size {self.vector_size}") 46 | return 47 | 48 | except asyncio.TimeoutError: 49 | if attempt < max_retries - 1: 50 | logger.warning(f"Timeout loading model on attempt {attempt + 1}, retrying in {retry_delay}s") 51 | await asyncio.sleep(retry_delay) 52 | retry_delay *= 2 53 | else: 54 | logger.error(f"Failed to load model after {max_retries} attempts") 55 | raise RuntimeError(f"Failed to load embedding model {self.model_name}: Timeout after {max_retries} attempts") 56 | except Exception as e: 57 | logger.error(f"Failed to load embedding model {self.model_name}: {str(e)}") 58 | raise RuntimeError(f"Failed to load embedding model {self.model_name}: {str(e)}") 59 | 60 | async def embed(self, text: Union[str, List[str]]) -> Union[List[float], List[List[float]]]: 61 | """Generate embeddings for text.""" 62 | if not self.initialized: 63 | await self.initialize() 64 | 65 | try: 66 | # Convert single string to list for consistent handling 67 | texts = [text] if isinstance(text, str) else text 68 | 69 | # Generate embeddings 70 | embeddings = self.model.encode( 71 | texts, 72 | convert_to_tensor=False, # Return numpy array 73 | normalize_embeddings=True # L2 normalize embeddings 74 | ) 75 | 76 | # Convert numpy arrays to lists for JSON serialization 77 | if isinstance(text, str): 78 | return embeddings[0].tolist() 79 | return [embedding.tolist() for embedding in embeddings] 80 | 81 | except Exception as e: 82 | logger.error(f"Failed to generate embeddings: {str(e)}") 83 | raise RuntimeError(f"Failed to generate embeddings: {str(e)}") 84 | 85 | async def embed_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]: 86 | """Generate embeddings for a batch of texts.""" 87 | if not self.initialized: 88 | await self.initialize() 89 | 90 | try: 91 | # Generate embeddings in batches 92 | all_embeddings = [] 93 | for i in range(0, len(texts), batch_size): 94 | batch = texts[i:i + batch_size] 95 | embeddings = self.model.encode( 96 | batch, 97 | convert_to_tensor=False, 98 | normalize_embeddings=True, 99 | batch_size=batch_size 100 | ) 101 | all_embeddings.extend(embeddings.tolist()) 102 | return all_embeddings 103 | 104 | except Exception as e: 105 | logger.error(f"Failed to generate batch embeddings: {str(e)}") 106 | raise RuntimeError(f"Failed to generate batch embeddings: {str(e)}") 107 | 108 | async def embed_with_cache( 109 | self, 110 | text: str, 111 | cache_manager = None 112 | ) -> List[float]: 113 | """Generate embeddings with caching.""" 114 | if not cache_manager: 115 | return await self.embed(text) 116 | 117 | # Try to get from cache 118 | cache_key = f"embedding:{hash(text)}" 119 | cached = cache_manager.get_from_memory(cache_key) 120 | if cached: 121 | return cached 122 | 123 | # Generate new embedding 124 | embedding = await self.embed(text) 125 | 126 | # Cache the result 127 | cache_manager.put_in_memory(cache_key, embedding) 128 | return embedding 129 | 130 | def get_vector_size(self) -> int: 131 | """Get the size of embedding vectors.""" 132 | return self.vector_size 133 | ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/config.py: -------------------------------------------------------------------------------- ```python 1 | """Server configuration module.""" 2 | 3 | from dataclasses import dataclass, field 4 | from pathlib import Path 5 | from typing import Optional, Dict, Any 6 | import os 7 | import logging 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | @dataclass 12 | class ServerConfig: 13 | """Server configuration.""" 14 | 15 | host: str = "127.0.0.1" 16 | port: int = 3000 17 | log_level: str = "INFO" 18 | qdrant_url: str = "http://localhost:6333" 19 | qdrant_api_key: Optional[str] = None 20 | docs_cache_dir: Path = Path("docs") 21 | adr_dir: Path = Path("docs/adrs") 22 | kb_storage_dir: Path = Path("knowledge") 23 | embedding_model: str = "all-MiniLM-L6-v2" 24 | collection_name: str = "codebase_patterns" 25 | debug_mode: bool = False 26 | metrics_enabled: bool = True 27 | cache_enabled: bool = True 28 | memory_cache_size: int = 1000 29 | disk_cache_dir: Optional[Path] = Path("cache") # Default to "cache" instead of None 30 | _state: Dict[str, Any] = field(default_factory=dict) 31 | 32 | def __post_init__(self): 33 | """Convert string paths to Path objects and process defaults.""" 34 | # Convert string paths to Path objects 35 | for attr_name in ["docs_cache_dir", "adr_dir", "kb_storage_dir"]: 36 | attr_value = getattr(self, attr_name) 37 | if attr_value is not None and not isinstance(attr_value, Path): 38 | setattr(self, attr_name, Path(attr_value)) 39 | 40 | # Handle disk_cache_dir specifically 41 | if self.cache_enabled: 42 | if self.disk_cache_dir is None: 43 | # Default to "cache" directory when None but cache is enabled 44 | self.disk_cache_dir = Path("cache") 45 | logger.debug(f"Setting default disk_cache_dir to {self.disk_cache_dir}") 46 | elif not isinstance(self.disk_cache_dir, Path): 47 | self.disk_cache_dir = Path(self.disk_cache_dir) 48 | else: 49 | # If cache is disabled, set disk_cache_dir to None regardless of previous value 50 | self.disk_cache_dir = None 51 | logger.debug("Cache disabled, setting disk_cache_dir to None") 52 | 53 | # Initialize state 54 | self._state = { 55 | "initialized": False, 56 | "components": {}, 57 | "metrics": {}, 58 | "errors": [] 59 | } 60 | 61 | @classmethod 62 | def from_env(cls) -> 'ServerConfig': 63 | """Create configuration from environment variables.""" 64 | cache_enabled = os.getenv("MCP_CACHE_ENABLED", "true").lower() == "true" 65 | disk_cache_path = os.getenv("MCP_DISK_CACHE_DIR", "cache") 66 | 67 | return cls( 68 | host=os.getenv("MCP_HOST", "127.0.0.1"), 69 | port=int(os.getenv("MCP_PORT", "3000")), 70 | log_level=os.getenv("MCP_LOG_LEVEL", "INFO"), 71 | qdrant_url=os.getenv("QDRANT_URL", "http://localhost:6333"), 72 | qdrant_api_key=os.getenv("QDRANT_API_KEY"), 73 | embedding_model=os.getenv("MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2"), 74 | collection_name=os.getenv("MCP_COLLECTION_NAME", "codebase_patterns"), 75 | docs_cache_dir=Path(os.getenv("MCP_DOCS_CACHE_DIR", "docs")), 76 | adr_dir=Path(os.getenv("MCP_ADR_DIR", "docs/adrs")), 77 | kb_storage_dir=Path(os.getenv("MCP_KB_STORAGE_DIR", "knowledge")), 78 | disk_cache_dir=Path(disk_cache_path) if cache_enabled else None, 79 | debug_mode=os.getenv("MCP_DEBUG", "false").lower() == "true", 80 | metrics_enabled=os.getenv("MCP_METRICS_ENABLED", "true").lower() == "true", 81 | cache_enabled=cache_enabled, 82 | memory_cache_size=int(os.getenv("MCP_MEMORY_CACHE_SIZE", "1000")) 83 | ) 84 | 85 | def create_directories(self) -> None: 86 | """Create all required directories for the server. 87 | 88 | This method should be called during server initialization to ensure 89 | all necessary directories exist before components are initialized. 90 | """ 91 | logger.debug("Creating required directories") 92 | 93 | # Create standard directories 94 | self.docs_cache_dir.mkdir(parents=True, exist_ok=True) 95 | self.adr_dir.mkdir(parents=True, exist_ok=True) 96 | self.kb_storage_dir.mkdir(parents=True, exist_ok=True) 97 | 98 | # Create cache directory if enabled and configured 99 | if self.cache_enabled and self.disk_cache_dir is not None: 100 | logger.debug(f"Creating disk cache directory: {self.disk_cache_dir}") 101 | self.disk_cache_dir.mkdir(parents=True, exist_ok=True) 102 | elif not self.cache_enabled: 103 | logger.debug("Cache is disabled, skipping disk cache directory creation") 104 | 105 | logger.debug("All required directories created") 106 | 107 | def get_state(self, key: str, default: Any = None) -> Any: 108 | """Get state value.""" 109 | return self._state.get(key, default) 110 | 111 | def set_state(self, key: str, value: Any): 112 | """Set state value.""" 113 | self._state[key] = value 114 | 115 | def update_state(self, updates: Dict[str, Any]): 116 | """Update multiple state values.""" 117 | self._state.update(updates) 118 | 119 | def clear_state(self): 120 | """Clear all state.""" 121 | self._state.clear() 122 | self._state = { 123 | "initialized": False, 124 | "components": {}, 125 | "metrics": {}, 126 | "errors": [] 127 | } 128 | ``` -------------------------------------------------------------------------------- /requirements-3.11.txt: -------------------------------------------------------------------------------- ``` 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe --output-file=requirements-3.11.minimal.txt requirements.in.minimal 6 | # 7 | aiohappyeyeballs==2.6.1 8 | # via aiohttp 9 | aiohttp==3.11.14 10 | # via -r requirements.in.minimal 11 | aiosignal==1.3.2 12 | # via aiohttp 13 | annotated-types==0.7.0 14 | # via pydantic 15 | anyio==4.9.0 16 | # via 17 | # httpx 18 | # mcp 19 | # sse-starlette 20 | # starlette 21 | attrs==25.3.0 22 | # via aiohttp 23 | beautifulsoup4==4.13.3 24 | # via -r requirements.in.minimal 25 | black==25.1.0 26 | # via -r requirements.in.minimal 27 | build==1.2.2.post1 28 | # via pip-tools 29 | bump2version==1.0.1 30 | # via -r requirements.in.minimal 31 | certifi==2025.1.31 32 | # via 33 | # httpcore 34 | # httpx 35 | # requests 36 | charset-normalizer==3.4.1 37 | # via requests 38 | click==8.1.8 39 | # via 40 | # black 41 | # pip-tools 42 | # uvicorn 43 | coverage[toml]==7.8.0 44 | # via pytest-cov 45 | fastapi==0.115.12 46 | # via -r requirements.in.minimal 47 | filelock==3.18.0 48 | # via 49 | # huggingface-hub 50 | # torch 51 | # transformers 52 | flake8==7.2.0 53 | # via -r requirements.in.minimal 54 | frozenlist==1.5.0 55 | # via 56 | # aiohttp 57 | # aiosignal 58 | fsspec==2025.3.1 59 | # via 60 | # huggingface-hub 61 | # torch 62 | grpcio==1.71.0 63 | # via 64 | # grpcio-tools 65 | # qdrant-client 66 | grpcio-tools==1.71.0 67 | # via qdrant-client 68 | h11==0.14.0 69 | # via 70 | # httpcore 71 | # uvicorn 72 | h2==4.2.0 73 | # via httpx 74 | hpack==4.1.0 75 | # via h2 76 | httpcore==1.0.7 77 | # via httpx 78 | httpx[http2]==0.28.1 79 | # via 80 | # -r requirements.in.minimal 81 | # mcp 82 | # qdrant-client 83 | httpx-sse==0.4.0 84 | # via mcp 85 | huggingface-hub==0.29.3 86 | # via 87 | # tokenizers 88 | # transformers 89 | hyperframe==6.1.0 90 | # via h2 91 | idna==3.10 92 | # via 93 | # anyio 94 | # httpx 95 | # requests 96 | # yarl 97 | iniconfig==2.1.0 98 | # via pytest 99 | isort==6.0.1 100 | # via -r requirements.in.minimal 101 | jinja2==3.1.6 102 | # via torch 103 | markdown==3.7 104 | # via -r requirements.in.minimal 105 | markupsafe==3.0.2 106 | # via jinja2 107 | mccabe==0.7.0 108 | # via flake8 109 | mcp==1.6.0 110 | # via -r requirements.in.minimal 111 | mpmath==1.3.0 112 | # via sympy 113 | multidict==6.2.0 114 | # via 115 | # aiohttp 116 | # yarl 117 | mypy==1.15.0 118 | # via -r requirements.in.minimal 119 | mypy-extensions==1.0.0 120 | # via 121 | # black 122 | # mypy 123 | networkx==3.4.2 124 | # via 125 | # -r requirements.in.minimal 126 | # torch 127 | numpy==2.2.4 128 | # via 129 | # -r requirements.in.minimal 130 | # qdrant-client 131 | # scipy 132 | # transformers 133 | packaging==24.2 134 | # via 135 | # black 136 | # build 137 | # huggingface-hub 138 | # pytest 139 | # transformers 140 | pathspec==0.12.1 141 | # via black 142 | pip-tools==7.4.1 143 | # via -r requirements.in.minimal 144 | platformdirs==4.3.7 145 | # via black 146 | pluggy==1.5.0 147 | # via pytest 148 | portalocker==2.10.1 149 | # via qdrant-client 150 | propcache==0.3.1 151 | # via 152 | # aiohttp 153 | # yarl 154 | protobuf==5.29.4 155 | # via grpcio-tools 156 | psutil==7.0.0 157 | # via -r requirements.in.minimal 158 | pycodestyle==2.13.0 159 | # via flake8 160 | pydantic==2.11.1 161 | # via 162 | # -r requirements.in.minimal 163 | # fastapi 164 | # mcp 165 | # pydantic-settings 166 | # qdrant-client 167 | pydantic-core==2.33.0 168 | # via pydantic 169 | pydantic-settings==2.8.1 170 | # via mcp 171 | pyflakes==3.3.1 172 | # via flake8 173 | pyproject-hooks==1.2.0 174 | # via 175 | # build 176 | # pip-tools 177 | pytest==8.3.5 178 | # via 179 | # -r requirements.in.minimal 180 | # pytest-asyncio 181 | # pytest-cov 182 | pytest-asyncio==0.26.0 183 | # via -r requirements.in.minimal 184 | pytest-cov==6.0.0 185 | # via -r requirements.in.minimal 186 | python-dotenv==1.1.0 187 | # via 188 | # -r requirements.in.minimal 189 | # pydantic-settings 190 | python-frontmatter==1.1.0 191 | # via -r requirements.in.minimal 192 | python-slugify==8.0.4 193 | # via -r requirements.in.minimal 194 | pyyaml==6.0.2 195 | # via 196 | # -r requirements.in.minimal 197 | # huggingface-hub 198 | # python-frontmatter 199 | # transformers 200 | qdrant-client==1.13.3 201 | # via -r requirements.in.minimal 202 | regex==2024.11.6 203 | # via transformers 204 | requests==2.32.3 205 | # via 206 | # -r requirements.in.minimal 207 | # huggingface-hub 208 | # transformers 209 | safetensors==0.5.3 210 | # via transformers 211 | scipy==1.15.2 212 | # via -r requirements.in.minimal 213 | slugify==0.0.1 214 | # via -r requirements.in.minimal 215 | sniffio==1.3.1 216 | # via anyio 217 | soupsieve==2.6 218 | # via beautifulsoup4 219 | sse-starlette==2.2.1 220 | # via mcp 221 | starlette==0.46.1 222 | # via 223 | # fastapi 224 | # mcp 225 | # sse-starlette 226 | structlog==25.2.0 227 | # via -r requirements.in.minimal 228 | sympy==1.13.1 229 | # via torch 230 | text-unidecode==1.3 231 | # via python-slugify 232 | tokenizers==0.21.1 233 | # via transformers 234 | torch==2.6.0 235 | # via -r requirements.in.minimal 236 | tqdm==4.67.1 237 | # via 238 | # huggingface-hub 239 | # transformers 240 | transformers==4.50.3 241 | # via -r requirements.in.minimal 242 | typing-extensions==4.13.0 243 | # via 244 | # anyio 245 | # beautifulsoup4 246 | # fastapi 247 | # huggingface-hub 248 | # mypy 249 | # pydantic 250 | # pydantic-core 251 | # torch 252 | # typing-inspection 253 | typing-inspection==0.4.0 254 | # via pydantic 255 | urllib3==2.3.0 256 | # via 257 | # qdrant-client 258 | # requests 259 | uvicorn==0.34.0 260 | # via 261 | # -r requirements.in.minimal 262 | # mcp 263 | wheel==0.45.1 264 | # via pip-tools 265 | yarl==1.18.3 266 | # via aiohttp 267 | 268 | # The following packages are considered to be unsafe in a requirements file: 269 | pip==25.0.1 270 | # via pip-tools 271 | setuptools==78.1.0 272 | # via 273 | # grpcio-tools 274 | # pip-tools 275 | # WARNING: starlette constraint was removed to resolve conflicts 276 | # You will need to manually install a compatible starlette version 277 | ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile --allow-unsafe --output-file=requirements-3.11.minimal.txt requirements.in.minimal 6 | # 7 | aiohappyeyeballs==2.6.1 8 | # via aiohttp 9 | aiohttp==3.11.14 10 | # via -r requirements.in.minimal 11 | aiosignal==1.3.2 12 | # via aiohttp 13 | annotated-types==0.7.0 14 | # via pydantic 15 | anyio==4.9.0 16 | # via 17 | # httpx 18 | # mcp 19 | # sse-starlette 20 | # starlette 21 | attrs==25.3.0 22 | # via aiohttp 23 | beautifulsoup4==4.13.3 24 | # via -r requirements.in.minimal 25 | black==25.1.0 26 | # via -r requirements.in.minimal 27 | build==1.2.2.post1 28 | # via pip-tools 29 | bump2version==1.0.1 30 | # via -r requirements.in.minimal 31 | certifi==2025.1.31 32 | # via 33 | # httpcore 34 | # httpx 35 | # requests 36 | charset-normalizer==3.4.1 37 | # via requests 38 | click==8.1.8 39 | # via 40 | # black 41 | # pip-tools 42 | # uvicorn 43 | coverage[toml]==7.8.0 44 | # via pytest-cov 45 | fastapi==0.115.12 46 | # via -r requirements.in.minimal 47 | filelock==3.18.0 48 | # via 49 | # huggingface-hub 50 | # torch 51 | # transformers 52 | flake8==7.2.0 53 | # via -r requirements.in.minimal 54 | frozenlist==1.5.0 55 | # via 56 | # aiohttp 57 | # aiosignal 58 | fsspec==2025.3.1 59 | # via 60 | # huggingface-hub 61 | # torch 62 | grpcio==1.71.0 63 | # via 64 | # grpcio-tools 65 | # qdrant-client 66 | grpcio-tools==1.71.0 67 | # via qdrant-client 68 | h11==0.14.0 69 | # via 70 | # httpcore 71 | # uvicorn 72 | h2==4.2.0 73 | # via httpx 74 | hpack==4.1.0 75 | # via h2 76 | httpcore==1.0.7 77 | # via httpx 78 | httpx[http2]==0.28.1 79 | # via 80 | # -r requirements.in.minimal 81 | # mcp 82 | # qdrant-client 83 | httpx-sse==0.4.0 84 | # via mcp 85 | huggingface-hub==0.29.3 86 | # via 87 | # tokenizers 88 | # transformers 89 | hyperframe==6.1.0 90 | # via h2 91 | idna==3.10 92 | # via 93 | # anyio 94 | # httpx 95 | # requests 96 | # yarl 97 | iniconfig==2.1.0 98 | # via pytest 99 | isort==6.0.1 100 | # via -r requirements.in.minimal 101 | jinja2==3.1.6 102 | # via torch 103 | markdown==3.7 104 | # via -r requirements.in.minimal 105 | markupsafe==3.0.2 106 | # via jinja2 107 | mccabe==0.7.0 108 | # via flake8 109 | mcp==1.6.0 110 | # via -r requirements.in.minimal 111 | mpmath==1.3.0 112 | # via sympy 113 | multidict==6.2.0 114 | # via 115 | # aiohttp 116 | # yarl 117 | mypy==1.15.0 118 | # via -r requirements.in.minimal 119 | mypy-extensions==1.0.0 120 | # via 121 | # black 122 | # mypy 123 | networkx==3.4.2 124 | # via 125 | # -r requirements.in.minimal 126 | # torch 127 | numpy==2.2.4 128 | # via 129 | # -r requirements.in.minimal 130 | # qdrant-client 131 | # scipy 132 | # transformers 133 | packaging==24.2 134 | # via 135 | # black 136 | # build 137 | # huggingface-hub 138 | # pytest 139 | # transformers 140 | pathspec==0.12.1 141 | # via black 142 | pip-tools==7.4.1 143 | # via -r requirements.in.minimal 144 | platformdirs==4.3.7 145 | # via black 146 | pluggy==1.5.0 147 | # via pytest 148 | portalocker==2.10.1 149 | # via qdrant-client 150 | propcache==0.3.1 151 | # via 152 | # aiohttp 153 | # yarl 154 | protobuf==5.29.4 155 | # via grpcio-tools 156 | psutil==7.0.0 157 | # via -r requirements.in.minimal 158 | pycodestyle==2.13.0 159 | # via flake8 160 | pydantic==2.11.1 161 | # via 162 | # -r requirements.in.minimal 163 | # fastapi 164 | # mcp 165 | # pydantic-settings 166 | # qdrant-client 167 | pydantic-core==2.33.0 168 | # via pydantic 169 | pydantic-settings==2.8.1 170 | # via mcp 171 | pyflakes==3.3.1 172 | # via flake8 173 | pyproject-hooks==1.2.0 174 | # via 175 | # build 176 | # pip-tools 177 | pytest==8.3.5 178 | # via 179 | # -r requirements.in.minimal 180 | # pytest-asyncio 181 | # pytest-cov 182 | pytest-asyncio==0.26.0 183 | # via -r requirements.in.minimal 184 | pytest-cov==6.0.0 185 | # via -r requirements.in.minimal 186 | python-dotenv==1.1.0 187 | # via 188 | # -r requirements.in.minimal 189 | # pydantic-settings 190 | python-frontmatter==1.1.0 191 | # via -r requirements.in.minimal 192 | python-slugify==8.0.4 193 | # via -r requirements.in.minimal 194 | pyyaml==6.0.2 195 | # via 196 | # -r requirements.in.minimal 197 | # huggingface-hub 198 | # python-frontmatter 199 | # transformers 200 | qdrant-client==1.13.3 201 | # via -r requirements.in.minimal 202 | regex==2024.11.6 203 | # via transformers 204 | requests==2.32.3 205 | # via 206 | # -r requirements.in.minimal 207 | # huggingface-hub 208 | # transformers 209 | safetensors==0.5.3 210 | # via transformers 211 | scipy==1.15.2 212 | # via -r requirements.in.minimal 213 | slugify==0.0.1 214 | # via -r requirements.in.minimal 215 | sniffio==1.3.1 216 | # via anyio 217 | soupsieve==2.6 218 | # via beautifulsoup4 219 | sse-starlette==2.2.1 220 | # via mcp 221 | starlette==0.46.1 222 | # via 223 | # fastapi 224 | # mcp 225 | # sse-starlette 226 | structlog==25.2.0 227 | # via -r requirements.in.minimal 228 | sympy==1.13.1 229 | # via torch 230 | text-unidecode==1.3 231 | # via python-slugify 232 | tokenizers==0.21.1 233 | # via transformers 234 | torch==2.6.0 235 | # via -r requirements.in.minimal 236 | tqdm==4.67.1 237 | # via 238 | # huggingface-hub 239 | # transformers 240 | transformers==4.50.3 241 | # via -r requirements.in.minimal 242 | typing-extensions==4.13.0 243 | # via 244 | # anyio 245 | # beautifulsoup4 246 | # fastapi 247 | # huggingface-hub 248 | # mypy 249 | # pydantic 250 | # pydantic-core 251 | # torch 252 | # typing-inspection 253 | typing-inspection==0.4.0 254 | # via pydantic 255 | urllib3==2.3.0 256 | # via 257 | # qdrant-client 258 | # requests 259 | uvicorn==0.34.0 260 | # via 261 | # -r requirements.in.minimal 262 | # mcp 263 | wheel==0.45.1 264 | # via pip-tools 265 | yarl==1.18.3 266 | # via aiohttp 267 | 268 | # The following packages are considered to be unsafe in a requirements file: 269 | pip==25.0.1 270 | # via pip-tools 271 | setuptools==78.1.0 272 | # via 273 | # grpcio-tools 274 | # pip-tools 275 | # WARNING: starlette constraint was removed to resolve conflicts 276 | # You will need to manually install a compatible starlette version 277 | ``` -------------------------------------------------------------------------------- /docs/getting-started/quickstart.md: -------------------------------------------------------------------------------- ```markdown 1 | # Quick Start Guide 2 | 3 | > 🚧 **Documentation In Progress** 4 | > 5 | > This documentation is being actively developed. More details will be added soon. 6 | 7 | ## Overview 8 | 9 | This guide will help you get started with MCP Codebase Insight quickly. 10 | 11 | ## Prerequisites 12 | 13 | Ensure you have: 14 | - Completed the [Installation](installation.md) 15 | - Set up [Qdrant](qdrant_setup.md) 16 | - Configured your [environment](configuration.md) 17 | 18 | ## Basic Usage 19 | 20 | 1. **Start the Server** 21 | ```bash 22 | mcp-codebase-insight --host localhost --port 3000 23 | ``` 24 | 25 | 2. **Analyze Code** 26 | ```python 27 | from mcp_codebase_insight import CodebaseAnalyzer 28 | 29 | analyzer = CodebaseAnalyzer() 30 | results = analyzer.analyze_code("path/to/code") 31 | ``` 32 | 33 | 3. **View Results** 34 | ```python 35 | print(results.patterns) 36 | print(results.suggestions) 37 | ``` 38 | 39 | ## Next Steps 40 | 41 | - [API Reference](../api/rest-api.md) 42 | - [Feature Documentation](../features/code-analysis.md) 43 | - [Development Guide](../development/README.md) 44 | 45 | ## 5-Minute Setup 46 | 47 | 1. **Install MCP Codebase Insight** 48 | ```bash 49 | # Create and activate virtual environment 50 | python -m venv venv 51 | source venv/bin/activate # On Windows: venv\Scripts\activate 52 | 53 | # Install the package 54 | pip install mcp-codebase-insight 55 | ``` 56 | 57 | 2. **Start Qdrant Vector Database** 58 | ```bash 59 | # Using Docker (recommended) 60 | docker pull qdrant/qdrant 61 | docker run -p 6333:6333 qdrant/qdrant 62 | ``` 63 | 64 | 3. **Configure Environment** 65 | ```bash 66 | # Create .env file 67 | cat > .env << EOL 68 | MCP_HOST=127.0.0.1 69 | MCP_PORT=3000 70 | QDRANT_URL=http://localhost:6333 71 | MCP_DOCS_CACHE_DIR=./docs 72 | MCP_ADR_DIR=./docs/adrs 73 | MCP_KB_STORAGE_DIR=./knowledge 74 | EOL 75 | 76 | # Create required directories 77 | mkdir -p docs/adrs knowledge 78 | ``` 79 | 80 | 4. **Verify Installation** 81 | ```bash 82 | # In another terminal 83 | curl http://localhost:3000/health 84 | ``` 85 | 86 | ## Basic Usage Examples 87 | 88 | ### 1. Analyze Code Patterns 89 | 90 | ```python 91 | import httpx 92 | 93 | async with httpx.AsyncClient() as client: 94 | # Analyze code patterns 95 | response = await client.post( 96 | "http://localhost:3000/api/analyze", 97 | json={ 98 | "code": """ 99 | def calculate_fibonacci(n): 100 | if n <= 1: 101 | return n 102 | return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) 103 | """, 104 | "language": "python" 105 | } 106 | ) 107 | 108 | results = response.json() 109 | print("Detected patterns:", results["patterns"]) 110 | ``` 111 | 112 | ### 2. Create an ADR 113 | 114 | ```python 115 | # Create an Architecture Decision Record 116 | response = await client.post( 117 | "http://localhost:3000/api/adrs", 118 | json={ 119 | "title": "Use FastAPI for REST API", 120 | "context": { 121 | "problem": "Need a modern Python web framework", 122 | "constraints": ["Performance", "Easy to maintain"] 123 | }, 124 | "options": [ 125 | { 126 | "title": "FastAPI", 127 | "pros": ["Fast", "Modern", "Great docs"], 128 | "cons": ["Newer framework"] 129 | }, 130 | { 131 | "title": "Flask", 132 | "pros": ["Mature", "Simple"], 133 | "cons": ["Slower", "Less modern"] 134 | } 135 | ], 136 | "decision": "We will use FastAPI", 137 | "consequences": ["Need to learn async/await", "Better performance"] 138 | } 139 | ) 140 | 141 | adr = response.json() 142 | print(f"Created ADR: {adr['id']}") 143 | ``` 144 | 145 | ### 3. Search Documentation 146 | 147 | ```python 148 | # Search for relevant documentation 149 | response = await client.get( 150 | "http://localhost:3000/api/docs/search", 151 | params={ 152 | "query": "how to handle authentication", 153 | "limit": 5 154 | } 155 | ) 156 | 157 | docs = response.json() 158 | for doc in docs["results"]: 159 | print(f"- {doc['title']}: {doc['relevance_score']}") 160 | ``` 161 | 162 | ### 4. Monitor System Health 163 | 164 | ```python 165 | # Get system health status 166 | response = await client.get("http://localhost:3000/health") 167 | health = response.json() 168 | 169 | print("System Status:", health["status"]) 170 | for component, status in health["components"].items(): 171 | print(f"- {component}: {status['status']}") 172 | ``` 173 | 174 | ## Using the Web Interface 175 | 176 | 1. Open your browser to `http://localhost:3000/docs` 177 | 2. Explore the interactive API documentation 178 | 3. Try out different endpoints directly from the browser 179 | 180 | ## Common Operations 181 | 182 | ### Managing ADRs 183 | 184 | ```bash 185 | # List all ADRs 186 | curl http://localhost:3000/api/adrs 187 | 188 | # Get specific ADR 189 | curl http://localhost:3000/api/adrs/{adr_id} 190 | 191 | # Update ADR status 192 | curl -X PATCH http://localhost:3000/api/adrs/{adr_id} \ 193 | -H "Content-Type: application/json" \ 194 | -d '{"status": "ACCEPTED"}' 195 | ``` 196 | 197 | ### Working with Documentation 198 | 199 | ```bash 200 | # Crawl documentation 201 | curl -X POST http://localhost:3000/api/docs/crawl \ 202 | -H "Content-Type: application/json" \ 203 | -d '{ 204 | "urls": ["https://your-docs-site.com"], 205 | "source_type": "documentation" 206 | }' 207 | 208 | # Search documentation 209 | curl "http://localhost:3000/api/docs/search?query=authentication&limit=5" 210 | ``` 211 | 212 | ### Analyzing Code 213 | 214 | ```bash 215 | # Analyze code patterns 216 | curl -X POST http://localhost:3000/api/analyze \ 217 | -H "Content-Type: application/json" \ 218 | -d '{ 219 | "code": "your code here", 220 | "language": "python" 221 | }' 222 | 223 | # Get analysis results 224 | curl http://localhost:3000/api/analysis/{analysis_id} 225 | ``` 226 | 227 | ## Troubleshooting 228 | 229 | 1. **Server Won't Start** 230 | ```bash 231 | # Check if ports are in use 232 | lsof -i :3000 233 | lsof -i :6333 234 | ``` 235 | 236 | 2. **Connection Issues** 237 | ```bash 238 | # Verify Qdrant is running 239 | curl http://localhost:6333/health 240 | 241 | # Check MCP server health 242 | curl http://localhost:3000/health 243 | ``` 244 | 245 | 3. **Permission Problems** 246 | ```bash 247 | # Fix directory permissions 248 | chmod -R 755 docs knowledge 249 | ``` 250 | 251 | ## Getting Help 252 | 253 | - Check the [Troubleshooting Guide](../troubleshooting/common-issues.md) 254 | - Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 255 | - Open an [Issue](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/metrics.py: -------------------------------------------------------------------------------- ```python 1 | """Metrics collection and monitoring module.""" 2 | 3 | import json 4 | from datetime import datetime 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import Dict, List, Optional, Union 8 | 9 | from pydantic import BaseModel 10 | 11 | class MetricType(str, Enum): 12 | """Metric type enumeration.""" 13 | 14 | COUNTER = "counter" 15 | GAUGE = "gauge" 16 | HISTOGRAM = "histogram" 17 | SUMMARY = "summary" 18 | 19 | class Metric(BaseModel): 20 | """Metric model.""" 21 | 22 | name: str 23 | type: MetricType 24 | value: Union[int, float] 25 | labels: Optional[Dict[str, str]] = None 26 | timestamp: datetime 27 | 28 | class MetricsManager: 29 | """Manager for system metrics.""" 30 | 31 | def __init__(self, config): 32 | """Initialize metrics manager.""" 33 | self.config = config 34 | self.enabled = config.metrics_enabled 35 | self.metrics_dir = config.docs_cache_dir / "metrics" 36 | self.metrics_dir.mkdir(parents=True, exist_ok=True) 37 | self.metrics: Dict[str, List[Metric]] = {} 38 | self.initialized = False 39 | 40 | async def initialize(self): 41 | """Initialize metrics collection.""" 42 | if self.initialized: 43 | return 44 | 45 | try: 46 | if not self.enabled: 47 | return 48 | 49 | # Load existing metrics 50 | for path in self.metrics_dir.glob("*.json"): 51 | try: 52 | metric_name = path.stem 53 | with open(path) as f: 54 | data = json.load(f) 55 | self.metrics[metric_name] = [ 56 | Metric(**metric) for metric in data 57 | ] 58 | except Exception as e: 59 | print(f"Error loading metric file {path}: {e}") 60 | 61 | self.initialized = True 62 | except Exception as e: 63 | print(f"Error initializing metrics manager: {e}") 64 | await self.cleanup() 65 | raise RuntimeError(f"Failed to initialize metrics manager: {str(e)}") 66 | 67 | async def cleanup(self): 68 | """Clean up metrics.""" 69 | if not self.initialized: 70 | return 71 | 72 | try: 73 | if not self.enabled: 74 | return 75 | 76 | # Save all metrics 77 | for name, metrics in self.metrics.items(): 78 | try: 79 | await self._save_metrics(name, metrics) 80 | except Exception as e: 81 | print(f"Error saving metrics for {name}: {e}") 82 | except Exception as e: 83 | print(f"Error cleaning up metrics manager: {e}") 84 | finally: 85 | self.initialized = False 86 | 87 | async def reset(self): 88 | """Reset all metrics.""" 89 | if not self.enabled: 90 | return 91 | 92 | # Clear in-memory metrics 93 | self.metrics = {} 94 | 95 | # Remove all metric files 96 | for path in self.metrics_dir.glob("*.json"): 97 | try: 98 | path.unlink() 99 | except Exception as e: 100 | print(f"Error removing metric file {path}: {e}") 101 | 102 | async def record_metric( 103 | self, 104 | name: str, 105 | type: MetricType, 106 | value: Union[int, float], 107 | labels: Optional[Dict[str, str]] = None 108 | ) -> None: 109 | """Record a new metric value.""" 110 | if not self.enabled: 111 | return 112 | 113 | metric = Metric( 114 | name=name, 115 | type=type, 116 | value=value, 117 | labels=labels, 118 | timestamp=datetime.utcnow() 119 | ) 120 | 121 | if name not in self.metrics: 122 | self.metrics[name] = [] 123 | self.metrics[name].append(metric) 124 | 125 | # Save metrics periodically 126 | if len(self.metrics[name]) >= 100: 127 | await self._save_metrics(name, self.metrics[name]) 128 | self.metrics[name] = [] 129 | 130 | async def get_metrics( 131 | self, 132 | names: Optional[List[str]] = None, 133 | start_time: Optional[datetime] = None, 134 | end_time: Optional[datetime] = None 135 | ) -> Dict[str, List[Dict]]: 136 | """Get metrics, optionally filtered by name and time range.""" 137 | if not self.enabled: 138 | return {} 139 | 140 | result = {} 141 | metric_names = names or list(self.metrics.keys()) 142 | 143 | for name in metric_names: 144 | if name not in self.metrics: 145 | continue 146 | 147 | metrics = self.metrics[name] 148 | 149 | # Apply time filters 150 | if start_time: 151 | metrics = [m for m in metrics if m.timestamp >= start_time] 152 | if end_time: 153 | metrics = [m for m in metrics if m.timestamp <= end_time] 154 | 155 | result[name] = [metric.model_dump() for metric in metrics] 156 | 157 | return result 158 | 159 | async def get_metric_summary( 160 | self, 161 | name: str, 162 | window_minutes: int = 60 163 | ) -> Optional[Dict]: 164 | """Get summary statistics for a metric.""" 165 | if not self.enabled or name not in self.metrics: 166 | return None 167 | 168 | metrics = self.metrics[name] 169 | if not metrics: 170 | return None 171 | 172 | # Filter metrics within time window 173 | cutoff = datetime.utcnow().timestamp() - (window_minutes * 60) 174 | recent_metrics = [ 175 | m for m in metrics 176 | if m.timestamp.timestamp() >= cutoff 177 | ] 178 | 179 | if not recent_metrics: 180 | return None 181 | 182 | values = [m.value for m in recent_metrics] 183 | return { 184 | "count": len(values), 185 | "min": min(values), 186 | "max": max(values), 187 | "avg": sum(values) / len(values), 188 | "last": values[-1] 189 | } 190 | 191 | async def _save_metrics(self, name: str, metrics: List[Metric]) -> None: 192 | """Save metrics to file.""" 193 | metric_path = self.metrics_dir / f"{name}.json" 194 | with open(metric_path, "w") as f: 195 | json.dump( 196 | [metric.model_dump() for metric in metrics], 197 | f, 198 | indent=2, 199 | default=str 200 | ) 201 | ``` -------------------------------------------------------------------------------- /docs/features/code-analysis.md: -------------------------------------------------------------------------------- ```markdown 1 | # Code Analysis 2 | 3 | MCP Codebase Insight provides powerful code analysis capabilities to help you understand patterns, identify issues, and improve code quality. 4 | 5 | ## Overview 6 | 7 | The code analysis feature: 8 | - Identifies common design patterns 9 | - Detects potential issues and anti-patterns 10 | - Suggests improvements and optimizations 11 | - Analyzes code relationships and dependencies 12 | - Provides semantic understanding of code 13 | 14 | ## Features 15 | 16 | ### 1. Pattern Detection 17 | 18 | The system can identify common software design patterns: 19 | 20 | ```python 21 | # Example: Factory Pattern Detection 22 | class Creator: 23 | def factory_method(self): 24 | pass 25 | 26 | def some_operation(self): 27 | product = self.factory_method() 28 | result = product.operation() 29 | return result 30 | 31 | class ConcreteCreator(Creator): 32 | def factory_method(self): 33 | return ConcreteProduct() 34 | ``` 35 | 36 | Analysis will identify this as a Factory Pattern implementation. 37 | 38 | ### 2. Code Quality Analysis 39 | 40 | Identifies potential issues and suggests improvements: 41 | 42 | - Code complexity metrics 43 | - Duplicate code detection 44 | - Dead code identification 45 | - Resource management issues 46 | - Error handling patterns 47 | 48 | ### 3. Dependency Analysis 49 | 50 | Maps relationships between code components: 51 | 52 | ```python 53 | # Example: Analyzing imports and dependencies 54 | response = await client.post( 55 | "http://localhost:3000/api/analyze/dependencies", 56 | json={ 57 | "file_path": "src/main.py", 58 | "depth": 2 # How deep to analyze dependencies 59 | } 60 | ) 61 | 62 | dependencies = response.json() 63 | ``` 64 | 65 | ### 4. Semantic Analysis 66 | 67 | Understands code meaning and context: 68 | 69 | ```python 70 | # Example: Semantic code search 71 | response = await client.post( 72 | "http://localhost:3000/api/analyze/semantic", 73 | json={ 74 | "query": "find all functions that handle user authentication", 75 | "scope": ["src/auth/", "src/users/"] 76 | } 77 | ) 78 | 79 | matches = response.json() 80 | ``` 81 | 82 | ## Usage 83 | 84 | ### Basic Analysis 85 | 86 | ```python 87 | import httpx 88 | 89 | async with httpx.AsyncClient() as client: 90 | response = await client.post( 91 | "http://localhost:3000/api/analyze", 92 | json={ 93 | "code": your_code, 94 | "language": "python", 95 | "analysis_type": ["patterns", "quality", "dependencies"] 96 | } 97 | ) 98 | 99 | results = response.json() 100 | print(results["patterns"]) 101 | print(results["quality_issues"]) 102 | print(results["dependencies"]) 103 | ``` 104 | 105 | ### Continuous Analysis 106 | 107 | Set up continuous analysis in your CI/CD pipeline: 108 | 109 | ```bash 110 | # Example: GitHub Actions workflow 111 | curl -X POST http://localhost:3000/api/analyze/ci \ 112 | -H "Content-Type: application/json" \ 113 | -d '{ 114 | "repository": "owner/repo", 115 | "branch": "main", 116 | "commit": "sha", 117 | "diff_only": true 118 | }' 119 | ``` 120 | 121 | ### Batch Analysis 122 | 123 | Analyze multiple files or entire directories: 124 | 125 | ```python 126 | # Analyze entire directory 127 | response = await client.post( 128 | "http://localhost:3000/api/analyze/batch", 129 | json={ 130 | "path": "src/", 131 | "include": ["*.py", "*.js"], 132 | "exclude": ["*_test.py", "node_modules"], 133 | "analysis_type": ["patterns", "quality"] 134 | } 135 | ) 136 | ``` 137 | 138 | ## Configuration 139 | 140 | ### Analysis Settings 141 | 142 | ```yaml 143 | analysis: 144 | # Pattern detection settings 145 | patterns: 146 | confidence_threshold: 0.8 147 | min_pattern_size: 5 148 | 149 | # Quality analysis settings 150 | quality: 151 | max_complexity: 15 152 | max_line_length: 100 153 | enable_type_checking: true 154 | 155 | # Dependency analysis settings 156 | dependencies: 157 | max_depth: 3 158 | include_external: true 159 | 160 | # Semantic analysis settings 161 | semantic: 162 | model: "code-bert-base" 163 | similarity_threshold: 0.7 164 | ``` 165 | 166 | ### Custom Rules 167 | 168 | Create custom analysis rules: 169 | 170 | ```python 171 | # Example: Custom pattern rule 172 | { 173 | "name": "custom_singleton", 174 | "pattern": { 175 | "type": "class", 176 | "properties": { 177 | "has_private_constructor": true, 178 | "has_static_instance": true 179 | } 180 | }, 181 | "message": "Possible Singleton pattern detected" 182 | } 183 | ``` 184 | 185 | ## Integration 186 | 187 | ### IDE Integration 188 | 189 | The analysis features can be integrated with popular IDEs: 190 | 191 | - VS Code Extension 192 | - JetBrains Plugin 193 | - Vim/Neovim Plugin 194 | 195 | ### CI/CD Integration 196 | 197 | Example GitHub Actions workflow: 198 | 199 | ```yaml 200 | name: Code Analysis 201 | 202 | on: [push, pull_request] 203 | 204 | jobs: 205 | analyze: 206 | runs-on: ubuntu-latest 207 | steps: 208 | - uses: actions/checkout@v2 209 | - name: Run MCP Analysis 210 | run: | 211 | curl -X POST http://localhost:3000/api/analyze/ci \ 212 | -H "Content-Type: application/json" \ 213 | -d '{ 214 | "repository": "${{ github.repository }}", 215 | "commit": "${{ github.sha }}", 216 | "diff_only": true 217 | }' 218 | ``` 219 | 220 | ## Best Practices 221 | 222 | 1. **Regular Analysis** 223 | - Run analysis on every commit 224 | - Set up automated analysis in CI/CD 225 | - Review analysis results in code reviews 226 | 227 | 2. **Custom Rules** 228 | - Create project-specific rules 229 | - Maintain a rule catalog 230 | - Version control your rules 231 | 232 | 3. **Performance** 233 | - Use selective analysis for large codebases 234 | - Enable caching for repeated analysis 235 | - Configure appropriate thresholds 236 | 237 | 4. **Integration** 238 | - Integrate with your IDE 239 | - Add to your CI/CD pipeline 240 | - Connect with code review tools 241 | 242 | ## Troubleshooting 243 | 244 | ### Common Issues 245 | 246 | 1. **Analysis Timeout** 247 | ```yaml 248 | # Increase timeout in config 249 | analysis: 250 | timeout: 300 # seconds 251 | ``` 252 | 253 | 2. **High Memory Usage** 254 | ```yaml 255 | # Adjust batch size 256 | analysis: 257 | batch_size: 50 258 | ``` 259 | 260 | 3. **False Positives** 261 | ```yaml 262 | # Adjust confidence thresholds 263 | analysis: 264 | patterns: 265 | confidence_threshold: 0.9 266 | ``` 267 | 268 | ## API Reference 269 | 270 | ### Analysis Endpoints 271 | 272 | | Endpoint | Description | 273 | |----------|-------------| 274 | | `/api/analyze` | Basic code analysis | 275 | | `/api/analyze/batch` | Batch analysis | 276 | | `/api/analyze/ci` | CI/CD integration | 277 | | `/api/analyze/dependencies` | Dependency analysis | 278 | | `/api/analyze/semantic` | Semantic analysis | 279 | 280 | ### Response Format 281 | 282 | ```json 283 | { 284 | "analysis_id": "uuid", 285 | "status": "completed", 286 | "results": { 287 | "patterns": [...], 288 | "quality": {...}, 289 | "dependencies": [...], 290 | "semantic": {...} 291 | }, 292 | "metrics": { 293 | "time_taken": "2.5s", 294 | "files_analyzed": 10 295 | } 296 | } 297 | ``` 298 | 299 | ## Next Steps 300 | 301 | - [Pattern Catalog](patterns/index.md) 302 | - [Quality Rules](quality/index.md) 303 | - [CI/CD Setup](../integration/ci-cd.md) 304 | - [Custom Rules Guide](rules/custom.md) ``` -------------------------------------------------------------------------------- /github-actions-documentation.md: -------------------------------------------------------------------------------- ```markdown 1 | # GitHub Actions Workflows Documentation 2 | 3 | @coderabbit 4 | 5 | This document provides a detailed review and documentation of the GitHub Actions workflows in the MCP Codebase Insight project. It aims to explain each workflow, its purpose, and identify potential areas for improvement. 6 | 7 | ## Overview of Workflows 8 | 9 | The repository contains three GitHub Actions workflows: 10 | 11 | 1. **build-verification.yml**: Verifies the build across multiple Python versions 12 | 2. **publish.yml**: Publishes the package to PyPI when a new tag is pushed 13 | 3. **tdd-verification.yml**: Verifies that the project follows Test-Driven Development principles 14 | 15 | ## 1. Build Verification Workflow 16 | 17 | **File**: `.github/workflows/build-verification.yml` 18 | **Purpose**: Ensures the project builds and tests pass across multiple Python versions. 19 | 20 | ### Trigger Events 21 | - Push to `main` branch 22 | - Pull requests to `main` branch 23 | - Manual workflow dispatch with configurable parameters 24 | 25 | ### Job Configuration 26 | - Runs on `ubuntu-latest` 27 | - Tests across Python versions: 3.10, 3.11, 3.12, 3.13 28 | - Uses Qdrant as a service container for vector storage 29 | 30 | ### Key Steps 31 | 1. **Checkout code** - Fetches the repository code 32 | 2. **Set up Python** - Configures the specified Python version 33 | 3. **Wait for Qdrant** - Ensures the Qdrant service is available 34 | 4. **Setup private packages** - Configures any private dependencies 35 | 5. **Install dependencies** - Installs project requirements 36 | 6. **Set up environment** - Configures environment variables and directories 37 | 7. **Initialize Qdrant collection** - Creates a vector database collection for testing 38 | 8. **Run build verification** - Executes a subset of tests that are known to pass 39 | 9. **Upload and parse verification report** - Generates and publishes test results 40 | 41 | ### Areas for Improvement 42 | 1. **Test Pattern Issue** - Fixed the wildcard pattern issue (`test_*`) in test paths 43 | 2. **Installation Resilience** - The approach to dependency installation could be improved with better error handling 44 | 3. **Service Health Check** - Extracted the Qdrant health check into a reusable script 45 | 4. **Test Selection** - The selective test running approach might miss regressions in other tests 46 | 47 | ## 2. Publish Workflow 48 | 49 | **File**: `.github/workflows/publish.yml` 50 | **Purpose**: Automates the publication of the package to PyPI when a new tag is created. 51 | 52 | ### Trigger Events 53 | - Push of tags matching the pattern `v*` (e.g., v1.0.0) 54 | 55 | ### Job Configuration 56 | - Runs on `ubuntu-latest` 57 | - Uses the PyPI environment for deployment 58 | - Requires write permissions for id-token and read for contents 59 | 60 | ### Key Steps 61 | 1. **Checkout code** - Fetches the repository with full history 62 | 2. **Set up Python** - Configures Python (latest 3.x version) 63 | 3. **Install dependencies** - Installs build and publishing tools 64 | 4. **Build package** - Creates distribution packages 65 | 5. **Check distribution** - Verifies the package integrity 66 | 6. **Publish to PyPI** - Uploads the package to PyPI 67 | 68 | ### Areas for Improvement 69 | 1. **Version Verification** - Could add a step to verify the version in the code matches the tag 70 | 2. **Changelog Validation** - Could verify that the changelog is updated for the new version 71 | 3. **Pre-publish Testing** - Could run tests before publishing to ensure quality 72 | 4. **Release Notes** - Could automatically generate GitHub release notes 73 | 74 | ## 3. TDD Verification Workflow 75 | 76 | **File**: `.github/workflows/tdd-verification.yml` 77 | **Purpose**: Enforces Test-Driven Development principles by checking test coverage and patterns. 78 | 79 | ### Trigger Events 80 | - Push to `dev` or `main` branches 81 | - Pull requests to `dev` or `main` branches 82 | - Manual workflow dispatch with configurable Python version 83 | 84 | ### Job Configuration 85 | - Runs on `ubuntu-latest` 86 | - Currently only tests with Python 3.11 87 | - Uses Qdrant as a service container 88 | 89 | ### Key Steps 90 | 1. **Checkout code** - Fetches the repository code 91 | 2. **Set up Python** - Configures Python 3.11 92 | 3. **Wait for Qdrant** - To ensure the Qdrant service is available 93 | 4. **Install dependencies** - Installs project and testing requirements 94 | 5. **Set up environment** - Configures environment variables and directories 95 | 6. **Initialize Qdrant collection** - Creates a vector database collection for testing 96 | 7. **Run unit tests** - Executes unit tests with coverage reporting 97 | 8. **Run integration tests** - Executes integration tests with coverage reporting 98 | 9. **Generate coverage report** - Combines and reports test coverage 99 | 10. **TDD Verification** - Checks that all modules have corresponding tests and enforces minimum coverage 100 | 11. **Upload coverage** - Uploads coverage data to Codecov 101 | 12. **Check test structure** - Validates that tests follow the Arrange-Act-Assert pattern 102 | 13. **TDD Workflow Summary** - Generates a summary of test coverage and counts 103 | 104 | ### Areas for Improvement 105 | 1. **Python Version Matrix** - Could test across multiple Python versions like the build workflow 106 | 2. **Inline Python Scripts** - Several inline Python scripts could be moved to dedicated files for better maintainability 107 | 3. **Test Pattern Detection** - The Arrange-Act-Assert pattern detection is simplistic and could be more sophisticated 108 | 4. **Coverage Enforcement** - Coverage threshold (60%) could be extracted to a variable or configuration file 109 | 5. **Naming Consistency** - Some naming inconsistencies exist between the workflows 110 | 111 | ## General Recommendations 112 | 113 | 1. **Workflow Consolidation** - Consider consolidating build-verification and tdd-verification workflows as they have overlapping functionality 114 | 2. **Shared Actions** - Extract common steps (like waiting for Qdrant) into reusable composite actions 115 | 3. **Workflow Dependencies** - Establish workflow dependencies to avoid redundant work (e.g., don't publish unless tests pass) 116 | 4. **Environment Standardization** - Standardize environment variables across workflows 117 | 5. **Documentation** - Add workflow-specific documentation in code comments 118 | 6. **Secret Management** - Audit and document the required secrets 119 | 7. **Caching Strategy** - Optimize dependency and build caching to speed up workflows 120 | 8. **Notification Integration** - Add notification channels (Slack, Discord) for workflow statuses 121 | 122 | ## Summary 123 | 124 | The GitHub Actions workflows provide a solid foundation for CI/CD in this project, with comprehensive build verification, TDD enforcement, and automated publishing. The identified areas for improvement focus on maintainability, consistency, and efficiency. Implementing these suggestions would enhance the reliability and performance of the CI/CD pipeline. ``` -------------------------------------------------------------------------------- /scripts/store_report_in_mcp.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python 2 | """ 3 | Store Build Verification Report in MCP Codebase Insight 4 | 5 | This script reads the build verification report and stores it in the MCP server 6 | using the vector database for later retrieval and analysis. 7 | """ 8 | 9 | import os 10 | import sys 11 | import json 12 | import asyncio 13 | import argparse 14 | import logging 15 | from datetime import datetime 16 | from pathlib import Path 17 | import uuid 18 | 19 | # Add the project root to the Python path 20 | sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) 21 | 22 | from src.mcp_codebase_insight.core.vector_store import VectorStore 23 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding 24 | 25 | # Configure logging 26 | logging.basicConfig( 27 | level=logging.INFO, 28 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 29 | handlers=[ 30 | logging.StreamHandler(), 31 | logging.FileHandler('logs/store_report.log') 32 | ] 33 | ) 34 | logger = logging.getLogger('store_report') 35 | 36 | async def store_report(report_file: str, config_path: str = None): 37 | """Store the build verification report in the MCP server. 38 | 39 | Args: 40 | report_file: Path to the report file 41 | config_path: Path to configuration file (optional) 42 | """ 43 | # Load configuration 44 | config = { 45 | 'qdrant_url': os.environ.get('QDRANT_URL', 'http://localhost:6333'), 46 | 'qdrant_api_key': os.environ.get('QDRANT_API_KEY', ''), 47 | 'collection_name': os.environ.get('COLLECTION_NAME', 'mcp-codebase-insight'), 48 | 'embedding_model': os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') 49 | } 50 | 51 | # Override with config file if provided 52 | if config_path: 53 | try: 54 | with open(config_path, 'r') as f: 55 | file_config = json.load(f) 56 | config.update(file_config) 57 | except Exception as e: 58 | logger.error(f"Failed to load config from {config_path}: {e}") 59 | 60 | try: 61 | # Load report 62 | logger.info(f"Loading report from {report_file}") 63 | with open(report_file, 'r') as f: 64 | report = json.load(f) 65 | 66 | # Initialize embedder 67 | logger.info("Initializing embedder...") 68 | embedder = SentenceTransformerEmbedding(model_name=config['embedding_model']) 69 | await embedder.initialize() 70 | 71 | # Initialize vector store 72 | logger.info(f"Connecting to vector store at {config['qdrant_url']}...") 73 | vector_store = VectorStore( 74 | url=config['qdrant_url'], 75 | embedder=embedder, 76 | collection_name=config['collection_name'], 77 | api_key=config.get('qdrant_api_key'), 78 | vector_name="default" 79 | ) 80 | await vector_store.initialize() 81 | 82 | # Prepare report for storage 83 | report_text = json.dumps(report, indent=2) 84 | 85 | # Extract summary information for metadata 86 | timestamp = report["build_verification_report"]["timestamp"] 87 | summary = report["build_verification_report"]["summary"] 88 | overall_status = report["build_verification_report"]["verification_results"]["overall_status"] 89 | 90 | # Create more user-friendly metadata 91 | metadata = { 92 | "type": "build_verification_report", 93 | "timestamp": timestamp, 94 | "overall_status": overall_status, 95 | "summary": summary, 96 | "tests_passed": report["build_verification_report"]["test_summary"]["passed"], 97 | "tests_total": report["build_verification_report"]["test_summary"]["total"], 98 | "criteria_passed": sum(1 for c in report["build_verification_report"]["verification_results"]["criteria_results"].values() if c["passed"]), 99 | "criteria_total": len(report["build_verification_report"]["verification_results"]["criteria_results"]), 100 | "build_date": datetime.now().strftime("%Y-%m-%d"), 101 | "project": "mcp-codebase-insight", 102 | "stored_by": "automated-build-verification" 103 | } 104 | 105 | # Store in vector database 106 | report_id = str(uuid.uuid4()) 107 | logger.info(f"Storing report with ID: {report_id}") 108 | 109 | # Generate embedding 110 | vector = await embedder.embed(report_text) 111 | 112 | # Store directly using the client to work around compatibility issues 113 | from qdrant_client.http import models as rest 114 | vector_store.client.upsert( 115 | collection_name=vector_store.collection_name, 116 | points=[ 117 | rest.PointStruct( 118 | id=report_id, 119 | vector=vector, # Use vector instead of vectors 120 | payload=metadata 121 | ) 122 | ] 123 | ) 124 | 125 | logger.info(f"Successfully stored report in MCP server with ID: {report_id}") 126 | 127 | # Create a record of stored reports 128 | try: 129 | history_file = Path("logs/report_history.json") 130 | history = [] 131 | 132 | if history_file.exists(): 133 | with open(history_file, 'r') as f: 134 | history = json.load(f) 135 | 136 | history.append({ 137 | "id": report_id, 138 | "timestamp": timestamp, 139 | "status": overall_status, 140 | "summary": summary 141 | }) 142 | 143 | with open(history_file, 'w') as f: 144 | json.dump(history, f, indent=2) 145 | 146 | logger.info(f"Updated report history in {history_file}") 147 | except Exception as e: 148 | logger.warning(f"Could not update report history: {e}") 149 | 150 | return report_id 151 | 152 | except Exception as e: 153 | logger.error(f"Failed to store report: {e}") 154 | raise 155 | finally: 156 | if 'vector_store' in locals(): 157 | await vector_store.close() 158 | 159 | async def main(): 160 | """Main function.""" 161 | parser = argparse.ArgumentParser(description="Store Build Verification Report in MCP") 162 | parser.add_argument("--report", default="logs/build_verification_report.json", help="Path to report file") 163 | parser.add_argument("--config", help="Path to configuration file") 164 | args = parser.parse_args() 165 | 166 | # Create logs directory if it doesn't exist 167 | os.makedirs("logs", exist_ok=True) 168 | 169 | try: 170 | report_id = await store_report(args.report, args.config) 171 | print(f"Report stored successfully with ID: {report_id}") 172 | return 0 173 | except Exception as e: 174 | print(f"Error storing report: {e}") 175 | return 1 176 | 177 | if __name__ == "__main__": 178 | sys.exit(asyncio.run(main())) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/di.py: -------------------------------------------------------------------------------- ```python 1 | """Dependency Injection Container for MCP Server.""" 2 | 3 | from dataclasses import dataclass, field 4 | from typing import Optional, Dict, Any 5 | import asyncio 6 | from pathlib import Path 7 | 8 | from sentence_transformers import SentenceTransformer 9 | from qdrant_client import QdrantClient 10 | 11 | from .config import ServerConfig 12 | from .vector_store import VectorStore 13 | from .cache import CacheManager 14 | from .metrics import MetricsManager 15 | from .health import HealthManager 16 | from .documentation import DocumentationManager 17 | from .knowledge import KnowledgeBase 18 | from .tasks import TaskManager 19 | from ..utils.logger import get_logger 20 | 21 | logger = get_logger(__name__) 22 | 23 | @dataclass 24 | class ComponentContext: 25 | """Context for managing component lifecycle.""" 26 | initialized: bool = False 27 | cleanup_tasks: list = field(default_factory=list) 28 | error: Optional[Exception] = None 29 | 30 | @dataclass 31 | class DIContainer: 32 | """Dependency Injection Container for managing server components.""" 33 | 34 | config: ServerConfig 35 | _components: Dict[str, Any] = field(default_factory=dict) 36 | _contexts: Dict[str, ComponentContext] = field(default_factory=dict) 37 | 38 | def __post_init__(self): 39 | """Initialize component contexts.""" 40 | self._contexts = { 41 | "embedding_model": ComponentContext(), 42 | "vector_store": ComponentContext(), 43 | "cache_manager": ComponentContext(), 44 | "metrics_manager": ComponentContext(), 45 | "health_manager": ComponentContext(), 46 | "docs_manager": ComponentContext(), 47 | "knowledge_base": ComponentContext(), 48 | "task_manager": ComponentContext() 49 | } 50 | 51 | async def initialize_component(self, name: str, factory_func) -> Any: 52 | """Initialize a component with proper error handling and lifecycle management.""" 53 | context = self._contexts[name] 54 | if context.initialized: 55 | return self._components[name] 56 | 57 | try: 58 | component = await factory_func() 59 | if hasattr(component, 'initialize'): 60 | await component.initialize() 61 | 62 | self._components[name] = component 63 | context.initialized = True 64 | 65 | # Register cleanup if available 66 | if hasattr(component, 'cleanup'): 67 | context.cleanup_tasks.append(component.cleanup) 68 | elif hasattr(component, 'close'): 69 | context.cleanup_tasks.append(component.close) 70 | 71 | return component 72 | 73 | except Exception as e: 74 | context.error = e 75 | logger.error(f"Failed to initialize {name}: {str(e)}") 76 | raise 77 | 78 | async def get_embedding_model(self) -> SentenceTransformer: 79 | """Get or create the embedding model.""" 80 | async def factory(): 81 | return SentenceTransformer(self.config.embedding_model) 82 | return await self.initialize_component("embedding_model", factory) 83 | 84 | async def get_vector_store(self) -> VectorStore: 85 | """Get or create the vector store.""" 86 | async def factory(): 87 | embedding_model = await self.get_embedding_model() 88 | client = QdrantClient( 89 | url=self.config.qdrant_url, 90 | timeout=60.0 # Configurable timeout 91 | ) 92 | return VectorStore( 93 | client=client, 94 | embedder=embedding_model, 95 | collection_name=self.config.collection_name 96 | ) 97 | return await self.initialize_component("vector_store", factory) 98 | 99 | async def get_cache_manager(self) -> CacheManager: 100 | """Get or create the cache manager.""" 101 | async def factory(): 102 | return CacheManager( 103 | memory_size=self.config.memory_cache_size, 104 | disk_path=self.config.disk_cache_dir 105 | ) 106 | return await self.initialize_component("cache_manager", factory) 107 | 108 | async def get_metrics_manager(self) -> MetricsManager: 109 | """Get or create the metrics manager.""" 110 | async def factory(): 111 | return MetricsManager(enabled=self.config.metrics_enabled) 112 | return await self.initialize_component("metrics_manager", factory) 113 | 114 | async def get_health_manager(self) -> HealthManager: 115 | """Get or create the health manager.""" 116 | async def factory(): 117 | metrics = await self.get_metrics_manager() 118 | cache = await self.get_cache_manager() 119 | return HealthManager(metrics=metrics, cache=cache) 120 | return await self.initialize_component("health_manager", factory) 121 | 122 | async def get_docs_manager(self) -> DocumentationManager: 123 | """Get or create the documentation manager.""" 124 | async def factory(): 125 | vector_store = await self.get_vector_store() 126 | cache = await self.get_cache_manager() 127 | return DocumentationManager( 128 | vector_store=vector_store, 129 | cache=cache, 130 | docs_dir=self.config.docs_cache_dir 131 | ) 132 | return await self.initialize_component("docs_manager", factory) 133 | 134 | async def get_knowledge_base(self) -> KnowledgeBase: 135 | """Get or create the knowledge base.""" 136 | async def factory(): 137 | vector_store = await self.get_vector_store() 138 | cache = await self.get_cache_manager() 139 | return KnowledgeBase( 140 | vector_store=vector_store, 141 | cache=cache, 142 | storage_dir=self.config.kb_storage_dir 143 | ) 144 | return await self.initialize_component("knowledge_base", factory) 145 | 146 | async def get_task_manager(self) -> TaskManager: 147 | """Get or create the task manager.""" 148 | async def factory(): 149 | kb = await self.get_knowledge_base() 150 | docs = await self.get_docs_manager() 151 | return TaskManager( 152 | knowledge_base=kb, 153 | docs_manager=docs, 154 | max_tasks=100 # Configurable 155 | ) 156 | return await self.initialize_component("task_manager", factory) 157 | 158 | async def cleanup(self): 159 | """Clean up all components in reverse initialization order.""" 160 | for name, context in reversed(list(self._contexts.items())): 161 | if context.initialized: 162 | try: 163 | for cleanup_task in reversed(context.cleanup_tasks): 164 | await cleanup_task() 165 | context.initialized = False 166 | except Exception as e: 167 | logger.error(f"Error cleaning up {name}: {str(e)}") 168 | 169 | self._components.clear() ``` -------------------------------------------------------------------------------- /run_build_verification.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | # End-to-End Build Verification Script 3 | # 4 | # This script automates the process of verifying an end-to-end build by: 5 | # 1. First analyzing the codebase to store component relationships 6 | # 2. Triggering the build verification process 7 | # 3. Reporting the results 8 | 9 | set -e 10 | 11 | # Set up environment 12 | source .venv/bin/activate || source test_env/bin/activate || echo "No virtual environment found, using system Python" 13 | 14 | # Create required directories 15 | mkdir -p logs knowledge cache 16 | 17 | # Set environment variables for testing 18 | export MCP_EMBEDDING_TIMEOUT=120 # Increase timeout for embedder initialization 19 | export QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" 20 | export QDRANT_API_KEY="${QDRANT_API_KEY:-}" 21 | export MCP_COLLECTION_NAME="${MCP_COLLECTION_NAME:-mcp-codebase-insight}" 22 | 23 | # Default values 24 | CONFIG_FILE="verification-config.json" 25 | OUTPUT_FILE="logs/build_verification_report.json" 26 | ANALYZE_FIRST=true 27 | VERBOSE=false 28 | 29 | # Check if Qdrant is running locally 30 | check_qdrant() { 31 | if curl -s "http://localhost:6333/collections" > /dev/null; then 32 | echo "Local Qdrant instance detected" 33 | return 0 34 | else 35 | echo "Warning: No local Qdrant instance found at http://localhost:6333" 36 | echo "You may need to start Qdrant using Docker:" 37 | echo "docker run -p 6333:6333 qdrant/qdrant" 38 | return 1 39 | fi 40 | } 41 | 42 | # Parse command line arguments 43 | while [[ $# -gt 0 ]]; do 44 | case $1 in 45 | --config) 46 | CONFIG_FILE="$2" 47 | shift 2 48 | ;; 49 | --output) 50 | OUTPUT_FILE="$2" 51 | shift 2 52 | ;; 53 | --skip-analysis) 54 | ANALYZE_FIRST=false 55 | shift 56 | ;; 57 | --verbose) 58 | VERBOSE=true 59 | shift 60 | ;; 61 | *) 62 | echo "Unknown option: $1" 63 | echo "Usage: $0 [--config CONFIG_FILE] [--output OUTPUT_FILE] [--skip-analysis] [--verbose]" 64 | exit 1 65 | ;; 66 | esac 67 | done 68 | 69 | # Set up logging 70 | LOG_FILE="logs/build_verification.log" 71 | if $VERBOSE; then 72 | # Log to both console and file 73 | exec > >(tee -a "$LOG_FILE") 2>&1 74 | else 75 | # Log only to file 76 | exec >> "$LOG_FILE" 2>&1 77 | fi 78 | 79 | echo "===================================================================" 80 | echo "Starting End-to-End Build Verification at $(date)" 81 | echo "===================================================================" 82 | echo "Config file: $CONFIG_FILE" 83 | echo "Output file: $OUTPUT_FILE" 84 | echo "Analyze first: $ANALYZE_FIRST" 85 | echo "Verbose mode: $VERBOSE" 86 | echo "-------------------------------------------------------------------" 87 | 88 | # Check if config file exists 89 | if [[ ! -f "$CONFIG_FILE" ]]; then 90 | echo "Error: Config file $CONFIG_FILE not found!" 91 | exit 1 92 | fi 93 | 94 | # Function to check if a command is available 95 | check_command() { 96 | if ! command -v "$1" &> /dev/null; then 97 | echo "Error: $1 is required but not installed." 98 | exit 1 99 | fi 100 | } 101 | 102 | # Check required commands 103 | check_command python 104 | check_command pip 105 | 106 | # Ensure all dependencies are installed 107 | echo "Checking dependencies..." 108 | pip install -q -r requirements.txt 109 | pip install -q -e . 110 | 111 | # Step 1: Check Qdrant availability 112 | echo "Checking Qdrant availability..." 113 | if ! check_qdrant; then 114 | if [[ "$QDRANT_URL" == "http://localhost:6333" ]]; then 115 | echo "Error: Local Qdrant instance is not running and no alternative QDRANT_URL is set" 116 | echo "Please either:" 117 | echo "1. Start a local Qdrant instance using Docker:" 118 | echo " docker run -p 6333:6333 qdrant/qdrant" 119 | echo "2. Or set QDRANT_URL to point to your Qdrant instance" 120 | exit 1 121 | else 122 | echo "Using alternative Qdrant instance at $QDRANT_URL" 123 | fi 124 | fi 125 | 126 | # Step 2: Analyze codebase and store component relationships (if enabled) 127 | if $ANALYZE_FIRST; then 128 | echo "Analyzing codebase and storing component relationships..." 129 | python -m scripts.store_code_relationships --config "$CONFIG_FILE" 130 | 131 | if [[ $? -ne 0 ]]; then 132 | echo "Error: Failed to analyze codebase and store component relationships!" 133 | exit 1 134 | fi 135 | 136 | echo "Component relationships analysis completed successfully." 137 | else 138 | echo "Skipping codebase analysis as requested." 139 | fi 140 | 141 | # Step 3: Run build verification 142 | echo "Running tests with standardized test runner..." 143 | chmod +x run_tests.py 144 | ./run_tests.py --all --clean --isolated --coverage --html --verbose 145 | TEST_EXIT_CODE=$? 146 | 147 | echo "Running build verification..." 148 | python -m scripts.verify_build --config "$CONFIG_FILE" --output "$OUTPUT_FILE" 149 | BUILD_STATUS=$? 150 | 151 | # Use the worst exit code between tests and build verification 152 | if [ $TEST_EXIT_CODE -ne 0 ]; then 153 | BUILD_STATUS=$TEST_EXIT_CODE 154 | fi 155 | 156 | if [[ $BUILD_STATUS -ne 0 ]]; then 157 | echo "Build verification failed with exit code $BUILD_STATUS!" 158 | else 159 | echo "Build verification completed successfully." 160 | fi 161 | 162 | # Step 4: Report results 163 | echo "Build verification report saved to $OUTPUT_FILE" 164 | 165 | if [[ -f "$OUTPUT_FILE" ]]; then 166 | # Extract summary from report if jq is available 167 | if command -v jq &> /dev/null; then 168 | SUMMARY=$(jq -r '.build_verification_report.summary' "$OUTPUT_FILE") 169 | STATUS=$(jq -r '.build_verification_report.verification_results.overall_status' "$OUTPUT_FILE") 170 | echo "-------------------------------------------------------------------" 171 | echo "Build Verification Status: $STATUS" 172 | echo "Summary: $SUMMARY" 173 | echo "-------------------------------------------------------------------" 174 | 175 | # Print test results 176 | TOTAL=$(jq -r '.build_verification_report.test_summary.total' "$OUTPUT_FILE") 177 | PASSED=$(jq -r '.build_verification_report.test_summary.passed' "$OUTPUT_FILE") 178 | FAILED=$(jq -r '.build_verification_report.test_summary.failed' "$OUTPUT_FILE") 179 | COVERAGE=$(jq -r '.build_verification_report.test_summary.coverage' "$OUTPUT_FILE") 180 | 181 | echo "Test Results:" 182 | echo "- Total Tests: $TOTAL" 183 | echo "- Passed: $PASSED" 184 | echo "- Failed: $FAILED" 185 | echo "- Coverage: $COVERAGE%" 186 | 187 | # Print failure info if any 188 | if [[ "$STATUS" != "PASS" ]]; then 189 | echo "-------------------------------------------------------------------" 190 | echo "Failures detected. See $OUTPUT_FILE for details." 191 | 192 | # Print failure analysis if available 193 | if jq -e '.build_verification_report.failure_analysis' "$OUTPUT_FILE" > /dev/null; then 194 | echo "Failure Analysis:" 195 | jq -r '.build_verification_report.failure_analysis[] | "- " + .description' "$OUTPUT_FILE" 196 | fi 197 | fi 198 | else 199 | echo "-------------------------------------------------------------------" 200 | echo "Install jq for better report formatting." 201 | echo "Report saved to $OUTPUT_FILE" 202 | fi 203 | else 204 | echo "Error: Build verification report not found at $OUTPUT_FILE!" 205 | fi 206 | 207 | echo "===================================================================" 208 | echo "End-to-End Build Verification completed at $(date)" 209 | echo "Exit status: $BUILD_STATUS" 210 | echo "===================================================================" 211 | 212 | exit $BUILD_STATUS ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/prompts.py: -------------------------------------------------------------------------------- ```python 1 | """Prompt management and generation module.""" 2 | 3 | from datetime import datetime 4 | from enum import Enum 5 | from typing import Dict, List, Optional 6 | from uuid import UUID, uuid4 7 | 8 | from pydantic import BaseModel 9 | 10 | class PromptType(str, Enum): 11 | """Prompt type enumeration.""" 12 | 13 | CODE_ANALYSIS = "code_analysis" 14 | PATTERN_EXTRACTION = "pattern_extraction" 15 | DOCUMENTATION = "documentation" 16 | DEBUG = "debug" 17 | ADR = "adr" 18 | 19 | class PromptTemplate(BaseModel): 20 | """Prompt template model.""" 21 | 22 | id: UUID 23 | name: str 24 | type: PromptType 25 | template: str 26 | description: Optional[str] = None 27 | variables: List[str] 28 | examples: Optional[List[Dict]] = None 29 | created_at: datetime 30 | updated_at: datetime 31 | version: Optional[str] = None 32 | 33 | class PromptManager: 34 | """Manager for prompt templates and generation.""" 35 | 36 | def __init__(self, config): 37 | """Initialize prompt manager.""" 38 | self.config = config 39 | self.templates: Dict[str, PromptTemplate] = {} 40 | self._load_default_templates() 41 | 42 | def _load_default_templates(self): 43 | """Load default prompt templates.""" 44 | # Code Analysis Templates 45 | self.add_template( 46 | name="code_pattern_analysis", 47 | type=PromptType.CODE_ANALYSIS, 48 | template="""Analyze the following code for patterns and best practices: 49 | 50 | Code: 51 | {code} 52 | 53 | Consider: 54 | - Design patterns used 55 | - Architecture patterns 56 | - Code organization 57 | - Error handling 58 | - Performance considerations 59 | - Security implications 60 | 61 | Provide detailed analysis focusing on: 62 | {focus_areas}""", 63 | variables=["code", "focus_areas"], 64 | description="Template for analyzing code patterns" 65 | ) 66 | 67 | # Pattern Extraction Templates 68 | self.add_template( 69 | name="extract_design_patterns", 70 | type=PromptType.PATTERN_EXTRACTION, 71 | template="""Extract design patterns from the following code: 72 | 73 | Code: 74 | {code} 75 | 76 | Look for instances of: 77 | - Creational patterns 78 | - Structural patterns 79 | - Behavioral patterns 80 | - Architectural patterns 81 | 82 | For each pattern found, explain: 83 | - Pattern name and category 84 | - How it's implemented 85 | - Benefits and tradeoffs 86 | - Potential improvements""", 87 | variables=["code"], 88 | description="Template for extracting design patterns" 89 | ) 90 | 91 | # Documentation Templates 92 | self.add_template( 93 | name="generate_documentation", 94 | type=PromptType.DOCUMENTATION, 95 | template="""Generate documentation for the following code: 96 | 97 | Code: 98 | {code} 99 | 100 | Documentation type: {doc_type} 101 | Include: 102 | - Overview 103 | - Usage examples 104 | - API reference 105 | - Dependencies 106 | - Configuration 107 | - Error handling 108 | - Best practices""", 109 | variables=["code", "doc_type"], 110 | description="Template for generating documentation" 111 | ) 112 | 113 | # Debug Templates 114 | self.add_template( 115 | name="debug_analysis", 116 | type=PromptType.DEBUG, 117 | template="""Analyze the following issue: 118 | 119 | Description: 120 | {description} 121 | 122 | Error: 123 | {error} 124 | 125 | Context: 126 | {context} 127 | 128 | Provide: 129 | - Root cause analysis 130 | - Potential solutions 131 | - Prevention strategies 132 | - Testing recommendations""", 133 | variables=["description", "error", "context"], 134 | description="Template for debug analysis" 135 | ) 136 | 137 | # ADR Templates 138 | self.add_template( 139 | name="adr_template", 140 | type=PromptType.ADR, 141 | template="""# Architecture Decision Record 142 | 143 | ## Title: {title} 144 | 145 | ## Status: {status} 146 | 147 | ## Context 148 | {context} 149 | 150 | ## Decision Drivers 151 | {decision_drivers} 152 | 153 | ## Considered Options 154 | {options} 155 | 156 | ## Decision 157 | {decision} 158 | 159 | ## Consequences 160 | {consequences} 161 | 162 | ## Implementation 163 | {implementation} 164 | 165 | ## Related Decisions 166 | {related_decisions}""", 167 | variables=[ 168 | "title", "status", "context", "decision_drivers", 169 | "options", "decision", "consequences", "implementation", 170 | "related_decisions" 171 | ], 172 | description="Template for architecture decision records" 173 | ) 174 | 175 | def add_template( 176 | self, 177 | name: str, 178 | type: PromptType, 179 | template: str, 180 | variables: List[str], 181 | description: Optional[str] = None, 182 | examples: Optional[List[Dict]] = None, 183 | version: Optional[str] = None 184 | ) -> PromptTemplate: 185 | """Add a new prompt template.""" 186 | now = datetime.utcnow() 187 | template = PromptTemplate( 188 | id=uuid4(), 189 | name=name, 190 | type=type, 191 | template=template, 192 | description=description, 193 | variables=variables, 194 | examples=examples, 195 | version=version, 196 | created_at=now, 197 | updated_at=now 198 | ) 199 | 200 | self.templates[name] = template 201 | return template 202 | 203 | def get_template(self, name: str) -> Optional[PromptTemplate]: 204 | """Get prompt template by name.""" 205 | return self.templates.get(name) 206 | 207 | def list_templates( 208 | self, 209 | type: Optional[PromptType] = None 210 | ) -> List[PromptTemplate]: 211 | """List all templates, optionally filtered by type.""" 212 | templates = list(self.templates.values()) 213 | if type: 214 | templates = [t for t in templates if t.type == type] 215 | return sorted(templates, key=lambda x: x.name) 216 | 217 | def generate_prompt( 218 | self, 219 | template_name: str, 220 | variables: Dict[str, str] 221 | ) -> Optional[str]: 222 | """Generate prompt from template and variables.""" 223 | template = self.get_template(template_name) 224 | if not template: 225 | return None 226 | 227 | # Validate variables 228 | missing = [v for v in template.variables if v not in variables] 229 | if missing: 230 | raise ValueError(f"Missing required variables: {', '.join(missing)}") 231 | 232 | try: 233 | return template.template.format(**variables) 234 | except KeyError as e: 235 | raise ValueError(f"Invalid variable: {e}") 236 | except Exception as e: 237 | raise ValueError(f"Error generating prompt: {e}") 238 | 239 | def update_template( 240 | self, 241 | name: str, 242 | template: Optional[str] = None, 243 | description: Optional[str] = None, 244 | examples: Optional[List[Dict]] = None, 245 | version: Optional[str] = None 246 | ) -> Optional[PromptTemplate]: 247 | """Update prompt template.""" 248 | tmpl = self.get_template(name) 249 | if not tmpl: 250 | return None 251 | 252 | if template: 253 | tmpl.template = template 254 | if description: 255 | tmpl.description = description 256 | if examples: 257 | tmpl.examples = examples 258 | if version: 259 | tmpl.version = version 260 | 261 | tmpl.updated_at = datetime.utcnow() 262 | return tmpl 263 | ``` -------------------------------------------------------------------------------- /scripts/validate_poc.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python3 2 | """ 3 | MCP Codebase Insight - PoC Validation Script 4 | This script orchestrates the validation of all PoC components using Firecrawl MCP. 5 | """ 6 | 7 | import asyncio 8 | import argparse 9 | import logging 10 | from pathlib import Path 11 | from typing import Dict, Any 12 | 13 | from mcp_firecrawl import ( 14 | verify_environment, 15 | setup_repository, 16 | configure_environment, 17 | initialize_services, 18 | verify_transport_config, 19 | verify_sse_endpoints, 20 | verify_stdio_transport, 21 | test_transport_switch, 22 | validate_transport_features, 23 | test_cross_transport 24 | ) 25 | 26 | # Configure logging 27 | logging.basicConfig(level=logging.INFO) 28 | logger = logging.getLogger(__name__) 29 | 30 | class PoCValidator: 31 | """Orchestrates PoC validation steps.""" 32 | 33 | def __init__(self, config_path: str = ".env"): 34 | self.config_path = config_path 35 | self.results = {} 36 | self.config = self._load_config() 37 | 38 | def _load_config(self) -> dict: 39 | """Load config from .env or other config file.""" 40 | from dotenv import dotenv_values 41 | config = dotenv_values(self.config_path) 42 | return config 43 | 44 | async def setup_environment(self) -> bool: 45 | """Validate and setup the environment.""" 46 | logger.info("Validating environment...") 47 | 48 | # Check system requirements 49 | env_check = verify_environment({ 50 | "python_version": "3.11", 51 | "docker_version": "20.10.0", 52 | "ram_gb": 4, 53 | "cpu_cores": 2, 54 | "disk_space_gb": 20 55 | }) 56 | 57 | if not env_check.success: 58 | logger.error("Environment validation failed:") 59 | for issue in env_check.issues: 60 | logger.error(f"- {issue}") 61 | return False 62 | 63 | logger.info("Environment validation successful") 64 | return True 65 | 66 | async def setup_services(self) -> bool: 67 | """Initialize and verify required services.""" 68 | logger.info("Initializing services...") 69 | 70 | try: 71 | services = await initialize_services({ 72 | "qdrant": { 73 | "docker_compose": True, 74 | "wait_for_ready": True 75 | }, 76 | "vector_store": { 77 | "init_collection": True, 78 | "verify_connection": True 79 | } 80 | }) 81 | 82 | logger.info("Services initialized successfully") 83 | return True 84 | 85 | except Exception as e: 86 | logger.error(f"Service initialization failed: {e}") 87 | return False 88 | 89 | async def validate_transports(self) -> bool: 90 | """Validate both SSE and stdio transports.""" 91 | logger.info("Validating transport protocols...") 92 | 93 | # Verify SSE endpoints 94 | sse_result = await verify_sse_endpoints( 95 | "http://localhost:8000", 96 | {"Authorization": f"Bearer {self.config.get('API_KEY')}"} 97 | ) 98 | 99 | # Verify stdio transport 100 | stdio_result = await verify_stdio_transport( 101 | "mcp-codebase-insight", 102 | {"auth_token": self.config.get('API_KEY')} 103 | ) 104 | 105 | # Test transport switching 106 | switch_result = await test_transport_switch( 107 | server_url="http://localhost:8000", 108 | stdio_binary="mcp-codebase-insight", 109 | config={ 110 | "auth_token": self.config.get('API_KEY'), 111 | "verify_endpoints": True, 112 | "check_data_consistency": True 113 | } 114 | ) 115 | 116 | # Validate transport features 117 | sse_features = await validate_transport_features( 118 | "sse", 119 | { 120 | "server_url": "http://localhost:8000", 121 | "auth_token": self.config.get('API_KEY'), 122 | "features": [ 123 | "event_streaming", 124 | "bidirectional_communication", 125 | "error_handling", 126 | "reconnection" 127 | ] 128 | } 129 | ) 130 | 131 | stdio_features = await validate_transport_features( 132 | "stdio", 133 | { 134 | "binary": "mcp-codebase-insight", 135 | "auth_token": self.config.get('API_KEY'), 136 | "features": [ 137 | "synchronous_communication", 138 | "process_isolation", 139 | "error_propagation", 140 | "signal_handling" 141 | ] 142 | } 143 | ) 144 | 145 | # Test cross-transport compatibility 146 | cross_transport = await test_cross_transport({ 147 | "sse_config": { 148 | "url": "http://localhost:8000", 149 | "auth_token": self.config.get('API_KEY') 150 | }, 151 | "stdio_config": { 152 | "binary": "mcp-codebase-insight", 153 | "auth_token": self.config.get('API_KEY') 154 | }, 155 | "test_operations": [ 156 | "vector_search", 157 | "pattern_store", 158 | "task_management", 159 | "adr_queries" 160 | ] 161 | }) 162 | 163 | all_passed = all([ 164 | sse_result.success, 165 | stdio_result.success, 166 | switch_result.success, 167 | sse_features.success, 168 | stdio_features.success, 169 | cross_transport.success 170 | ]) 171 | 172 | if all_passed: 173 | logger.info("Transport validation successful") 174 | else: 175 | logger.error("Transport validation failed") 176 | 177 | return all_passed 178 | 179 | async def run_validation(self) -> Dict[str, Any]: 180 | """Run all validation steps.""" 181 | validation_steps = [ 182 | ("environment", self.setup_environment()), 183 | ("services", self.setup_services()), 184 | ("transports", self.validate_transports()), 185 | # Add more validation steps here 186 | ] 187 | 188 | results = {} 189 | for step_name, coro in validation_steps: 190 | try: 191 | results[step_name] = await coro 192 | if not results[step_name]: 193 | logger.error(f"Validation step '{step_name}' failed") 194 | break 195 | except Exception as e: 196 | logger.error(f"Error in validation step '{step_name}': {e}") 197 | results[step_name] = False 198 | break 199 | 200 | return results 201 | 202 | def main(): 203 | """Main entry point for PoC validation.""" 204 | parser = argparse.ArgumentParser(description="Validate MCP Codebase Insight PoC") 205 | parser.add_argument("--config", default=".env", help="Path to configuration file") 206 | args = parser.parse_args() 207 | 208 | validator = PoCValidator(args.config) 209 | results = asyncio.run(validator.run_validation()) 210 | 211 | # Print summary 212 | print("\nValidation Results:") 213 | print("-" * 50) 214 | for step, success in results.items(): 215 | status = "✅ Passed" if success else "❌ Failed" 216 | print(f"{step:20} {status}") 217 | print("-" * 50) 218 | 219 | # Exit with appropriate status 220 | exit(0 if all(results.values()) else 1) 221 | 222 | if __name__ == "__main__": 223 | main() ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/adr.py: -------------------------------------------------------------------------------- ```python 1 | """ADR (Architecture Decision Record) management module.""" 2 | 3 | import json 4 | from datetime import datetime 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import Dict, List, Optional 8 | from uuid import UUID, uuid4 9 | from slugify import slugify 10 | import os 11 | 12 | from pydantic import BaseModel 13 | 14 | class ADRError(Exception): 15 | """Base class for ADR-related errors.""" 16 | pass 17 | 18 | class ADRStatus(str, Enum): 19 | """ADR status enumeration.""" 20 | 21 | PROPOSED = "proposed" 22 | ACCEPTED = "accepted" 23 | REJECTED = "rejected" 24 | SUPERSEDED = "superseded" 25 | DEPRECATED = "deprecated" 26 | 27 | class ADROption(BaseModel): 28 | """ADR option model.""" 29 | 30 | title: str 31 | pros: List[str] 32 | cons: List[str] 33 | description: Optional[str] = None 34 | 35 | class ADRContext(BaseModel): 36 | """ADR context model.""" 37 | 38 | problem: str 39 | constraints: List[str] 40 | assumptions: Optional[List[str]] = None 41 | background: Optional[str] = None 42 | 43 | class ADR(BaseModel): 44 | """ADR model.""" 45 | 46 | id: UUID 47 | title: str 48 | status: ADRStatus 49 | context: ADRContext 50 | options: List[ADROption] 51 | decision: str 52 | consequences: Optional[Dict[str, List[str]]] = None 53 | metadata: Optional[Dict[str, str]] = None 54 | created_at: datetime 55 | updated_at: datetime 56 | superseded_by: Optional[UUID] = None 57 | 58 | class ADRManager: 59 | """ADR manager for handling architecture decision records.""" 60 | 61 | def __init__(self, config): 62 | """Initialize ADR manager.""" 63 | self.config = config 64 | self.adr_dir = config.adr_dir 65 | self.adr_dir.mkdir(parents=True, exist_ok=True) 66 | self.next_adr_number = 1 # Default to 1, will be updated in initialize() 67 | self.initialized = False 68 | self.adrs: Dict[UUID, ADR] = {} 69 | 70 | async def initialize(self): 71 | """Initialize the ADR manager. 72 | 73 | This method ensures the ADR directory exists and loads any existing ADRs. 74 | """ 75 | if self.initialized: 76 | return 77 | 78 | try: 79 | # Ensure ADR directory exists 80 | self.adr_dir.mkdir(parents=True, exist_ok=True) 81 | 82 | # Calculate next ADR number from existing files 83 | max_number = 0 84 | for adr_file in self.adr_dir.glob("*.md"): 85 | try: 86 | # Extract number from filename (e.g., "0001-title.md") 87 | number = int(adr_file.name.split("-")[0]) 88 | max_number = max(max_number, number) 89 | except (ValueError, IndexError): 90 | continue 91 | 92 | self.next_adr_number = max_number + 1 93 | 94 | # Load any existing ADRs 95 | for adr_file in self.adr_dir.glob("*.json"): 96 | if adr_file.is_file(): 97 | try: 98 | with open(adr_file, "r") as f: 99 | adr_data = json.load(f) 100 | # Convert the loaded data into an ADR object 101 | adr = ADR(**adr_data) 102 | self.adrs[adr.id] = adr 103 | except (json.JSONDecodeError, ValueError) as e: 104 | # Log error but continue processing other files 105 | print(f"Error loading ADR {adr_file}: {e}") 106 | 107 | self.initialized = True 108 | except Exception as e: 109 | print(f"Error initializing ADR manager: {e}") 110 | await self.cleanup() 111 | raise RuntimeError(f"Failed to initialize ADR manager: {str(e)}") 112 | 113 | async def cleanup(self): 114 | """Clean up resources used by the ADR manager. 115 | 116 | This method ensures all ADRs are saved and resources are released. 117 | """ 118 | if not self.initialized: 119 | return 120 | 121 | try: 122 | # Save any modified ADRs 123 | for adr in self.adrs.values(): 124 | try: 125 | await self._save_adr(adr) 126 | except Exception as e: 127 | print(f"Error saving ADR {adr.id}: {e}") 128 | 129 | # Clear in-memory ADRs 130 | self.adrs.clear() 131 | except Exception as e: 132 | print(f"Error cleaning up ADR manager: {e}") 133 | finally: 134 | self.initialized = False 135 | 136 | async def create_adr( 137 | self, 138 | title: str, 139 | context: dict, 140 | options: List[dict], 141 | decision: str, 142 | consequences: Optional[Dict[str, List[str]]] = None 143 | ) -> ADR: 144 | """Create a new ADR.""" 145 | adr_id = uuid4() 146 | now = datetime.utcnow() 147 | 148 | # Convert context dict to ADRContext 149 | adr_context = ADRContext( 150 | problem=context["problem"], 151 | constraints=context["constraints"], 152 | assumptions=context.get("assumptions"), 153 | background=context.get("background") 154 | ) 155 | 156 | # Convert options list to ADROption objects 157 | adr_options = [ 158 | ADROption( 159 | title=opt["title"], 160 | pros=opt["pros"], 161 | cons=opt["cons"], 162 | description=opt.get("description") 163 | ) 164 | for opt in options 165 | ] 166 | 167 | adr = ADR( 168 | id=adr_id, 169 | title=title, 170 | status=ADRStatus.PROPOSED, 171 | context=adr_context, 172 | options=adr_options, 173 | decision=decision, 174 | consequences=consequences, 175 | created_at=now, 176 | updated_at=now 177 | ) 178 | 179 | await self._save_adr(adr) 180 | return adr 181 | 182 | async def get_adr(self, adr_id: UUID) -> Optional[ADR]: 183 | """Get ADR by ID.""" 184 | adr_path = self.adr_dir / f"{adr_id}.json" 185 | if not adr_path.exists(): 186 | return None 187 | 188 | with open(adr_path) as f: 189 | data = json.load(f) 190 | return ADR(**data) 191 | 192 | async def update_adr( 193 | self, 194 | adr_id: UUID, 195 | status: Optional[ADRStatus] = None, 196 | superseded_by: Optional[UUID] = None, 197 | metadata: Optional[Dict[str, str]] = None 198 | ) -> Optional[ADR]: 199 | """Update ADR status and metadata.""" 200 | adr = await self.get_adr(adr_id) 201 | if not adr: 202 | return None 203 | 204 | if status: 205 | adr.status = status 206 | if superseded_by: 207 | adr.superseded_by = superseded_by 208 | if metadata: 209 | adr.metadata = {**(adr.metadata or {}), **metadata} 210 | 211 | adr.updated_at = datetime.utcnow() 212 | await self._save_adr(adr) 213 | return adr 214 | 215 | async def list_adrs( 216 | self, 217 | status: Optional[ADRStatus] = None 218 | ) -> List[ADR]: 219 | """List all ADRs, optionally filtered by status.""" 220 | adrs = [] 221 | for path in self.adr_dir.glob("*.json"): 222 | with open(path) as f: 223 | data = json.load(f) 224 | adr = ADR(**data) 225 | if not status or adr.status == status: 226 | adrs.append(adr) 227 | return sorted(adrs, key=lambda x: x.created_at) 228 | 229 | async def _save_adr(self, adr: ADR) -> None: 230 | """Save ADR to file.""" 231 | adr_path = self.adr_dir / f"{adr.id}.json" 232 | with open(adr_path, "w") as f: 233 | json.dump(adr.model_dump(), f, indent=2, default=str) 234 | ``` -------------------------------------------------------------------------------- /tests/components/test_core_components.py: -------------------------------------------------------------------------------- ```python 1 | """Test core server components.""" 2 | 3 | import sys 4 | import os 5 | 6 | # Ensure the src directory is in the Python path 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) 8 | 9 | import pytest 10 | from datetime import datetime 11 | from uuid import uuid4 12 | 13 | from src.mcp_codebase_insight.core.config import ServerConfig 14 | from src.mcp_codebase_insight.core.adr import ADRManager, ADRStatus 15 | from src.mcp_codebase_insight.core.debug import DebugSystem, IssueType, IssueStatus 16 | from src.mcp_codebase_insight.core.documentation import DocumentationManager, DocumentationType 17 | from src.mcp_codebase_insight.core.knowledge import KnowledgeBase, Pattern, PatternType, PatternConfidence 18 | from src.mcp_codebase_insight.core.tasks import TaskManager, TaskType, TaskStatus, TaskPriority 19 | from src.mcp_codebase_insight.core.metrics import MetricsManager, MetricType 20 | from src.mcp_codebase_insight.core.health import HealthManager, HealthStatus 21 | from src.mcp_codebase_insight.core.cache import CacheManager 22 | from src.mcp_codebase_insight.core.vector_store import VectorStore 23 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding 24 | 25 | @pytest.mark.asyncio 26 | async def test_adr_manager(test_config: ServerConfig, test_adr: dict): 27 | """Test ADR manager functions.""" 28 | manager = ADRManager(test_config) 29 | 30 | # Test creation 31 | adr = await manager.create_adr( 32 | title=test_adr["title"], 33 | context=test_adr["context"], 34 | options=test_adr["options"], 35 | decision=test_adr["decision"] 36 | ) 37 | 38 | assert adr.title == test_adr["title"] 39 | assert adr.status == ADRStatus.PROPOSED 40 | 41 | # Test retrieval 42 | retrieved = await manager.get_adr(adr.id) 43 | assert retrieved is not None 44 | assert retrieved.id == adr.id 45 | 46 | # Test update 47 | updated = await manager.update_adr( 48 | adr.id, 49 | status=ADRStatus.ACCEPTED 50 | ) 51 | assert updated.status == ADRStatus.ACCEPTED 52 | 53 | @pytest.mark.asyncio 54 | async def test_knowledge_base(test_config: ServerConfig, qdrant_client): 55 | """Test knowledge base functions.""" 56 | # Initialize vector store with embedder 57 | embedder = SentenceTransformerEmbedding() 58 | vector_store = VectorStore( 59 | url=test_config.qdrant_url, 60 | embedder=embedder 61 | ) 62 | kb = KnowledgeBase(test_config, vector_store=vector_store) 63 | 64 | # Test pattern creation 65 | now = datetime.utcnow() 66 | pattern = Pattern( 67 | id=uuid4(), 68 | name="Test Pattern", 69 | description="A test pattern", 70 | type=PatternType.CODE, 71 | content="def test(): pass", 72 | confidence=PatternConfidence.HIGH, 73 | created_at=now, 74 | updated_at=now 75 | ) 76 | 77 | # Test pattern storage 78 | stored_pattern = await kb.add_pattern( 79 | name=pattern.name, 80 | type=pattern.type, 81 | description=pattern.description, 82 | content=pattern.content, 83 | confidence=pattern.confidence 84 | ) 85 | 86 | # Verify stored pattern 87 | assert stored_pattern.name == pattern.name 88 | assert stored_pattern.type == pattern.type 89 | assert stored_pattern.description == pattern.description 90 | assert stored_pattern.content == pattern.content 91 | assert stored_pattern.confidence == pattern.confidence 92 | 93 | @pytest.mark.asyncio 94 | async def test_task_manager(test_config: ServerConfig, test_code: str): 95 | """Test task manager functions.""" 96 | manager = TaskManager( 97 | config=test_config, 98 | adr_manager=ADRManager(test_config), 99 | debug_system=DebugSystem(test_config), 100 | doc_manager=DocumentationManager(test_config), 101 | knowledge_base=KnowledgeBase(test_config, None), 102 | prompt_manager=None 103 | ) 104 | 105 | # Test task creation 106 | task = await manager.create_task( 107 | type=TaskType.CODE_ANALYSIS, 108 | title="Test Task", 109 | description="Analyze test code", 110 | priority=TaskPriority.MEDIUM, 111 | context={"code": test_code} 112 | ) 113 | 114 | assert task.title == "Test Task" 115 | assert task.status == TaskStatus.PENDING 116 | 117 | # Test task retrieval 118 | retrieved = await manager.get_task(task.id) 119 | assert retrieved is not None 120 | assert retrieved.id == task.id 121 | 122 | @pytest.mark.asyncio 123 | async def test_metrics_manager(test_config: ServerConfig): 124 | """Test metrics manager functions.""" 125 | # Override the metrics_enabled setting for this test 126 | test_config.metrics_enabled = True 127 | 128 | manager = MetricsManager(test_config) 129 | await manager.initialize() 130 | 131 | try: 132 | # Test metric recording 133 | await manager.record_metric( 134 | "test_metric", 135 | MetricType.COUNTER, 136 | 1.0, 137 | {"label": "test"} 138 | ) 139 | 140 | # Test metric retrieval 141 | metrics = await manager.get_metrics(["test_metric"]) 142 | assert len(metrics) == 1 143 | assert "test_metric" in metrics 144 | finally: 145 | # Cleanup 146 | await manager.cleanup() 147 | 148 | @pytest.mark.asyncio 149 | async def test_health_manager(test_config: ServerConfig): 150 | """Test health manager functions.""" 151 | manager = HealthManager(test_config) 152 | 153 | # Test health check 154 | health = await manager.check_health() 155 | assert health.status is not None 156 | assert isinstance(health.components, dict) 157 | assert isinstance(health.timestamp, datetime) 158 | 159 | @pytest.mark.asyncio 160 | async def test_cache_manager(test_config: ServerConfig): 161 | """Test cache manager functions.""" 162 | manager = CacheManager(test_config) 163 | await manager.initialize() # Initialize the manager 164 | 165 | try: 166 | # Test memory cache 167 | manager.put_in_memory("test_key", "test_value") 168 | result = manager.get_from_memory("test_key") 169 | assert result == "test_value" 170 | 171 | # Test persistent cache 172 | manager.put_in_disk("test_key", "test_value") 173 | result = manager.get_from_disk("test_key") 174 | assert result == "test_value" 175 | 176 | # Test combined operations 177 | manager.put("combined_key", "combined_value") 178 | result = manager.get("combined_key") 179 | assert result == "combined_value" 180 | 181 | # Test removal 182 | manager.remove("test_key") 183 | assert manager.get("test_key") is None 184 | finally: 185 | await manager.cleanup() # Clean up after tests 186 | 187 | @pytest.mark.asyncio 188 | async def test_documentation_manager(test_config: ServerConfig): 189 | """Test documentation manager functions.""" 190 | manager = DocumentationManager(test_config) 191 | 192 | # Test document creation 193 | doc = await manager.add_document( 194 | title="Test Doc", 195 | content="Test content", 196 | type=DocumentationType.REFERENCE 197 | ) 198 | 199 | assert doc.title == "Test Doc" 200 | 201 | # Test document retrieval 202 | retrieved = await manager.get_document(doc.id) 203 | assert retrieved is not None 204 | assert retrieved.id == doc.id 205 | 206 | @pytest.mark.asyncio 207 | async def test_debug_system(test_config: ServerConfig): 208 | """Test debug system functions.""" 209 | system = DebugSystem(test_config) 210 | 211 | # Test issue creation 212 | issue = await system.create_issue( 213 | title="Test issue", 214 | type=IssueType.BUG, 215 | description={"message": "Test description", "steps": ["Step 1", "Step 2"]} 216 | ) 217 | 218 | assert issue.title == "Test issue" 219 | assert issue.type == IssueType.BUG 220 | assert issue.status == IssueStatus.OPEN 221 | assert "message" in issue.description 222 | assert "steps" in issue.description 223 | ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/debug.py: -------------------------------------------------------------------------------- ```python 1 | """Debug system for issue tracking and analysis.""" 2 | 3 | import json 4 | from datetime import datetime 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import Dict, List, Optional 8 | from uuid import UUID, uuid4 9 | 10 | from pydantic import BaseModel 11 | 12 | class IssueType(str, Enum): 13 | """Issue type enumeration.""" 14 | 15 | BUG = "bug" 16 | PERFORMANCE = "performance" 17 | SECURITY = "security" 18 | DESIGN = "design" 19 | DOCUMENTATION = "documentation" 20 | OTHER = "other" 21 | 22 | class IssueStatus(str, Enum): 23 | """Issue status enumeration.""" 24 | 25 | OPEN = "open" 26 | IN_PROGRESS = "in_progress" 27 | RESOLVED = "resolved" 28 | CLOSED = "closed" 29 | WONT_FIX = "wont_fix" 30 | 31 | class Issue(BaseModel): 32 | """Issue model.""" 33 | 34 | id: UUID 35 | title: str 36 | type: IssueType 37 | status: IssueStatus 38 | description: Dict 39 | steps: Optional[List[Dict]] = None 40 | created_at: datetime 41 | updated_at: datetime 42 | resolved_at: Optional[datetime] = None 43 | metadata: Optional[Dict[str, str]] = None 44 | 45 | class DebugSystem: 46 | """System for debugging and issue management.""" 47 | 48 | def __init__(self, config): 49 | """Initialize debug system.""" 50 | self.config = config 51 | self.debug_dir = Path(config.docs_cache_dir) / "debug" 52 | self.debug_dir.mkdir(parents=True, exist_ok=True) 53 | self.issues: Dict[UUID, Issue] = {} 54 | self.initialized = False 55 | 56 | async def initialize(self) -> None: 57 | """Initialize debug system.""" 58 | if self.initialized: 59 | return 60 | 61 | try: 62 | # Load existing issues 63 | if self.debug_dir.exists(): 64 | for issue_file in self.debug_dir.glob("*.json"): 65 | try: 66 | with open(issue_file) as f: 67 | data = json.load(f) 68 | issue = Issue(**data) 69 | self.issues[issue.id] = issue 70 | except Exception as e: 71 | # Log error but continue loading other issues 72 | print(f"Error loading issue {issue_file}: {e}") 73 | 74 | self.initialized = True 75 | except Exception as e: 76 | print(f"Error initializing debug system: {e}") 77 | await self.cleanup() 78 | raise RuntimeError(f"Failed to initialize debug system: {str(e)}") 79 | 80 | async def cleanup(self) -> None: 81 | """Clean up debug system resources.""" 82 | if not self.initialized: 83 | return 84 | 85 | try: 86 | # Save any pending issues 87 | for issue in self.issues.values(): 88 | try: 89 | await self._save_issue(issue) 90 | except Exception as e: 91 | print(f"Error saving issue {issue.id}: {e}") 92 | # Clear in-memory issues 93 | self.issues.clear() 94 | except Exception as e: 95 | print(f"Error cleaning up debug system: {e}") 96 | finally: 97 | self.initialized = False 98 | 99 | async def create_issue( 100 | self, 101 | title: str, 102 | type: str, 103 | description: Dict 104 | ) -> Issue: 105 | """Create a new issue.""" 106 | now = datetime.utcnow() 107 | issue = Issue( 108 | id=uuid4(), 109 | title=title, 110 | type=IssueType(type), 111 | status=IssueStatus.OPEN, 112 | description=description, 113 | created_at=now, 114 | updated_at=now 115 | ) 116 | 117 | await self._save_issue(issue) 118 | return issue 119 | 120 | async def get_issue(self, issue_id: UUID) -> Optional[Issue]: 121 | """Get issue by ID.""" 122 | issue_path = self.debug_dir / f"{issue_id}.json" 123 | if not issue_path.exists(): 124 | return None 125 | 126 | with open(issue_path) as f: 127 | data = json.load(f) 128 | return Issue(**data) 129 | 130 | async def update_issue( 131 | self, 132 | issue_id: UUID, 133 | status: Optional[IssueStatus] = None, 134 | steps: Optional[List[Dict]] = None, 135 | metadata: Optional[Dict[str, str]] = None 136 | ) -> Optional[Issue]: 137 | """Update issue status and details.""" 138 | issue = await self.get_issue(issue_id) 139 | if not issue: 140 | return None 141 | 142 | if status: 143 | issue.status = status 144 | if status == IssueStatus.RESOLVED: 145 | issue.resolved_at = datetime.utcnow() 146 | if steps: 147 | issue.steps = steps 148 | if metadata: 149 | issue.metadata = {**(issue.metadata or {}), **metadata} 150 | 151 | issue.updated_at = datetime.utcnow() 152 | await self._save_issue(issue) 153 | return issue 154 | 155 | async def list_issues( 156 | self, 157 | type: Optional[IssueType] = None, 158 | status: Optional[IssueStatus] = None 159 | ) -> List[Issue]: 160 | """List all issues, optionally filtered by type and status.""" 161 | issues = [] 162 | for path in self.debug_dir.glob("*.json"): 163 | with open(path) as f: 164 | data = json.load(f) 165 | issue = Issue(**data) 166 | if (not type or issue.type == type) and (not status or issue.status == status): 167 | issues.append(issue) 168 | return sorted(issues, key=lambda x: x.created_at) 169 | 170 | async def analyze_issue(self, issue_id: UUID) -> List[Dict]: 171 | """Analyze issue and generate debug steps.""" 172 | issue = await self.get_issue(issue_id) 173 | if not issue: 174 | return [] 175 | 176 | # Generate analysis steps based on issue type 177 | steps = [] 178 | 179 | if issue.type == IssueType.BUG: 180 | steps.extend([ 181 | {"type": "check", "name": "Reproduce Issue", "description": "Steps to reproduce the issue"}, 182 | {"type": "check", "name": "Error Logs", "description": "Check relevant error logs"}, 183 | {"type": "check", "name": "Stack Trace", "description": "Analyze stack trace if available"}, 184 | {"type": "check", "name": "Code Review", "description": "Review related code sections"} 185 | ]) 186 | 187 | elif issue.type == IssueType.PERFORMANCE: 188 | steps.extend([ 189 | {"type": "check", "name": "Profiling", "description": "Run performance profiling"}, 190 | {"type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O"}, 191 | {"type": "check", "name": "Query Analysis", "description": "Review database queries"}, 192 | {"type": "check", "name": "Bottlenecks", "description": "Identify performance bottlenecks"} 193 | ]) 194 | 195 | elif issue.type == IssueType.SECURITY: 196 | steps.extend([ 197 | {"type": "check", "name": "Vulnerability Scan", "description": "Run security scanners"}, 198 | {"type": "check", "name": "Access Control", "description": "Review permissions"}, 199 | {"type": "check", "name": "Input Validation", "description": "Check input handling"}, 200 | {"type": "check", "name": "Dependencies", "description": "Audit dependencies"} 201 | ]) 202 | 203 | # Update issue with analysis steps 204 | await self.update_issue(issue_id, steps=steps) 205 | return steps 206 | 207 | async def _save_issue(self, issue: Issue) -> None: 208 | """Save issue to file.""" 209 | issue_path = self.debug_dir / f"{issue.id}.json" 210 | with open(issue_path, "w") as f: 211 | json.dump(issue.model_dump(), f, indent=2, default=str) 212 | ```