This is page 2 of 6. Use http://codebase.md/tosin2013/mcp-codebase-insight?page={x} to view the full context. # Directory Structure ``` ├── .bumpversion.cfg ├── .codecov.yml ├── .compile-venv-py3.11 │ ├── bin │ │ ├── activate │ │ ├── activate.csh │ │ ├── activate.fish │ │ ├── Activate.ps1 │ │ ├── coverage │ │ ├── coverage-3.11 │ │ ├── coverage3 │ │ ├── pip │ │ ├── pip-compile │ │ ├── pip-sync │ │ ├── pip3 │ │ ├── pip3.11 │ │ ├── py.test │ │ ├── pyproject-build │ │ ├── pytest │ │ ├── python │ │ ├── python3 │ │ ├── python3.11 │ │ └── wheel │ └── pyvenv.cfg ├── .env.example ├── .github │ └── workflows │ ├── build-verification.yml │ ├── publish.yml │ └── tdd-verification.yml ├── .gitignore ├── async_fixture_wrapper.py ├── CHANGELOG.md ├── CLAUDE.md ├── codebase_structure.txt ├── component_test_runner.py ├── CONTRIBUTING.md ├── core_workflows.txt ├── debug_tests.md ├── Dockerfile ├── docs │ ├── adrs │ │ └── 001_use_docker_for_qdrant.md │ ├── api.md │ ├── components │ │ └── README.md │ ├── cookbook.md │ ├── development │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ └── README.md │ ├── documentation_map.md │ ├── documentation_summary.md │ ├── features │ │ ├── adr-management.md │ │ ├── code-analysis.md │ │ └── documentation.md │ ├── getting-started │ │ ├── configuration.md │ │ ├── docker-setup.md │ │ ├── installation.md │ │ ├── qdrant_setup.md │ │ └── quickstart.md │ ├── qdrant_setup.md │ ├── README.md │ ├── SSE_INTEGRATION.md │ ├── system_architecture │ │ └── README.md │ ├── templates │ │ └── adr.md │ ├── testing_guide.md │ ├── troubleshooting │ │ ├── common-issues.md │ │ └── faq.md │ ├── vector_store_best_practices.md │ └── workflows │ └── README.md ├── error_logs.txt ├── examples │ └── use_with_claude.py ├── github-actions-documentation.md ├── Makefile ├── module_summaries │ ├── backend_summary.txt │ ├── database_summary.txt │ └── frontend_summary.txt ├── output.txt ├── package-lock.json ├── package.json ├── PLAN.md ├── prepare_codebase.sh ├── PULL_REQUEST.md ├── pyproject.toml ├── pytest.ini ├── README.md ├── requirements-3.11.txt ├── requirements-3.11.txt.backup ├── requirements-dev.txt ├── requirements.in ├── requirements.txt ├── run_build_verification.sh ├── run_fixed_tests.sh ├── run_test_with_path_fix.sh ├── run_tests.py ├── scripts │ ├── check_qdrant_health.sh │ ├── compile_requirements.sh │ ├── load_example_patterns.py │ ├── macos_install.sh │ ├── README.md │ ├── setup_qdrant.sh │ ├── start_mcp_server.sh │ ├── store_code_relationships.py │ ├── store_report_in_mcp.py │ ├── validate_knowledge_base.py │ ├── validate_poc.py │ ├── validate_vector_store.py │ └── verify_build.py ├── server.py ├── setup_qdrant_collection.py ├── setup.py ├── src │ └── mcp_codebase_insight │ ├── __init__.py │ ├── __main__.py │ ├── asgi.py │ ├── core │ │ ├── __init__.py │ │ ├── adr.py │ │ ├── cache.py │ │ ├── component_status.py │ │ ├── config.py │ │ ├── debug.py │ │ ├── di.py │ │ ├── documentation.py │ │ ├── embeddings.py │ │ ├── errors.py │ │ ├── health.py │ │ ├── knowledge.py │ │ ├── metrics.py │ │ ├── prompts.py │ │ ├── sse.py │ │ ├── state.py │ │ ├── task_tracker.py │ │ ├── tasks.py │ │ └── vector_store.py │ ├── models.py │ ├── server_test_isolation.py │ ├── server.py │ ├── utils │ │ ├── __init__.py │ │ └── logger.py │ └── version.py ├── start-mcpserver.sh ├── summary_document.txt ├── system-architecture.md ├── system-card.yml ├── test_fix_helper.py ├── test_fixes.md ├── test_function.txt ├── test_imports.py ├── tests │ ├── components │ │ ├── conftest.py │ │ ├── test_core_components.py │ │ ├── test_embeddings.py │ │ ├── test_knowledge_base.py │ │ ├── test_sse_components.py │ │ ├── test_stdio_components.py │ │ ├── test_task_manager.py │ │ └── test_vector_store.py │ ├── config │ │ └── test_config_and_env.py │ ├── conftest.py │ ├── integration │ │ ├── fixed_test2.py │ │ ├── test_api_endpoints.py │ │ ├── test_api_endpoints.py-e │ │ ├── test_communication_integration.py │ │ └── test_server.py │ ├── README.md │ ├── README.test.md │ ├── test_build_verifier.py │ └── test_file_relationships.py └── trajectories └── tosinakinosho ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9 │ └── db62b9 │ └── config.yaml ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e │ └── 03565e │ ├── 03565e.traj │ └── config.yaml └── default__openrouter └── anthropic └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e └── 03565e ├── 03565e.pred ├── 03565e.traj └── config.yaml ``` # Files -------------------------------------------------------------------------------- /requirements-3.11.txt: -------------------------------------------------------------------------------- ``` # # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile --allow-unsafe --output-file=requirements-3.11.minimal.txt requirements.in.minimal # aiohappyeyeballs==2.6.1 # via aiohttp aiohttp==3.11.14 # via -r requirements.in.minimal aiosignal==1.3.2 # via aiohttp annotated-types==0.7.0 # via pydantic anyio==4.9.0 # via # httpx # mcp # sse-starlette # starlette attrs==25.3.0 # via aiohttp beautifulsoup4==4.13.3 # via -r requirements.in.minimal black==25.1.0 # via -r requirements.in.minimal build==1.2.2.post1 # via pip-tools bump2version==1.0.1 # via -r requirements.in.minimal certifi==2025.1.31 # via # httpcore # httpx # requests charset-normalizer==3.4.1 # via requests click==8.1.8 # via # black # pip-tools # uvicorn coverage[toml]==7.8.0 # via pytest-cov fastapi==0.115.12 # via -r requirements.in.minimal filelock==3.18.0 # via # huggingface-hub # torch # transformers flake8==7.2.0 # via -r requirements.in.minimal frozenlist==1.5.0 # via # aiohttp # aiosignal fsspec==2025.3.1 # via # huggingface-hub # torch grpcio==1.71.0 # via # grpcio-tools # qdrant-client grpcio-tools==1.71.0 # via qdrant-client h11==0.14.0 # via # httpcore # uvicorn h2==4.2.0 # via httpx hpack==4.1.0 # via h2 httpcore==1.0.7 # via httpx httpx[http2]==0.28.1 # via # -r requirements.in.minimal # mcp # qdrant-client httpx-sse==0.4.0 # via mcp huggingface-hub==0.29.3 # via # tokenizers # transformers hyperframe==6.1.0 # via h2 idna==3.10 # via # anyio # httpx # requests # yarl iniconfig==2.1.0 # via pytest isort==6.0.1 # via -r requirements.in.minimal jinja2==3.1.6 # via torch markdown==3.7 # via -r requirements.in.minimal markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via flake8 mcp==1.6.0 # via -r requirements.in.minimal mpmath==1.3.0 # via sympy multidict==6.2.0 # via # aiohttp # yarl mypy==1.15.0 # via -r requirements.in.minimal mypy-extensions==1.0.0 # via # black # mypy networkx==3.4.2 # via # -r requirements.in.minimal # torch numpy==2.2.4 # via # -r requirements.in.minimal # qdrant-client # scipy # transformers packaging==24.2 # via # black # build # huggingface-hub # pytest # transformers pathspec==0.12.1 # via black pip-tools==7.4.1 # via -r requirements.in.minimal platformdirs==4.3.7 # via black pluggy==1.5.0 # via pytest portalocker==2.10.1 # via qdrant-client propcache==0.3.1 # via # aiohttp # yarl protobuf==5.29.4 # via grpcio-tools psutil==7.0.0 # via -r requirements.in.minimal pycodestyle==2.13.0 # via flake8 pydantic==2.11.1 # via # -r requirements.in.minimal # fastapi # mcp # pydantic-settings # qdrant-client pydantic-core==2.33.0 # via pydantic pydantic-settings==2.8.1 # via mcp pyflakes==3.3.1 # via flake8 pyproject-hooks==1.2.0 # via # build # pip-tools pytest==8.3.5 # via # -r requirements.in.minimal # pytest-asyncio # pytest-cov pytest-asyncio==0.26.0 # via -r requirements.in.minimal pytest-cov==6.0.0 # via -r requirements.in.minimal python-dotenv==1.1.0 # via # -r requirements.in.minimal # pydantic-settings python-frontmatter==1.1.0 # via -r requirements.in.minimal python-slugify==8.0.4 # via -r requirements.in.minimal pyyaml==6.0.2 # via # -r requirements.in.minimal # huggingface-hub # python-frontmatter # transformers qdrant-client==1.13.3 # via -r requirements.in.minimal regex==2024.11.6 # via transformers requests==2.32.3 # via # -r requirements.in.minimal # huggingface-hub # transformers safetensors==0.5.3 # via transformers scipy==1.15.2 # via -r requirements.in.minimal slugify==0.0.1 # via -r requirements.in.minimal sniffio==1.3.1 # via anyio soupsieve==2.6 # via beautifulsoup4 sse-starlette==2.2.1 # via mcp starlette==0.46.1 # via # fastapi # mcp # sse-starlette structlog==25.2.0 # via -r requirements.in.minimal sympy==1.13.1 # via torch text-unidecode==1.3 # via python-slugify tokenizers==0.21.1 # via transformers torch==2.6.0 # via -r requirements.in.minimal tqdm==4.67.1 # via # huggingface-hub # transformers transformers==4.50.3 # via -r requirements.in.minimal typing-extensions==4.13.0 # via # anyio # beautifulsoup4 # fastapi # huggingface-hub # mypy # pydantic # pydantic-core # torch # typing-inspection typing-inspection==0.4.0 # via pydantic urllib3==2.3.0 # via # qdrant-client # requests uvicorn==0.34.0 # via # -r requirements.in.minimal # mcp wheel==0.45.1 # via pip-tools yarl==1.18.3 # via aiohttp # The following packages are considered to be unsafe in a requirements file: pip==25.0.1 # via pip-tools setuptools==78.1.0 # via # grpcio-tools # pip-tools # WARNING: starlette constraint was removed to resolve conflicts # You will need to manually install a compatible starlette version ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- ``` # # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile --allow-unsafe --output-file=requirements-3.11.minimal.txt requirements.in.minimal # aiohappyeyeballs==2.6.1 # via aiohttp aiohttp==3.11.14 # via -r requirements.in.minimal aiosignal==1.3.2 # via aiohttp annotated-types==0.7.0 # via pydantic anyio==4.9.0 # via # httpx # mcp # sse-starlette # starlette attrs==25.3.0 # via aiohttp beautifulsoup4==4.13.3 # via -r requirements.in.minimal black==25.1.0 # via -r requirements.in.minimal build==1.2.2.post1 # via pip-tools bump2version==1.0.1 # via -r requirements.in.minimal certifi==2025.1.31 # via # httpcore # httpx # requests charset-normalizer==3.4.1 # via requests click==8.1.8 # via # black # pip-tools # uvicorn coverage[toml]==7.8.0 # via pytest-cov fastapi==0.115.12 # via -r requirements.in.minimal filelock==3.18.0 # via # huggingface-hub # torch # transformers flake8==7.2.0 # via -r requirements.in.minimal frozenlist==1.5.0 # via # aiohttp # aiosignal fsspec==2025.3.1 # via # huggingface-hub # torch grpcio==1.71.0 # via # grpcio-tools # qdrant-client grpcio-tools==1.71.0 # via qdrant-client h11==0.14.0 # via # httpcore # uvicorn h2==4.2.0 # via httpx hpack==4.1.0 # via h2 httpcore==1.0.7 # via httpx httpx[http2]==0.28.1 # via # -r requirements.in.minimal # mcp # qdrant-client httpx-sse==0.4.0 # via mcp huggingface-hub==0.29.3 # via # tokenizers # transformers hyperframe==6.1.0 # via h2 idna==3.10 # via # anyio # httpx # requests # yarl iniconfig==2.1.0 # via pytest isort==6.0.1 # via -r requirements.in.minimal jinja2==3.1.6 # via torch markdown==3.7 # via -r requirements.in.minimal markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via flake8 mcp==1.6.0 # via -r requirements.in.minimal mpmath==1.3.0 # via sympy multidict==6.2.0 # via # aiohttp # yarl mypy==1.15.0 # via -r requirements.in.minimal mypy-extensions==1.0.0 # via # black # mypy networkx==3.4.2 # via # -r requirements.in.minimal # torch numpy==2.2.4 # via # -r requirements.in.minimal # qdrant-client # scipy # transformers packaging==24.2 # via # black # build # huggingface-hub # pytest # transformers pathspec==0.12.1 # via black pip-tools==7.4.1 # via -r requirements.in.minimal platformdirs==4.3.7 # via black pluggy==1.5.0 # via pytest portalocker==2.10.1 # via qdrant-client propcache==0.3.1 # via # aiohttp # yarl protobuf==5.29.4 # via grpcio-tools psutil==7.0.0 # via -r requirements.in.minimal pycodestyle==2.13.0 # via flake8 pydantic==2.11.1 # via # -r requirements.in.minimal # fastapi # mcp # pydantic-settings # qdrant-client pydantic-core==2.33.0 # via pydantic pydantic-settings==2.8.1 # via mcp pyflakes==3.3.1 # via flake8 pyproject-hooks==1.2.0 # via # build # pip-tools pytest==8.3.5 # via # -r requirements.in.minimal # pytest-asyncio # pytest-cov pytest-asyncio==0.26.0 # via -r requirements.in.minimal pytest-cov==6.0.0 # via -r requirements.in.minimal python-dotenv==1.1.0 # via # -r requirements.in.minimal # pydantic-settings python-frontmatter==1.1.0 # via -r requirements.in.minimal python-slugify==8.0.4 # via -r requirements.in.minimal pyyaml==6.0.2 # via # -r requirements.in.minimal # huggingface-hub # python-frontmatter # transformers qdrant-client==1.13.3 # via -r requirements.in.minimal regex==2024.11.6 # via transformers requests==2.32.3 # via # -r requirements.in.minimal # huggingface-hub # transformers safetensors==0.5.3 # via transformers scipy==1.15.2 # via -r requirements.in.minimal slugify==0.0.1 # via -r requirements.in.minimal sniffio==1.3.1 # via anyio soupsieve==2.6 # via beautifulsoup4 sse-starlette==2.2.1 # via mcp starlette==0.46.1 # via # fastapi # mcp # sse-starlette structlog==25.2.0 # via -r requirements.in.minimal sympy==1.13.1 # via torch text-unidecode==1.3 # via python-slugify tokenizers==0.21.1 # via transformers torch==2.6.0 # via -r requirements.in.minimal tqdm==4.67.1 # via # huggingface-hub # transformers transformers==4.50.3 # via -r requirements.in.minimal typing-extensions==4.13.0 # via # anyio # beautifulsoup4 # fastapi # huggingface-hub # mypy # pydantic # pydantic-core # torch # typing-inspection typing-inspection==0.4.0 # via pydantic urllib3==2.3.0 # via # qdrant-client # requests uvicorn==0.34.0 # via # -r requirements.in.minimal # mcp wheel==0.45.1 # via pip-tools yarl==1.18.3 # via aiohttp # The following packages are considered to be unsafe in a requirements file: pip==25.0.1 # via pip-tools setuptools==78.1.0 # via # grpcio-tools # pip-tools # WARNING: starlette constraint was removed to resolve conflicts # You will need to manually install a compatible starlette version ``` -------------------------------------------------------------------------------- /docs/getting-started/quickstart.md: -------------------------------------------------------------------------------- ```markdown # Quick Start Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Overview This guide will help you get started with MCP Codebase Insight quickly. ## Prerequisites Ensure you have: - Completed the [Installation](installation.md) - Set up [Qdrant](qdrant_setup.md) - Configured your [environment](configuration.md) ## Basic Usage 1. **Start the Server** ```bash mcp-codebase-insight --host localhost --port 3000 ``` 2. **Analyze Code** ```python from mcp_codebase_insight import CodebaseAnalyzer analyzer = CodebaseAnalyzer() results = analyzer.analyze_code("path/to/code") ``` 3. **View Results** ```python print(results.patterns) print(results.suggestions) ``` ## Next Steps - [API Reference](../api/rest-api.md) - [Feature Documentation](../features/code-analysis.md) - [Development Guide](../development/README.md) ## 5-Minute Setup 1. **Install MCP Codebase Insight** ```bash # Create and activate virtual environment python -m venv venv source venv/bin/activate # On Windows: venv\Scripts\activate # Install the package pip install mcp-codebase-insight ``` 2. **Start Qdrant Vector Database** ```bash # Using Docker (recommended) docker pull qdrant/qdrant docker run -p 6333:6333 qdrant/qdrant ``` 3. **Configure Environment** ```bash # Create .env file cat > .env << EOL MCP_HOST=127.0.0.1 MCP_PORT=3000 QDRANT_URL=http://localhost:6333 MCP_DOCS_CACHE_DIR=./docs MCP_ADR_DIR=./docs/adrs MCP_KB_STORAGE_DIR=./knowledge EOL # Create required directories mkdir -p docs/adrs knowledge ``` 4. **Verify Installation** ```bash # In another terminal curl http://localhost:3000/health ``` ## Basic Usage Examples ### 1. Analyze Code Patterns ```python import httpx async with httpx.AsyncClient() as client: # Analyze code patterns response = await client.post( "http://localhost:3000/api/analyze", json={ "code": """ def calculate_fibonacci(n): if n <= 1: return n return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) """, "language": "python" } ) results = response.json() print("Detected patterns:", results["patterns"]) ``` ### 2. Create an ADR ```python # Create an Architecture Decision Record response = await client.post( "http://localhost:3000/api/adrs", json={ "title": "Use FastAPI for REST API", "context": { "problem": "Need a modern Python web framework", "constraints": ["Performance", "Easy to maintain"] }, "options": [ { "title": "FastAPI", "pros": ["Fast", "Modern", "Great docs"], "cons": ["Newer framework"] }, { "title": "Flask", "pros": ["Mature", "Simple"], "cons": ["Slower", "Less modern"] } ], "decision": "We will use FastAPI", "consequences": ["Need to learn async/await", "Better performance"] } ) adr = response.json() print(f"Created ADR: {adr['id']}") ``` ### 3. Search Documentation ```python # Search for relevant documentation response = await client.get( "http://localhost:3000/api/docs/search", params={ "query": "how to handle authentication", "limit": 5 } ) docs = response.json() for doc in docs["results"]: print(f"- {doc['title']}: {doc['relevance_score']}") ``` ### 4. Monitor System Health ```python # Get system health status response = await client.get("http://localhost:3000/health") health = response.json() print("System Status:", health["status"]) for component, status in health["components"].items(): print(f"- {component}: {status['status']}") ``` ## Using the Web Interface 1. Open your browser to `http://localhost:3000/docs` 2. Explore the interactive API documentation 3. Try out different endpoints directly from the browser ## Common Operations ### Managing ADRs ```bash # List all ADRs curl http://localhost:3000/api/adrs # Get specific ADR curl http://localhost:3000/api/adrs/{adr_id} # Update ADR status curl -X PATCH http://localhost:3000/api/adrs/{adr_id} \ -H "Content-Type: application/json" \ -d '{"status": "ACCEPTED"}' ``` ### Working with Documentation ```bash # Crawl documentation curl -X POST http://localhost:3000/api/docs/crawl \ -H "Content-Type: application/json" \ -d '{ "urls": ["https://your-docs-site.com"], "source_type": "documentation" }' # Search documentation curl "http://localhost:3000/api/docs/search?query=authentication&limit=5" ``` ### Analyzing Code ```bash # Analyze code patterns curl -X POST http://localhost:3000/api/analyze \ -H "Content-Type: application/json" \ -d '{ "code": "your code here", "language": "python" }' # Get analysis results curl http://localhost:3000/api/analysis/{analysis_id} ``` ## Troubleshooting 1. **Server Won't Start** ```bash # Check if ports are in use lsof -i :3000 lsof -i :6333 ``` 2. **Connection Issues** ```bash # Verify Qdrant is running curl http://localhost:6333/health # Check MCP server health curl http://localhost:3000/health ``` 3. **Permission Problems** ```bash # Fix directory permissions chmod -R 755 docs knowledge ``` ## Getting Help - Check the [Troubleshooting Guide](../troubleshooting/common-issues.md) - Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) - Open an [Issue](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/metrics.py: -------------------------------------------------------------------------------- ```python """Metrics collection and monitoring module.""" import json from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict, List, Optional, Union from pydantic import BaseModel class MetricType(str, Enum): """Metric type enumeration.""" COUNTER = "counter" GAUGE = "gauge" HISTOGRAM = "histogram" SUMMARY = "summary" class Metric(BaseModel): """Metric model.""" name: str type: MetricType value: Union[int, float] labels: Optional[Dict[str, str]] = None timestamp: datetime class MetricsManager: """Manager for system metrics.""" def __init__(self, config): """Initialize metrics manager.""" self.config = config self.enabled = config.metrics_enabled self.metrics_dir = config.docs_cache_dir / "metrics" self.metrics_dir.mkdir(parents=True, exist_ok=True) self.metrics: Dict[str, List[Metric]] = {} self.initialized = False async def initialize(self): """Initialize metrics collection.""" if self.initialized: return try: if not self.enabled: return # Load existing metrics for path in self.metrics_dir.glob("*.json"): try: metric_name = path.stem with open(path) as f: data = json.load(f) self.metrics[metric_name] = [ Metric(**metric) for metric in data ] except Exception as e: print(f"Error loading metric file {path}: {e}") self.initialized = True except Exception as e: print(f"Error initializing metrics manager: {e}") await self.cleanup() raise RuntimeError(f"Failed to initialize metrics manager: {str(e)}") async def cleanup(self): """Clean up metrics.""" if not self.initialized: return try: if not self.enabled: return # Save all metrics for name, metrics in self.metrics.items(): try: await self._save_metrics(name, metrics) except Exception as e: print(f"Error saving metrics for {name}: {e}") except Exception as e: print(f"Error cleaning up metrics manager: {e}") finally: self.initialized = False async def reset(self): """Reset all metrics.""" if not self.enabled: return # Clear in-memory metrics self.metrics = {} # Remove all metric files for path in self.metrics_dir.glob("*.json"): try: path.unlink() except Exception as e: print(f"Error removing metric file {path}: {e}") async def record_metric( self, name: str, type: MetricType, value: Union[int, float], labels: Optional[Dict[str, str]] = None ) -> None: """Record a new metric value.""" if not self.enabled: return metric = Metric( name=name, type=type, value=value, labels=labels, timestamp=datetime.utcnow() ) if name not in self.metrics: self.metrics[name] = [] self.metrics[name].append(metric) # Save metrics periodically if len(self.metrics[name]) >= 100: await self._save_metrics(name, self.metrics[name]) self.metrics[name] = [] async def get_metrics( self, names: Optional[List[str]] = None, start_time: Optional[datetime] = None, end_time: Optional[datetime] = None ) -> Dict[str, List[Dict]]: """Get metrics, optionally filtered by name and time range.""" if not self.enabled: return {} result = {} metric_names = names or list(self.metrics.keys()) for name in metric_names: if name not in self.metrics: continue metrics = self.metrics[name] # Apply time filters if start_time: metrics = [m for m in metrics if m.timestamp >= start_time] if end_time: metrics = [m for m in metrics if m.timestamp <= end_time] result[name] = [metric.model_dump() for metric in metrics] return result async def get_metric_summary( self, name: str, window_minutes: int = 60 ) -> Optional[Dict]: """Get summary statistics for a metric.""" if not self.enabled or name not in self.metrics: return None metrics = self.metrics[name] if not metrics: return None # Filter metrics within time window cutoff = datetime.utcnow().timestamp() - (window_minutes * 60) recent_metrics = [ m for m in metrics if m.timestamp.timestamp() >= cutoff ] if not recent_metrics: return None values = [m.value for m in recent_metrics] return { "count": len(values), "min": min(values), "max": max(values), "avg": sum(values) / len(values), "last": values[-1] } async def _save_metrics(self, name: str, metrics: List[Metric]) -> None: """Save metrics to file.""" metric_path = self.metrics_dir / f"{name}.json" with open(metric_path, "w") as f: json.dump( [metric.model_dump() for metric in metrics], f, indent=2, default=str ) ``` -------------------------------------------------------------------------------- /docs/features/code-analysis.md: -------------------------------------------------------------------------------- ```markdown # Code Analysis MCP Codebase Insight provides powerful code analysis capabilities to help you understand patterns, identify issues, and improve code quality. ## Overview The code analysis feature: - Identifies common design patterns - Detects potential issues and anti-patterns - Suggests improvements and optimizations - Analyzes code relationships and dependencies - Provides semantic understanding of code ## Features ### 1. Pattern Detection The system can identify common software design patterns: ```python # Example: Factory Pattern Detection class Creator: def factory_method(self): pass def some_operation(self): product = self.factory_method() result = product.operation() return result class ConcreteCreator(Creator): def factory_method(self): return ConcreteProduct() ``` Analysis will identify this as a Factory Pattern implementation. ### 2. Code Quality Analysis Identifies potential issues and suggests improvements: - Code complexity metrics - Duplicate code detection - Dead code identification - Resource management issues - Error handling patterns ### 3. Dependency Analysis Maps relationships between code components: ```python # Example: Analyzing imports and dependencies response = await client.post( "http://localhost:3000/api/analyze/dependencies", json={ "file_path": "src/main.py", "depth": 2 # How deep to analyze dependencies } ) dependencies = response.json() ``` ### 4. Semantic Analysis Understands code meaning and context: ```python # Example: Semantic code search response = await client.post( "http://localhost:3000/api/analyze/semantic", json={ "query": "find all functions that handle user authentication", "scope": ["src/auth/", "src/users/"] } ) matches = response.json() ``` ## Usage ### Basic Analysis ```python import httpx async with httpx.AsyncClient() as client: response = await client.post( "http://localhost:3000/api/analyze", json={ "code": your_code, "language": "python", "analysis_type": ["patterns", "quality", "dependencies"] } ) results = response.json() print(results["patterns"]) print(results["quality_issues"]) print(results["dependencies"]) ``` ### Continuous Analysis Set up continuous analysis in your CI/CD pipeline: ```bash # Example: GitHub Actions workflow curl -X POST http://localhost:3000/api/analyze/ci \ -H "Content-Type: application/json" \ -d '{ "repository": "owner/repo", "branch": "main", "commit": "sha", "diff_only": true }' ``` ### Batch Analysis Analyze multiple files or entire directories: ```python # Analyze entire directory response = await client.post( "http://localhost:3000/api/analyze/batch", json={ "path": "src/", "include": ["*.py", "*.js"], "exclude": ["*_test.py", "node_modules"], "analysis_type": ["patterns", "quality"] } ) ``` ## Configuration ### Analysis Settings ```yaml analysis: # Pattern detection settings patterns: confidence_threshold: 0.8 min_pattern_size: 5 # Quality analysis settings quality: max_complexity: 15 max_line_length: 100 enable_type_checking: true # Dependency analysis settings dependencies: max_depth: 3 include_external: true # Semantic analysis settings semantic: model: "code-bert-base" similarity_threshold: 0.7 ``` ### Custom Rules Create custom analysis rules: ```python # Example: Custom pattern rule { "name": "custom_singleton", "pattern": { "type": "class", "properties": { "has_private_constructor": true, "has_static_instance": true } }, "message": "Possible Singleton pattern detected" } ``` ## Integration ### IDE Integration The analysis features can be integrated with popular IDEs: - VS Code Extension - JetBrains Plugin - Vim/Neovim Plugin ### CI/CD Integration Example GitHub Actions workflow: ```yaml name: Code Analysis on: [push, pull_request] jobs: analyze: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Run MCP Analysis run: | curl -X POST http://localhost:3000/api/analyze/ci \ -H "Content-Type: application/json" \ -d '{ "repository": "${{ github.repository }}", "commit": "${{ github.sha }}", "diff_only": true }' ``` ## Best Practices 1. **Regular Analysis** - Run analysis on every commit - Set up automated analysis in CI/CD - Review analysis results in code reviews 2. **Custom Rules** - Create project-specific rules - Maintain a rule catalog - Version control your rules 3. **Performance** - Use selective analysis for large codebases - Enable caching for repeated analysis - Configure appropriate thresholds 4. **Integration** - Integrate with your IDE - Add to your CI/CD pipeline - Connect with code review tools ## Troubleshooting ### Common Issues 1. **Analysis Timeout** ```yaml # Increase timeout in config analysis: timeout: 300 # seconds ``` 2. **High Memory Usage** ```yaml # Adjust batch size analysis: batch_size: 50 ``` 3. **False Positives** ```yaml # Adjust confidence thresholds analysis: patterns: confidence_threshold: 0.9 ``` ## API Reference ### Analysis Endpoints | Endpoint | Description | |----------|-------------| | `/api/analyze` | Basic code analysis | | `/api/analyze/batch` | Batch analysis | | `/api/analyze/ci` | CI/CD integration | | `/api/analyze/dependencies` | Dependency analysis | | `/api/analyze/semantic` | Semantic analysis | ### Response Format ```json { "analysis_id": "uuid", "status": "completed", "results": { "patterns": [...], "quality": {...}, "dependencies": [...], "semantic": {...} }, "metrics": { "time_taken": "2.5s", "files_analyzed": 10 } } ``` ## Next Steps - [Pattern Catalog](patterns/index.md) - [Quality Rules](quality/index.md) - [CI/CD Setup](../integration/ci-cd.md) - [Custom Rules Guide](rules/custom.md) ``` -------------------------------------------------------------------------------- /github-actions-documentation.md: -------------------------------------------------------------------------------- ```markdown # GitHub Actions Workflows Documentation @coderabbit This document provides a detailed review and documentation of the GitHub Actions workflows in the MCP Codebase Insight project. It aims to explain each workflow, its purpose, and identify potential areas for improvement. ## Overview of Workflows The repository contains three GitHub Actions workflows: 1. **build-verification.yml**: Verifies the build across multiple Python versions 2. **publish.yml**: Publishes the package to PyPI when a new tag is pushed 3. **tdd-verification.yml**: Verifies that the project follows Test-Driven Development principles ## 1. Build Verification Workflow **File**: `.github/workflows/build-verification.yml` **Purpose**: Ensures the project builds and tests pass across multiple Python versions. ### Trigger Events - Push to `main` branch - Pull requests to `main` branch - Manual workflow dispatch with configurable parameters ### Job Configuration - Runs on `ubuntu-latest` - Tests across Python versions: 3.10, 3.11, 3.12, 3.13 - Uses Qdrant as a service container for vector storage ### Key Steps 1. **Checkout code** - Fetches the repository code 2. **Set up Python** - Configures the specified Python version 3. **Wait for Qdrant** - Ensures the Qdrant service is available 4. **Setup private packages** - Configures any private dependencies 5. **Install dependencies** - Installs project requirements 6. **Set up environment** - Configures environment variables and directories 7. **Initialize Qdrant collection** - Creates a vector database collection for testing 8. **Run build verification** - Executes a subset of tests that are known to pass 9. **Upload and parse verification report** - Generates and publishes test results ### Areas for Improvement 1. **Test Pattern Issue** - Fixed the wildcard pattern issue (`test_*`) in test paths 2. **Installation Resilience** - The approach to dependency installation could be improved with better error handling 3. **Service Health Check** - Extracted the Qdrant health check into a reusable script 4. **Test Selection** - The selective test running approach might miss regressions in other tests ## 2. Publish Workflow **File**: `.github/workflows/publish.yml` **Purpose**: Automates the publication of the package to PyPI when a new tag is created. ### Trigger Events - Push of tags matching the pattern `v*` (e.g., v1.0.0) ### Job Configuration - Runs on `ubuntu-latest` - Uses the PyPI environment for deployment - Requires write permissions for id-token and read for contents ### Key Steps 1. **Checkout code** - Fetches the repository with full history 2. **Set up Python** - Configures Python (latest 3.x version) 3. **Install dependencies** - Installs build and publishing tools 4. **Build package** - Creates distribution packages 5. **Check distribution** - Verifies the package integrity 6. **Publish to PyPI** - Uploads the package to PyPI ### Areas for Improvement 1. **Version Verification** - Could add a step to verify the version in the code matches the tag 2. **Changelog Validation** - Could verify that the changelog is updated for the new version 3. **Pre-publish Testing** - Could run tests before publishing to ensure quality 4. **Release Notes** - Could automatically generate GitHub release notes ## 3. TDD Verification Workflow **File**: `.github/workflows/tdd-verification.yml` **Purpose**: Enforces Test-Driven Development principles by checking test coverage and patterns. ### Trigger Events - Push to `dev` or `main` branches - Pull requests to `dev` or `main` branches - Manual workflow dispatch with configurable Python version ### Job Configuration - Runs on `ubuntu-latest` - Currently only tests with Python 3.11 - Uses Qdrant as a service container ### Key Steps 1. **Checkout code** - Fetches the repository code 2. **Set up Python** - Configures Python 3.11 3. **Wait for Qdrant** - To ensure the Qdrant service is available 4. **Install dependencies** - Installs project and testing requirements 5. **Set up environment** - Configures environment variables and directories 6. **Initialize Qdrant collection** - Creates a vector database collection for testing 7. **Run unit tests** - Executes unit tests with coverage reporting 8. **Run integration tests** - Executes integration tests with coverage reporting 9. **Generate coverage report** - Combines and reports test coverage 10. **TDD Verification** - Checks that all modules have corresponding tests and enforces minimum coverage 11. **Upload coverage** - Uploads coverage data to Codecov 12. **Check test structure** - Validates that tests follow the Arrange-Act-Assert pattern 13. **TDD Workflow Summary** - Generates a summary of test coverage and counts ### Areas for Improvement 1. **Python Version Matrix** - Could test across multiple Python versions like the build workflow 2. **Inline Python Scripts** - Several inline Python scripts could be moved to dedicated files for better maintainability 3. **Test Pattern Detection** - The Arrange-Act-Assert pattern detection is simplistic and could be more sophisticated 4. **Coverage Enforcement** - Coverage threshold (60%) could be extracted to a variable or configuration file 5. **Naming Consistency** - Some naming inconsistencies exist between the workflows ## General Recommendations 1. **Workflow Consolidation** - Consider consolidating build-verification and tdd-verification workflows as they have overlapping functionality 2. **Shared Actions** - Extract common steps (like waiting for Qdrant) into reusable composite actions 3. **Workflow Dependencies** - Establish workflow dependencies to avoid redundant work (e.g., don't publish unless tests pass) 4. **Environment Standardization** - Standardize environment variables across workflows 5. **Documentation** - Add workflow-specific documentation in code comments 6. **Secret Management** - Audit and document the required secrets 7. **Caching Strategy** - Optimize dependency and build caching to speed up workflows 8. **Notification Integration** - Add notification channels (Slack, Discord) for workflow statuses ## Summary The GitHub Actions workflows provide a solid foundation for CI/CD in this project, with comprehensive build verification, TDD enforcement, and automated publishing. The identified areas for improvement focus on maintainability, consistency, and efficiency. Implementing these suggestions would enhance the reliability and performance of the CI/CD pipeline. ``` -------------------------------------------------------------------------------- /scripts/store_report_in_mcp.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python """ Store Build Verification Report in MCP Codebase Insight This script reads the build verification report and stores it in the MCP server using the vector database for later retrieval and analysis. """ import os import sys import json import asyncio import argparse import logging from datetime import datetime from pathlib import Path import uuid # Add the project root to the Python path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from src.mcp_codebase_insight.core.vector_store import VectorStore from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), logging.FileHandler('logs/store_report.log') ] ) logger = logging.getLogger('store_report') async def store_report(report_file: str, config_path: str = None): """Store the build verification report in the MCP server. Args: report_file: Path to the report file config_path: Path to configuration file (optional) """ # Load configuration config = { 'qdrant_url': os.environ.get('QDRANT_URL', 'http://localhost:6333'), 'qdrant_api_key': os.environ.get('QDRANT_API_KEY', ''), 'collection_name': os.environ.get('COLLECTION_NAME', 'mcp-codebase-insight'), 'embedding_model': os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') } # Override with config file if provided if config_path: try: with open(config_path, 'r') as f: file_config = json.load(f) config.update(file_config) except Exception as e: logger.error(f"Failed to load config from {config_path}: {e}") try: # Load report logger.info(f"Loading report from {report_file}") with open(report_file, 'r') as f: report = json.load(f) # Initialize embedder logger.info("Initializing embedder...") embedder = SentenceTransformerEmbedding(model_name=config['embedding_model']) await embedder.initialize() # Initialize vector store logger.info(f"Connecting to vector store at {config['qdrant_url']}...") vector_store = VectorStore( url=config['qdrant_url'], embedder=embedder, collection_name=config['collection_name'], api_key=config.get('qdrant_api_key'), vector_name="default" ) await vector_store.initialize() # Prepare report for storage report_text = json.dumps(report, indent=2) # Extract summary information for metadata timestamp = report["build_verification_report"]["timestamp"] summary = report["build_verification_report"]["summary"] overall_status = report["build_verification_report"]["verification_results"]["overall_status"] # Create more user-friendly metadata metadata = { "type": "build_verification_report", "timestamp": timestamp, "overall_status": overall_status, "summary": summary, "tests_passed": report["build_verification_report"]["test_summary"]["passed"], "tests_total": report["build_verification_report"]["test_summary"]["total"], "criteria_passed": sum(1 for c in report["build_verification_report"]["verification_results"]["criteria_results"].values() if c["passed"]), "criteria_total": len(report["build_verification_report"]["verification_results"]["criteria_results"]), "build_date": datetime.now().strftime("%Y-%m-%d"), "project": "mcp-codebase-insight", "stored_by": "automated-build-verification" } # Store in vector database report_id = str(uuid.uuid4()) logger.info(f"Storing report with ID: {report_id}") # Generate embedding vector = await embedder.embed(report_text) # Store directly using the client to work around compatibility issues from qdrant_client.http import models as rest vector_store.client.upsert( collection_name=vector_store.collection_name, points=[ rest.PointStruct( id=report_id, vector=vector, # Use vector instead of vectors payload=metadata ) ] ) logger.info(f"Successfully stored report in MCP server with ID: {report_id}") # Create a record of stored reports try: history_file = Path("logs/report_history.json") history = [] if history_file.exists(): with open(history_file, 'r') as f: history = json.load(f) history.append({ "id": report_id, "timestamp": timestamp, "status": overall_status, "summary": summary }) with open(history_file, 'w') as f: json.dump(history, f, indent=2) logger.info(f"Updated report history in {history_file}") except Exception as e: logger.warning(f"Could not update report history: {e}") return report_id except Exception as e: logger.error(f"Failed to store report: {e}") raise finally: if 'vector_store' in locals(): await vector_store.close() async def main(): """Main function.""" parser = argparse.ArgumentParser(description="Store Build Verification Report in MCP") parser.add_argument("--report", default="logs/build_verification_report.json", help="Path to report file") parser.add_argument("--config", help="Path to configuration file") args = parser.parse_args() # Create logs directory if it doesn't exist os.makedirs("logs", exist_ok=True) try: report_id = await store_report(args.report, args.config) print(f"Report stored successfully with ID: {report_id}") return 0 except Exception as e: print(f"Error storing report: {e}") return 1 if __name__ == "__main__": sys.exit(asyncio.run(main())) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/di.py: -------------------------------------------------------------------------------- ```python """Dependency Injection Container for MCP Server.""" from dataclasses import dataclass, field from typing import Optional, Dict, Any import asyncio from pathlib import Path from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient from .config import ServerConfig from .vector_store import VectorStore from .cache import CacheManager from .metrics import MetricsManager from .health import HealthManager from .documentation import DocumentationManager from .knowledge import KnowledgeBase from .tasks import TaskManager from ..utils.logger import get_logger logger = get_logger(__name__) @dataclass class ComponentContext: """Context for managing component lifecycle.""" initialized: bool = False cleanup_tasks: list = field(default_factory=list) error: Optional[Exception] = None @dataclass class DIContainer: """Dependency Injection Container for managing server components.""" config: ServerConfig _components: Dict[str, Any] = field(default_factory=dict) _contexts: Dict[str, ComponentContext] = field(default_factory=dict) def __post_init__(self): """Initialize component contexts.""" self._contexts = { "embedding_model": ComponentContext(), "vector_store": ComponentContext(), "cache_manager": ComponentContext(), "metrics_manager": ComponentContext(), "health_manager": ComponentContext(), "docs_manager": ComponentContext(), "knowledge_base": ComponentContext(), "task_manager": ComponentContext() } async def initialize_component(self, name: str, factory_func) -> Any: """Initialize a component with proper error handling and lifecycle management.""" context = self._contexts[name] if context.initialized: return self._components[name] try: component = await factory_func() if hasattr(component, 'initialize'): await component.initialize() self._components[name] = component context.initialized = True # Register cleanup if available if hasattr(component, 'cleanup'): context.cleanup_tasks.append(component.cleanup) elif hasattr(component, 'close'): context.cleanup_tasks.append(component.close) return component except Exception as e: context.error = e logger.error(f"Failed to initialize {name}: {str(e)}") raise async def get_embedding_model(self) -> SentenceTransformer: """Get or create the embedding model.""" async def factory(): return SentenceTransformer(self.config.embedding_model) return await self.initialize_component("embedding_model", factory) async def get_vector_store(self) -> VectorStore: """Get or create the vector store.""" async def factory(): embedding_model = await self.get_embedding_model() client = QdrantClient( url=self.config.qdrant_url, timeout=60.0 # Configurable timeout ) return VectorStore( client=client, embedder=embedding_model, collection_name=self.config.collection_name ) return await self.initialize_component("vector_store", factory) async def get_cache_manager(self) -> CacheManager: """Get or create the cache manager.""" async def factory(): return CacheManager( memory_size=self.config.memory_cache_size, disk_path=self.config.disk_cache_dir ) return await self.initialize_component("cache_manager", factory) async def get_metrics_manager(self) -> MetricsManager: """Get or create the metrics manager.""" async def factory(): return MetricsManager(enabled=self.config.metrics_enabled) return await self.initialize_component("metrics_manager", factory) async def get_health_manager(self) -> HealthManager: """Get or create the health manager.""" async def factory(): metrics = await self.get_metrics_manager() cache = await self.get_cache_manager() return HealthManager(metrics=metrics, cache=cache) return await self.initialize_component("health_manager", factory) async def get_docs_manager(self) -> DocumentationManager: """Get or create the documentation manager.""" async def factory(): vector_store = await self.get_vector_store() cache = await self.get_cache_manager() return DocumentationManager( vector_store=vector_store, cache=cache, docs_dir=self.config.docs_cache_dir ) return await self.initialize_component("docs_manager", factory) async def get_knowledge_base(self) -> KnowledgeBase: """Get or create the knowledge base.""" async def factory(): vector_store = await self.get_vector_store() cache = await self.get_cache_manager() return KnowledgeBase( vector_store=vector_store, cache=cache, storage_dir=self.config.kb_storage_dir ) return await self.initialize_component("knowledge_base", factory) async def get_task_manager(self) -> TaskManager: """Get or create the task manager.""" async def factory(): kb = await self.get_knowledge_base() docs = await self.get_docs_manager() return TaskManager( knowledge_base=kb, docs_manager=docs, max_tasks=100 # Configurable ) return await self.initialize_component("task_manager", factory) async def cleanup(self): """Clean up all components in reverse initialization order.""" for name, context in reversed(list(self._contexts.items())): if context.initialized: try: for cleanup_task in reversed(context.cleanup_tasks): await cleanup_task() context.initialized = False except Exception as e: logger.error(f"Error cleaning up {name}: {str(e)}") self._components.clear() ``` -------------------------------------------------------------------------------- /run_build_verification.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # End-to-End Build Verification Script # # This script automates the process of verifying an end-to-end build by: # 1. First analyzing the codebase to store component relationships # 2. Triggering the build verification process # 3. Reporting the results set -e # Set up environment source .venv/bin/activate || source test_env/bin/activate || echo "No virtual environment found, using system Python" # Create required directories mkdir -p logs knowledge cache # Set environment variables for testing export MCP_EMBEDDING_TIMEOUT=120 # Increase timeout for embedder initialization export QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" export QDRANT_API_KEY="${QDRANT_API_KEY:-}" export MCP_COLLECTION_NAME="${MCP_COLLECTION_NAME:-mcp-codebase-insight}" # Default values CONFIG_FILE="verification-config.json" OUTPUT_FILE="logs/build_verification_report.json" ANALYZE_FIRST=true VERBOSE=false # Check if Qdrant is running locally check_qdrant() { if curl -s "http://localhost:6333/collections" > /dev/null; then echo "Local Qdrant instance detected" return 0 else echo "Warning: No local Qdrant instance found at http://localhost:6333" echo "You may need to start Qdrant using Docker:" echo "docker run -p 6333:6333 qdrant/qdrant" return 1 fi } # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in --config) CONFIG_FILE="$2" shift 2 ;; --output) OUTPUT_FILE="$2" shift 2 ;; --skip-analysis) ANALYZE_FIRST=false shift ;; --verbose) VERBOSE=true shift ;; *) echo "Unknown option: $1" echo "Usage: $0 [--config CONFIG_FILE] [--output OUTPUT_FILE] [--skip-analysis] [--verbose]" exit 1 ;; esac done # Set up logging LOG_FILE="logs/build_verification.log" if $VERBOSE; then # Log to both console and file exec > >(tee -a "$LOG_FILE") 2>&1 else # Log only to file exec >> "$LOG_FILE" 2>&1 fi echo "===================================================================" echo "Starting End-to-End Build Verification at $(date)" echo "===================================================================" echo "Config file: $CONFIG_FILE" echo "Output file: $OUTPUT_FILE" echo "Analyze first: $ANALYZE_FIRST" echo "Verbose mode: $VERBOSE" echo "-------------------------------------------------------------------" # Check if config file exists if [[ ! -f "$CONFIG_FILE" ]]; then echo "Error: Config file $CONFIG_FILE not found!" exit 1 fi # Function to check if a command is available check_command() { if ! command -v "$1" &> /dev/null; then echo "Error: $1 is required but not installed." exit 1 fi } # Check required commands check_command python check_command pip # Ensure all dependencies are installed echo "Checking dependencies..." pip install -q -r requirements.txt pip install -q -e . # Step 1: Check Qdrant availability echo "Checking Qdrant availability..." if ! check_qdrant; then if [[ "$QDRANT_URL" == "http://localhost:6333" ]]; then echo "Error: Local Qdrant instance is not running and no alternative QDRANT_URL is set" echo "Please either:" echo "1. Start a local Qdrant instance using Docker:" echo " docker run -p 6333:6333 qdrant/qdrant" echo "2. Or set QDRANT_URL to point to your Qdrant instance" exit 1 else echo "Using alternative Qdrant instance at $QDRANT_URL" fi fi # Step 2: Analyze codebase and store component relationships (if enabled) if $ANALYZE_FIRST; then echo "Analyzing codebase and storing component relationships..." python -m scripts.store_code_relationships --config "$CONFIG_FILE" if [[ $? -ne 0 ]]; then echo "Error: Failed to analyze codebase and store component relationships!" exit 1 fi echo "Component relationships analysis completed successfully." else echo "Skipping codebase analysis as requested." fi # Step 3: Run build verification echo "Running tests with standardized test runner..." chmod +x run_tests.py ./run_tests.py --all --clean --isolated --coverage --html --verbose TEST_EXIT_CODE=$? echo "Running build verification..." python -m scripts.verify_build --config "$CONFIG_FILE" --output "$OUTPUT_FILE" BUILD_STATUS=$? # Use the worst exit code between tests and build verification if [ $TEST_EXIT_CODE -ne 0 ]; then BUILD_STATUS=$TEST_EXIT_CODE fi if [[ $BUILD_STATUS -ne 0 ]]; then echo "Build verification failed with exit code $BUILD_STATUS!" else echo "Build verification completed successfully." fi # Step 4: Report results echo "Build verification report saved to $OUTPUT_FILE" if [[ -f "$OUTPUT_FILE" ]]; then # Extract summary from report if jq is available if command -v jq &> /dev/null; then SUMMARY=$(jq -r '.build_verification_report.summary' "$OUTPUT_FILE") STATUS=$(jq -r '.build_verification_report.verification_results.overall_status' "$OUTPUT_FILE") echo "-------------------------------------------------------------------" echo "Build Verification Status: $STATUS" echo "Summary: $SUMMARY" echo "-------------------------------------------------------------------" # Print test results TOTAL=$(jq -r '.build_verification_report.test_summary.total' "$OUTPUT_FILE") PASSED=$(jq -r '.build_verification_report.test_summary.passed' "$OUTPUT_FILE") FAILED=$(jq -r '.build_verification_report.test_summary.failed' "$OUTPUT_FILE") COVERAGE=$(jq -r '.build_verification_report.test_summary.coverage' "$OUTPUT_FILE") echo "Test Results:" echo "- Total Tests: $TOTAL" echo "- Passed: $PASSED" echo "- Failed: $FAILED" echo "- Coverage: $COVERAGE%" # Print failure info if any if [[ "$STATUS" != "PASS" ]]; then echo "-------------------------------------------------------------------" echo "Failures detected. See $OUTPUT_FILE for details." # Print failure analysis if available if jq -e '.build_verification_report.failure_analysis' "$OUTPUT_FILE" > /dev/null; then echo "Failure Analysis:" jq -r '.build_verification_report.failure_analysis[] | "- " + .description' "$OUTPUT_FILE" fi fi else echo "-------------------------------------------------------------------" echo "Install jq for better report formatting." echo "Report saved to $OUTPUT_FILE" fi else echo "Error: Build verification report not found at $OUTPUT_FILE!" fi echo "===================================================================" echo "End-to-End Build Verification completed at $(date)" echo "Exit status: $BUILD_STATUS" echo "===================================================================" exit $BUILD_STATUS ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/prompts.py: -------------------------------------------------------------------------------- ```python """Prompt management and generation module.""" from datetime import datetime from enum import Enum from typing import Dict, List, Optional from uuid import UUID, uuid4 from pydantic import BaseModel class PromptType(str, Enum): """Prompt type enumeration.""" CODE_ANALYSIS = "code_analysis" PATTERN_EXTRACTION = "pattern_extraction" DOCUMENTATION = "documentation" DEBUG = "debug" ADR = "adr" class PromptTemplate(BaseModel): """Prompt template model.""" id: UUID name: str type: PromptType template: str description: Optional[str] = None variables: List[str] examples: Optional[List[Dict]] = None created_at: datetime updated_at: datetime version: Optional[str] = None class PromptManager: """Manager for prompt templates and generation.""" def __init__(self, config): """Initialize prompt manager.""" self.config = config self.templates: Dict[str, PromptTemplate] = {} self._load_default_templates() def _load_default_templates(self): """Load default prompt templates.""" # Code Analysis Templates self.add_template( name="code_pattern_analysis", type=PromptType.CODE_ANALYSIS, template="""Analyze the following code for patterns and best practices: Code: {code} Consider: - Design patterns used - Architecture patterns - Code organization - Error handling - Performance considerations - Security implications Provide detailed analysis focusing on: {focus_areas}""", variables=["code", "focus_areas"], description="Template for analyzing code patterns" ) # Pattern Extraction Templates self.add_template( name="extract_design_patterns", type=PromptType.PATTERN_EXTRACTION, template="""Extract design patterns from the following code: Code: {code} Look for instances of: - Creational patterns - Structural patterns - Behavioral patterns - Architectural patterns For each pattern found, explain: - Pattern name and category - How it's implemented - Benefits and tradeoffs - Potential improvements""", variables=["code"], description="Template for extracting design patterns" ) # Documentation Templates self.add_template( name="generate_documentation", type=PromptType.DOCUMENTATION, template="""Generate documentation for the following code: Code: {code} Documentation type: {doc_type} Include: - Overview - Usage examples - API reference - Dependencies - Configuration - Error handling - Best practices""", variables=["code", "doc_type"], description="Template for generating documentation" ) # Debug Templates self.add_template( name="debug_analysis", type=PromptType.DEBUG, template="""Analyze the following issue: Description: {description} Error: {error} Context: {context} Provide: - Root cause analysis - Potential solutions - Prevention strategies - Testing recommendations""", variables=["description", "error", "context"], description="Template for debug analysis" ) # ADR Templates self.add_template( name="adr_template", type=PromptType.ADR, template="""# Architecture Decision Record ## Title: {title} ## Status: {status} ## Context {context} ## Decision Drivers {decision_drivers} ## Considered Options {options} ## Decision {decision} ## Consequences {consequences} ## Implementation {implementation} ## Related Decisions {related_decisions}""", variables=[ "title", "status", "context", "decision_drivers", "options", "decision", "consequences", "implementation", "related_decisions" ], description="Template for architecture decision records" ) def add_template( self, name: str, type: PromptType, template: str, variables: List[str], description: Optional[str] = None, examples: Optional[List[Dict]] = None, version: Optional[str] = None ) -> PromptTemplate: """Add a new prompt template.""" now = datetime.utcnow() template = PromptTemplate( id=uuid4(), name=name, type=type, template=template, description=description, variables=variables, examples=examples, version=version, created_at=now, updated_at=now ) self.templates[name] = template return template def get_template(self, name: str) -> Optional[PromptTemplate]: """Get prompt template by name.""" return self.templates.get(name) def list_templates( self, type: Optional[PromptType] = None ) -> List[PromptTemplate]: """List all templates, optionally filtered by type.""" templates = list(self.templates.values()) if type: templates = [t for t in templates if t.type == type] return sorted(templates, key=lambda x: x.name) def generate_prompt( self, template_name: str, variables: Dict[str, str] ) -> Optional[str]: """Generate prompt from template and variables.""" template = self.get_template(template_name) if not template: return None # Validate variables missing = [v for v in template.variables if v not in variables] if missing: raise ValueError(f"Missing required variables: {', '.join(missing)}") try: return template.template.format(**variables) except KeyError as e: raise ValueError(f"Invalid variable: {e}") except Exception as e: raise ValueError(f"Error generating prompt: {e}") def update_template( self, name: str, template: Optional[str] = None, description: Optional[str] = None, examples: Optional[List[Dict]] = None, version: Optional[str] = None ) -> Optional[PromptTemplate]: """Update prompt template.""" tmpl = self.get_template(name) if not tmpl: return None if template: tmpl.template = template if description: tmpl.description = description if examples: tmpl.examples = examples if version: tmpl.version = version tmpl.updated_at = datetime.utcnow() return tmpl ``` -------------------------------------------------------------------------------- /scripts/validate_poc.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ MCP Codebase Insight - PoC Validation Script This script orchestrates the validation of all PoC components using Firecrawl MCP. """ import asyncio import argparse import logging from pathlib import Path from typing import Dict, Any from mcp_firecrawl import ( verify_environment, setup_repository, configure_environment, initialize_services, verify_transport_config, verify_sse_endpoints, verify_stdio_transport, test_transport_switch, validate_transport_features, test_cross_transport ) # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class PoCValidator: """Orchestrates PoC validation steps.""" def __init__(self, config_path: str = ".env"): self.config_path = config_path self.results = {} self.config = self._load_config() def _load_config(self) -> dict: """Load config from .env or other config file.""" from dotenv import dotenv_values config = dotenv_values(self.config_path) return config async def setup_environment(self) -> bool: """Validate and setup the environment.""" logger.info("Validating environment...") # Check system requirements env_check = verify_environment({ "python_version": "3.11", "docker_version": "20.10.0", "ram_gb": 4, "cpu_cores": 2, "disk_space_gb": 20 }) if not env_check.success: logger.error("Environment validation failed:") for issue in env_check.issues: logger.error(f"- {issue}") return False logger.info("Environment validation successful") return True async def setup_services(self) -> bool: """Initialize and verify required services.""" logger.info("Initializing services...") try: services = await initialize_services({ "qdrant": { "docker_compose": True, "wait_for_ready": True }, "vector_store": { "init_collection": True, "verify_connection": True } }) logger.info("Services initialized successfully") return True except Exception as e: logger.error(f"Service initialization failed: {e}") return False async def validate_transports(self) -> bool: """Validate both SSE and stdio transports.""" logger.info("Validating transport protocols...") # Verify SSE endpoints sse_result = await verify_sse_endpoints( "http://localhost:8000", {"Authorization": f"Bearer {self.config.get('API_KEY')}"} ) # Verify stdio transport stdio_result = await verify_stdio_transport( "mcp-codebase-insight", {"auth_token": self.config.get('API_KEY')} ) # Test transport switching switch_result = await test_transport_switch( server_url="http://localhost:8000", stdio_binary="mcp-codebase-insight", config={ "auth_token": self.config.get('API_KEY'), "verify_endpoints": True, "check_data_consistency": True } ) # Validate transport features sse_features = await validate_transport_features( "sse", { "server_url": "http://localhost:8000", "auth_token": self.config.get('API_KEY'), "features": [ "event_streaming", "bidirectional_communication", "error_handling", "reconnection" ] } ) stdio_features = await validate_transport_features( "stdio", { "binary": "mcp-codebase-insight", "auth_token": self.config.get('API_KEY'), "features": [ "synchronous_communication", "process_isolation", "error_propagation", "signal_handling" ] } ) # Test cross-transport compatibility cross_transport = await test_cross_transport({ "sse_config": { "url": "http://localhost:8000", "auth_token": self.config.get('API_KEY') }, "stdio_config": { "binary": "mcp-codebase-insight", "auth_token": self.config.get('API_KEY') }, "test_operations": [ "vector_search", "pattern_store", "task_management", "adr_queries" ] }) all_passed = all([ sse_result.success, stdio_result.success, switch_result.success, sse_features.success, stdio_features.success, cross_transport.success ]) if all_passed: logger.info("Transport validation successful") else: logger.error("Transport validation failed") return all_passed async def run_validation(self) -> Dict[str, Any]: """Run all validation steps.""" validation_steps = [ ("environment", self.setup_environment()), ("services", self.setup_services()), ("transports", self.validate_transports()), # Add more validation steps here ] results = {} for step_name, coro in validation_steps: try: results[step_name] = await coro if not results[step_name]: logger.error(f"Validation step '{step_name}' failed") break except Exception as e: logger.error(f"Error in validation step '{step_name}': {e}") results[step_name] = False break return results def main(): """Main entry point for PoC validation.""" parser = argparse.ArgumentParser(description="Validate MCP Codebase Insight PoC") parser.add_argument("--config", default=".env", help="Path to configuration file") args = parser.parse_args() validator = PoCValidator(args.config) results = asyncio.run(validator.run_validation()) # Print summary print("\nValidation Results:") print("-" * 50) for step, success in results.items(): status = "✅ Passed" if success else "❌ Failed" print(f"{step:20} {status}") print("-" * 50) # Exit with appropriate status exit(0 if all(results.values()) else 1) if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/adr.py: -------------------------------------------------------------------------------- ```python """ADR (Architecture Decision Record) management module.""" import json from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict, List, Optional from uuid import UUID, uuid4 from slugify import slugify import os from pydantic import BaseModel class ADRError(Exception): """Base class for ADR-related errors.""" pass class ADRStatus(str, Enum): """ADR status enumeration.""" PROPOSED = "proposed" ACCEPTED = "accepted" REJECTED = "rejected" SUPERSEDED = "superseded" DEPRECATED = "deprecated" class ADROption(BaseModel): """ADR option model.""" title: str pros: List[str] cons: List[str] description: Optional[str] = None class ADRContext(BaseModel): """ADR context model.""" problem: str constraints: List[str] assumptions: Optional[List[str]] = None background: Optional[str] = None class ADR(BaseModel): """ADR model.""" id: UUID title: str status: ADRStatus context: ADRContext options: List[ADROption] decision: str consequences: Optional[Dict[str, List[str]]] = None metadata: Optional[Dict[str, str]] = None created_at: datetime updated_at: datetime superseded_by: Optional[UUID] = None class ADRManager: """ADR manager for handling architecture decision records.""" def __init__(self, config): """Initialize ADR manager.""" self.config = config self.adr_dir = config.adr_dir self.adr_dir.mkdir(parents=True, exist_ok=True) self.next_adr_number = 1 # Default to 1, will be updated in initialize() self.initialized = False self.adrs: Dict[UUID, ADR] = {} async def initialize(self): """Initialize the ADR manager. This method ensures the ADR directory exists and loads any existing ADRs. """ if self.initialized: return try: # Ensure ADR directory exists self.adr_dir.mkdir(parents=True, exist_ok=True) # Calculate next ADR number from existing files max_number = 0 for adr_file in self.adr_dir.glob("*.md"): try: # Extract number from filename (e.g., "0001-title.md") number = int(adr_file.name.split("-")[0]) max_number = max(max_number, number) except (ValueError, IndexError): continue self.next_adr_number = max_number + 1 # Load any existing ADRs for adr_file in self.adr_dir.glob("*.json"): if adr_file.is_file(): try: with open(adr_file, "r") as f: adr_data = json.load(f) # Convert the loaded data into an ADR object adr = ADR(**adr_data) self.adrs[adr.id] = adr except (json.JSONDecodeError, ValueError) as e: # Log error but continue processing other files print(f"Error loading ADR {adr_file}: {e}") self.initialized = True except Exception as e: print(f"Error initializing ADR manager: {e}") await self.cleanup() raise RuntimeError(f"Failed to initialize ADR manager: {str(e)}") async def cleanup(self): """Clean up resources used by the ADR manager. This method ensures all ADRs are saved and resources are released. """ if not self.initialized: return try: # Save any modified ADRs for adr in self.adrs.values(): try: await self._save_adr(adr) except Exception as e: print(f"Error saving ADR {adr.id}: {e}") # Clear in-memory ADRs self.adrs.clear() except Exception as e: print(f"Error cleaning up ADR manager: {e}") finally: self.initialized = False async def create_adr( self, title: str, context: dict, options: List[dict], decision: str, consequences: Optional[Dict[str, List[str]]] = None ) -> ADR: """Create a new ADR.""" adr_id = uuid4() now = datetime.utcnow() # Convert context dict to ADRContext adr_context = ADRContext( problem=context["problem"], constraints=context["constraints"], assumptions=context.get("assumptions"), background=context.get("background") ) # Convert options list to ADROption objects adr_options = [ ADROption( title=opt["title"], pros=opt["pros"], cons=opt["cons"], description=opt.get("description") ) for opt in options ] adr = ADR( id=adr_id, title=title, status=ADRStatus.PROPOSED, context=adr_context, options=adr_options, decision=decision, consequences=consequences, created_at=now, updated_at=now ) await self._save_adr(adr) return adr async def get_adr(self, adr_id: UUID) -> Optional[ADR]: """Get ADR by ID.""" adr_path = self.adr_dir / f"{adr_id}.json" if not adr_path.exists(): return None with open(adr_path) as f: data = json.load(f) return ADR(**data) async def update_adr( self, adr_id: UUID, status: Optional[ADRStatus] = None, superseded_by: Optional[UUID] = None, metadata: Optional[Dict[str, str]] = None ) -> Optional[ADR]: """Update ADR status and metadata.""" adr = await self.get_adr(adr_id) if not adr: return None if status: adr.status = status if superseded_by: adr.superseded_by = superseded_by if metadata: adr.metadata = {**(adr.metadata or {}), **metadata} adr.updated_at = datetime.utcnow() await self._save_adr(adr) return adr async def list_adrs( self, status: Optional[ADRStatus] = None ) -> List[ADR]: """List all ADRs, optionally filtered by status.""" adrs = [] for path in self.adr_dir.glob("*.json"): with open(path) as f: data = json.load(f) adr = ADR(**data) if not status or adr.status == status: adrs.append(adr) return sorted(adrs, key=lambda x: x.created_at) async def _save_adr(self, adr: ADR) -> None: """Save ADR to file.""" adr_path = self.adr_dir / f"{adr.id}.json" with open(adr_path, "w") as f: json.dump(adr.model_dump(), f, indent=2, default=str) ``` -------------------------------------------------------------------------------- /tests/components/test_core_components.py: -------------------------------------------------------------------------------- ```python """Test core server components.""" import sys import os # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) import pytest from datetime import datetime from uuid import uuid4 from src.mcp_codebase_insight.core.config import ServerConfig from src.mcp_codebase_insight.core.adr import ADRManager, ADRStatus from src.mcp_codebase_insight.core.debug import DebugSystem, IssueType, IssueStatus from src.mcp_codebase_insight.core.documentation import DocumentationManager, DocumentationType from src.mcp_codebase_insight.core.knowledge import KnowledgeBase, Pattern, PatternType, PatternConfidence from src.mcp_codebase_insight.core.tasks import TaskManager, TaskType, TaskStatus, TaskPriority from src.mcp_codebase_insight.core.metrics import MetricsManager, MetricType from src.mcp_codebase_insight.core.health import HealthManager, HealthStatus from src.mcp_codebase_insight.core.cache import CacheManager from src.mcp_codebase_insight.core.vector_store import VectorStore from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding @pytest.mark.asyncio async def test_adr_manager(test_config: ServerConfig, test_adr: dict): """Test ADR manager functions.""" manager = ADRManager(test_config) # Test creation adr = await manager.create_adr( title=test_adr["title"], context=test_adr["context"], options=test_adr["options"], decision=test_adr["decision"] ) assert adr.title == test_adr["title"] assert adr.status == ADRStatus.PROPOSED # Test retrieval retrieved = await manager.get_adr(adr.id) assert retrieved is not None assert retrieved.id == adr.id # Test update updated = await manager.update_adr( adr.id, status=ADRStatus.ACCEPTED ) assert updated.status == ADRStatus.ACCEPTED @pytest.mark.asyncio async def test_knowledge_base(test_config: ServerConfig, qdrant_client): """Test knowledge base functions.""" # Initialize vector store with embedder embedder = SentenceTransformerEmbedding() vector_store = VectorStore( url=test_config.qdrant_url, embedder=embedder ) kb = KnowledgeBase(test_config, vector_store=vector_store) # Test pattern creation now = datetime.utcnow() pattern = Pattern( id=uuid4(), name="Test Pattern", description="A test pattern", type=PatternType.CODE, content="def test(): pass", confidence=PatternConfidence.HIGH, created_at=now, updated_at=now ) # Test pattern storage stored_pattern = await kb.add_pattern( name=pattern.name, type=pattern.type, description=pattern.description, content=pattern.content, confidence=pattern.confidence ) # Verify stored pattern assert stored_pattern.name == pattern.name assert stored_pattern.type == pattern.type assert stored_pattern.description == pattern.description assert stored_pattern.content == pattern.content assert stored_pattern.confidence == pattern.confidence @pytest.mark.asyncio async def test_task_manager(test_config: ServerConfig, test_code: str): """Test task manager functions.""" manager = TaskManager( config=test_config, adr_manager=ADRManager(test_config), debug_system=DebugSystem(test_config), doc_manager=DocumentationManager(test_config), knowledge_base=KnowledgeBase(test_config, None), prompt_manager=None ) # Test task creation task = await manager.create_task( type=TaskType.CODE_ANALYSIS, title="Test Task", description="Analyze test code", priority=TaskPriority.MEDIUM, context={"code": test_code} ) assert task.title == "Test Task" assert task.status == TaskStatus.PENDING # Test task retrieval retrieved = await manager.get_task(task.id) assert retrieved is not None assert retrieved.id == task.id @pytest.mark.asyncio async def test_metrics_manager(test_config: ServerConfig): """Test metrics manager functions.""" # Override the metrics_enabled setting for this test test_config.metrics_enabled = True manager = MetricsManager(test_config) await manager.initialize() try: # Test metric recording await manager.record_metric( "test_metric", MetricType.COUNTER, 1.0, {"label": "test"} ) # Test metric retrieval metrics = await manager.get_metrics(["test_metric"]) assert len(metrics) == 1 assert "test_metric" in metrics finally: # Cleanup await manager.cleanup() @pytest.mark.asyncio async def test_health_manager(test_config: ServerConfig): """Test health manager functions.""" manager = HealthManager(test_config) # Test health check health = await manager.check_health() assert health.status is not None assert isinstance(health.components, dict) assert isinstance(health.timestamp, datetime) @pytest.mark.asyncio async def test_cache_manager(test_config: ServerConfig): """Test cache manager functions.""" manager = CacheManager(test_config) await manager.initialize() # Initialize the manager try: # Test memory cache manager.put_in_memory("test_key", "test_value") result = manager.get_from_memory("test_key") assert result == "test_value" # Test persistent cache manager.put_in_disk("test_key", "test_value") result = manager.get_from_disk("test_key") assert result == "test_value" # Test combined operations manager.put("combined_key", "combined_value") result = manager.get("combined_key") assert result == "combined_value" # Test removal manager.remove("test_key") assert manager.get("test_key") is None finally: await manager.cleanup() # Clean up after tests @pytest.mark.asyncio async def test_documentation_manager(test_config: ServerConfig): """Test documentation manager functions.""" manager = DocumentationManager(test_config) # Test document creation doc = await manager.add_document( title="Test Doc", content="Test content", type=DocumentationType.REFERENCE ) assert doc.title == "Test Doc" # Test document retrieval retrieved = await manager.get_document(doc.id) assert retrieved is not None assert retrieved.id == doc.id @pytest.mark.asyncio async def test_debug_system(test_config: ServerConfig): """Test debug system functions.""" system = DebugSystem(test_config) # Test issue creation issue = await system.create_issue( title="Test issue", type=IssueType.BUG, description={"message": "Test description", "steps": ["Step 1", "Step 2"]} ) assert issue.title == "Test issue" assert issue.type == IssueType.BUG assert issue.status == IssueStatus.OPEN assert "message" in issue.description assert "steps" in issue.description ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/debug.py: -------------------------------------------------------------------------------- ```python """Debug system for issue tracking and analysis.""" import json from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict, List, Optional from uuid import UUID, uuid4 from pydantic import BaseModel class IssueType(str, Enum): """Issue type enumeration.""" BUG = "bug" PERFORMANCE = "performance" SECURITY = "security" DESIGN = "design" DOCUMENTATION = "documentation" OTHER = "other" class IssueStatus(str, Enum): """Issue status enumeration.""" OPEN = "open" IN_PROGRESS = "in_progress" RESOLVED = "resolved" CLOSED = "closed" WONT_FIX = "wont_fix" class Issue(BaseModel): """Issue model.""" id: UUID title: str type: IssueType status: IssueStatus description: Dict steps: Optional[List[Dict]] = None created_at: datetime updated_at: datetime resolved_at: Optional[datetime] = None metadata: Optional[Dict[str, str]] = None class DebugSystem: """System for debugging and issue management.""" def __init__(self, config): """Initialize debug system.""" self.config = config self.debug_dir = Path(config.docs_cache_dir) / "debug" self.debug_dir.mkdir(parents=True, exist_ok=True) self.issues: Dict[UUID, Issue] = {} self.initialized = False async def initialize(self) -> None: """Initialize debug system.""" if self.initialized: return try: # Load existing issues if self.debug_dir.exists(): for issue_file in self.debug_dir.glob("*.json"): try: with open(issue_file) as f: data = json.load(f) issue = Issue(**data) self.issues[issue.id] = issue except Exception as e: # Log error but continue loading other issues print(f"Error loading issue {issue_file}: {e}") self.initialized = True except Exception as e: print(f"Error initializing debug system: {e}") await self.cleanup() raise RuntimeError(f"Failed to initialize debug system: {str(e)}") async def cleanup(self) -> None: """Clean up debug system resources.""" if not self.initialized: return try: # Save any pending issues for issue in self.issues.values(): try: await self._save_issue(issue) except Exception as e: print(f"Error saving issue {issue.id}: {e}") # Clear in-memory issues self.issues.clear() except Exception as e: print(f"Error cleaning up debug system: {e}") finally: self.initialized = False async def create_issue( self, title: str, type: str, description: Dict ) -> Issue: """Create a new issue.""" now = datetime.utcnow() issue = Issue( id=uuid4(), title=title, type=IssueType(type), status=IssueStatus.OPEN, description=description, created_at=now, updated_at=now ) await self._save_issue(issue) return issue async def get_issue(self, issue_id: UUID) -> Optional[Issue]: """Get issue by ID.""" issue_path = self.debug_dir / f"{issue_id}.json" if not issue_path.exists(): return None with open(issue_path) as f: data = json.load(f) return Issue(**data) async def update_issue( self, issue_id: UUID, status: Optional[IssueStatus] = None, steps: Optional[List[Dict]] = None, metadata: Optional[Dict[str, str]] = None ) -> Optional[Issue]: """Update issue status and details.""" issue = await self.get_issue(issue_id) if not issue: return None if status: issue.status = status if status == IssueStatus.RESOLVED: issue.resolved_at = datetime.utcnow() if steps: issue.steps = steps if metadata: issue.metadata = {**(issue.metadata or {}), **metadata} issue.updated_at = datetime.utcnow() await self._save_issue(issue) return issue async def list_issues( self, type: Optional[IssueType] = None, status: Optional[IssueStatus] = None ) -> List[Issue]: """List all issues, optionally filtered by type and status.""" issues = [] for path in self.debug_dir.glob("*.json"): with open(path) as f: data = json.load(f) issue = Issue(**data) if (not type or issue.type == type) and (not status or issue.status == status): issues.append(issue) return sorted(issues, key=lambda x: x.created_at) async def analyze_issue(self, issue_id: UUID) -> List[Dict]: """Analyze issue and generate debug steps.""" issue = await self.get_issue(issue_id) if not issue: return [] # Generate analysis steps based on issue type steps = [] if issue.type == IssueType.BUG: steps.extend([ {"type": "check", "name": "Reproduce Issue", "description": "Steps to reproduce the issue"}, {"type": "check", "name": "Error Logs", "description": "Check relevant error logs"}, {"type": "check", "name": "Stack Trace", "description": "Analyze stack trace if available"}, {"type": "check", "name": "Code Review", "description": "Review related code sections"} ]) elif issue.type == IssueType.PERFORMANCE: steps.extend([ {"type": "check", "name": "Profiling", "description": "Run performance profiling"}, {"type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O"}, {"type": "check", "name": "Query Analysis", "description": "Review database queries"}, {"type": "check", "name": "Bottlenecks", "description": "Identify performance bottlenecks"} ]) elif issue.type == IssueType.SECURITY: steps.extend([ {"type": "check", "name": "Vulnerability Scan", "description": "Run security scanners"}, {"type": "check", "name": "Access Control", "description": "Review permissions"}, {"type": "check", "name": "Input Validation", "description": "Check input handling"}, {"type": "check", "name": "Dependencies", "description": "Audit dependencies"} ]) # Update issue with analysis steps await self.update_issue(issue_id, steps=steps) return steps async def _save_issue(self, issue: Issue) -> None: """Save issue to file.""" issue_path = self.debug_dir / f"{issue.id}.json" with open(issue_path, "w") as f: json.dump(issue.model_dump(), f, indent=2, default=str) ``` -------------------------------------------------------------------------------- /docs/features/documentation.md: -------------------------------------------------------------------------------- ```markdown # Documentation Management MCP Codebase Insight provides powerful tools for managing technical documentation, ensuring it stays up-to-date with your codebase and is easily accessible. ## Overview The documentation management feature: - Auto-generates documentation from code - Maintains documentation-code links - Provides semantic search capabilities - Supports multiple documentation formats - Enables documentation validation - Tracks documentation coverage ## Features ### 1. Documentation Generation Automatically generate documentation from code: ```python # Example: Generate documentation for a module response = await client.post( "http://localhost:3000/api/docs/generate", json={ "source": "src/auth/", "output_format": "markdown", "include_private": False, "template": "api-docs" } ) docs = response.json() print(f"Generated {len(docs['files'])} documentation files") ``` ### 2. Documentation Search Search through documentation using semantic understanding: ```python # Example: Search documentation response = await client.get( "http://localhost:3000/api/docs/search", params={ "query": "how to implement authentication", "doc_types": ["guide", "api", "tutorial"], "limit": 5 } ) results = response.json() for doc in results["matches"]: print(f"- {doc['title']} (Score: {doc['score']})") ``` ### 3. Documentation Validation Validate documentation completeness and accuracy: ```python # Example: Validate documentation response = await client.post( "http://localhost:3000/api/docs/validate", json={ "paths": ["docs/api/", "docs/guides/"], "rules": ["broken-links", "code-coverage", "freshness"] } ) validation = response.json() print(f"Found {len(validation['issues'])} issues") ``` ### 4. Documentation Crawling Crawl and index external documentation: ```python # Example: Crawl documentation response = await client.post( "http://localhost:3000/api/docs/crawl", json={ "urls": [ "https://api.example.com/docs", "https://wiki.example.com/technical-docs" ], "depth": 2, "include_patterns": ["*.md", "*.html"], "exclude_patterns": ["*draft*", "*private*"] } ) ``` ## Usage ### Basic Documentation Workflow 1. **Generate Documentation** ```bash # Using CLI mcp-codebase-insight docs generate \ --source src/ \ --output docs/api \ --template api-reference ``` 2. **Validate Documentation** ```bash # Check documentation quality mcp-codebase-insight docs validate \ --path docs/ \ --rules all ``` 3. **Update Documentation** ```bash # Update existing documentation mcp-codebase-insight docs update \ --path docs/api \ --sync-with-code ``` 4. **Search Documentation** ```bash # Search in documentation mcp-codebase-insight docs search \ "authentication implementation" \ --type guide \ --limit 5 ``` ### Documentation Templates Create custom documentation templates: ```yaml # templates/docs/api-reference.yaml name: "API Reference Template" sections: - title: "Overview" required: true content: - "Brief description" - "Key features" - "Requirements" - title: "Installation" required: true content: - "Step-by-step instructions" - "Configuration options" - title: "API Methods" required: true for_each: "method" content: - "Method signature" - "Parameters" - "Return values" - "Examples" ``` ## Configuration ### Documentation Settings ```yaml documentation: # Generation settings generation: templates_dir: "./templates/docs" output_dir: "./docs" default_format: "markdown" include_private: false # Validation settings validation: rules: broken_links: true code_coverage: true freshness: true max_age_days: 90 # Search settings search: index_update_interval: "1h" min_score: 0.5 max_results: 10 # Crawling settings crawling: max_depth: 3 timeout: 30 concurrent_requests: 5 respect_robots_txt: true ``` ### Storage Settings ```yaml storage: # File storage files: path: "./docs" backup_path: "./docs/backup" # Vector storage vectors: collection: "documentation" dimension: 384 # Cache settings cache: enabled: true ttl: 3600 max_size: "1GB" ``` ## Best Practices 1. **Documentation Structure** - Use consistent formatting - Follow a clear hierarchy - Include examples - Keep sections focused 2. **Maintenance** - Update regularly - Remove outdated content - Track changes with code - Validate links 3. **Organization** - Use clear categories - Maintain an index - Cross-reference related docs - Version appropriately 4. **Quality** - Include code examples - Add diagrams where helpful - Proofread content - Test code samples ## API Reference ### Documentation Endpoints | Endpoint | Method | Description | |----------|--------|-------------| | `/api/docs/generate` | POST | Generate documentation | | `/api/docs/validate` | POST | Validate documentation | | `/api/docs/search` | GET | Search documentation | | `/api/docs/crawl` | POST | Crawl external docs | | `/api/docs/update` | POST | Update documentation | | `/api/docs/stats` | GET | Get documentation stats | ### Response Format ```json { "documentation": { "id": "uuid", "title": "string", "content": "string", "format": "string", "metadata": { "author": "string", "created_at": "datetime", "updated_at": "datetime", "version": "string" }, "related_code": [{ "file": "string", "lines": [int, int], "type": "string" }], "validation": { "status": "string", "issues": [{ "type": "string", "severity": "string", "message": "string" }] } } } ``` ## Integration ### IDE Integration ```python # VS Code Extension Example from mcp.client import Client client = Client.connect() # Document current file async def document_current_file(file_path: str): response = await client.post( "/api/docs/generate", json={ "source": file_path, "template": "code-reference" } ) return response.json() ``` ### CI/CD Integration ```yaml # GitHub Actions Example name: Documentation Check on: [push, pull_request] jobs: validate-docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Validate Documentation run: | curl -X POST http://localhost:3000/api/docs/validate \ -H "Content-Type: application/json" \ -d '{ "paths": ["docs/"], "rules": ["all"] }' ``` ## Troubleshooting ### Common Issues 1. **Generation Fails** ```bash # Check template validity mcp-codebase-insight docs validate-template \ --template api-reference ``` 2. **Search Not Working** ```bash # Rebuild search index mcp-codebase-insight docs rebuild-index ``` 3. **Validation Errors** ```bash # Get detailed validation report mcp-codebase-insight docs validate \ --path docs/ \ --verbose ``` ## Next Steps - [Documentation Templates](docs/templates.md) - [Style Guide](docs/style-guide.md) - [Advanced Search](docs/search.md) - [Automation Guide](docs/automation.md) ``` -------------------------------------------------------------------------------- /docs/features/adr-management.md: -------------------------------------------------------------------------------- ```markdown # ADR Management Architecture Decision Records (ADRs) are documents that capture important architectural decisions made along with their context and consequences. MCP Codebase Insight provides comprehensive tools for managing ADRs. ## Overview The ADR management feature: - Creates and maintains ADR documents - Tracks decision history and status - Links ADRs to code implementations - Provides templates and workflows - Enables searching and analysis of past decisions ## Features ### 1. ADR Creation Create new ADRs with structured templates: ```python # Example: Creating a new ADR response = await client.post( "http://localhost:3000/api/adrs", json={ "title": "Use GraphQL for API", "status": "PROPOSED", "context": { "problem": "Need efficient data fetching", "constraints": [ "Multiple client applications", "Complex data relationships" ] }, "options": [ { "title": "GraphQL", "pros": [ "Flexible data fetching", "Strong typing", "Built-in documentation" ], "cons": [ "Learning curve", "Complex server setup" ] }, { "title": "REST", "pros": [ "Simple and familiar", "Mature ecosystem" ], "cons": [ "Over/under fetching", "Multiple endpoints" ] } ], "decision": "We will use GraphQL", "consequences": [ "Need to train team on GraphQL", "Better client performance", "Simplified API evolution" ] } ) adr = response.json() print(f"Created ADR: {adr['id']}") ``` ### 2. ADR Lifecycle Management Track and update ADR status: ```python # Update ADR status response = await client.patch( f"http://localhost:3000/api/adrs/{adr_id}", json={ "status": "ACCEPTED", "metadata": { "approved_by": "Architecture Board", "approved_date": "2024-03-26" } } ) ``` ### 3. ADR Search and Analysis Search through existing ADRs: ```python # Search ADRs response = await client.get( "http://localhost:3000/api/adrs/search", params={ "query": "authentication", "status": "ACCEPTED", "date_from": "2023-01-01" } ) results = response.json() for adr in results["adrs"]: print(f"- {adr['title']} ({adr['status']})") ``` ### 4. Code Implementation Tracking Link ADRs to code implementations: ```python # Link ADR to code response = await client.post( f"http://localhost:3000/api/adrs/{adr_id}/implementations", json={ "files": ["src/graphql/schema.ts", "src/graphql/resolvers/"], "pull_request": "https://github.com/org/repo/pull/123", "status": "IN_PROGRESS" } ) ``` ## Usage ### Basic ADR Workflow 1. **Create ADR** ```bash # Using CLI mcp-codebase-insight adr new \ --title "Use GraphQL for API" \ --template graphql-decision ``` 2. **Review and Collaborate** ```bash # Get ADR details curl http://localhost:3000/api/adrs/{adr_id} # Add comments curl -X POST http://localhost:3000/api/adrs/{adr_id}/comments \ -d '{"text": "Consider Apollo Federation for microservices"}' ``` 3. **Update Status** ```bash # Update status curl -X PATCH http://localhost:3000/api/adrs/{adr_id} \ -d '{"status": "ACCEPTED"}' ``` 4. **Track Implementation** ```bash # Add implementation details curl -X POST http://localhost:3000/api/adrs/{adr_id}/implementations \ -d '{ "files": ["src/graphql/"], "status": "COMPLETED", "metrics": { "coverage": 95, "performance_impact": "+12%" } }' ``` ### ADR Templates Create custom ADR templates: ```yaml # templates/adr/microservice-decision.yaml name: "Microservice Decision Template" sections: - title: "Service Boundaries" required: true prompts: - "What domain does this service handle?" - "What are the integration points?" - title: "Data Ownership" required: true prompts: - "What data does this service own?" - "How is data shared with other services?" - title: "Technical Stack" required: true subsections: - "Language & Framework" - "Database" - "Message Queue" - "Deployment Platform" ``` ## Configuration ### ADR Settings ```yaml adr: # Storage settings storage: path: "./docs/adrs" format: "markdown" naming_convention: "YYYY-MM-DD-title" # Workflow settings workflow: require_approval: true approvers: ["arch-board"] auto_number: true # Templates templates: path: "./templates/adr" default: "basic-decision" # Implementation tracking implementation: require_evidence: true track_metrics: true ``` ### Integration Settings ```yaml integrations: github: enabled: true repo: "org/repo" pr_template: "adr-implementation" labels: ["architecture", "adr"] jira: enabled: true project: "ARCH" issue_type: "Architecture Decision" ``` ## Best Practices 1. **ADR Creation** - Use clear, descriptive titles - Include sufficient context - Document all considered options - Be explicit about consequences 2. **Review Process** - Involve stakeholders early - Document discussions - Consider technical and business impact - Set clear acceptance criteria 3. **Implementation** - Link to concrete evidence - Track metrics and impact - Update status regularly - Document deviations 4. **Maintenance** - Review periodically - Update affected ADRs - Archive superseded decisions - Maintain traceability ## API Reference ### ADR Endpoints | Endpoint | Method | Description | |----------|--------|-------------| | `/api/adrs` | GET | List all ADRs | | `/api/adrs` | POST | Create new ADR | | `/api/adrs/{id}` | GET | Get ADR details | | `/api/adrs/{id}` | PATCH | Update ADR | | `/api/adrs/search` | GET | Search ADRs | | `/api/adrs/{id}/implementations` | POST | Add implementation | | `/api/adrs/{id}/comments` | POST | Add comment | ### Response Format ```json { "id": "uuid", "title": "string", "status": "string", "context": { "problem": "string", "constraints": ["string"] }, "options": [{ "title": "string", "pros": ["string"], "cons": ["string"] }], "decision": "string", "consequences": ["string"], "metadata": { "created_at": "datetime", "updated_at": "datetime", "created_by": "string", "approved_by": "string" }, "implementations": [{ "files": ["string"], "status": "string", "metrics": {} }] } ``` ## Troubleshooting ### Common Issues 1. **Template Not Found** ```bash # Check template directory ls -l templates/adr/ # Verify template path in config cat config.yaml | grep template ``` 2. **Permission Issues** ```bash # Fix ADR directory permissions chmod -R 755 docs/adrs/ ``` 3. **Integration Errors** ```bash # Check integration status curl http://localhost:3000/api/status/integrations ``` ## Next Steps - [ADR Templates Guide](adr/templates.md) - [Integration Setup](../integration/index.md) - [Workflow Customization](adr/workflow.md) - [Metrics and Reporting](adr/metrics.md) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/errors.py: -------------------------------------------------------------------------------- ```python """Error handling module.""" from enum import Enum from typing import Any, Dict, Optional class ErrorCode(str, Enum): """Error code enumeration.""" # General errors INTERNAL_ERROR = "internal_error" INVALID_REQUEST = "invalid_request" NOT_FOUND = "not_found" ALREADY_EXISTS = "already_exists" VALIDATION_ERROR = "validation_error" # Component-specific errors VECTOR_STORE_ERROR = "vector_store_error" EMBEDDING_ERROR = "embedding_error" CACHE_ERROR = "cache_error" TASK_ERROR = "task_error" ADR_ERROR = "adr_error" DOCUMENTATION_ERROR = "documentation_error" DEBUG_ERROR = "debug_error" PROMPT_ERROR = "prompt_error" # Resource errors RESOURCE_NOT_FOUND = "resource_not_found" RESOURCE_UNAVAILABLE = "resource_unavailable" RESOURCE_EXHAUSTED = "resource_exhausted" # Authentication/Authorization errors UNAUTHORIZED = "unauthorized" FORBIDDEN = "forbidden" TOKEN_EXPIRED = "token_expired" # Rate limiting errors RATE_LIMITED = "rate_limited" QUOTA_EXCEEDED = "quota_exceeded" # Configuration errors CONFIG_ERROR = "config_error" MISSING_CONFIG = "missing_config" INVALID_CONFIG = "invalid_config" class BaseError(Exception): """Base error class.""" def __init__( self, code: ErrorCode, message: str, details: Optional[Dict[str, Any]] = None ): """Initialize error.""" self.code = code self.message = message self.details = details or {} super().__init__(message) def to_dict(self) -> Dict[str, Any]: """Convert error to dictionary.""" return { "code": self.code, "message": self.message, "details": self.details } class InternalError(BaseError): """Internal server error.""" def __init__( self, message: str = "Internal server error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.INTERNAL_ERROR, message, details) class InvalidRequestError(BaseError): """Invalid request error.""" def __init__( self, message: str = "Invalid request", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.INVALID_REQUEST, message, details) class NotFoundError(BaseError): """Not found error.""" def __init__( self, message: str = "Resource not found", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.NOT_FOUND, message, details) class AlreadyExistsError(BaseError): """Already exists error.""" def __init__( self, message: str = "Resource already exists", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.ALREADY_EXISTS, message, details) class ValidationError(BaseError): """Validation error.""" def __init__( self, message: str = "Validation error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.VALIDATION_ERROR, message, details) class VectorStoreError(BaseError): """Vector store error.""" def __init__( self, message: str = "Vector store error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.VECTOR_STORE_ERROR, message, details) class EmbeddingError(BaseError): """Embedding error.""" def __init__( self, message: str = "Embedding error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.EMBEDDING_ERROR, message, details) class CacheError(BaseError): """Cache error.""" def __init__( self, message: str = "Cache error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.CACHE_ERROR, message, details) class TaskError(BaseError): """Task error.""" def __init__( self, message: str = "Task error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.TASK_ERROR, message, details) class ADRError(BaseError): """ADR error.""" def __init__( self, message: str = "ADR error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.ADR_ERROR, message, details) class DocumentationError(BaseError): """Documentation error.""" def __init__( self, message: str = "Documentation error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.DOCUMENTATION_ERROR, message, details) class DebugError(BaseError): """Debug error.""" def __init__( self, message: str = "Debug error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.DEBUG_ERROR, message, details) class PromptError(BaseError): """Prompt error.""" def __init__( self, message: str = "Prompt error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.PROMPT_ERROR, message, details) class ConfigError(BaseError): """Configuration error.""" def __init__( self, message: str = "Configuration error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.CONFIG_ERROR, message, details) class UnauthorizedError(BaseError): """Unauthorized error.""" def __init__( self, message: str = "Unauthorized", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.UNAUTHORIZED, message, details) class ForbiddenError(BaseError): """Forbidden error.""" def __init__( self, message: str = "Forbidden", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.FORBIDDEN, message, details) class RateLimitedError(BaseError): """Rate limited error.""" def __init__( self, message: str = "Rate limited", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.RATE_LIMITED, message, details) class ResourceNotFoundError(BaseError): """Resource not found error.""" def __init__( self, message: str = "Resource not found", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.RESOURCE_NOT_FOUND, message, details) class ProcessingError(BaseError): """Processing error.""" def __init__( self, message: str = "Processing error", details: Optional[Dict[str, Any]] = None ): """Initialize error.""" super().__init__(ErrorCode.INTERNAL_ERROR, message, details) def handle_error(error: Exception) -> Dict[str, Any]: """Convert error to API response format.""" if isinstance(error, BaseError): return error.to_dict() return { "code": ErrorCode.INTERNAL_ERROR, "message": str(error), "details": {} } ``` -------------------------------------------------------------------------------- /tests/components/test_stdio_components.py: -------------------------------------------------------------------------------- ```python import asyncio import json import pytest from unittest.mock import MagicMock, AsyncMock, patch from io import StringIO class MockStdinReader: def __init__(self, input_data): self.input_stream = StringIO(input_data) async def readline(self): return self.input_stream.readline() class MockStdoutWriter: def __init__(self): self.output = StringIO() async def write(self, data): self.output.write(data) async def drain(self): pass def get_output(self): return self.output.getvalue() @pytest.fixture async def mock_stdio(): input_data = '{"type": "register", "tool_id": "test_tool"}\n' reader = MockStdinReader(input_data) writer = MockStdoutWriter() return reader, writer @pytest.mark.asyncio async def test_stdio_registration(mock_stdio): """Test tool registration via stdio.""" reader, writer = await mock_stdio # Process registration message line = await reader.readline() message = json.loads(line) # Verify registration message format assert message["type"] == "register" assert message["tool_id"] == "test_tool" # Send registration acknowledgment response = { "type": "registration_success", "tool_id": message["tool_id"] } await writer.write(json.dumps(response) + "\n") # Verify response was written assert "registration_success" in writer.get_output() assert message["tool_id"] in writer.get_output() @pytest.mark.asyncio async def test_stdio_message_streaming(): """Test streaming messages via stdio.""" # Set up mock streams with multiple messages input_messages = [ {"type": "request", "id": "1", "method": "test", "params": {}}, {"type": "request", "id": "2", "method": "test", "params": {}} ] input_data = "\n".join(json.dumps(msg) for msg in input_messages) + "\n" reader = MockStdinReader(input_data) writer = MockStdoutWriter() # Process messages messages_received = [] while True: line = await reader.readline() if not line: break messages_received.append(json.loads(line)) # Verify all messages were received assert len(messages_received) == len(input_messages) assert all(msg["type"] == "request" for msg in messages_received) @pytest.mark.asyncio async def test_stdio_error_handling(): """Test error handling in stdio communication.""" # Test invalid JSON reader = MockStdinReader("invalid json\n") writer = MockStdoutWriter() line = await reader.readline() try: message = json.loads(line) except json.JSONDecodeError as e: error_response = { "type": "error", "error": "Invalid JSON format" } await writer.write(json.dumps(error_response) + "\n") assert "error" in writer.get_output() assert "Invalid JSON format" in writer.get_output() @pytest.mark.asyncio async def test_stdio_message_ordering(): """Test message ordering and response correlation.""" # Set up messages with sequence numbers input_messages = [ {"type": "request", "id": "1", "sequence": 1}, {"type": "request", "id": "2", "sequence": 2}, {"type": "request", "id": "3", "sequence": 3} ] input_data = "\n".join(json.dumps(msg) for msg in input_messages) + "\n" reader = MockStdinReader(input_data) writer = MockStdoutWriter() # Process messages and send responses sequence = 1 while True: line = await reader.readline() if not line: break message = json.loads(line) assert message["sequence"] == sequence response = { "type": "response", "id": message["id"], "sequence": sequence } await writer.write(json.dumps(response) + "\n") sequence += 1 # Verify response ordering output = writer.get_output() responses = [json.loads(line) for line in output.strip().split("\n")] assert all(resp["sequence"] == idx + 1 for idx, resp in enumerate(responses)) @pytest.mark.asyncio async def test_stdio_large_message(): """Test handling of large messages via stdio.""" # Create a large message large_data = "x" * 1024 * 1024 # 1MB of data large_message = { "type": "request", "id": "large", "data": large_data } reader = MockStdinReader(json.dumps(large_message) + "\n") writer = MockStdoutWriter() # Process large message line = await reader.readline() message = json.loads(line) # Verify message was received correctly assert len(message["data"]) == len(large_data) assert message["data"] == large_data # Send large response response = { "type": "response", "id": message["id"], "data": large_data } await writer.write(json.dumps(response) + "\n") # Verify large response was written output = writer.get_output() response_message = json.loads(output) assert len(response_message["data"]) == len(large_data) @pytest.mark.asyncio async def test_stdio_buffer_overflow_handling(): """Test handling of buffer overflow in stdio communication.""" very_large_data = "x" * (10 * 1024 * 1024) very_large_message = { "type": "request", "id": "overflow_test", "data": very_large_data } reader = MockStdinReader(json.dumps(very_large_message) + "\n") writer = MockStdoutWriter() line = await reader.readline() try: message = json.loads(line) assert len(message["data"]) == len(very_large_data) response = { "type": "response", "id": message["id"], "status": "received", "data_size": len(message["data"]) } await writer.write(json.dumps(response) + "\n") assert "received" in writer.get_output() assert str(len(very_large_data)) in writer.get_output() except json.JSONDecodeError: pytest.fail("Failed to parse large JSON message") except MemoryError: pytest.fail("Memory error when processing large message") @pytest.mark.asyncio async def test_stdio_component_unavailability(): """Test stdio behavior when a required component is unavailable.""" reader = MockStdinReader('{"type": "request", "id": "test", "method": "unavailable_component", "params": {}}\n') writer = MockStdoutWriter() line = await reader.readline() message = json.loads(line) component_available = False if component_available: response = { "type": "response", "id": message["id"], "result": "success" } else: response = { "type": "error", "id": message["id"], "error": "Component unavailable", "code": "COMPONENT_UNAVAILABLE" } await writer.write(json.dumps(response) + "\n") output = writer.get_output() assert "error" in output assert "Component unavailable" in output assert "COMPONENT_UNAVAILABLE" in output @pytest.mark.asyncio async def test_stdio_protocol_version_check(): """Test handling of protocol version mismatches in stdio communication.""" reader = MockStdinReader('{"type": "init", "protocol_version": "1.0", "client_id": "test_client"}\n') writer = MockStdoutWriter() supported_versions = ["2.0", "2.1"] line = await reader.readline() message = json.loads(line) client_version = message.get("protocol_version", "unknown") is_compatible = client_version in supported_versions if is_compatible: response = { "type": "init_success", "server_version": supported_versions[-1] } else: response = { "type": "init_error", "error": "Incompatible protocol version", "supported_versions": supported_versions } await writer.write(json.dumps(response) + "\n") output = writer.get_output() assert "init_error" in output assert "Incompatible protocol version" in output assert all(version in output for version in supported_versions) ``` -------------------------------------------------------------------------------- /tests/components/test_knowledge_base.py: -------------------------------------------------------------------------------- ```python import sys import os # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) import pytest import pytest_asyncio from pathlib import Path from typing import AsyncGenerator from src.mcp_codebase_insight.core.knowledge import KnowledgeBase, PatternType, PatternConfidence from src.mcp_codebase_insight.core.config import ServerConfig from src.mcp_codebase_insight.core.vector_store import VectorStore @pytest_asyncio.fixture async def knowledge_base(test_config: ServerConfig, vector_store: VectorStore): kb = KnowledgeBase(test_config, vector_store) await kb.initialize() yield kb await kb.cleanup() @pytest.mark.asyncio async def test_knowledge_base_initialization(knowledge_base: KnowledgeBase): """Test that knowledge base initializes correctly.""" assert knowledge_base is not None assert knowledge_base.vector_store is not None assert knowledge_base.config is not None @pytest.mark.asyncio async def test_add_and_get_pattern(knowledge_base: KnowledgeBase): """Test adding and retrieving patterns.""" # Add pattern pattern_data = { "name": "Test Pattern", "description": "A test pattern", "content": "def test(): pass", # Note: renamed from 'code' to 'content' to match implementation "tags": ["test", "example"] } pattern = await knowledge_base.add_pattern( name=pattern_data["name"], type=PatternType.CODE, description=pattern_data["description"], content=pattern_data["content"], confidence=PatternConfidence.MEDIUM, tags=pattern_data["tags"] ) assert pattern.id is not None # Get pattern retrieved = await knowledge_base.get_pattern(pattern.id) assert retrieved.name == pattern_data["name"] assert retrieved.description == pattern_data["description"] @pytest.mark.asyncio async def test_find_similar_patterns(knowledge_base: KnowledgeBase): """Test finding similar patterns.""" # Add test patterns pattern1_data = { "name": "Test Pattern 1", "description": "First test pattern", "content": "def test1(): pass", "tags": ["test"] } pattern2_data = { "name": "Test Pattern 2", "description": "Second test pattern", "content": "def test2(): pass", "tags": ["test"] } pattern1 = await knowledge_base.add_pattern( name=pattern1_data["name"], type=PatternType.CODE, description=pattern1_data["description"], content=pattern1_data["content"], confidence=PatternConfidence.MEDIUM, tags=pattern1_data["tags"] ) pattern2 = await knowledge_base.add_pattern( name=pattern2_data["name"], type=PatternType.CODE, description=pattern2_data["description"], content=pattern2_data["content"], confidence=PatternConfidence.MEDIUM, tags=pattern2_data["tags"] ) # Search for similar patterns similar = await knowledge_base.find_similar_patterns("test pattern") assert len(similar) > 0 @pytest.mark.asyncio async def test_update_pattern(knowledge_base: KnowledgeBase): """Test updating patterns.""" # Add initial pattern pattern_data = { "name": "Original Pattern", "description": "Original description", "content": "def original(): pass", "tags": ["original"] } pattern = await knowledge_base.add_pattern( name=pattern_data["name"], type=PatternType.CODE, description=pattern_data["description"], content=pattern_data["content"], confidence=PatternConfidence.MEDIUM, tags=pattern_data["tags"] ) # Update pattern updated_data = { "name": "Updated Pattern", "description": "Updated description", "content": "def updated(): pass", "tags": ["updated"] } await knowledge_base.update_pattern( pattern_id=pattern.id, description=updated_data["description"], content=updated_data["content"], tags=updated_data["tags"] ) # Verify update retrieved = await knowledge_base.get_pattern(pattern.id) # Name is not updated by the update_pattern method assert retrieved.name == pattern_data["name"] # Original name should remain assert retrieved.description == updated_data["description"] @pytest.mark.asyncio async def test_delete_pattern(knowledge_base: KnowledgeBase): """Test deleting patterns.""" # Add a pattern to delete pattern_data = { "name": "Pattern to Delete", "description": "This pattern will be deleted", "content": "def to_be_deleted(): pass", "tags": ["delete", "test"] } pattern = await knowledge_base.add_pattern( name=pattern_data["name"], type=PatternType.CODE, description=pattern_data["description"], content=pattern_data["content"], confidence=PatternConfidence.MEDIUM, tags=pattern_data["tags"] ) # Verify pattern exists retrieved_before = await knowledge_base.get_pattern(pattern.id) assert retrieved_before is not None # Delete the pattern await knowledge_base.delete_pattern(pattern.id) # Verify pattern no longer exists try: retrieved_after = await knowledge_base.get_pattern(pattern.id) assert retrieved_after is None, "Pattern should have been deleted" except Exception as e: # Either the pattern is None or an exception is raised (both are acceptable) pass @pytest.mark.asyncio async def test_search_patterns_by_tag(knowledge_base: KnowledgeBase): """Test searching patterns by tag.""" # Add patterns with different tags tag1_pattern = await knowledge_base.add_pattern( name="Tag1 Pattern", type=PatternType.CODE, description="Pattern with tag1", content="def tag1_function(): pass", confidence=PatternConfidence.HIGH, tags=["tag1", "common"] ) tag2_pattern = await knowledge_base.add_pattern( name="Tag2 Pattern", type=PatternType.CODE, description="Pattern with tag2", content="def tag2_function(): pass", confidence=PatternConfidence.HIGH, tags=["tag2", "common"] ) # Search by tag1 tag1_results = await knowledge_base.search_patterns(tags=["tag1"]) assert any(p.id == tag1_pattern.id for p in tag1_results) assert not any(p.id == tag2_pattern.id for p in tag1_results) # Search by tag2 tag2_results = await knowledge_base.search_patterns(tags=["tag2"]) assert any(p.id == tag2_pattern.id for p in tag2_results) assert not any(p.id == tag1_pattern.id for p in tag2_results) # Search by common tag common_results = await knowledge_base.search_patterns(tags=["common"]) assert any(p.id == tag1_pattern.id for p in common_results) assert any(p.id == tag2_pattern.id for p in common_results) @pytest.mark.asyncio async def test_pattern_versioning(knowledge_base: KnowledgeBase): """Test pattern versioning functionality.""" # Create initial pattern initial_pattern = await knowledge_base.add_pattern( name="Versioned Pattern", type=PatternType.CODE, description="Initial version", content="def version1(): pass", confidence=PatternConfidence.MEDIUM, tags=["versioned"] ) # Update pattern multiple times to create versions await knowledge_base.update_pattern( pattern_id=initial_pattern.id, description="Version 2", content="def version2(): pass" ) await knowledge_base.update_pattern( pattern_id=initial_pattern.id, description="Version 3", content="def version3(): pass" ) # Get the latest version latest = await knowledge_base.get_pattern(initial_pattern.id) assert latest.description == "Version 3" assert "version3" in latest.content # If versioning is supported, try to get a specific version try: # This might not be implemented in all versions of the knowledge base versions = await knowledge_base.get_pattern_versions(initial_pattern.id) if versions and len(versions) > 1: # If we have version history, verify it assert len(versions) >= 3, "Should have at least 3 versions" assert any("Version 2" in v.description for v in versions) assert any("Initial version" in v.description for v in versions) except (AttributeError, NotImplementedError): # Versioning might not be implemented, which is fine pass ``` -------------------------------------------------------------------------------- /test_fix_helper.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ A utility script to help fix common test issues in the MCP Codebase Insight project. This script can: 1. Update import paths in all test files 2. Check for proper dependencies 3. Set up proper Python path in conftest.py files """ import os import re import sys import importlib import subprocess from pathlib import Path from typing import List, Tuple, Dict, Optional def add_python_path_to_conftest(conftest_path: str) -> bool: """Add Python path setting to a conftest.py file.""" if not os.path.exists(conftest_path): print(f"Error: {conftest_path} does not exist") return False with open(conftest_path, 'r') as f: content = f.read() # Check if Python path is already set if "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))" in content: print(f"Python path already set in {conftest_path}") return True # Add import statements if needed imports_to_add = [] if "import sys" not in content: imports_to_add.append("import sys") if "import os" not in content: imports_to_add.append("import os") # Find a good spot to insert the path setting (after imports) lines = content.split('\n') insert_position = 0 # Find the last import statement for i, line in enumerate(lines): if line.startswith('import ') or line.startswith('from '): insert_position = i + 1 # Insert the Python path setting path_setting = "\n# Ensure the src directory is in the Python path\nsys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))\n" # Add imports if needed if imports_to_add: path_setting = "\n" + "\n".join(imports_to_add) + path_setting # Insert into content new_content = '\n'.join(lines[:insert_position]) + path_setting + '\n'.join(lines[insert_position:]) # Write back to file with open(conftest_path, 'w') as f: f.write(new_content) print(f"Added Python path setting to {conftest_path}") return True def fix_imports_in_file(file_path: str) -> Tuple[int, int]: """Fix import paths in a Python file, changing from 'mcp_codebase_insight' to 'src.mcp_codebase_insight'.""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() except UnicodeDecodeError: # Try with a different encoding or skip the file try: with open(file_path, 'r', encoding='latin-1') as f: content = f.read() except Exception as e: print(f"Error reading {file_path}: {e}") return 0, 0 # Look for the problematic imports pattern = r'from\s+mcp_codebase_insight\.' matches = re.findall(pattern, content) if not matches: return 0, 0 # No matches found # Replace with correct import path new_content = re.sub(pattern, 'from src.mcp_codebase_insight.', content) # Add sys.path.insert if not already present and there were matches if 'sys.path.insert' not in new_content: import_sys_path = ( "import sys\n" "import os\n\n" "# Ensure the src directory is in the Python path\n" "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))\n\n" ) # Find a good spot to insert the path setting (before imports) lines = new_content.split('\n') insert_position = 0 # Find the first import statement for i, line in enumerate(lines): if line.startswith('import ') or line.startswith('from '): insert_position = i break # Reconstruct the content with path inserted new_content = '\n'.join(lines[:insert_position]) + '\n' + import_sys_path + '\n'.join(lines[insert_position:]) # Write the changes back to the file with the same encoding we used to read it try: with open(file_path, 'w', encoding='utf-8') as f: f.write(new_content) except UnicodeEncodeError: with open(file_path, 'w', encoding='latin-1') as f: f.write(new_content) return len(matches), 1 # Return number of replacements and files modified def find_and_fix_test_files(root_dir: str = '.') -> Tuple[int, int]: """Find all test files in the project and fix their imports.""" test_files = [] conftest_files = [] # Walk through the directory structure to find test files for root, _, files in os.walk(root_dir): for file in files: if file.startswith('test_') and file.endswith('.py'): test_files.append(os.path.join(root, file)) elif file == 'conftest.py': conftest_files.append(os.path.join(root, file)) # Fix imports in all test files total_replacements = 0 total_files_modified = 0 for file_path in test_files: replacements, files_modified = fix_imports_in_file(file_path) total_replacements += replacements total_files_modified += files_modified if replacements > 0: print(f"Fixed {replacements} imports in {file_path}") # Update conftest files for conftest_path in conftest_files: if add_python_path_to_conftest(conftest_path): total_files_modified += 1 return total_replacements, total_files_modified def check_dependencies() -> bool: """Check if all required dependencies are installed.""" required_packages = [ 'sentence-transformers', 'torch', 'fastapi', 'qdrant-client', 'pytest', 'pytest-asyncio' ] missing_packages = [] for package in required_packages: try: importlib.import_module(package.replace('-', '_')) print(f"✅ {package} is installed") except ImportError: missing_packages.append(package) print(f"❌ {package} is NOT installed") if missing_packages: print("\nMissing packages:") for package in missing_packages: print(f"- {package}") return False return True def install_dependencies() -> bool: """Install missing dependencies.""" try: subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True) return True except subprocess.CalledProcessError: print("Failed to install dependencies from requirements.txt") return False def create_path_fix_script() -> bool: """Create a script to fix path issues when running tests.""" script_content = """#!/bin/bash # This script runs tests with proper path and environment setup set -e # Activate the virtual environment (or create it if it doesn't exist) if [ ! -d ".venv" ]; then echo "Creating virtual environment..." python3 -m venv .venv fi source .venv/bin/activate # Install required dependencies echo "Installing required dependencies..." pip install -e . pip install pytest pytest-asyncio # Set environment variables export MCP_TEST_MODE=1 export QDRANT_URL="http://localhost:6333" export MCP_COLLECTION_NAME="test_collection_$(date +%s)" export PYTHONPATH="$PYTHONPATH:$(pwd)" # Run the tests echo "Running tests..." python -m pytest "$@" """ script_path = 'run_fixed_tests.sh' with open(script_path, 'w') as f: f.write(script_content) # Make the script executable os.chmod(script_path, 0o755) print(f"Created {script_path} - use it to run tests with proper path setup") return True def main(): """Main entry point.""" print("=== MCP Codebase Insight Test Fix Helper ===\n") # Find and fix import issues print("Fixing import paths in test files...") replacements, files_modified = find_and_fix_test_files() print(f"Fixed {replacements} imports in {files_modified} files\n") # Check dependencies print("Checking dependencies...") if not check_dependencies(): print("\nWould you like to install missing dependencies? (y/n)") choice = input().strip().lower() if choice == 'y': install_dependencies() # Create helper script print("\nCreating test runner script...") create_path_fix_script() print("\n=== Fixes Complete ===") print(""" Next steps: 1. Run the tests using: ./run_fixed_tests.sh [test_options] e.g., ./run_fixed_tests.sh tests/components/test_vector_store.py -v 2. If Qdrant collection creation fails, check the Docker container: docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_data:/qdrant/storage qdrant/qdrant 3. If specific tests still fail, check their requirements individually """) if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /.compile-venv-py3.11/bin/Activate.ps1: -------------------------------------------------------------------------------- ``` <# .Synopsis Activate a Python virtual environment for the current PowerShell session. .Description Pushes the python executable for a virtual environment to the front of the $Env:PATH environment variable and sets the prompt to signify that you are in a Python virtual environment. Makes use of the command line switches as well as the `pyvenv.cfg` file values present in the virtual environment. .Parameter VenvDir Path to the directory that contains the virtual environment to activate. The default value for this is the parent of the directory that the Activate.ps1 script is located within. .Parameter Prompt The prompt prefix to display when this virtual environment is activated. By default, this prompt is the name of the virtual environment folder (VenvDir) surrounded by parentheses and followed by a single space (ie. '(.venv) '). .Example Activate.ps1 Activates the Python virtual environment that contains the Activate.ps1 script. .Example Activate.ps1 -Verbose Activates the Python virtual environment that contains the Activate.ps1 script, and shows extra information about the activation as it executes. .Example Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv Activates the Python virtual environment located in the specified location. .Example Activate.ps1 -Prompt "MyPython" Activates the Python virtual environment that contains the Activate.ps1 script, and prefixes the current prompt with the specified string (surrounded in parentheses) while the virtual environment is active. .Notes On Windows, it may be required to enable this Activate.ps1 script by setting the execution policy for the user. You can do this by issuing the following PowerShell command: PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser For more information on Execution Policies: https://go.microsoft.com/fwlink/?LinkID=135170 #> Param( [Parameter(Mandatory = $false)] [String] $VenvDir, [Parameter(Mandatory = $false)] [String] $Prompt ) <# Function declarations --------------------------------------------------- #> <# .Synopsis Remove all shell session elements added by the Activate script, including the addition of the virtual environment's Python executable from the beginning of the PATH variable. .Parameter NonDestructive If present, do not remove this function from the global namespace for the session. #> function global:deactivate ([switch]$NonDestructive) { # Revert to original values # The prior prompt: if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT } # The prior PYTHONHOME: if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME } # The prior PATH: if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH Remove-Item -Path Env:_OLD_VIRTUAL_PATH } # Just remove the VIRTUAL_ENV altogether: if (Test-Path -Path Env:VIRTUAL_ENV) { Remove-Item -Path env:VIRTUAL_ENV } # Just remove VIRTUAL_ENV_PROMPT altogether. if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { Remove-Item -Path env:VIRTUAL_ENV_PROMPT } # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force } # Leave deactivate function in the global namespace if requested: if (-not $NonDestructive) { Remove-Item -Path function:deactivate } } <# .Description Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the given folder, and returns them in a map. For each line in the pyvenv.cfg file, if that line can be parsed into exactly two strings separated by `=` (with any amount of whitespace surrounding the =) then it is considered a `key = value` line. The left hand string is the key, the right hand is the value. If the value starts with a `'` or a `"` then the first and last character is stripped from the value before being captured. .Parameter ConfigDir Path to the directory that contains the `pyvenv.cfg` file. #> function Get-PyVenvConfig( [String] $ConfigDir ) { Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue # An empty map will be returned if no config file is found. $pyvenvConfig = @{ } if ($pyvenvConfigPath) { Write-Verbose "File exists, parse `key = value` lines" $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath $pyvenvConfigContent | ForEach-Object { $keyval = $PSItem -split "\s*=\s*", 2 if ($keyval[0] -and $keyval[1]) { $val = $keyval[1] # Remove extraneous quotations around a string value. if ("'""".Contains($val.Substring(0, 1))) { $val = $val.Substring(1, $val.Length - 2) } $pyvenvConfig[$keyval[0]] = $val Write-Verbose "Adding Key: '$($keyval[0])'='$val'" } } } return $pyvenvConfig } <# Begin Activate script --------------------------------------------------- #> # Determine the containing directory of this script $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition $VenvExecDir = Get-Item -Path $VenvExecPath Write-Verbose "Activation script is located in path: '$VenvExecPath'" Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" # Set values required in priority: CmdLine, ConfigFile, Default # First, get the location of the virtual environment, it might not be # VenvExecDir if specified on the command line. if ($VenvDir) { Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" } else { Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") Write-Verbose "VenvDir=$VenvDir" } # Next, read the `pyvenv.cfg` file to determine any required value such # as `prompt`. $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir # Next, set the prompt from the command line, or the config file, or # just use the name of the virtual environment folder. if ($Prompt) { Write-Verbose "Prompt specified as argument, using '$Prompt'" } else { Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" if ($pyvenvCfg -and $pyvenvCfg['prompt']) { Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" $Prompt = $pyvenvCfg['prompt']; } else { Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" $Prompt = Split-Path -Path $venvDir -Leaf } } Write-Verbose "Prompt = '$Prompt'" Write-Verbose "VenvDir='$VenvDir'" # Deactivate any currently active virtual environment, but leave the # deactivate function in place. deactivate -nondestructive # Now set the environment variable VIRTUAL_ENV, used by many tools to determine # that there is an activated venv. $env:VIRTUAL_ENV = $VenvDir if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { Write-Verbose "Setting prompt to '$Prompt'" # Set the prompt to include the env name # Make sure _OLD_VIRTUAL_PROMPT is global function global:_OLD_VIRTUAL_PROMPT { "" } Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt function global:prompt { Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " _OLD_VIRTUAL_PROMPT } $env:VIRTUAL_ENV_PROMPT = $Prompt } # Clear PYTHONHOME if (Test-Path -Path Env:PYTHONHOME) { Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME Remove-Item -Path Env:PYTHONHOME } # Add the venv to the PATH Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" ``` -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- ```python import json import logging from contextlib import asynccontextmanager from typing import AsyncIterator, Dict, Any, Optional, List from datetime import datetime import time from mcp.server import Server from mcp.server.fastmcp import Context, FastMCP from qdrant_client import QdrantClient from sentence_transformers import SentenceTransformer from .core import ( ServerConfig, EmbeddingProvider, VectorStore, CacheManager, HealthMonitor, MetricsCollector, ErrorContext, handle_error ) from .utils.logger import get_logger logger = get_logger(__name__) class CodebaseAnalyzer: """Analyzes code patterns and architecture.""" def __init__( self, vector_store: VectorStore, cache_manager: CacheManager, metrics_collector: MetricsCollector ): self.vector_store = vector_store self.cache_manager = cache_manager self.metrics_collector = metrics_collector async def analyze_patterns(self, code_text: str) -> Dict[str, Any]: """Analyze code patterns in the given text.""" start_time = time.time() try: # Try cache first cached_result = await self.cache_manager.result_cache.get_result( "analyze_patterns", code_text ) if cached_result: await self.metrics_collector.record_cache_access(hit=True) return cached_result await self.metrics_collector.record_cache_access(hit=False) # Search for similar patterns similar_patterns = await self.vector_store.search( text=code_text, filter_params={"must": [{"key": "type", "match": {"value": "pattern"}}]}, limit=5 ) await self.metrics_collector.record_vector_query() result = { "patterns_found": len(similar_patterns), "matches": [ { "pattern": p.payload.get("pattern_name", "Unknown"), "description": p.payload.get("description", ""), "similarity": p.score, "examples": p.payload.get("examples", []) } for p in similar_patterns ] } # Cache the result await self.cache_manager.result_cache.store_result( "analyze_patterns", result, code_text ) # Record metrics duration = time.time() - start_time await self.metrics_collector.record_request( tool_name="analyze_patterns", duration=duration, success=True, metadata={ "patterns_found": len(similar_patterns) } ) return result except Exception as e: # Record error metrics duration = time.time() - start_time await self.metrics_collector.record_request( tool_name="analyze_patterns", duration=duration, success=False, error=str(e) ) raise async def detect_architecture(self, codebase_path: str) -> Dict[str, Any]: """Detect architectural patterns in a codebase.""" start_time = time.time() try: # Try cache first cached_result = await self.cache_manager.result_cache.get_result( "detect_architecture", codebase_path ) if cached_result: await self.metrics_collector.record_cache_access(hit=True) return cached_result await self.metrics_collector.record_cache_access(hit=False) # This is a placeholder - actual implementation would analyze # the entire codebase structure result = { "architecture": "layered", "patterns": ["MVC", "Repository"], "components": ["controllers", "models", "views"] } # Cache the result await self.cache_manager.result_cache.store_result( "detect_architecture", result, codebase_path ) # Record metrics duration = time.time() - start_time await self.metrics_collector.record_request( tool_name="detect_architecture", duration=duration, success=True ) return result except Exception as e: # Record error metrics duration = time.time() - start_time await self.metrics_collector.record_request( tool_name="detect_architecture", duration=duration, success=False, error=str(e) ) raise @asynccontextmanager async def server_lifespan(server: Server) -> AsyncIterator[Dict]: """Initialize server components and manage their lifecycle.""" config = ServerConfig.from_env() cache_manager = None health_monitor = None metrics_collector = None try: # Initialize vector store embedding_model = SentenceTransformer(config.embedding_model) embedder = EmbeddingProvider(embedding_model) # Initialize Qdrant client qdrant_client = QdrantClient( url=config.qdrant_url, timeout=config.qdrant_timeout ) vector_store = VectorStore(qdrant_client, embedder, config.collection_name) await vector_store.initialize() # Initialize supporting components cache_manager = CacheManager(config.to_dict()) health_monitor = HealthMonitor(config) metrics_collector = MetricsCollector() # Initialize analyzer analyzer = CodebaseAnalyzer( vector_store=vector_store, cache_manager=cache_manager, metrics_collector=metrics_collector ) yield { "config": config, "vector_store": vector_store, "cache_manager": cache_manager, "health_monitor": health_monitor, "metrics_collector": metrics_collector, "analyzer": analyzer } finally: if vector_store: await vector_store.close() if cache_manager: await cache_manager.clear_all() if metrics_collector: await metrics_collector.reset() # Create FastMCP instance with lifespan management mcp = FastMCP(lifespan=server_lifespan) # Tool Schemas analyze_patterns_schema = { "type": "object", "properties": { "code": { "type": "string", "description": "Code text to analyze for patterns", } }, "required": ["code"], } detect_architecture_schema = { "type": "object", "properties": { "path": { "type": "string", "description": "Path to the codebase to analyze", } }, "required": ["path"], } health_check_schema = { "type": "object", "properties": { "force": { "type": "boolean", "description": "Force a new health check", "default": False } } } metrics_schema = { "type": "object", "properties": {} } # Tool Implementations @mcp.tool(name="analyze-patterns", description="Analyze code for common patterns") async def analyze_patterns(ctx: Context, code: str) -> Dict[str, Any]: """Analyze code text for common patterns.""" analyzer: CodebaseAnalyzer = ctx.request_context.lifespan_context["analyzer"] return await analyzer.analyze_patterns(code) @mcp.tool(name="detect-architecture", description="Detect architectural patterns in a codebase") async def detect_architecture(ctx: Context, path: str) -> Dict[str, Any]: """Detect architectural patterns in a codebase.""" analyzer: CodebaseAnalyzer = ctx.request_context.lifespan_context["analyzer"] return await analyzer.detect_architecture(path) @mcp.tool(name="health-check", description="Check server health status") async def health_check(ctx: Context, force: bool = False) -> Dict[str, Any]: """Check the health status of server components.""" health_monitor: HealthMonitor = ctx.request_context.lifespan_context["health_monitor"] return await health_monitor.check_health(force) @mcp.tool(name="get-metrics", description="Get server performance metrics") async def get_metrics(ctx: Context) -> Dict[str, Any]: """Get server performance metrics.""" metrics_collector: MetricsCollector = ctx.request_context.lifespan_context["metrics_collector"] return await metrics_collector.get_all_metrics() ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/cache.py: -------------------------------------------------------------------------------- ```python """Cache management module.""" import json import os from collections import OrderedDict from datetime import datetime, timedelta from pathlib import Path from typing import Any, Dict, Optional, Union import logging class MemoryCache: """In-memory LRU cache.""" def __init__(self, max_size: int = 1000): """Initialize memory cache.""" self.max_size = max_size self.cache: OrderedDict = OrderedDict() def get(self, key: str) -> Optional[Any]: """Get value from cache.""" if key not in self.cache: return None # Move to end (most recently used) value = self.cache.pop(key) self.cache[key] = value return value def put(self, key: str, value: Any) -> None: """Put value in cache.""" if key in self.cache: # Move to end self.cache.pop(key) elif len(self.cache) >= self.max_size: # Remove oldest self.cache.popitem(last=False) self.cache[key] = value def remove(self, key: str) -> None: """Remove value from cache.""" if key in self.cache: self.cache.pop(key) def clear(self) -> None: """Clear all values from cache.""" self.cache.clear() class DiskCache: """Disk-based cache.""" def __init__( self, cache_dir: Union[str, Path], max_age_days: int = 7 ): """Initialize disk cache.""" self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) self.max_age = timedelta(days=max_age_days) def get(self, key: str) -> Optional[Any]: """Get value from cache.""" cache_path = self._get_cache_path(key) if not cache_path.exists(): return None # Check if expired if self._is_expired(cache_path): cache_path.unlink() return None try: with open(cache_path) as f: data = json.load(f) return data["value"] except Exception: return None def put(self, key: str, value: Any) -> None: """Put value in cache.""" cache_path = self._get_cache_path(key) try: with open(cache_path, "w") as f: json.dump({ "value": value, "timestamp": datetime.utcnow().isoformat() }, f) except Exception: # Ignore write errors pass def remove(self, key: str) -> None: """Remove value from cache.""" cache_path = self._get_cache_path(key) if cache_path.exists(): cache_path.unlink() def clear(self) -> None: """Clear all values from cache.""" for path in self.cache_dir.glob("*.json"): path.unlink() def cleanup_expired(self) -> None: """Remove expired cache entries.""" for path in self.cache_dir.glob("*.json"): if self._is_expired(path): path.unlink() def _get_cache_path(self, key: str) -> Path: """Get cache file path for key.""" # Use hash of key as filename filename = f"{hash(key)}.json" return self.cache_dir / filename def _is_expired(self, path: Path) -> bool: """Check if cache entry is expired.""" try: with open(path) as f: data = json.load(f) timestamp = datetime.fromisoformat(data["timestamp"]) return datetime.utcnow() - timestamp > self.max_age except Exception: return True class CacheManager: """Manager for memory and disk caching.""" def __init__(self, config): """Initialize cache manager.""" self.config = config self.enabled = config.cache_enabled self.memory_cache = None self.disk_cache = None self.initialized = False self.logger = logging.getLogger(__name__) async def initialize(self) -> None: """Initialize cache components.""" if self.initialized: self.logger.debug("Cache manager already initialized") return try: self.logger.debug(f"Initializing cache manager (enabled: {self.enabled})") if self.enabled: self.logger.debug(f"Creating memory cache with size: {self.config.memory_cache_size}") self.memory_cache = MemoryCache( max_size=self.config.memory_cache_size ) # Check if disk cache is configured and enabled if self.config.disk_cache_dir is not None: self.logger.debug(f"Creating disk cache at: {self.config.disk_cache_dir}") # Ensure directory exists (should be created by ServerConfig.create_directories) if not self.config.disk_cache_dir.exists(): self.logger.debug(f"Creating disk cache directory: {self.config.disk_cache_dir}") self.config.disk_cache_dir.mkdir(parents=True, exist_ok=True) self.disk_cache = DiskCache( cache_dir=self.config.disk_cache_dir ) else: self.logger.debug("Disk cache directory not configured, skipping disk cache") else: self.logger.debug("Cache is disabled, not initializing memory or disk cache") self.initialized = True self.logger.debug("Cache manager initialized successfully") except Exception as e: self.logger.error(f"Error initializing cache manager: {e}") await self.cleanup() raise RuntimeError(f"Failed to initialize cache manager: {str(e)}") def get_from_memory(self, key: str) -> Optional[Any]: """Get value from memory cache.""" if not self.enabled or not self.memory_cache: return None return self.memory_cache.get(key) def put_in_memory(self, key: str, value: Any) -> None: """Put value in memory cache.""" if not self.enabled or not self.memory_cache: return self.memory_cache.put(key, value) def get_from_disk(self, key: str) -> Optional[Any]: """Get value from disk cache.""" if not self.enabled or not self.disk_cache: return None return self.disk_cache.get(key) def put_in_disk(self, key: str, value: Any) -> None: """Put value in disk cache.""" if not self.enabled or not self.disk_cache: return self.disk_cache.put(key, value) def get(self, key: str) -> Optional[Any]: """Get value from cache (memory first, then disk).""" if not self.enabled: return None # Try memory cache first value = self.get_from_memory(key) if value is not None: return value # Try disk cache if self.disk_cache: value = self.get_from_disk(key) if value is not None: # Cache in memory for next time self.put_in_memory(key, value) return value return None def put(self, key: str, value: Any) -> None: """Put value in cache (both memory and disk).""" if not self.enabled: return self.put_in_memory(key, value) if self.disk_cache: self.put_in_disk(key, value) def remove(self, key: str) -> None: """Remove value from cache.""" if not self.enabled: return if self.memory_cache: self.memory_cache.remove(key) if self.disk_cache: self.disk_cache.remove(key) def clear(self) -> None: """Clear all values from cache.""" if not self.enabled: return if self.memory_cache: self.memory_cache.clear() if self.disk_cache: self.disk_cache.clear() async def cleanup(self) -> None: """Clean up expired cache entries and clear memory cache.""" if not self.initialized: return try: if not self.enabled: return # Clear memory cache if self.memory_cache: self.memory_cache.clear() # Clean up disk cache if self.disk_cache: self.disk_cache.cleanup_expired() except Exception as e: print(f"Error cleaning up cache manager: {e}") finally: self.initialized = False async def clear_all(self) -> None: """Clear all values from cache asynchronously.""" self.clear() ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/health.py: -------------------------------------------------------------------------------- ```python """Health monitoring module.""" import asyncio import os import psutil import time from datetime import datetime from enum import Enum from typing import Dict, List, Optional from pydantic import BaseModel import aiohttp class HealthStatus(str, Enum): """Health status enumeration.""" HEALTHY = "healthy" DEGRADED = "degraded" UNHEALTHY = "unhealthy" class ComponentHealth(BaseModel): """Component health model.""" name: str status: HealthStatus message: Optional[str] = None last_check: datetime metrics: Optional[Dict[str, float]] = None class SystemHealth(BaseModel): """System health model.""" status: HealthStatus components: Dict[str, ComponentHealth] system_metrics: Dict[str, float] timestamp: datetime class HealthManager: """Manager for system health monitoring.""" def __init__(self, config): """Initialize health manager.""" self.config = config self.components: Dict[str, ComponentHealth] = {} self.check_interval = 60 # seconds self.running = False self._monitor_task = None self.initialized = False async def initialize(self): """Initialize health monitoring.""" if self.initialized: return try: self.running = True self._monitor_task = asyncio.create_task(self._monitor_health()) # Register core components await self.register_component("qdrant") await self.register_component("disk") await self.register_component("memory") # Initial health check await self.check_health() self.initialized = True except Exception as e: print(f"Error initializing health manager: {e}") await self.cleanup() raise RuntimeError(f"Failed to initialize health manager: {str(e)}") async def cleanup(self): """Clean up health monitoring.""" if not self.initialized: return try: if self.running: self.running = False if self._monitor_task: try: # Wait for the task to finish with a timeout await asyncio.wait_for(self._monitor_task, timeout=5.0) except asyncio.TimeoutError: # If it doesn't finish in time, cancel it self._monitor_task.cancel() try: await self._monitor_task except asyncio.CancelledError: pass finally: self._monitor_task = None self.components.clear() except Exception as e: print(f"Error cleaning up health manager: {e}") finally: self.initialized = False async def check_health(self) -> SystemHealth: """Check system health.""" # Update component health await self._check_components() # Get system metrics system_metrics = await self._get_system_metrics() # Determine overall status status = HealthStatus.HEALTHY if any(c.status == HealthStatus.UNHEALTHY for c in self.components.values()): status = HealthStatus.UNHEALTHY elif any(c.status == HealthStatus.DEGRADED for c in self.components.values()): status = HealthStatus.DEGRADED return SystemHealth( status=status, components=self.components, system_metrics=system_metrics, timestamp=datetime.utcnow() ) async def register_component( self, name: str, check_fn=None ) -> None: """Register a component for health monitoring.""" self.components[name] = ComponentHealth( name=name, status=HealthStatus.HEALTHY, last_check=datetime.utcnow(), metrics={} ) async def update_component_health( self, name: str, status: HealthStatus, message: Optional[str] = None, metrics: Optional[Dict[str, float]] = None ) -> None: """Update component health status.""" if name not in self.components: return self.components[name] = ComponentHealth( name=name, status=status, message=message, last_check=datetime.utcnow(), metrics=metrics ) async def _monitor_health(self): """Monitor system health periodically.""" while self.running: try: await self.check_health() except Exception as e: print(f"Error monitoring health: {e}") await asyncio.sleep(self.check_interval) async def _check_components(self): """Check health of all registered components.""" # Check Qdrant connection try: if hasattr(self.config, "qdrant_url"): await self._check_qdrant() except Exception as e: await self.update_component_health( "qdrant", HealthStatus.UNHEALTHY, str(e) ) # Check disk space try: await self._check_disk_space() except Exception as e: await self.update_component_health( "disk", HealthStatus.UNHEALTHY, str(e) ) # Check memory usage try: await self._check_memory() except Exception as e: await self.update_component_health( "memory", HealthStatus.UNHEALTHY, str(e) ) async def _check_qdrant(self): """Check Qdrant connection health.""" try: # Use REST API health endpoint start_time = time.perf_counter() async with aiohttp.ClientSession() as session: async with session.get("http://localhost:6333/healthz") as response: response.raise_for_status() response_time = time.perf_counter() - start_time await self.update_component_health( "qdrant", HealthStatus.HEALTHY, message="Qdrant is responding", metrics={ "response_time": response_time } ) except Exception as e: await self.update_component_health( "qdrant", HealthStatus.UNHEALTHY, message=f"Qdrant health check failed: {str(e)}" ) async def _check_disk_space(self): """Check disk space health.""" disk_path = self.config.docs_cache_dir usage = psutil.disk_usage(disk_path) status = HealthStatus.HEALTHY message = None # Alert if disk usage is high if usage.percent >= 90: status = HealthStatus.UNHEALTHY message = "Disk usage critical" elif usage.percent >= 80: status = HealthStatus.DEGRADED message = "Disk usage high" await self.update_component_health( "disk", status, message, metrics={ "total_gb": usage.total / (1024 ** 3), "used_gb": usage.used / (1024 ** 3), "free_gb": usage.free / (1024 ** 3), "percent_used": usage.percent } ) async def _check_memory(self): """Check memory health.""" memory = psutil.virtual_memory() status = HealthStatus.HEALTHY message = None # Alert if memory usage is high if memory.percent >= 90: status = HealthStatus.UNHEALTHY message = "Memory usage critical" elif memory.percent >= 80: status = HealthStatus.DEGRADED message = "Memory usage high" await self.update_component_health( "memory", status, message, metrics={ "total_gb": memory.total / (1024 ** 3), "used_gb": memory.used / (1024 ** 3), "free_gb": memory.available / (1024 ** 3), "percent_used": memory.percent } ) async def _get_system_metrics(self) -> Dict[str, float]: """Get system metrics.""" cpu_percent = psutil.cpu_percent(interval=1) memory = psutil.virtual_memory() disk = psutil.disk_usage("/") return { "cpu_percent": cpu_percent, "memory_percent": memory.percent, "disk_percent": disk.percent, "load_avg_1min": os.getloadavg()[0], "load_avg_5min": os.getloadavg()[1], "load_avg_15min": os.getloadavg()[2] } ``` -------------------------------------------------------------------------------- /PLAN.md: -------------------------------------------------------------------------------- ```markdown # Universal Codebase Analysis System Implementation Plan ## System Overview ```mermaid graph TD subgraph Core Infrastructure A[MCP Server] --> B[Vector Store] A --> C[Cache System] A --> D[Metrics Collector] A --> E[Health Monitor] end subgraph LLM Integration F[Meta Prompt System] --> G[Context Builder] G --> H[Prompt Generator] H --> I[Result Processor] I --> J[Knowledge Updater] end subgraph Analysis Engine K[Code Analyzer] --> L[Pattern Detector] K --> M[Architecture Analyzer] K --> N[Security Analyzer] K --> O[Performance Analyzer] end subgraph Documentation System U[FireCrawl Integration] --> V[Doc Manager] V --> W[Reference Cache] V --> X[Git Integration] end subgraph Task Management P[Task Tracker] --> Q[Debug System] P --> R[Test Manager] P --> S[Doc Generator] P --> T[ADR Creator] end subgraph ADR System AA[Impact Analysis] --> AB[Context Gathering] AB --> AC[Decision Analysis] AC --> AD[ADR Generation] AD --> AE[Implementation Tracking] AE --> AF[Evolution Management] end ``` ## Implementation Phases ### Phase 1: Core Infrastructure (Week 1-2) ```mermaid graph LR A[Setup Project] --> B[Vector Store] B --> C[Cache Layer] C --> D[Health Monitoring] D --> E[Metrics System] ``` #### Components: 1. **Vector Store** - Qdrant integration - Embedding system - Pattern storage - Search functionality 2. **Cache System** - LRU implementation - Result caching - Embedding cache - Performance optimization 3. **Health Monitor** - Component status - Performance metrics - Error tracking - System diagnostics 4. **Metrics Collector** - Usage statistics - Performance data - Error rates - Cache effectiveness ### Phase 2: Documentation & Knowledge Management (Week 3-4) ```mermaid graph TD A[FireCrawl Integration] --> B[Doc Crawler] B --> C[Reference Manager] C --> D[Local Cache] D --> E[Git Management] F[External Sources] --> B F --> G[API Docs] F --> H[Framework Docs] F --> I[Best Practices] ``` #### Components: 1. **Documentation Manager** ```python class DocumentationManager: async def crawl_docs(self, sources: List[str]) -> None: """Crawl and store documentation.""" async def update_references(self) -> None: """Update local documentation cache.""" async def manage_gitignore(self) -> None: """Handle version control for docs.""" ``` 2. **Reference System** ```python class ReferenceSystem: async def index_docs(self) -> None: """Index documentation for search.""" async def find_relevant_docs(self, context: str) -> List[Doc]: """Find relevant documentation.""" ``` ### Phase 3: LLM Integration & ADR System (Week 5-6) ```mermaid graph TD A[Meta Prompt System] --> B[Context Builder] B --> C[Prompt Generator] C --> D[Result Processor] D --> E[Knowledge Base] E --> A F[ADR System] --> G[Impact Analysis] G --> H[Context Gathering] H --> I[Decision Analysis] I --> J[Pattern Learning] ``` #### Components: 1. **Meta Prompt System** ```python class MetaPromptSystem: async def generate_prompt(self, task_type: str) -> str: """Generate task-specific prompts.""" async def evolve_prompts(self, feedback: Dict[str, Any]) -> None: """Evolve prompts based on effectiveness.""" ``` 2. **ADR System** ```python class ADRSystem: async def analyze_impact(self, changes: CodeChanges) -> ImpactAnalysis: """Analyze architectural impact of changes.""" async def gather_context(self) -> DecisionContext: """Gather relevant context for decision.""" async def analyze_options(self, options: List[Option]) -> OptionsAnalysis: """Analyze and compare options.""" async def generate_adr(self, context: DecisionContext) -> ADR: """Generate ADR document.""" async def track_implementation(self, adr: ADR) -> Implementation: """Track ADR implementation.""" async def evolve_adr(self, adr: ADR, feedback: Feedback) -> ADR: """Evolve ADR based on implementation feedback.""" async def learn_patterns(self, adr: ADR) -> List[Pattern]: """Extract reusable patterns from ADR.""" ``` ### Phase 4: Debug & Analysis System (Week 7-8) ```mermaid graph TD A[Debug System] --> B[Issue Analysis] B --> C[Pattern Matching] C --> D[Solution Generation] E[Agans Rules] --> F[System Understanding] F --> G[Failure Analysis] G --> H[Solution Verification] ``` #### Components: 1. **Debug System** ```python class DebugSystem: async def analyze_issue(self, issue: Issue) -> Analysis: """Analyze using Agans' 9 Rules.""" async def suggest_solution(self, analysis: Analysis) -> Solution: """Suggest solution approach.""" ``` ## Prompt Templates ### 1. Meta Prompts ```python META_PROMPTS = { "task_analysis": """ Given task: {task_description} Generate optimal analysis prompt considering: 1. Required context 2. Analysis steps 3. Validation criteria 4. Expected outcomes """, "prompt_evolution": """ Original prompt: {original_prompt} Results: {results} Effectiveness: {metrics} Suggest improvements for: 1. Context gathering 2. Analysis depth 3. Result quality 4. Validation accuracy """ } ``` ### 2. ADR Analysis Prompts ```python ADR_PROMPTS = { "impact_analysis": """ Code Changes: {code_changes} Current Architecture: {architecture_context} Historical Decisions: {related_adrs} Analyze: 1. Architectural Impact - Component changes - Interface modifications - Dependency updates 2. Technical Debt Impact - Existing debt affected - Potential new debt - Mitigation strategies 3. Cross-cutting Concerns - Security implications - Performance impact - Scalability considerations """, "decision_analysis": """ Decision Context: {decision_context} Options Considered: {options_analysis} Similar Decisions: {historical_decisions} Analyze each option for: 1. Technical Alignment - Architecture fit - Technology stack - Development practices 2. Business Impact - Development effort - Maintenance cost - Time to market 3. Risk Assessment - Technical risks - Implementation risks - Operational risks """ } ``` ### 3. Debug Analysis Prompts ```python DEBUG_PROMPTS = { "debug_analysis": """ Issue context: {issue_details} System state: {system_state} Following Agans' 9 Rules: 1. System Understanding: - Current architecture - Component relationships - Expected behavior 2. Failure Analysis: - Reproduction steps - Failure conditions - Pattern matching 3. Observable Data: - Error logs - Stack traces - System metrics 4. Component Isolation: - Affected components - Working components - Interface boundaries """ } ``` ## Implementation Strategy 1. **Infrastructure First** - Set up core components - Establish monitoring - Implement caching - Configure vector store 2. **Documentation System** - Integrate FireCrawl - Set up reference management - Configure Git integration - Implement caching 3. **LLM & ADR Integration** - Implement meta prompt system - Build ADR analysis system - Create knowledge management - Set up learning loop 4. **Debug & Analysis** - Implement Agans' rules system - Add pattern detection - Create solution generation - Set up verification system ## Success Criteria 1. **System Performance** - Response time < 2s - Cache hit rate > 80% - Pattern match accuracy > 90% - System uptime > 99.9% 2. **Documentation Quality** - Reference freshness < 24h - Documentation coverage > 95% - ADR completeness > 90% - Test coverage > 85% 3. **Analysis Quality** - Pattern detection accuracy > 90% - Architecture analysis precision > 85% - Security issue detection > 95% - Performance insight accuracy > 85% 4. **ADR Quality** - Context completeness > 95% - Decision clarity > 90% - Implementation tracking > 85% - Evolution management > 90% 5. **Debug Effectiveness** - Issue resolution time < 24h - Solution accuracy > 90% - Pattern learning rate > 85% - Knowledge reuse > 80% ## Next Steps 1. Toggle to Act mode to begin implementation 2. Start with core infrastructure 3. Implement documentation system 4. Add LLM & ADR integration 5. Build debug & analysis components 6. Test and refine each component 7. Gather feedback and improve ``` -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- ```markdown ## Task Management API The Task Management API provides endpoints for creating, listing, and retrieving information about asynchronous tasks. ### Create Task **Endpoint:** `POST /api/tasks/create` Create a new asynchronous task for processing. **Request Body:** ```json { "type": "code_analysis", "title": "Analyze Repository", "description": "Full code analysis of the repository", "context": { "repository_path": "/path/to/repo" }, "priority": "medium", "metadata": { "requested_by": "user123" } } ``` **Parameters:** - `type` (string, required): Type of task to create (e.g., `code_analysis`, `pattern_extraction`, `documentation`) - `title` (string, required): Title of the task - `description` (string, required): Description of what the task will do - `context` (object, required): Context data for the task, varies based on task type - `priority` (string, optional): Task priority (`low`, `medium`, `high`, `critical`), defaults to `medium` - `metadata` (object, optional): Additional metadata for the task **Response:** ```json { "id": "123e4567-e89b-12d3-a456-426614174000", "type": "code_analysis", "title": "Analyze Repository", "description": "Full code analysis of the repository", "status": "pending", "priority": "medium", "context": { "repository_path": "/path/to/repo" }, "result": null, "error": null, "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:30:00.123456", "completed_at": null, "metadata": { "requested_by": "user123" } } ``` ### List Tasks **Endpoint:** `GET /api/tasks` List all tasks with optional filtering. **Query Parameters:** - `type` (string, optional): Filter tasks by type - `status` (string, optional): Filter tasks by status (`pending`, `in_progress`, `completed`, `failed`, `cancelled`) - `priority` (string, optional): Filter tasks by priority - `limit` (integer, optional): Maximum number of tasks to return, defaults to 20 **Response:** ```json [ { "id": "123e4567-e89b-12d3-a456-426614174000", "type": "code_analysis", "title": "Analyze Repository", "description": "Full code analysis of the repository", "status": "completed", "priority": "medium", "context": { "repository_path": "/path/to/repo" }, "result": { "files_analyzed": 150, "patterns_identified": 5, "complexity_score": 78 }, "error": null, "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:35:20.123456", "completed_at": "2023-07-10T14:35:20.123456", "metadata": { "requested_by": "user123" } }, { "id": "223e4567-e89b-12d3-a456-426614174000", "type": "pattern_extraction", "title": "Extract Design Patterns", "description": "Identify design patterns in codebase", "status": "in_progress", "priority": "high", "context": { "repository_path": "/path/to/repo" }, "result": null, "error": null, "created_at": "2023-07-10T14:40:00.123456", "updated_at": "2023-07-10T14:40:30.123456", "completed_at": null, "metadata": { "requested_by": "user456" } } ] ``` ### Get Task by ID **Endpoint:** `GET /api/tasks/{task_id}` Get detailed information about a specific task. **Path Parameters:** - `task_id` (string, required): The unique identifier of the task **Response:** ```json { "id": "123e4567-e89b-12d3-a456-426614174000", "type": "code_analysis", "title": "Analyze Repository", "description": "Full code analysis of the repository", "status": "completed", "priority": "medium", "context": { "repository_path": "/path/to/repo" }, "result": { "files_analyzed": 150, "patterns_identified": 5, "complexity_score": 78 }, "error": null, "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:35:20.123456", "completed_at": "2023-07-10T14:35:20.123456", "metadata": { "requested_by": "user123" } } ``` **Error Responses:** - `400 Bad Request`: Invalid task ID format - `404 Not Found`: Task not found - `500 Internal Server Error`: Server error while retrieving task ## Debug System API The Debug System API provides endpoints for creating, listing, and managing issues for debugging and tracking purposes. ### Create Debug Issue **Endpoint:** `POST /api/debug/issues` Create a new debug issue for tracking and analysis. **Request Body:** ```json { "title": "Memory Leak in Data Processing", "type": "performance", "description": { "severity": "high", "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"], "expected_behavior": "Memory usage should remain stable", "actual_behavior": "Memory usage increases continuously" } } ``` **Parameters:** - `title` (string, required): Title of the issue - `type` (string, required): Type of the issue - one of: `bug`, `performance`, `security`, `design`, `documentation`, `other` - `description` (object, required): Detailed description of the issue, structure depends on issue type **Response:** ```json { "id": "123e4567-e89b-12d3-a456-426614174000", "title": "Memory Leak in Data Processing", "type": "performance", "status": "open", "description": { "severity": "high", "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"], "expected_behavior": "Memory usage should remain stable", "actual_behavior": "Memory usage increases continuously" }, "steps": null, "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:30:00.123456", "resolved_at": null, "metadata": null } ``` ### List Debug Issues **Endpoint:** `GET /api/debug/issues` List all debug issues with optional filtering. **Query Parameters:** - `type` (string, optional): Filter issues by type - `status` (string, optional): Filter issues by status (`open`, `in_progress`, `resolved`, `closed`, `wont_fix`) **Response:** ```json [ { "id": "123e4567-e89b-12d3-a456-426614174000", "title": "Memory Leak in Data Processing", "type": "performance", "status": "open", "description": { "severity": "high", "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"], "expected_behavior": "Memory usage should remain stable", "actual_behavior": "Memory usage increases continuously" }, "steps": [ { "type": "check", "name": "Profiling", "description": "Run performance profiling" }, { "type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O" } ], "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:35:00.123456", "resolved_at": null, "metadata": { "assigned_to": "developer1" } } ] ``` ### Get Debug Issue **Endpoint:** `GET /api/debug/issues/{issue_id}` Get detailed information about a specific debug issue. **Path Parameters:** - `issue_id` (string, required): The unique identifier of the issue **Response:** ```json { "id": "123e4567-e89b-12d3-a456-426614174000", "title": "Memory Leak in Data Processing", "type": "performance", "status": "open", "description": { "severity": "high", "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"], "expected_behavior": "Memory usage should remain stable", "actual_behavior": "Memory usage increases continuously" }, "steps": [ { "type": "check", "name": "Profiling", "description": "Run performance profiling" }, { "type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O" } ], "created_at": "2023-07-10T14:30:00.123456", "updated_at": "2023-07-10T14:35:00.123456", "resolved_at": null, "metadata": { "assigned_to": "developer1" } } ``` ### Update Debug Issue **Endpoint:** `PUT /api/debug/issues/{issue_id}` Update the status and metadata of a debug issue. **Path Parameters:** - `issue_id` (string, required): The unique identifier of the issue **Request Body:** ```json { "status": "in_progress", "metadata": { "assigned_to": "developer1", "priority": "high" } } ``` **Parameters:** - `status` (string, optional): New status for the issue - one of: `open`, `in_progress`, `resolved`, `closed`, `wont_fix` - `metadata` (object, optional): Updated metadata for the issue **Response:** Same as the Get Debug Issue response, with updated values. ### Analyze Debug Issue **Endpoint:** `POST /api/debug/issues/{issue_id}/analyze` Analyze a debug issue to generate recommended debugging steps based on the issue type. **Path Parameters:** - `issue_id` (string, required): The unique identifier of the issue **Response:** ```json [ { "type": "check", "name": "Profiling", "description": "Run performance profiling" }, { "type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O" }, { "type": "check", "name": "Query Analysis", "description": "Review database queries" }, { "type": "check", "name": "Bottlenecks", "description": "Identify performance bottlenecks" } ] ``` **Error Responses:** - `400 Bad Request`: Invalid issue ID format - `404 Not Found`: Issue not found - `500 Internal Server Error`: Server error during analysis ``` -------------------------------------------------------------------------------- /.github/workflows/tdd-verification.yml: -------------------------------------------------------------------------------- ```yaml name: TDD Workflow Verification on: push: branches: [ dev, main ] pull_request: branches: [ dev, main ] workflow_dispatch: inputs: python_version: description: 'Python version to use for verification' required: false default: '3.11' jobs: tdd-verify: runs-on: ubuntu-latest strategy: matrix: python-version: ["${{ github.event.inputs.python_version || '3.11' }}"] fail-fast: false name: TDD Verification with Python ${{ matrix.python-version }} environment: name: development url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} services: qdrant: image: qdrant/qdrant:v1.13.6 ports: - 6333:6333 - 6334:6334 steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/[email protected] with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Wait for Qdrant and verify connection run: | echo "Waiting for Qdrant to start..." chmod +x scripts/check_qdrant_health.sh ./scripts/check_qdrant_health.sh "http://localhost:6333" 20 5 - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel \ && pip install -r requirements.txt -r requirements-dev.txt \ && pip install pytest-cov pytest-mock pytest-asyncio factory_boy \ && pip install -e . - name: Set up environment run: | # Create required directories mkdir -p logs knowledge cache { echo "QDRANT_URL=http://localhost:6333" echo "MCP_QDRANT_URL=http://localhost:6333" echo "COLLECTION_NAME=mcp-codebase-insight-tdd-${{ github.run_id }}" echo "MCP_COLLECTION_NAME=mcp-codebase-insight-tdd-${{ github.run_id }}" echo "EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2" echo "PYTHON_VERSION=${{ matrix.python-version }}" } >> "$GITHUB_ENV" - name: Initialize Qdrant collection run: | echo "Creating Qdrant collection for testing..." python - <<-'EOF' import os from qdrant_client import QdrantClient from qdrant_client.http import models # Connect to Qdrant client = QdrantClient(url="http://localhost:6333") collection_name = os.environ.get("COLLECTION_NAME", "mcp-codebase-insight-tdd-${{ github.run_id }}") # Check if collection exists collections = client.get_collections().collections collection_names = [c.name for c in collections] if collection_name in collection_names: print(f"Collection {collection_name} already exists, recreating it...") client.delete_collection(collection_name=collection_name) # Create collection with vector size 384 (for all-MiniLM-L6-v2) client.create_collection( collection_name=collection_name, vectors_config=models.VectorParams( size=384, # Dimension for all-MiniLM-L6-v2 distance=models.Distance.COSINE, ), ) print(f"Successfully created collection {collection_name}") EOF - name: Run unit tests run: | echo "Running unit tests with coverage..." python -m pytest tests/components -v -p pytest_asyncio --cov=src --cov-report=xml:coverage-unit.xml --cov-report=term - name: Run integration tests run: | echo "Running integration tests with coverage..." python -m pytest tests/integration -v -p pytest_asyncio --cov=src --cov-report=xml:coverage-integration.xml --cov-report=term - name: Generate full coverage report run: | echo "Generating combined coverage report..." python -m coverage combine coverage-*.xml python -m coverage report python -m coverage xml - name: TDD Verification run: | echo "Performing TDD verification checks..." # Check if tests exist for all modules python - <<-'EOF' import os import sys from pathlib import Path src_dir = Path("src/mcp_codebase_insight") test_dir = Path("tests") # Get all Python modules in src modules = [f for f in src_dir.glob("**/*.py") if "__pycache__" not in str(f)] modules = [str(m.relative_to("src")).replace(".py", "").replace("/", ".") for m in modules] modules = [m for m in modules if not m.endswith("__init__")] # Check for corresponding test files missing_tests = [] for module in modules: module_parts = module.split(".") if len(module_parts) > 2: # Skip __init__ files module_path = "/".join(module_parts[1:]) test_file = test_dir / f"test_{module_path}.py" component_test = test_dir / "components" / f"test_{module_parts[-1]}.py" if not test_file.exists() and not component_test.exists(): missing_tests.append(module) if missing_tests: print("Warning: The following modules don't have corresponding test files:") for m in missing_tests: print(f" - {m}") else: print("All modules have corresponding test files.") EOF # Check test coverage threshold coverage_threshold=40 coverage_result=$(python -m coverage report | grep TOTAL | awk '{print $4}' | sed 's/%//') echo "Current test coverage: ${coverage_result}%" echo "Required minimum coverage: ${coverage_threshold}%" if (( $(echo "$coverage_result < $coverage_threshold" | bc -l) )); then echo "Error: Test coverage is below the required threshold of ${coverage_threshold}%" exit 1 else echo "Test coverage meets the required threshold." fi - name: Upload coverage to Codecov uses: codecov/[email protected] with: files: ./coverage.xml name: codecov-tdd fail_ci_if_error: false - name: Check test structure run: | echo "Validating test structure..." # Check for arrange-act-assert pattern in tests python - <<-'EOF' import os import re from pathlib import Path test_files = list(Path("tests").glob("**/*.py")) violations = [] for test_file in test_files: if test_file.name.startswith("test_") and not test_file.name.startswith("conftest"): with open(test_file, "r") as f: content = f.read() # Check for test functions test_funcs = re.findall(r"def (test_[a-zA-Z0-9_]+)", content) for func in test_funcs: # Extract function body pattern = rf"def {func}.*?:(.*?)(?=\n\S|\Z)" matches = re.search(pattern, content, re.DOTALL) if matches: func_body = matches.group(1) # Simple heuristic for arrange-act-assert if not ( # Look for arranging variables and mocks re.search(r"= [^=]+", func_body) and # Look for function calls (actions) re.search(r"\w+\([^)]*\)", func_body) and # Look for assertions ("assert" in func_body) ): violations.append(f"{test_file}::{func}") if violations: print("Warning: The following tests might not follow the arrange-act-assert pattern:") for v in violations[:10]: # Show first 10 violations print(f" - {v}") if len(violations) > 10: print(f" ... and {len(violations) - 10} more") else: print("All tests appear to follow the arrange-act-assert pattern.") EOF - name: TDD Workflow Summary run: | echo "## TDD Workflow Summary" >> "$GITHUB_STEP_SUMMARY" echo "✅ TDD verification completed" >> "$GITHUB_STEP_SUMMARY" # Add coverage information coverage_result=$(python -m coverage report | grep TOTAL | awk '{print $4}') echo "- Test coverage: ${coverage_result}" >> "$GITHUB_STEP_SUMMARY" # Add test counts unit_tests=$(python -m pytest tests/components --collect-only -q | wc -l) integration_tests=$(python -m pytest tests/integration --collect-only -q | wc -l) echo "- Unit tests: ${unit_tests}" >> "$GITHUB_STEP_SUMMARY" echo "- Integration tests: ${integration_tests}" >> "$GITHUB_STEP_SUMMARY" ```