This is page 1 of 6. Use http://codebase.md/tosin2013/mcp-codebase-insight?page={x} to view the full context. # Directory Structure ``` ├── .bumpversion.cfg ├── .codecov.yml ├── .compile-venv-py3.11 │ ├── bin │ │ ├── activate │ │ ├── activate.csh │ │ ├── activate.fish │ │ ├── Activate.ps1 │ │ ├── coverage │ │ ├── coverage-3.11 │ │ ├── coverage3 │ │ ├── pip │ │ ├── pip-compile │ │ ├── pip-sync │ │ ├── pip3 │ │ ├── pip3.11 │ │ ├── py.test │ │ ├── pyproject-build │ │ ├── pytest │ │ ├── python │ │ ├── python3 │ │ ├── python3.11 │ │ └── wheel │ └── pyvenv.cfg ├── .env.example ├── .github │ └── workflows │ ├── build-verification.yml │ ├── publish.yml │ └── tdd-verification.yml ├── .gitignore ├── async_fixture_wrapper.py ├── CHANGELOG.md ├── CLAUDE.md ├── codebase_structure.txt ├── component_test_runner.py ├── CONTRIBUTING.md ├── core_workflows.txt ├── debug_tests.md ├── Dockerfile ├── docs │ ├── adrs │ │ └── 001_use_docker_for_qdrant.md │ ├── api.md │ ├── components │ │ └── README.md │ ├── cookbook.md │ ├── development │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ └── README.md │ ├── documentation_map.md │ ├── documentation_summary.md │ ├── features │ │ ├── adr-management.md │ │ ├── code-analysis.md │ │ └── documentation.md │ ├── getting-started │ │ ├── configuration.md │ │ ├── docker-setup.md │ │ ├── installation.md │ │ ├── qdrant_setup.md │ │ └── quickstart.md │ ├── qdrant_setup.md │ ├── README.md │ ├── SSE_INTEGRATION.md │ ├── system_architecture │ │ └── README.md │ ├── templates │ │ └── adr.md │ ├── testing_guide.md │ ├── troubleshooting │ │ ├── common-issues.md │ │ └── faq.md │ ├── vector_store_best_practices.md │ └── workflows │ └── README.md ├── error_logs.txt ├── examples │ └── use_with_claude.py ├── github-actions-documentation.md ├── Makefile ├── module_summaries │ ├── backend_summary.txt │ ├── database_summary.txt │ └── frontend_summary.txt ├── output.txt ├── package-lock.json ├── package.json ├── PLAN.md ├── prepare_codebase.sh ├── PULL_REQUEST.md ├── pyproject.toml ├── pytest.ini ├── README.md ├── requirements-3.11.txt ├── requirements-3.11.txt.backup ├── requirements-dev.txt ├── requirements.in ├── requirements.txt ├── run_build_verification.sh ├── run_fixed_tests.sh ├── run_test_with_path_fix.sh ├── run_tests.py ├── scripts │ ├── check_qdrant_health.sh │ ├── compile_requirements.sh │ ├── load_example_patterns.py │ ├── macos_install.sh │ ├── README.md │ ├── setup_qdrant.sh │ ├── start_mcp_server.sh │ ├── store_code_relationships.py │ ├── store_report_in_mcp.py │ ├── validate_knowledge_base.py │ ├── validate_poc.py │ ├── validate_vector_store.py │ └── verify_build.py ├── server.py ├── setup_qdrant_collection.py ├── setup.py ├── src │ └── mcp_codebase_insight │ ├── __init__.py │ ├── __main__.py │ ├── asgi.py │ ├── core │ │ ├── __init__.py │ │ ├── adr.py │ │ ├── cache.py │ │ ├── component_status.py │ │ ├── config.py │ │ ├── debug.py │ │ ├── di.py │ │ ├── documentation.py │ │ ├── embeddings.py │ │ ├── errors.py │ │ ├── health.py │ │ ├── knowledge.py │ │ ├── metrics.py │ │ ├── prompts.py │ │ ├── sse.py │ │ ├── state.py │ │ ├── task_tracker.py │ │ ├── tasks.py │ │ └── vector_store.py │ ├── models.py │ ├── server_test_isolation.py │ ├── server.py │ ├── utils │ │ ├── __init__.py │ │ └── logger.py │ └── version.py ├── start-mcpserver.sh ├── summary_document.txt ├── system-architecture.md ├── system-card.yml ├── test_fix_helper.py ├── test_fixes.md ├── test_function.txt ├── test_imports.py ├── tests │ ├── components │ │ ├── conftest.py │ │ ├── test_core_components.py │ │ ├── test_embeddings.py │ │ ├── test_knowledge_base.py │ │ ├── test_sse_components.py │ │ ├── test_stdio_components.py │ │ ├── test_task_manager.py │ │ └── test_vector_store.py │ ├── config │ │ └── test_config_and_env.py │ ├── conftest.py │ ├── integration │ │ ├── fixed_test2.py │ │ ├── test_api_endpoints.py │ │ ├── test_api_endpoints.py-e │ │ ├── test_communication_integration.py │ │ └── test_server.py │ ├── README.md │ ├── README.test.md │ ├── test_build_verifier.py │ └── test_file_relationships.py └── trajectories └── tosinakinosho ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9 │ └── db62b9 │ └── config.yaml ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e │ └── 03565e │ ├── 03565e.traj │ └── config.yaml └── default__openrouter └── anthropic └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e └── 03565e ├── 03565e.pred ├── 03565e.traj └── config.yaml ``` # Files -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- ```yaml codecov: require_ci_to_pass: yes notify: wait_for_ci: yes coverage: precision: 2 round: down range: "70...100" status: project: default: target: 80% threshold: 2% base: auto if_ci_failed: error informational: false only_pulls: false patch: default: target: 80% threshold: 2% base: auto if_ci_failed: error informational: false only_pulls: false parsers: gcov: branch_detection: conditional: yes loop: yes method: no macro: no comment: layout: "reach,diff,flags,files,footer" behavior: default require_changes: false require_base: no require_head: yes branches: - main ignore: - "tests/**/*" - "setup.py" - "docs/**/*" - "examples/**/*" - "scripts/**/*" - "**/version.py" - "**/__init__.py" ``` -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- ``` [bumpversion] current_version = 0.1.0 commit = True tag = True parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>[a-z]+)(?P<build>\d+))? serialize = {major}.{minor}.{patch}{release}{build} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = prod first_value = dev values = dev prod [bumpversion:part:build] first_value = 1 [bumpversion:file:pyproject.toml] search = version = "{current_version}" replace = version = "{new_version}" [bumpversion:file:src/mcp_codebase_insight/version.py] search = __version__ = "{current_version}" replace = __version__ = "{new_version}" [bumpversion:file:src/mcp_codebase_insight/version.py] search = VERSION_MAJOR = {current_version.split(".")[0]} replace = VERSION_MAJOR = {new_version.split(".")[0]} [bumpversion:file:src/mcp_codebase_insight/version.py] search = VERSION_MINOR = {current_version.split(".")[1]} replace = VERSION_MINOR = {new_version.split(".")[1]} [bumpversion:file:src/mcp_codebase_insight/version.py] search = VERSION_PATCH = {current_version.split(".")[2]} replace = VERSION_PATCH = {new_version.split(".")[2]} ``` -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- ``` # Server configuration MCP_HOST=127.0.0.1 MCP_PORT=3000 MCP_LOG_LEVEL=INFO MCP_DEBUG=false # Qdrant configuration QDRANT_URL=http://localhost:6333 QDRANT_API_KEY=your-qdrant-api-key-here # Directory configuration MCP_DOCS_CACHE_DIR=docs MCP_ADR_DIR=docs/adrs MCP_KB_STORAGE_DIR=knowledge MCP_DISK_CACHE_DIR=cache # Model configuration MCP_EMBEDDING_MODEL=all-MiniLM-L6-v2 MCP_COLLECTION_NAME=codebase_patterns # Feature flags MCP_METRICS_ENABLED=true MCP_CACHE_ENABLED=true MCP_MEMORY_CACHE_SIZE=1000 # Optional: Authentication (if needed) # MCP_AUTH_ENABLED=false # MCP_AUTH_SECRET_KEY=your-secret-key # MCP_AUTH_TOKEN_EXPIRY=3600 # Optional: Rate limiting (if needed) # MCP_RATE_LIMIT_ENABLED=false # MCP_RATE_LIMIT_REQUESTS=100 # MCP_RATE_LIMIT_WINDOW=60 # Optional: SSL/TLS configuration (if needed) # MCP_SSL_ENABLED=false # MCP_SSL_CERT_FILE=path/to/cert.pem # MCP_SSL_KEY_FILE=path/to/key.pem # Optional: Proxy configuration (if needed) # MCP_PROXY_URL=http://proxy:8080 # MCP_NO_PROXY=localhost,127.0.0.1 # Optional: External services (if needed) # MCP_GITHUB_TOKEN=your-github-token # MCP_JIRA_URL=https://your-jira-instance # MCP_JIRA_TOKEN=your-jira-token # Optional: Monitoring (if needed) # MCP_SENTRY_DSN=your-sentry-dsn # MCP_DATADOG_API_KEY=your-datadog-api-key # MCP_PROMETHEUS_ENABLED=false # Test Configuration # These variables are used when running tests MCP_TEST_MODE=1 MCP_TEST_QDRANT_URL=http://localhost:6333 MCP_TEST_COLLECTION_NAME=test_collection MCP_TEST_EMBEDDING_MODEL=all-MiniLM-L6-v2 # Event Loop Debug Mode # Uncomment to enable asyncio debug mode for testing # PYTHONASYNCIODEBUG=1 ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Python __pycache__/ *.py[cod] *$py.class *.so .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # Virtual Environment .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # IDE .idea/ .vscode/ *.swp *.swo *~ .project .pydevproject .settings/ # Testing .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ htmlcov/ # Documentation docs/_build/ docs/api/ # Project specific docs/adrs/* !docs/adrs/001_use_docker_for_qdrant.md !docs/adrs/README.md knowledge/* !knowledge/README.md cache/* !cache/README.md logs/* !logs/README.md .test_cache/ test_knowledge/ build_output.txt testreport.txt test_env/ codebase_stats.txt dependency_map.txt vector_relationship_graph.* verification-config.json *.dot *.json.tmp # Jupyter Notebook .ipynb_checkpoints # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # macOS .DS_Store .AppleDouble .LSOverride Icon ._* .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Windows Thumbs.db ehthumbs.db Desktop.ini $RECYCLE.BIN/ *.cab *.msi *.msm *.msp *.lnk # Linux *~ .fuse_hidden* .directory .Trash-* .nfs* # Project specific .env .env.* !.env.example *.log logs/ cache/ knowledge/ docs/adrs/* !docs/adrs/001_use_docker_for_qdrant.md # Documentation and ADRs (temporary private) docs/adrs/ docs/private/ docs/internal/ # Cache and Temporary Files cache/ .cache/ tmp/ temp/ *.tmp *.bak *.log # Sensitive Configuration .env* !.env.example *.key *.pem *.crt secrets/ private/ # Vector Database qdrant_storage/ # Knowledge Base (private for now) knowledge/patterns/ knowledge/tasks/ knowledge/private/ # Build and Distribution dist/ build/ *.pyc *.pyo *.pyd .Python *.so # Misc .DS_Store Thumbs.db *.swp *.swo *~ # Project Specific mcp.json .cursor/rules/ module_summaries/ logs/ references/private/ prompts/ # Ignore Qdrant data storage directory qdrant_data/ .aider* ``` -------------------------------------------------------------------------------- /tests/README.test.md: -------------------------------------------------------------------------------- ```markdown import pytest from pathlib import Path @pytest.fixture def readme_content(): readme_path = Path(__file__).parent / "README.md" with open(readme_path, "r") as f: return f.read() ``` -------------------------------------------------------------------------------- /docs/components/README.md: -------------------------------------------------------------------------------- ```markdown # Core Components > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Overview This document details the core components of the MCP Codebase Insight system. For workflow information, please see the [Workflows Documentation](../workflows/README.md). ## Components ### Server Framework - API endpoint management - Request validation - Response formatting - Server lifecycle management ### Testing Framework - Test environment management - Component-level testing - Integration test support - Performance testing tools ### Documentation Tools - Documentation generation - Relationship analysis - Validation tools - Integration with code analysis ## Implementation Details See the [System Architecture](../system_architecture/README.md) for more details on how these components interact ``` -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- ```markdown # Utility Scripts This directory contains utility scripts for the MCP Codebase Insight project. ## Available Scripts ### check_qdrant_health.sh **Purpose**: Checks if the Qdrant vector database service is available and healthy. **Usage**: ```bash ./check_qdrant_health.sh [qdrant_url] [max_retries] [sleep_seconds] ``` **Parameters**: - `qdrant_url` - URL of the Qdrant service (default: "http://localhost:6333") - `max_retries` - Maximum number of retry attempts (default: 20) - `sleep_seconds` - Seconds to wait between retries (default: 5) **Example**: ```bash ./check_qdrant_health.sh "http://localhost:6333" 30 2 ``` > Note: This script uses `apt-get` and may require `sudo` privileges on Linux systems. Ensure `curl` and `jq` are pre-installed or run with proper permissions. **Exit Codes**: - 0: Qdrant service is accessible and healthy - 1: Qdrant service is not accessible or not healthy ### compile_requirements.sh **Purpose**: Compiles and generates version-specific requirements files for different Python versions. **Usage**: ```bash ./compile_requirements.sh <python-version> ``` **Example**: ```bash ./compile_requirements.sh 3.11 ``` ### load_example_patterns.py **Purpose**: Loads example patterns and ADRs into the knowledge base for demonstration or testing. **Usage**: ```bash python load_example_patterns.py [--help] ``` ### verify_build.py **Purpose**: Verifies the build status and generates a build verification report. **Usage**: ```bash python verify_build.py [--config <file>] [--output <report-file>] ``` ## Usage in GitHub Actions These scripts are used in our GitHub Actions workflows to automate and standardize common tasks. For example, `check_qdrant_health.sh` is used in both the build verification and TDD verification workflows to ensure the Qdrant service is available before running tests. ## Adding New Scripts When adding new scripts to this directory: 1. Make them executable: `chmod +x scripts/your_script.sh` 2. Include a header comment explaining the purpose and usage 3. Add error handling and sensible defaults 4. Update this README with information about the script 5. Use parameter validation and help text when appropriate ``` -------------------------------------------------------------------------------- /docs/development/README.md: -------------------------------------------------------------------------------- ```markdown # Development Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Overview This guide covers development setup, contribution guidelines, and best practices for the MCP Codebase Insight project. ## Development Setup 1. **Clone Repository** ```bash git clone https://github.com/modelcontextprotocol/mcp-codebase-insight cd mcp-codebase-insight ``` 2. **Create Virtual Environment** ```bash python -m venv venv source venv/bin/activate # On Windows: venv\Scripts\activate ``` 3. **Install Development Dependencies** ```bash pip install -e ".[dev]" ``` 4. **Setup Pre-commit Hooks** ```bash pre-commit install ``` ## Project Structure ``` mcp-codebase-insight/ ├── src/ │ └── mcp_codebase_insight/ │ ├── analysis/ # Code analysis modules │ ├── documentation/ # Documentation management │ ├── kb/ # Knowledge base operations │ └── server/ # FastAPI server ├── tests/ │ ├── integration/ # Integration tests │ └── unit/ # Unit tests ├── docs/ # Documentation └── examples/ # Example usage ``` ## Testing ```bash # Run unit tests pytest tests/unit # Run integration tests pytest tests/integration # Run with coverage pytest --cov=src tests/ ``` ## Code Style - Follow PEP 8 - Use type hints - Document functions and classes - Keep functions focused and small - Write tests for new features ## Git Workflow 1. Create feature branch 2. Make changes 3. Run tests 4. Submit pull request ## Documentation - Update docs for new features - Include docstrings - Add examples when relevant ## Debugging ### Server Debugging ```python import debugpy debugpy.listen(("0.0.0.0", 5678)) debugpy.wait_for_client() ``` ### VSCode Launch Configuration ```json { "version": "0.2.0", "configurations": [ { "name": "Python: Remote Attach", "type": "python", "request": "attach", "port": 5678, "host": "localhost" } ] } ``` ## Performance Profiling ```bash python -m cProfile -o profile.stats your_script.py python -m snakeviz profile.stats ``` ## Next Steps - [Contributing Guidelines](CONTRIBUTING.md) - [Code of Conduct](CODE_OF_CONDUCT.md) - [API Reference](../api/rest-api.md) ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown # MCP Codebase Insight - WIP > 🚧 **Development in Progress** > > This project is actively under development. Features and documentation are being continuously updated. ## Overview MCP Codebase Insight is a system for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management. ## Current Development Status ### Completed Features - ✅ Core Vector Store System - ✅ Basic Knowledge Base - ✅ SSE Integration - ✅ Testing Framework - ✅ TDD and Debugging Framework (rules_template integration) ### In Progress - 🔄 Documentation Management System - 🔄 Advanced Pattern Detection - 🔄 Performance Optimization - 🔄 Integration Testing - 🔄 Debugging Utilities Enhancement ### Planned - 📋 Extended API Documentation - 📋 Custom Pattern Plugins - 📋 Advanced Caching Strategies - 📋 Deployment Guides - 📋 Comprehensive Error Tracking System ## Quick Start 1. **Installation** ```bash pip install mcp-codebase-insight ``` 2. **Basic Usage** ```python from mcp_codebase_insight import CodebaseAnalyzer analyzer = CodebaseAnalyzer() results = analyzer.analyze_code("path/to/code") ``` 3. **Running Tests** ```bash # Run all tests pytest tests/ # Run unit tests pytest tests/unit/ # Run component tests pytest tests/components/ # Run tests with coverage pytest tests/ --cov=src --cov-report=term-missing ``` 4. **Debugging Utilities** ```python from mcp_codebase_insight.utils.debug_utils import debug_trace, DebugContext, get_error_tracker # Use debug trace decorator @debug_trace def my_function(): # Implementation # Use debug context with DebugContext("operation_name"): # Code to debug # Track errors try: # Risky operation except Exception as e: error_id = get_error_tracker().record_error(e, context={"operation": "description"}) print(f"Error recorded with ID: {error_id}") ``` ## Testing and Debugging ### Test-Driven Development This project follows Test-Driven Development (TDD) principles: 1. Write a failing test first (Red) 2. Write minimal code to make the test pass (Green) 3. Refactor for clean code while keeping tests passing (Refactor) Our TDD documentation can be found in [docs/tdd/workflow.md](docs/tdd/workflow.md). ### Debugging Framework We use Agans' 9 Rules of Debugging: 1. Understand the System 2. Make It Fail 3. Quit Thinking and Look 4. Divide and Conquer 5. Change One Thing at a Time 6. Keep an Audit Trail 7. Check the Plug 8. Get a Fresh View 9. If You Didn't Fix It, It Isn't Fixed Learn more about our debugging approach in [docs/debuggers/agans_9_rules.md](docs/debuggers/agans_9_rules.md). ## Documentation - [System Architecture](docs/system_architecture/README.md) - [Core Components](docs/components/README.md) - [API Reference](docs/api/README.md) - [Development Guide](docs/development/README.md) - [Workflows](docs/workflows/README.md) - [TDD Workflow](docs/tdd/workflow.md) - [Debugging Practices](docs/debuggers/best_practices.md) ## Contributing We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details. ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. ## Support - [Issue Tracker](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) - [Discussions](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) ``` -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- ```markdown # Test Structure This directory contains the test suite for the MCP Codebase Insight project. The tests are organized into the following structure: ## Directory Structure ``` tests/ ├── components/ # Component-level tests │ ├── test_vector_store.py │ ├── test_knowledge_base.py │ ├── test_task_manager.py │ └── ... ├── integration/ # Integration and API tests │ ├── test_api_endpoints.py │ └── test_server.py ├── config/ # Configuration tests │ └── test_config_and_env.py ├── conftest.py # Shared test fixtures └── README.md # This file ``` ## Test Categories 1. **Component Tests** (`components/`) - Unit tests for individual components - Tests component initialization, methods, and cleanup - Isolated from other components where possible 2. **Integration Tests** (`integration/`) - Tests for API endpoints - Server lifecycle tests - Component interaction tests 3. **Configuration Tests** (`config/`) - Environment variable handling - Configuration file parsing - Directory setup and permissions ## API Test Coverage The following API endpoints are tested in the integration tests: | Endpoint | Test Status | Test File | |----------|-------------|-----------| | `/health` | ✅ Tested | `test_api_endpoints.py` | | `/api/vector-store/search` | ✅ Tested | `test_api_endpoints.py` | | `/api/docs/adrs` | ✅ Tested | `test_api_endpoints.py` | | `/api/docs/adrs/{adr_id}` | ✅ Tested | `test_api_endpoints.py` | | `/api/docs/patterns` | ✅ Tested | `test_api_endpoints.py` | | `/api/docs/patterns/{pattern_id}` | ✅ Tested | `test_api_endpoints.py` | | `/api/analyze` | ✅ Tested | `test_api_endpoints.py` | | `/api/tasks/create` | ✅ Tested | `test_api_endpoints.py` | | `/api/tasks` | ✅ Tested | `test_api_endpoints.py` | | `/api/tasks/{task_id}` | ✅ Tested | `test_api_endpoints.py` | | `/api/debug/issues` | ✅ Tested | `test_api_endpoints.py` | | `/api/debug/issues/{issue_id}` | ✅ Tested | `test_api_endpoints.py` | | `/api/debug/issues/{issue_id}/analyze` | ✅ Tested | `test_api_endpoints.py` | | `/tools/*` | ✅ Tested | `test_api_endpoints.py` | Each test verifies: - Successful responses with valid input - Error handling with invalid input - Response structure and content validation - Edge cases where applicable ## Running Tests To run all tests: ```bash python -m pytest tests/ ``` To run specific test categories: ```bash # Run component tests python -m pytest tests/components/ # Run integration tests python -m pytest tests/integration/ # Run config tests python -m pytest tests/config/ # Run API endpoint tests only python -m pytest tests/integration/test_api_endpoints.py # Run tests for a specific API endpoint python -m pytest tests/integration/test_api_endpoints.py::test_health_check ``` ## Test Fixtures Shared test fixtures are defined in `conftest.py` and include: - `temp_dir`: Temporary directory for test files - `test_config`: Server configuration for testing - `embedder`: Sentence transformer embedder - `vector_store`: Vector store instance - `test_server`: Server instance for testing - `test_client`: FastAPI test client - `test_code`: Sample code for testing - `test_adr`: Sample ADR data - `env_vars`: Environment variables for testing ## Writing New Tests 1. Place new tests in the appropriate directory based on what they're testing 2. Use the shared fixtures from `conftest.py` 3. Follow the existing patterns for async tests and cleanup 4. Add proper docstrings and comments 5. Ensure proper cleanup in fixtures that create resources ## Test Dependencies The test suite has the following dependencies: - pytest - pytest-asyncio - httpx - fastapi - sentence-transformers Make sure these are installed before running tests. ``` -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- ```markdown # MCP Codebase Insight Documentation Welcome to the MCP Codebase Insight documentation. This directory contains detailed information about installation, configuration, usage, and development of the MCP Codebase Insight tool. ## Documentation Structure ### Getting Started - [Installation Guide](getting-started/installation.md) - Complete installation instructions - [Configuration Guide](getting-started/configuration.md) - Configuration options and environment setup - [Quick Start Tutorial](getting-started/quickstart.md) - Get up and running quickly - [Qdrant Setup](getting-started/qdrant_setup.md) - Vector database setup and configuration ### Core Features - [Code Analysis](features/code-analysis.md) - Understanding code patterns and insights - [ADR Management](features/adr-management.md) - Managing architectural decisions - [Documentation Management](features/documentation.md) - Auto-generation and maintenance - [Knowledge Base](features/knowledge-base.md) - Pattern storage and retrieval - [Debug System](features/debug-system.md) - Intelligent debugging assistance - [Build Verification](features/build-verification.md) - Automated build checks ### API Reference - [REST API](api/rest-api.md) - Complete API endpoint documentation - [SSE Integration](SSE_INTEGRATION.md) - Server-Sent Events integration guide - [Vector Store API](api/vector-store-api.md) - Vector database interaction - [Client Libraries](api/client-libraries.md) - Available client SDKs ### Development - [Contributing Guide](development/contributing.md) - How to contribute to the project - [Architecture Overview](development/architecture.md) - System architecture and design - [Testing Guide](testing_guide.md) - Writing and running tests - [Best Practices](development/best-practices.md) - Coding standards and guidelines ### Deployment - [Production Deployment](deployment/production.md) - Production setup guide - [Docker Deployment](deployment/docker.md) - Container-based deployment - [Scaling Guide](deployment/scaling.md) - Handling increased load - [Monitoring](deployment/monitoring.md) - System monitoring and alerts ### Troubleshooting - [Common Issues](troubleshooting/common-issues.md) - Frequently encountered problems - [FAQ](troubleshooting/faq.md) - Frequently asked questions - [Debug Guide](troubleshooting/debug-guide.md) - Advanced debugging techniques - [Support](troubleshooting/support.md) - Getting help and support ## Quick Links - [GitHub Repository](https://github.com/modelcontextprotocol/mcp-codebase-insight) - [Issue Tracker](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) - [Discussions](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) - [Release Notes](CHANGELOG.md) - [License](../LICENSE) ## Contributing to Documentation We welcome contributions to improve this documentation. Please see our [Contributing Guide](development/contributing.md) for details on: - Documentation style guide - How to submit documentation changes - Documentation testing - Building documentation locally ## Documentation Versions This documentation corresponds to the latest stable release of MCP Codebase Insight. For other versions: - [Latest Development](https://github.com/modelcontextprotocol/mcp-codebase-insight/tree/main/docs) - [Version History](https://github.com/modelcontextprotocol/mcp-codebase-insight/releases) ## Support If you need help or have questions: 1. Check the [FAQ](troubleshooting/faq.md) and [Common Issues](troubleshooting/common-issues.md) 2. Search existing [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 3. Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 4. Open a new issue if needed ``` -------------------------------------------------------------------------------- /docs/system_architecture/README.md: -------------------------------------------------------------------------------- ```markdown # System Architecture > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Overview This document provides a comprehensive overview of the MCP Codebase Insight system architecture. For detailed workflow information, please see the [Workflows Documentation](../workflows/README.md). ## Architecture Components ### Core Systems - Vector Store System - Knowledge Base - Task Management - Health Monitoring - Error Handling - Metrics Collection - Cache Management ### Documentation - ADR Management - Documentation Tools - API Documentation ### Testing - Test Framework - SSE Testing - Integration Testing ## Detailed Documentation - [Core Components](../components/README.md) - [API Reference](../api/README.md) - [Development Guide](../development/README.md) ## System Overview This document provides a comprehensive overview of the MCP Codebase Insight system architecture, focusing on system interactions, dependencies, and design considerations. ## Core Systems ### 1. Vector Store System (`src/mcp_codebase_insight/core/vector_store.py`) - **Purpose**: Manages code embeddings and semantic search capabilities - **Key Components**: - Qdrant integration for vector storage - Embedding generation and management - Search optimization and caching - **Integration Points**: - Knowledge Base for semantic understanding - Cache Management for performance optimization - Health Monitoring for system status ### 2. Knowledge Base (`src/mcp_codebase_insight/core/knowledge.py`) - **Purpose**: Central repository for code insights and relationships - **Key Components**: - Pattern detection and storage - Relationship mapping - Semantic analysis - **Feedback Loops**: - Updates vector store with new patterns - Receives feedback from code analysis - Improves pattern detection over time ### 3. Task Management (`src/mcp_codebase_insight/core/tasks.py`) - **Purpose**: Handles async operations and job scheduling - **Key Components**: - Task scheduling and prioritization - Progress tracking - Resource management - **Bottleneck Mitigation**: - Task queuing strategies - Resource allocation - Error recovery ### 4. Health Monitoring (`src/mcp_codebase_insight/core/health.py`) - **Purpose**: System health and performance monitoring - **Key Components**: - Component status tracking - Performance metrics - Alert system - **Feedback Mechanisms**: - Real-time status updates - Performance optimization triggers - System recovery procedures ### 5. Error Handling (`src/mcp_codebase_insight/core/errors.py`) - **Purpose**: Centralized error management - **Key Components**: - Error classification - Recovery strategies - Logging and reporting - **Resilience Features**: - Graceful degradation - Circuit breakers - Error propagation control ## System Interactions ### Critical Paths 1. **Code Analysis Flow**: ```mermaid sequenceDiagram participant CA as Code Analysis participant KB as Knowledge Base participant VS as Vector Store participant CM as Cache CA->>VS: Request embeddings VS->>CM: Check cache CM-->>VS: Return cached/null VS->>KB: Get patterns KB-->>VS: Return patterns VS-->>CA: Return analysis ``` 2. **Health Monitoring Flow**: ```mermaid sequenceDiagram participant HM as Health Monitor participant CS as Component State participant TM as Task Manager participant EH as Error Handler HM->>CS: Check states CS->>TM: Verify tasks TM-->>CS: Task status CS-->>HM: System status HM->>EH: Report issues ``` ## Performance Considerations ### Caching Strategy - Multi-level caching (memory and disk) - Cache invalidation triggers - Cache size management ### Scalability Points 1. Vector Store: - Horizontal scaling capabilities - Batch processing optimization - Search performance tuning 2. Task Management: - Worker pool management - Task prioritization - Resource allocation ## Error Recovery ### Failure Scenarios 1. Vector Store Unavailable: - Fallback to cached results - Graceful degradation of search - Automatic reconnection 2. Task Overload: - Dynamic task throttling - Priority-based scheduling - Resource reallocation ## System Evolution ### Extension Points 1. Knowledge Base: - Plugin system for new patterns - Custom analyzers - External integrations 2. Monitoring: - Custom metrics - Alert integrations - Performance profiling ## Next Steps 1. **Documentation Needs**: - Detailed component interaction guides - Performance tuning documentation - Deployment architecture guides 2. **System Improvements**: - Enhanced caching strategies - More robust error recovery - Better performance monitoring ``` -------------------------------------------------------------------------------- /docs/workflows/README.md: -------------------------------------------------------------------------------- ```markdown # MCP Codebase Insight Workflows ## Overview This document details the various workflows supported by MCP Codebase Insight, including both user-facing and system-level processes. These workflows are designed to help developers effectively use and interact with the system's features. ## Quick Navigation - [User Workflows](#user-workflows) - [Code Analysis](#1-code-analysis-workflow) - [Documentation Management](#2-documentation-management-workflow) - [Testing](#3-testing-workflow) - [System Workflows](#system-workflows) - [Vector Store Operations](#1-vector-store-operations) - [Health Monitoring](#2-health-monitoring) - [Integration Points](#integration-points) - [Best Practices](#best-practices) - [Troubleshooting](#troubleshooting) - [Next Steps](#next-steps) ## User Workflows ### 1. Code Analysis Workflow #### Process Flow ```mermaid graph TD A[Developer] -->|Submit Code| B[Analysis Request] B --> C{Analysis Type} C -->|Pattern Detection| D[Pattern Analysis] C -->|Semantic Search| E[Vector Search] C -->|Documentation| F[Doc Analysis] D --> G[Results] E --> G F --> G G -->|Display| A ``` #### Steps 1. **Submit Code** - Upload code files or provide repository URL - Specify analysis parameters - Set analysis scope 2. **Analysis Processing** - Pattern detection runs against known patterns - Semantic search finds similar code - Documentation analysis checks coverage 3. **Results Review** - View detected patterns - Review suggestions - Access related documentation ### 2. Documentation Management Workflow #### Process Flow ```mermaid graph TD A[Developer] -->|Create/Update| B[Documentation] B --> C{Doc Type} C -->|ADR| D[ADR Processing] C -->|API| E[API Docs] C -->|Guide| F[User Guide] D --> G[Link Analysis] E --> G F --> G G -->|Update| H[Doc Map] H -->|Validate| A ``` #### Steps 1. **Create/Update Documentation** - Choose document type - Write content - Add metadata 2. **Processing** - Analyze document relationships - Update documentation map - Validate links 3. **Validation** - Check for broken links - Verify consistency - Update references ### 3. Testing Workflow #### Process Flow ```mermaid graph TD A[Developer] -->|Run Tests| B[Test Suite] B --> C{Test Type} C -->|Unit| D[Unit Tests] C -->|Integration| E[Integration Tests] C -->|SSE| F[SSE Tests] D --> G[Results] E --> G F --> G G -->|Report| A ``` #### Steps 1. **Test Initialization** - Set up test environment - Configure test parameters - Prepare test data 2. **Test Execution** - Run selected test types - Monitor progress - Collect results 3. **Results Analysis** - Review test reports - Analyze failures - Generate coverage reports ## System Workflows ### 1. Vector Store Operations #### Process Flow ```mermaid sequenceDiagram participant User participant Server participant Cache participant VectorStore participant Knowledge User->>Server: Request Analysis Server->>Cache: Check Cache Cache-->>Server: Cache Hit/Miss alt Cache Miss Server->>VectorStore: Generate Embeddings VectorStore->>Knowledge: Get Patterns Knowledge-->>VectorStore: Return Patterns VectorStore-->>Server: Return Results Server->>Cache: Update Cache end Server-->>User: Return Analysis ``` #### Components 1. **Cache Layer** - In-memory cache for frequent requests - Disk cache for larger datasets - Cache invalidation strategy 2. **Vector Store** - Embedding generation - Vector search - Pattern matching 3. **Knowledge Base** - Pattern storage - Relationship tracking - Context management ### 2. Health Monitoring #### Process Flow ```mermaid sequenceDiagram participant Monitor participant Components participant Tasks participant Alerts loop Every 30s Monitor->>Components: Check Status Components->>Tasks: Verify Tasks Tasks-->>Components: Task Status alt Issues Detected Components->>Alerts: Raise Alert Alerts->>Monitor: Alert Status end Components-->>Monitor: System Status end ``` #### Components 1. **Monitor** - Regular health checks - Performance monitoring - Resource tracking 2. **Components** - Service status - Resource usage - Error rates 3. **Tasks** - Task queue status - Processing rates - Error handling 4. **Alerts** - Alert generation - Notification routing - Alert history ## Integration Points ### 1. External Systems - Version Control Systems - CI/CD Pipelines - Issue Tracking Systems - Documentation Platforms ### 2. APIs - REST API for main operations - SSE for real-time updates - WebSocket for bi-directional communication ### 3. Storage - Vector Database (Qdrant) - Cache Storage - Document Storage ## Best Practices ### 1. Code Analysis - Regular analysis scheduling - Incremental analysis for large codebases - Pattern customization ### 2. Documentation - Consistent formatting - Regular updates - Link validation ### 3. Testing - Comprehensive test coverage - Regular test runs - Performance benchmarking ## Troubleshooting ### Common Issues 1. **Analysis Failures** - Check input validation - Verify system resources - Review error logs 2. **Performance Issues** - Monitor cache hit rates - Check vector store performance - Review resource usage 3. **Integration Issues** - Verify API endpoints - Check authentication - Review connection settings ## Next Steps 1. **Workflow Optimization** - Performance improvements - Enhanced error handling - Better user feedback 2. **New Features** - Custom workflow creation - Advanced analysis options - Extended integration options 3. **Documentation** - Workflow examples - Integration guides - Troubleshooting guides ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contributing to MCP Codebase Insight > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Getting Started 1. Fork the repository 2. Clone your fork 3. Create a new branch 4. Make your changes 5. Submit a pull request ## Development Setup See the [Development Guide](docs/development/README.md) for detailed setup instructions. ## Code Style - Follow PEP 8 guidelines - Use type hints - Write docstrings for all public functions and classes - Keep functions focused and small - Write clear commit messages ## Testing - Write tests for new features - Ensure all tests pass before submitting PR - Include both unit and integration tests - Document test cases ## Documentation - Update documentation for new features - Follow the documentation style guide - Include examples where appropriate - Keep documentation up to date with code ## Pull Request Process 1. Update documentation 2. Add tests 3. Update CHANGELOG.md 4. Submit PR with clear description 5. Address review comments ## Code of Conduct Please note that this project is released with a [Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. ``` -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- ```markdown # TechPath Project Guidelines ## Build & Test Commands - **Python**: `make install-dev` (setup), `make start` (run server), `make check` (all checks) - **Python Tests**: `make test` or `pytest tests/test_file.py::test_function_name` (single test) - **Frontend**: `cd project && npm run dev` (development), `npm run build` (production) - **Frontend Tests**: `cd project && npm test` or `npm test -- -t "test name pattern"` (single test) - **Linting**: `make lint` (Python), `cd project && npm run lint` (TypeScript/React) - **Formatting**: `make format` (Python), `prettier --write src/` (Frontend) ## Code Style Guidelines - **Python**: Black (88 chars), isort for imports, type hints required - **TypeScript**: 2-space indent, semicolons, strong typing with interfaces - **Imports**: Group by external then internal, alphabetize - **React**: Functional components with hooks, avoid class components - **Types**: Define interfaces in separate files when reused - **Naming**: camelCase for JS/TS variables, PascalCase for components/types, snake_case for Python - **Error Handling**: Try/catch in async functions, propagate errors with descriptive messages - **Comments**: Document complex logic, interfaces, and function parameters/returns - **Testing**: Unit test coverage required, mock external dependencies ``` -------------------------------------------------------------------------------- /docs/development/CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contributing Guidelines > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Welcome! Thank you for considering contributing to MCP Codebase Insight! This document provides guidelines and workflows for contributing. ## Code of Conduct Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md). ## How Can I Contribute? ### Reporting Bugs 1. Check if the bug is already reported in [Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 2. If not, create a new issue with: - Clear title - Detailed description - Steps to reproduce - Expected vs actual behavior - Environment details ### Suggesting Enhancements 1. Check existing issues and discussions 2. Create a new issue with: - Clear title - Detailed description - Use cases - Implementation ideas (optional) ### Pull Requests 1. Fork the repository 2. Create a feature branch 3. Make your changes 4. Run tests and linting 5. Submit PR with: - Clear title - Description of changes - Reference to related issues - Updated documentation ## Development Process ### 1. Setup Development Environment Follow the [Development Guide](README.md) for setup instructions. ### 2. Make Changes 1. Create a branch: ```bash git checkout -b feature/your-feature ``` 2. Make changes following our style guide 3. Add tests for new functionality 4. Update documentation ### 3. Test Your Changes ```bash # Run all tests pytest # Run specific test file pytest tests/path/to/test_file.py # Run with coverage pytest --cov=src tests/ ``` ### 4. Submit Changes 1. Push to your fork 2. Create pull request 3. Wait for review 4. Address feedback ## Style Guide ### Python Code Style - Follow PEP 8 - Use type hints - Maximum line length: 88 characters - Use docstrings (Google style) ### Commit Messages ``` type(scope): description [optional body] [optional footer] ``` Types: - feat: New feature - fix: Bug fix - docs: Documentation - style: Formatting - refactor: Code restructuring - test: Adding tests - chore: Maintenance ### Documentation - Keep README.md updated - Add docstrings to all public APIs - Update relevant documentation files - Include examples for new features ## Review Process 1. Automated checks must pass 2. At least one maintainer review 3. All feedback addressed 4. Documentation updated 5. Tests added/updated ## Getting Help - Join our [Discord](https://discord.gg/mcp-codebase-insight) - Ask in GitHub Discussions - Contact maintainers ## Recognition Contributors will be: - Listed in CONTRIBUTORS.md - Mentioned in release notes - Credited in documentation Thank you for contributing! ``` -------------------------------------------------------------------------------- /docs/development/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- ```markdown # Code of Conduct > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Project maintainers are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the project maintainers responsible for enforcement at [INSERT CONTACT METHOD]. All complaints will be reviewed and investigated promptly and fairly. ## Enforcement Guidelines Project maintainers will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from project maintainers, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. [homepage]: https://www.contributor-covenant.org ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/__init__.py: -------------------------------------------------------------------------------- ```python """Core package initialization.""" from .config import ServerConfig __all__ = ["ServerConfig"] ``` -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- ``` pytest>=8.0 pytest-asyncio>=0.26.0 anyio>=3.0.0 httpx>=0.24.0 fastapi[all]>=0.100.0 qdrant-client>=1.2.0 ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/__init__.py: -------------------------------------------------------------------------------- ```python """MCP Codebase Insight package.""" from .core.config import ServerConfig __version__ = "0.2.2" __all__ = ["ServerConfig"] ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/utils/__init__.py: -------------------------------------------------------------------------------- ```python """Utils package initialization.""" from .logger import Logger, get_logger, logger __all__ = ["Logger", "get_logger", "logger"] ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/asgi.py: -------------------------------------------------------------------------------- ```python """ASGI application entry point.""" from .core.config import ServerConfig from .server import CodebaseAnalysisServer # Create server instance with default config config = ServerConfig() server = CodebaseAnalysisServer(config) # Export the FastAPI app instance app = server.app ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/component_status.py: -------------------------------------------------------------------------------- ```python """Component status enumeration.""" from enum import Enum class ComponentStatus(str, Enum): """Component status enumeration.""" UNINITIALIZED = "uninitialized" INITIALIZING = "initializing" INITIALIZED = "initialized" FAILED = "failed" CLEANING = "cleaning" CLEANED = "cleaned" ``` -------------------------------------------------------------------------------- /module_summaries/database_summary.txt: -------------------------------------------------------------------------------- ``` # Database Module Summary - **Purpose**: Describe the database's role in the application. - **Key Components**: List database types, schema designs, and any ORM tools used. - **Dependencies**: Mention the relationships with the backend and data sources. - **Largest Files**: Identify the largest database-related files and their purposes. ``` -------------------------------------------------------------------------------- /module_summaries/backend_summary.txt: -------------------------------------------------------------------------------- ``` # Backend Module Summary - **Purpose**: Describe the backend's role in the application. - **Key Components**: List key components such as main frameworks, APIs, and data handling. - **Dependencies**: Mention any database connections and external services it relies on. - **Largest Files**: Identify the largest backend files and their purposes. ``` -------------------------------------------------------------------------------- /module_summaries/frontend_summary.txt: -------------------------------------------------------------------------------- ``` # Frontend Module Summary - **Purpose**: Describe the frontend's role in the application. - **Key Components**: List key components such as main frameworks, libraries, and UI components. - **Dependencies**: Mention any dependencies on backend services or external APIs. - **Largest Files**: Identify the largest frontend files and their purposes. ``` -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- ``` [pytest] asyncio_mode = strict asyncio_default_fixture_loop_scope = session testpaths = tests python_files = test_*.py python_classes = Test* python_functions = test_* addopts = -v --cov=src/mcp_codebase_insight --cov-report=term-missing filterwarnings = ignore::DeprecationWarning:pkg_resources.* ignore::DeprecationWarning:importlib.* ignore::DeprecationWarning:pytest_asyncio.* ignore::DeprecationWarning:pydantic.* ignore::pydantic.PydanticDeprecatedSince20 ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/version.py: -------------------------------------------------------------------------------- ```python """Version information.""" __version__ = "0.1.0" __author__ = "MCP Team" __author_email__ = "[email protected]" __description__ = "MCP Codebase Insight Server" __url__ = "https://github.com/modelcontextprotocol/mcp-codebase-insight" __license__ = "MIT" # Version components VERSION_MAJOR = 0 VERSION_MINOR = 1 VERSION_PATCH = 0 VERSION_SUFFIX = "" # Build version tuple VERSION_INFO = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) # Build version string VERSION = ".".join(map(str, VERSION_INFO)) if VERSION_SUFFIX: VERSION += VERSION_SUFFIX ``` -------------------------------------------------------------------------------- /test_function.txt: -------------------------------------------------------------------------------- ``` async def test_health_check(client: httpx.AsyncClient): """Test the health check endpoint.""" response = await client.get("/health") assert response.status_code == status.HTTP_200_OK data = response.json() # In test environment, we expect partially initialized state assert "status" in data assert "initialized" in data # We don't assert on components field since it might be missing # Accept 'ok' status in test environment assert data["status"] in ["healthy", "initializing", "ok"], f"Unexpected status: {data["status"]}" # Print status for debugging print(f"Health status: {data}") ``` -------------------------------------------------------------------------------- /tests/integration/fixed_test2.py: -------------------------------------------------------------------------------- ```python async def test_health_check(client: httpx.AsyncClient): """Test the health check endpoint.""" response = await client.get("/health") assert response.status_code == status.HTTP_200_OK data = response.json() # In test environment, we expect partially initialized state assert "status" in data assert "initialized" in data # We don't assert on components field since it might be missing # Accept 'ok' status in test environment assert data["status"] in ["healthy", "initializing", "ok"], f"Unexpected status: {data['status']}" # Print status for debugging print(f"Health status: {data}") ``` -------------------------------------------------------------------------------- /run_fixed_tests.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # This script runs tests with proper path and environment setup set -e # Activate the virtual environment source .venv/bin/activate # Install the package in development mode pip install -e . # Set environment variables export MCP_TEST_MODE=1 export QDRANT_URL="http://localhost:6333" export MCP_COLLECTION_NAME="test_collection_$(date +%s)" export PYTHONPATH="$PYTHONPATH:$(pwd)" # Check if we should run a specific test or all tests if [ $# -eq 0 ]; then echo "Running specific vector store tests..." python component_test_runner.py tests/components/test_vector_store.py else echo "Running specified tests: $*" python component_test_runner.py "$@" fi ``` -------------------------------------------------------------------------------- /debug_tests.md: -------------------------------------------------------------------------------- ```markdown # Debug MCP Codebase Insight Tests ## Problem Statement Debug and fix the test execution issues in the MCP Codebase Insight project. The main test script `run_tests.py` is encountering issues with module imports and test execution. ## Current Issues 1. Module import errors for `mcp_codebase_insight` package 2. Test execution failures 3. Coverage reporting issues ## Expected Behavior - All tests should run successfully - Coverage reports should be generated - No import errors should occur ## Additional Context - The project uses pytest for testing - Coverage reporting is handled through pytest-cov - The project is set up with a virtual environment - Environment variables are set in .env file ``` -------------------------------------------------------------------------------- /docs/templates/adr.md: -------------------------------------------------------------------------------- ```markdown # {title} ## Status {status} ## Context {context} ## Decision Drivers <!-- What forces influenced this decision? --> * Technical constraints * Business requirements * Resource constraints * Time constraints ## Considered Options {options} ## Decision {decision} ## Expected Consequences ### Positive Consequences {positive_consequences} ### Negative Consequences {negative_consequences} ## Pros and Cons of the Options {options_details} ## Links <!-- Optional section for links to other decisions, patterns, or resources --> ## Notes {notes} ## Metadata * Created: {created_at} * Last Modified: {updated_at} * Author: {author} * Approvers: {approvers} * Status: {status} * Tags: {tags} {metadata} ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/models.py: -------------------------------------------------------------------------------- ```python """API request and response models.""" from typing import List, Dict, Any, Optional from pydantic import BaseModel class ToolRequest(BaseModel): """Base request model for tool endpoints.""" name: str arguments: Dict[str, Any] class CrawlDocsRequest(BaseModel): """Request model for crawl-docs endpoint.""" urls: List[str] source_type: str class AnalyzeCodeRequest(BaseModel): """Request model for analyze-code endpoint.""" code: str context: Dict[str, Any] class SearchKnowledgeRequest(BaseModel): """Request model for search-knowledge endpoint.""" query: str pattern_type: str limit: int = 5 class CodeAnalysisRequest(BaseModel): """Code analysis request model.""" code: str context: Optional[Dict[str, Any]] = None ``` -------------------------------------------------------------------------------- /core_workflows.txt: -------------------------------------------------------------------------------- ``` # Core Workflows ## User Journeys 1. **Product Browsing**: - Relevant code files: [list of files responsible for navigation, product listing] - File sizes: [line counts for each key file] 2. **Checkout Process**: - Relevant code files: [list of files responsible for cart management, payment handling] - File sizes: [line counts for each key file] 3. **User Authentication**: - Relevant code files: [list of files responsible for login, logout, user session management] - File sizes: [line counts for each key file] ### Note: - The workflows and summaries provided are examples. Please modify them to fit the specific use case and structure of your application repository. - Pay special attention to large files, as they may represent core functionality or potential refactoring opportunities. ``` -------------------------------------------------------------------------------- /summary_document.txt: -------------------------------------------------------------------------------- ``` # Application Summary ## Architecture This document provides a summary of the application's architecture, key modules, and their relationships. ## Key Modules - Placeholder for module descriptions. - Include information about the functionality, dependencies, and interaction with other modules. ## Key Files by Size - See codebase_stats.txt for a complete listing of files by line count - The largest files often represent core functionality or areas that might need refactoring ## High-Level Next Steps for LLM 1. Identify and generate module summaries for frontend, backend, and database. 2. Document core workflows and user journeys within the application. 3. Use the LLM relationship prompt (llm_relationship_prompt.txt) to generate a comprehensive relationship analysis. 4. Pay special attention to the largest files and their relationships to other components. ``` -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- ```yaml name: Publish to PyPI on: push: tags: - 'v*' jobs: deploy: runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/mcp-codebase-insight permissions: id-token: write contents: read steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up Python uses: actions/[email protected] with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install build twine - name: Build package run: python -m build - name: Check distribution run: | python -m twine check dist/* ls -l dist/ - name: Publish to PyPI env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: python -m twine upload dist/* ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json { "name": "vite-react-typescript-starter", "private": true, "version": "0.0.0", "type": "module", "scripts": { "dev": "vite", "build": "tsc && vite build", "lint": "eslint .", "preview": "vite preview" }, "dependencies": { "@supabase/supabase-js": "^2.39.7", "lucide-react": "^0.344.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-router-dom": "^6.22.0", "recharts": "^2.12.1" }, "devDependencies": { "@eslint/js": "^9.9.1", "@tsconfig/recommended": "^1.0.3", "@types/node": "^20.11.24", "@types/react": "^18.3.5", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react": "^4.3.1", "autoprefixer": "^10.4.18", "eslint": "^9.9.1", "eslint-plugin-react-hooks": "^5.1.0-rc.0", "eslint-plugin-react-refresh": "^0.4.11", "globals": "^15.9.0", "postcss": "^8.4.35", "tailwindcss": "^3.4.1", "typescript": "^5.5.3", "typescript-eslint": "^8.3.0", "vite": "^5.4.2" } } ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile # Use Python 3.11 slim image FROM python:3.11-slim # Set working directory WORKDIR /app # Set environment variables ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 # Install system dependencies RUN apt-get update \ && apt-get install -y --no-install-recommends \ build-essential \ curl \ git \ && rm -rf /var/lib/apt/lists/* # Install Rust (needed for pydantic) RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" # Copy requirements file COPY requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt # Copy source code COPY src/ src/ COPY scripts/ scripts/ # Copy configuration files COPY .env.example .env # Create necessary directories RUN mkdir -p \ docs/adrs \ knowledge \ cache \ logs # Set permissions RUN chmod +x scripts/start_mcp_server.sh # Expose port EXPOSE 3000 # Set entrypoint ENTRYPOINT ["scripts/start_mcp_server.sh"] # Set default command CMD ["--host", "0.0.0.0", "--port", "3000"] ``` -------------------------------------------------------------------------------- /docs/getting-started/qdrant_setup.md: -------------------------------------------------------------------------------- ```markdown # Qdrant Setup Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Overview This guide covers setting up Qdrant vector database for MCP Codebase Insight. ## Installation Methods ### 1. Using Docker (Recommended) ```bash # Pull the Qdrant image docker pull qdrant/qdrant # Start Qdrant container docker run -p 6333:6333 -v $(pwd)/qdrant_storage:/qdrant/storage qdrant/qdrant ``` ### 2. From Binary Download from [Qdrant Releases](https://github.com/qdrant/qdrant/releases) ### 3. From Source ```bash git clone https://github.com/qdrant/qdrant cd qdrant cargo build --release ``` ## Configuration 1. **Create Collection** ```python from qdrant_client import QdrantClient client = QdrantClient("localhost", port=6333) client.create_collection( collection_name="code_vectors", vectors_config={"size": 384, "distance": "Cosine"} ) ``` 2. **Verify Setup** ```bash curl http://localhost:6333/collections/code_vectors ``` ## Next Steps - [Configuration Guide](configuration.md) - [Quick Start Guide](quickstart.md) - [API Reference](../api/rest-api.md) ``` -------------------------------------------------------------------------------- /tests/components/test_embeddings.py: -------------------------------------------------------------------------------- ```python import sys import os # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) import pytest import asyncio from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding @pytest.mark.asyncio async def test_embedder_initialization(): """Test that embedder initializes correctly.""" embedder = SentenceTransformerEmbedding() try: await asyncio.wait_for(embedder.initialize(), timeout=60.0) assert embedder.model is not None assert embedder.vector_size == 384 # Default size for all-MiniLM-L6-v2 except asyncio.TimeoutError: pytest.fail("Embedder initialization timed out") except Exception as e: pytest.fail(f"Embedder initialization failed: {str(e)}") @pytest.mark.asyncio async def test_embedder_embedding(): """Test that embedder can generate embeddings.""" embedder = SentenceTransformerEmbedding() await embedder.initialize() # Test single text embedding text = "Test text" embedding = await embedder.embed(text) assert len(embedding) == embedder.vector_size # Test batch embedding texts = ["Test text 1", "Test text 2"] embeddings = await embedder.embed_batch(texts) assert len(embeddings) == 2 assert all(len(emb) == embedder.vector_size for emb in embeddings) ``` -------------------------------------------------------------------------------- /async_fixture_wrapper.py: -------------------------------------------------------------------------------- ```python """ Async Fixture Wrapper for Component Tests This script serves as a wrapper for running component tests with complex async fixtures to ensure they are properly awaited in isolated test mode. """ import os import sys import asyncio import pytest import importlib from pathlib import Path def run_with_async_fixture_support(): """Run pytest with proper async fixture support.""" # Get the module path and test name from command line arguments if len(sys.argv) < 3: print("Usage: python async_fixture_wrapper.py <module_path> <test_name>") sys.exit(1) module_path = sys.argv[1] test_name = sys.argv[2] # Configure event loop policy for macOS if needed if sys.platform == 'darwin': import platform if int(platform.mac_ver()[0].split('.')[0]) >= 10: # macOS 10+ - use the right event loop policy asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) # Ensure PYTHONPATH is set correctly base_dir = str(Path(module_path).parent.parent) sys.path.insert(0, base_dir) # Build pytest args pytest_args = [module_path, f"-k={test_name}", "--asyncio-mode=strict"] # Add any additional args if len(sys.argv) > 3: pytest_args.extend(sys.argv[3:]) # Run the test exit_code = pytest.main(pytest_args) sys.exit(exit_code) if __name__ == "__main__": run_with_async_fixture_support() ``` -------------------------------------------------------------------------------- /PULL_REQUEST.md: -------------------------------------------------------------------------------- ```markdown # GitHub Actions Workflow Improvements @coderabbit I'd like to request your detailed review of our GitHub Actions workflows. ## Overview This PR aims to improve the GitHub Actions workflows in our repository by: 1. **Documenting** all existing workflows 2. **Addressing** the test pattern issue in build-verification.yml 3. **Extracting** common functionality into reusable scripts 4. **Standardizing** practices across different workflows ## Changes - Added comprehensive documentation of all GitHub Actions workflows - Fixed the wildcard pattern issue (`test_*`) in build-verification.yml - Extracted Qdrant health check logic into a reusable script - Added README for the scripts directory ## Benefits - **Maintainability**: Common logic is now in a single location - **Readability**: Workflows are cleaner and better documented - **Reliability**: Fixed test pattern ensures more consistent test execution - **Extensibility**: Easier to add new workflows or modify existing ones ## Request for Review @coderabbit, I'm particularly interested in your feedback on: 1. Workflow structure and organization 2. Any redundancies or inefficiencies you notice 3. Any missing best practices 4. Suggestions for further improvements ## Future Improvements We're planning to implement additional enhancements based on your feedback: - Extract more common functionality into reusable actions - Standardize environment variables across workflows - Improve caching strategies - Add workflow dependencies to avoid redundant work Thank you for your time and expertise! ``` -------------------------------------------------------------------------------- /run_test_with_path_fix.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # This script runs tests with a fix for the Python path issue set -e # Activate the virtual environment source .venv/bin/activate # Setup environment for Qdrant export MCP_TEST_MODE=1 export QDRANT_URL="http://localhost:6333" export MCP_COLLECTION_NAME="test_collection_$(date +%s)" export PYTHONPATH="$PYTHONPATH:$(pwd)" # Initialize Qdrant collection for testing echo "Creating Qdrant collection for testing..." python - << EOF import os from qdrant_client import QdrantClient from qdrant_client.http import models # Connect to Qdrant client = QdrantClient(url="http://localhost:6333") collection_name = os.environ.get("MCP_COLLECTION_NAME") # Check if collection exists collections = client.get_collections().collections collection_names = [c.name for c in collections] if collection_name in collection_names: print(f"Collection {collection_name} already exists, recreating it...") client.delete_collection(collection_name=collection_name) # Create collection with vector size 384 (for all-MiniLM-L6-v2) client.create_collection( collection_name=collection_name, vectors_config=models.VectorParams( size=384, # Dimension for all-MiniLM-L6-v2 distance=models.Distance.COSINE, ), ) # Create test directory that might be needed os.makedirs("qdrant_storage", exist_ok=True) print(f"Successfully created collection {collection_name}") EOF # Run all component tests in vector_store echo "Running all vector store tests with component_test_runner.py..." python component_test_runner.py tests/components/test_vector_store.py ``` -------------------------------------------------------------------------------- /test_imports.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Test script to verify imports work correctly """ import sys import importlib import os def test_import(module_name): try: module = importlib.import_module(module_name) print(f"✅ Successfully imported {module_name}") return True except ImportError as e: print(f"❌ Failed to import {module_name}: {e}") return False def print_path(): print("\nPython Path:") for i, path in enumerate(sys.path): print(f"{i}: {path}") def main(): print("=== Testing Package Imports ===") print("\nEnvironment:") print(f"Python version: {sys.version}") print(f"Working directory: {os.getcwd()}") print("\nTesting core package imports:") # First ensure the parent directory is in the path sys.path.insert(0, os.getcwd()) print_path() print("\nTesting imports:") # Test basic Python imports test_import("os") test_import("sys") # Test ML/NLP packages test_import("torch") test_import("numpy") test_import("transformers") test_import("sentence_transformers") # Test FastAPI and web packages test_import("fastapi") test_import("starlette") test_import("pydantic") # Test database packages test_import("qdrant_client") # Test project specific modules test_import("src.mcp_codebase_insight.core.config") test_import("src.mcp_codebase_insight.core.embeddings") test_import("src.mcp_codebase_insight.core.vector_store") print("\n=== Testing Complete ===") if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /scripts/setup_qdrant.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # Script to set up Qdrant for MCP Codebase Insight set -e # Colors for output GREEN='\033[0;32m' RED='\033[0;31m' NC='\033[0m' # No Color echo "Setting up Qdrant for MCP Codebase Insight..." # Check if Docker is running if ! docker info > /dev/null 2>&1; then echo -e "${RED}Error: Docker is not running${NC}" exit 1 fi # Check if port 6333 is available if lsof -Pi :6333 -sTCP:LISTEN -t >/dev/null ; then echo -e "${RED}Warning: Port 6333 is already in use${NC}" echo "Checking if it's a Qdrant instance..." if curl -s http://localhost:6333/health > /dev/null; then echo -e "${GREEN}Existing Qdrant instance detected and healthy${NC}" exit 0 else echo -e "${RED}Port 6333 is in use by another service${NC}" exit 1 fi fi # Create data directory if it doesn't exist mkdir -p ./qdrant_data # Stop and remove existing container if it exists if docker ps -a | grep -q mcp-qdrant; then echo "Removing existing mcp-qdrant container..." docker stop mcp-qdrant || true docker rm mcp-qdrant || true fi # Pull latest Qdrant image echo "Pulling latest Qdrant image..." docker pull qdrant/qdrant:latest # Start Qdrant container echo "Starting Qdrant container..." docker run -d \ --name mcp-qdrant \ -p 6333:6333 \ -v "$(pwd)/qdrant_data:/qdrant/storage" \ qdrant/qdrant # Wait for Qdrant to be ready echo "Waiting for Qdrant to be ready..." for i in {1..30}; do if curl -s http://localhost:6333/health > /dev/null; then echo -e "${GREEN}Qdrant is ready!${NC}" exit 0 fi echo "Waiting... ($i/30)" sleep 1 done echo -e "${RED}Error: Qdrant failed to start within 30 seconds${NC}" exit 1 ``` -------------------------------------------------------------------------------- /scripts/check_qdrant_health.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -euo pipefail # Script to check if Qdrant service is available and healthy # Usage: ./check_qdrant_health.sh [qdrant_url] [max_retries] [sleep_seconds] # Default values QDRANT_URL=${1:-"http://localhost:6333"} MAX_RETRIES=${2:-20} SLEEP_SECONDS=${3:-5} echo "Checking Qdrant health at $QDRANT_URL (max $MAX_RETRIES attempts with $SLEEP_SECONDS seconds delay)" # Install dependencies if not present if ! command -v curl &> /dev/null || ! command -v jq &> /dev/null; then echo "Installing required dependencies..." apt-get update &> /dev/null && apt-get install -y curl jq &> /dev/null || true fi # Check if dependencies are available if ! command -v curl &> /dev/null; then echo "Error: curl command not found and could not be installed" exit 1 fi if ! command -v jq &> /dev/null; then echo "Warning: jq command not found and could not be installed. JSON validation will be skipped." JQ_AVAILABLE=false else JQ_AVAILABLE=true fi # Wait for Qdrant to be available retry_count=0 until [ "$(curl -s -o /dev/null -w "%{http_code}" "$QDRANT_URL/collections")" -eq 200 ] || [ "$retry_count" -eq "$MAX_RETRIES" ] do echo "Waiting for Qdrant... (attempt $retry_count of $MAX_RETRIES)" sleep "$SLEEP_SECONDS" retry_count=$((retry_count+1)) done if [ "$retry_count" -eq "$MAX_RETRIES" ]; then echo "Qdrant service failed to become available after $((MAX_RETRIES * SLEEP_SECONDS)) seconds" exit 1 fi # Check for valid JSON response if jq is available if [ "$JQ_AVAILABLE" = true ]; then if ! curl -s "$QDRANT_URL/collections" | jq . > /dev/null; then echo "Qdrant did not return valid JSON." exit 1 fi fi echo "Qdrant service is accessible and healthy." exit 0 ``` -------------------------------------------------------------------------------- /docs/qdrant_setup.md: -------------------------------------------------------------------------------- ```markdown # Qdrant Setup Guide ## Overview This document outlines the setup and maintenance procedures for the Qdrant vector database instance required for running tests and development. ## Prerequisites - Docker installed and running - Port 6333 available on localhost - Python 3.8+ with pip ## Setup Options ### Option 1: Docker Container (Recommended for Development) ```bash # Pull the latest Qdrant image docker pull qdrant/qdrant:latest # Run Qdrant container docker run -d \ --name mcp-qdrant \ -p 6333:6333 \ -v $(pwd)/qdrant_data:/qdrant/storage \ qdrant/qdrant # Verify the instance is running curl http://localhost:6333/health ``` ### Option 2: Pre-existing Instance If using a pre-existing Qdrant instance: 1. Ensure it's accessible at `localhost:6333` 2. Verify health status 3. Configure environment variables if needed: ```bash export QDRANT_HOST=localhost export QDRANT_PORT=6333 ``` ## Health Check ```python from qdrant_client import QdrantClient client = QdrantClient(host="localhost", port=6333) health = client.health() print(f"Qdrant health status: {health}") ``` ## Maintenance - Regular health checks are automated in CI/CD pipeline - Database backups are stored in `./qdrant_data` - Version updates should be coordinated with the team ## Troubleshooting 1. If container fails to start: ```bash # Check logs docker logs mcp-qdrant # Verify port availability lsof -i :6333 ``` 2. If connection fails: ```bash # Restart container docker restart mcp-qdrant # Check container status docker ps -a | grep mcp-qdrant ``` ## Responsible Parties - Primary maintainer: DevOps Team - Documentation updates: Development Team Lead - Testing coordination: QA Team Lead ## Version Control - Document version: 1.0 - Last updated: 2025-03-24 - Next review: 2025-06-24 ``` -------------------------------------------------------------------------------- /setup_qdrant_collection.py: -------------------------------------------------------------------------------- ```python from qdrant_client import QdrantClient from qdrant_client.http import models from qdrant_client.http.models import Distance, VectorParams def setup_collection(): # Connect to Qdrant client = QdrantClient( url='https://e67ee53a-6e03-4526-9e41-3fde622323a9.us-east4-0.gcp.cloud.qdrant.io:6333', api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzQ1MTAyNzQ3fQ.3gvK8M7dJxZkSpyzpJtTGVUhjyjgbYEhEvl2aG7JodM' ) collection_name = "mcp-codebase-insight" try: # Check if collection exists collections = client.get_collections().collections exists = any(c.name == collection_name for c in collections) # If collection exists, recreate it if exists: print(f"\nRemoving existing collection '{collection_name}'") client.delete_collection(collection_name=collection_name) # Create a new collection with named vector configurations print(f"\nCreating collection '{collection_name}' with named vectors") # Create named vectors configuration vectors_config = { # For the default MCP server embedding model (all-MiniLM-L6-v2) "fast-all-minilm-l6-v2": VectorParams( size=384, # all-MiniLM-L6-v2 produces 384-dimensional vectors distance=Distance.COSINE ) } client.create_collection( collection_name=collection_name, vectors_config=vectors_config ) # Verify the collection was created properly collection_info = client.get_collection(collection_name=collection_name) print(f"\nCollection '{collection_name}' created successfully") print(f"Vector configuration: {collection_info.config.params.vectors}") print("\nCollection is ready for the MCP server") except Exception as e: print(f"\nError setting up collection: {e}") if __name__ == '__main__': setup_collection() ``` -------------------------------------------------------------------------------- /docs/vector_store_best_practices.md: -------------------------------------------------------------------------------- ```markdown # VectorStore Best Practices This document outlines best practices for working with the VectorStore component in the MCP Codebase Insight project. ## Metadata Structure To ensure consistency and prevent `KeyError` exceptions, always follow these metadata structure guidelines: ### Required Fields Always include these fields in your metadata when adding vectors: - `type`: The type of content (e.g., "code", "documentation", "pattern") - `language`: Programming language if applicable (e.g., "python", "javascript") - `title`: Short descriptive title - `description`: Longer description of the content ### Accessing Metadata Always use the `.get()` method with a default value when accessing metadata fields: ```python # Good - safe access pattern result.metadata.get("type", "code") # Bad - can cause KeyError result.metadata["type"] ``` ## Initialization and Cleanup Follow these best practices for proper initialization and cleanup: 1. Always `await vector_store.initialize()` before using a VectorStore 2. Always `await vector_store.cleanup()` in test teardown/finally blocks 3. Use unique collection names in tests to prevent conflicts 4. Check `vector_store.initialized` status before operations Example: ```python try: store = VectorStore(url, embedder, collection_name=unique_name) await store.initialize() # Use the store... finally: await store.cleanup() await store.close() ``` ## Vector Names and Dimensions - Use consistent vector dimensions (384 for all-MiniLM-L6-v2) - Be careful when overriding the vector_name parameter - Ensure embedder and vector store are compatible ## Error Handling - Check for component availability before use - Handle initialization errors gracefully - Log failures with meaningful messages ## Testing Guidelines 1. Use isolated test collections with unique names 2. Clean up all test data after tests 3. Verify metadata structure in tests 4. Use standardized test data fixtures 5. Test both positive and negative paths By following these guidelines, you can avoid common issues like the "KeyError: 'type'" problem that was occurring in the codebase. ``` -------------------------------------------------------------------------------- /scripts/macos_install.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # Exit on error set -e echo "Installing MCP Codebase Insight development environment..." # Check for Homebrew if ! command -v brew &> /dev/null; then echo "Installing Homebrew..." /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" else echo "Homebrew already installed, updating..." brew update fi # Check for Python if ! command -v python3 &> /dev/null; then echo "Installing Python..." brew install [email protected] else echo "Python already installed" fi # Check for Docker if ! command -v docker &> /dev/null; then echo "Installing Docker..." brew install --cask docker echo "Starting Docker..." open -a Docker # Wait for Docker to start echo "Waiting for Docker to start..." while ! docker info &> /dev/null; do sleep 1 done else echo "Docker already installed" fi # Create virtual environment echo "Creating virtual environment..." python3.11 -m venv .venv # Activate virtual environment echo "Activating virtual environment..." source .venv/bin/activate # Install dependencies echo "Installing Python dependencies..." pip install --upgrade pip pip install -r requirements.txt # Start Qdrant echo "Starting Qdrant container..." if ! docker ps | grep -q qdrant; then docker run -d -p 6333:6333 -p 6334:6334 \ -v $(pwd)/qdrant_storage:/qdrant/storage \ qdrant/qdrant echo "Qdrant container started" else echo "Qdrant container already running" fi # Create required directories echo "Creating project directories..." mkdir -p docs/adrs mkdir -p docs/templates mkdir -p knowledge/patterns mkdir -p references mkdir -p logs/debug # Copy environment file if it doesn't exist if [ ! -f .env ]; then echo "Creating .env file..." cp .env.example .env echo "Please update .env with your settings" fi # Load example patterns echo "Loading example patterns..." python scripts/load_example_patterns.py echo " Installation complete! 🎉 To start development: 1. Update .env with your settings 2. Activate the virtual environment: source .venv/bin/activate 3. Start the server: make run For more information, see the README.md file. " ``` -------------------------------------------------------------------------------- /start-mcpserver.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # This script starts the MCP Qdrant server with SSE transport set -x source .venv/bin/activate # Set the PATH to include the local bin directory export PATH="$HOME/.local/bin:$PATH" # Define environment variables export COLLECTION_NAME="mcp-codebase-insight" export EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2" export QDRANT_URL="${QDRANT_URL:-http://localhost:6333}" export QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzQ1MTAyNzQ3fQ.3gvK8M7dJxZkSpyzpJtTGVUhjyjgbYEhEvl2aG7JodM" # Define tool descriptions TOOL_STORE_DESCRIPTION="Store reusable code snippets and test results. 'information' contains a description. 'metadata' is a dictionary with a 'type' key: 'code' for code snippets, 'test_result' for test results. For 'code', 'metadata' includes a 'code' key with the code. For 'test_result', 'metadata' includes 'test_name', 'status' (pass/fail), and 'error_message'." TOOL_FIND_DESCRIPTION="Search for code snippets and test results. The 'query' parameter describes what you're looking for. Returned results will have a 'metadata' field with a 'type' key indicating 'code' or 'test_result'. Use this to find code or analyze test failures." # Default port for the SSE transport (can be overridden with PORT env var) PORT="${PORT:-8000}" # Determine transport type (default to sse if not specified) TRANSPORT="${TRANSPORT:-sse}" # Check if uvx and mcp-server-qdrant are installed if ! command -v uvx &> /dev/null; then echo "Error: uvx is not installed. Please install it with: pip install uvx" exit 1 fi if ! python -c "import importlib.util; print(importlib.util.find_spec('mcp_server_qdrant') is not None)" | grep -q "True"; then echo "Error: mcp-server-qdrant is not installed. Please install it with: pip install mcp-server-qdrant" exit 1 fi echo "Starting MCP Qdrant server with $TRANSPORT transport on port $PORT..." # Run the MCP Qdrant server with the specified transport if [ "$TRANSPORT" = "sse" ]; then # For SSE transport, we need to specify the port uvx mcp-server-qdrant --transport sse --port $PORT else # For other transports (e.g., stdio which is the default) uvx mcp-server-qdrant fi ``` -------------------------------------------------------------------------------- /docs/testing_guide.md: -------------------------------------------------------------------------------- ```markdown # Testing Guide for MCP Codebase Insight ## Asynchronous Testing The MCP Codebase Insight project uses asynchronous APIs and should be tested using proper async test clients. Here are guidelines for testing: ### Async vs Sync Testing Clients The project provides two test client fixtures: 1. **`test_client`** - Use for asynchronous tests - Returns an `AsyncClient` from httpx - Must be used with `await` for requests - Must be used with `@pytest.mark.asyncio` decorator 2. **`sync_test_client`** - Use for synchronous tests - Returns a `TestClient` from FastAPI - Used for simpler tests where async is not needed - No need for await or asyncio decorators ### Example: Async Test ```python import pytest @pytest.mark.asyncio async def test_my_endpoint(test_client): """Test an endpoint asynchronously.""" response = await test_client.get("/my-endpoint") assert response.status_code == 200 data = response.json() assert "result" in data ``` ### Example: Sync Test ```python def test_simple_endpoint(sync_test_client): """Test an endpoint synchronously.""" response = sync_test_client.get("/simple-endpoint") assert response.status_code == 200 ``` ### Common Issues 1. **Using TestClient with async:** The error `'TestClient' object does not support the asynchronous context manager protocol` occurs when trying to use TestClient in an async context. Always use the `test_client` fixture for async tests. 2. **Mixing async/sync:** Don't mix async and sync patterns in the same test. 3. **Missing asyncio mark:** Always add `@pytest.mark.asyncio` to async test functions. ## Test Isolation Tests should be isolated to prevent state interference between tests: 1. Each test gets its own server instance with isolated state 2. Vector store tests use unique collection names 3. Cleanup is performed automatically after tests ## Running Tests Run tests using pytest: ```bash # Run all tests pytest # Run specific test file pytest tests/test_file_relationships.py # Run specific test function pytest tests/test_file_relationships.py::test_create_file_relationship ``` For more advanced test running options, use the `run_tests.py` script in the project root. ``` -------------------------------------------------------------------------------- /.compile-venv-py3.11/bin/activate.fish: -------------------------------------------------------------------------------- ``` # This file must be used with "source <venv>/bin/activate.fish" *from fish* # (https://fishshell.com/); you cannot run it directly. function deactivate -d "Exit virtual environment and return to normal shell environment" # reset old environment variables if test -n "$_OLD_VIRTUAL_PATH" set -gx PATH $_OLD_VIRTUAL_PATH set -e _OLD_VIRTUAL_PATH end if test -n "$_OLD_VIRTUAL_PYTHONHOME" set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME set -e _OLD_VIRTUAL_PYTHONHOME end if test -n "$_OLD_FISH_PROMPT_OVERRIDE" set -e _OLD_FISH_PROMPT_OVERRIDE # prevents error when using nested fish instances (Issue #93858) if functions -q _old_fish_prompt functions -e fish_prompt functions -c _old_fish_prompt fish_prompt functions -e _old_fish_prompt end end set -e VIRTUAL_ENV set -e VIRTUAL_ENV_PROMPT if test "$argv[1]" != "nondestructive" # Self-destruct! functions -e deactivate end end # Unset irrelevant variables. deactivate nondestructive set -gx VIRTUAL_ENV /Users/tosinakinosho/workspaces/mcp-codebase-insight/.compile-venv-py3.11 set -gx _OLD_VIRTUAL_PATH $PATH set -gx PATH "$VIRTUAL_ENV/"bin $PATH # Unset PYTHONHOME if set. if set -q PYTHONHOME set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME set -e PYTHONHOME end if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" # fish uses a function instead of an env var to generate the prompt. # Save the current fish_prompt function as the function _old_fish_prompt. functions -c fish_prompt _old_fish_prompt # With the original prompt function renamed, we can override with our own. function fish_prompt # Save the return status of the last command. set -l old_status $status # Output the venv prompt; color taken from the blue of the Python logo. printf "%s%s%s" (set_color 4B8BBE) '(.compile-venv-py3.11) ' (set_color normal) # Restore the return status of the previous command. echo "exit $old_status" | . # Output the original/"old" prompt. _old_fish_prompt end set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" set -gx VIRTUAL_ENV_PROMPT '(.compile-venv-py3.11) ' end ``` -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- ```python from setuptools import setup, find_packages import re import os # Read version from __init__.py with open(os.path.join("src", "mcp_codebase_insight", "__init__.py"), "r") as f: version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M) if version_match: version = version_match.group(1) else: raise RuntimeError("Unable to find version string") setup( name="mcp-codebase-insight", version=version, description="Model Context Protocol (MCP) server for codebase analysis and insights", long_description=open("README.md").read(), long_description_content_type="text/markdown", author="Model Context Protocol", author_email="[email protected]", url="https://github.com/modelcontextprotocol/mcp-codebase-insight", packages=find_packages(where="src"), package_dir={"": "src"}, install_requires=[ "fastapi>=0.103.2,<0.104.0", "uvicorn>=0.23.2,<0.24.0", "pydantic>=2.4.2,<3.0.0", "starlette>=0.27.0,<0.28.0", "asyncio>=3.4.3", "aiohttp>=3.9.0,<4.0.0", "qdrant-client>=1.13.3", "sentence-transformers>=2.2.2", "torch>=2.0.0", "transformers>=4.34.0,<5.0.0", "python-frontmatter>=1.0.0", "markdown>=3.4.4", "PyYAML>=6.0.1", "structlog>=23.1.0", "psutil>=5.9.5", "python-dotenv>=1.0.0", "requests>=2.31.0", "beautifulsoup4>=4.12.0", "scipy>=1.11.0", "numpy>=1.24.0", "python-slugify>=8.0.0", "slugify>=0.0.1", # Temporarily commented out for development installation # "uvx>=0.4.0", "mcp-server-qdrant>=0.2.0", "mcp==1.5.0", ], python_requires=">=3.9", classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Libraries :: Python Modules", ], entry_points={ "console_scripts": [ "mcp-codebase-insight=mcp_codebase_insight.server:run", ], }, ) ``` -------------------------------------------------------------------------------- /scripts/start_mcp_server.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash set -e # Function to log messages log() { echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" } # Function to check if Qdrant is available check_qdrant() { local url="${QDRANT_URL:-http://localhost:6333}" local max_attempts=30 local attempt=1 log "Checking Qdrant connection at $url" while [ $attempt -le $max_attempts ]; do if curl -s -f "$url/health" > /dev/null 2>&1; then log "Qdrant is available" return 0 fi log "Waiting for Qdrant (attempt $attempt/$max_attempts)..." sleep 2 attempt=$((attempt + 1)) done log "Error: Could not connect to Qdrant" return 1 } # Function to check Python environment check_python() { if ! command -v python3 &> /dev/null; then log "Error: Python 3 is not installed" exit 1 fi if ! python3 -c "import pkg_resources; pkg_resources.require('fastapi>=0.103.2')" &> /dev/null; then log "Error: Required Python packages are not installed" exit 1 fi } # Function to setup environment setup_env() { # Create required directories if they don't exist mkdir -p docs/adrs knowledge cache logs # Copy example env file if .env doesn't exist if [ ! -f .env ] && [ -f .env.example ]; then cp .env.example .env log "Created .env from example" fi # Set default environment variables if not set export MCP_HOST=${MCP_HOST:-0.0.0.0} export MCP_PORT=${MCP_PORT:-3000} export MCP_LOG_LEVEL=${MCP_LOG_LEVEL:-INFO} log "Environment setup complete" } # Main startup sequence main() { log "Starting MCP Codebase Insight Server" # Perform checks check_python setup_env check_qdrant # Parse command line arguments local host="0.0.0.0" local port="3000" while [[ $# -gt 0 ]]; do case $1 in --host) host="$2" shift 2 ;; --port) port="$2" shift 2 ;; *) log "Unknown option: $1" exit 1 ;; esac done # Start server log "Starting server on $host:$port" exec python3 -m mcp_codebase_insight } # Run main function with all arguments main "$@" ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/__main__.py: -------------------------------------------------------------------------------- ```python """Main entry point for MCP server.""" import os from pathlib import Path import sys import logging import uvicorn from dotenv import load_dotenv from .core.config import ServerConfig from .server import create_app from .utils.logger import get_logger # Configure logging logger = get_logger(__name__) def get_config() -> ServerConfig: """Get server configuration.""" try: # Load environment variables load_dotenv() config = ServerConfig( host=os.getenv("MCP_HOST", "127.0.0.1"), port=int(os.getenv("MCP_PORT", "3000")), log_level=os.getenv("MCP_LOG_LEVEL", "INFO"), qdrant_url=os.getenv("QDRANT_URL", "http://localhost:6333"), docs_cache_dir=Path(os.getenv("MCP_DOCS_CACHE_DIR", "docs")), adr_dir=Path(os.getenv("MCP_ADR_DIR", "docs/adrs")), kb_storage_dir=Path(os.getenv("MCP_KB_STORAGE_DIR", "knowledge")), embedding_model=os.getenv("MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2"), collection_name=os.getenv("MCP_COLLECTION_NAME", "codebase_patterns"), debug_mode=os.getenv("MCP_DEBUG", "false").lower() == "true", metrics_enabled=os.getenv("MCP_METRICS_ENABLED", "true").lower() == "true", cache_enabled=os.getenv("MCP_CACHE_ENABLED", "true").lower() == "true", memory_cache_size=int(os.getenv("MCP_MEMORY_CACHE_SIZE", "1000")), disk_cache_dir=Path(os.getenv("MCP_DISK_CACHE_DIR", "cache")) if os.getenv("MCP_DISK_CACHE_DIR") else None ) logger.info("Configuration loaded successfully") return config except Exception as e: logger.error(f"Failed to load configuration: {e}", exc_info=True) raise def main(): """Run the server.""" try: # Get configuration config = get_config() # Create FastAPI app app = create_app(config) # Log startup message logger.info( f"Starting MCP Codebase Insight Server on {config.host}:{config.port} " f"(log level: {config.log_level}, debug mode: {config.debug_mode})" ) # Run using Uvicorn directly uvicorn.run( app=app, host=config.host, port=config.port, log_level=config.log_level.lower(), loop="auto", lifespan="on", workers=1 ) except Exception as e: logger.error(f"Server error: {e}", exc_info=True) sys.exit(1) if __name__ == "__main__": # Run main directly without asyncio.run() main() ``` -------------------------------------------------------------------------------- /scripts/validate_knowledge_base.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Knowledge Base Validation Script Tests knowledge base operations using Firecrawl MCP. """ import asyncio import logging from mcp_firecrawl import ( test_knowledge_operations, validate_entity_relations, verify_query_results ) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def validate_knowledge_base(config: dict) -> bool: """Validate knowledge base operations.""" logger.info("Testing knowledge base operations...") # Test basic knowledge operations ops_result = await test_knowledge_operations({ "url": "http://localhost:8001", "auth_token": config["API_KEY"], "test_entities": [ {"name": "TestClass", "type": "class"}, {"name": "test_method", "type": "method"}, {"name": "test_variable", "type": "variable"} ], "verify_persistence": True }) # Validate entity relations relations_result = await validate_entity_relations({ "url": "http://localhost:8001", "auth_token": config["API_KEY"], "test_relations": [ {"from": "TestClass", "to": "test_method", "type": "contains"}, {"from": "test_method", "to": "test_variable", "type": "uses"} ], "verify_bidirectional": True }) # Verify query functionality query_result = await verify_query_results({ "url": "http://localhost:8001", "auth_token": config["API_KEY"], "test_queries": [ "find classes that use test_variable", "find methods in TestClass", "find variables used by test_method" ], "expected_matches": { "classes": ["TestClass"], "methods": ["test_method"], "variables": ["test_variable"] } }) all_passed = all([ ops_result.success, relations_result.success, query_result.success ]) if all_passed: logger.info("Knowledge base validation successful") else: logger.error("Knowledge base validation failed") if not ops_result.success: logger.error("Knowledge operations failed") if not relations_result.success: logger.error("Entity relations validation failed") if not query_result.success: logger.error("Query validation failed") return all_passed if __name__ == "__main__": import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent)) from scripts.config import load_config config = load_config() success = asyncio.run(validate_knowledge_base(config)) sys.exit(0 if success else 1) ``` -------------------------------------------------------------------------------- /test_fixes.md: -------------------------------------------------------------------------------- ```markdown # MCP Codebase Insight Test Fixes ## Identified Issues 1. **Package Import Problems** - The tests were trying to import from `mcp_codebase_insight` directly, but the package needed to be imported from `src.mcp_codebase_insight` - The Python path wasn't correctly set up to include the project root directory 2. **Missing Dependencies** - The `sentence-transformers` package was installed in the wrong Python environment (Python 3.13 instead of 3.11) - Had to explicitly install it in the correct environment 3. **Test Isolation Problems** - Tests were failing due to not being properly isolated - The `component_test_runner.py` script needed fixes to properly load test modules 4. **Qdrant Server Issue** - The `test_vector_store_cleanup` test failed due to permission issues in the Qdrant server - The server couldn't create a collection directory for the test ## Applied Fixes 1. **Fixed Import Paths** - Modified test files to use `from src.mcp_codebase_insight...` instead of `from mcp_codebase_insight...` - Added code to explicitly set `sys.path` to include the project root directory 2. **Fixed Dependency Issues** - Ran `python3.11 -m pip install sentence-transformers` to install the package in the correct environment - Verified all dependencies were properly installed 3. **Created a Test Runner Script** - Created `run_test_with_path_fix.sh` to set up the proper environment variables and paths - Modified `component_test_runner.py` to better handle module loading 4. **Fixed Test Module Loading** - Added a `load_test_module` function to properly handle import paths - Ensured the correct Python path is set before importing test modules ## Results - Successfully ran 2 out of 3 vector store tests: - ✅ `test_vector_store_initialization` - ✅ `test_vector_store_add_and_search` - ❌ `test_vector_store_cleanup` (still failing due to Qdrant server issue) ## Recommendations for Remaining Issue The `test_vector_store_cleanup` test is failing due to the Qdrant server not being able to create a directory for the collection. This could be fixed by: 1. Checking the Qdrant server configuration to ensure it has proper permissions to create directories 2. Creating the necessary directories beforehand 3. Modifying the test to use a collection name that already exists or mock the collection creation The error message suggests a file system permission issue: ``` "Can't create directory for collection cleanup_test_db679546. Error: No such file or directory (os error 2)" ``` A simpler fix for testing purposes might be to modify the Qdrant Docker run command to include a volume mount with proper permissions: ```bash docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_data:/qdrant/storage qdrant/qdrant ``` This would ensure the storage directory exists and has the right permissions. ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/utils/logger.py: -------------------------------------------------------------------------------- ```python """Structured logging module.""" import logging import sys from typing import Any, Dict, Optional import structlog # Configure structlog structlog.configure( processors=[ structlog.stdlib.filter_by_level, structlog.stdlib.add_logger_name, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.processors.JSONRenderer() ], context_class=dict, logger_factory=structlog.stdlib.LoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) class Logger: """Structured logger.""" def __init__( self, name: str, level: str = "INFO", extra: Optional[Dict[str, Any]] = None ): """Initialize logger.""" # Set log level log_level = getattr(logging, level.upper()) logging.basicConfig( format="%(message)s", stream=sys.stdout, level=log_level, ) # Create logger self.logger = structlog.get_logger(name) self.extra = extra or {} def bind(self, **kwargs) -> "Logger": """Create new logger with additional context.""" extra = {**self.extra, **kwargs} return Logger( name=self.logger.name, level=logging.getLevelName(self.logger.level), extra=extra ) def debug(self, event: str, **kwargs): """Log debug message.""" self.logger.debug( event, **{**self.extra, **kwargs} ) def info(self, event: str, **kwargs): """Log info message.""" self.logger.info( event, **{**self.extra, **kwargs} ) def warning(self, event: str, **kwargs): """Log warning message.""" self.logger.warning( event, **{**self.extra, **kwargs} ) def error(self, event: str, **kwargs): """Log error message.""" self.logger.error( event, **{**self.extra, **kwargs} ) def exception(self, event: str, exc_info: bool = True, **kwargs): """Log exception message.""" self.logger.exception( event, exc_info=exc_info, **{**self.extra, **kwargs} ) def critical(self, event: str, **kwargs): """Log critical message.""" self.logger.critical( event, **{**self.extra, **kwargs} ) def get_logger( name: str, level: str = "INFO", extra: Optional[Dict[str, Any]] = None ) -> Logger: """Get logger instance.""" return Logger(name, level, extra) # Default logger logger = get_logger("mcp_codebase_insight") ``` -------------------------------------------------------------------------------- /scripts/validate_vector_store.py: -------------------------------------------------------------------------------- ```python #!/usr/bin/env python3 """ Vector Store Validation Script Tests vector store operations using local codebase. """ import asyncio import logging from pathlib import Path import sys # Add the src directory to the Python path sys.path.append(str(Path(__file__).parent.parent / "src")) from mcp_codebase_insight.core.vector_store import VectorStore from mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def validate_vector_store(config: dict) -> bool: """Validate vector store operations.""" logger.info("Testing vector store operations...") try: # Initialize embedder embedder = SentenceTransformerEmbedding( model_name="sentence-transformers/all-MiniLM-L6-v2" ) await embedder.initialize() logger.info("Embedder initialized successfully") # Initialize vector store vector_store = VectorStore( url=config.get("QDRANT_URL", "http://localhost:6333"), embedder=embedder, collection_name=config.get("COLLECTION_NAME", "mcp-codebase-insight"), api_key=config.get("QDRANT_API_KEY", ""), vector_name="default" ) await vector_store.initialize() logger.info("Vector store initialized successfully") # Test vector operations test_text = "def test_function():\n pass" embedding = await embedder.embed(test_text) # Store vector await vector_store.add_vector( text=test_text, metadata={"type": "code", "content": test_text} ) logger.info("Vector storage test passed") # Search for similar vectors logger.info("Searching for similar vectors") results = await vector_store.search_similar( query=test_text, limit=1 ) if not results or len(results) == 0: logger.error("Vector search test failed: No results found") return False logger.info("Vector search test passed") # Verify result metadata result = results[0] if not result.metadata or result.metadata.get("type") != "code": logger.error("Vector metadata test failed: Invalid metadata") return False logger.info("Vector metadata test passed") return True except Exception as e: logger.error(f"Vector store validation failed: {e}") return False if __name__ == "__main__": # Load config from environment or .env file from dotenv import load_dotenv load_dotenv() import os config = { "QDRANT_URL": os.getenv("QDRANT_URL", "http://localhost:6333"), "COLLECTION_NAME": os.getenv("COLLECTION_NAME", "mcp-codebase-insight"), "QDRANT_API_KEY": os.getenv("QDRANT_API_KEY", "") } success = asyncio.run(validate_vector_store(config)) sys.exit(0 if success else 1) ``` -------------------------------------------------------------------------------- /tests/components/conftest.py: -------------------------------------------------------------------------------- ```python """ Component Test Fixture Configuration. This file defines fixtures specifically for component tests that might have different scope requirements than the main test fixtures. """ import pytest import pytest_asyncio import sys import os from pathlib import Path import uuid from typing import Dict # Import required components from src.mcp_codebase_insight.core.config import ServerConfig from src.mcp_codebase_insight.core.vector_store import VectorStore from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding from src.mcp_codebase_insight.core.knowledge import KnowledgeBase from src.mcp_codebase_insight.core.tasks import TaskManager # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))) @pytest.fixture def test_config(): """Create a server configuration for tests. This is an alias for test_server_config that allows component tests to use their expected fixture name. """ config = ServerConfig( host="localhost", port=8000, log_level="DEBUG", qdrant_url="http://localhost:6333", docs_cache_dir=Path(".test_cache") / "docs", adr_dir=Path(".test_cache") / "docs/adrs", kb_storage_dir=Path(".test_cache") / "knowledge", embedding_model="all-MiniLM-L6-v2", collection_name=f"test_collection_{uuid.uuid4().hex[:8]}", debug_mode=True, metrics_enabled=False, cache_enabled=True, memory_cache_size=1000, disk_cache_dir=Path(".test_cache") / "cache" ) return config @pytest.fixture def test_metadata() -> Dict: """Standard test metadata for consistency across tests.""" return { "type": "code", "language": "python", "title": "Test Code", "description": "Test code snippet for vector store testing", "tags": ["test", "vector"] } @pytest_asyncio.fixture async def embedder(): """Create an embedder for tests.""" return SentenceTransformerEmbedding() @pytest_asyncio.fixture async def vector_store(test_config, embedder): """Create a vector store for tests.""" store = VectorStore(test_config.qdrant_url, embedder) await store.initialize() yield store await store.cleanup() @pytest_asyncio.fixture async def task_manager(test_config): """Create a task manager for tests.""" manager = TaskManager(test_config) await manager.initialize() yield manager await manager.cleanup() @pytest.fixture def test_code(): """Provide sample code for testing task-related functionality.""" return """ def example_function(): \"\"\"This is a test function for task manager tests.\"\"\" return "Hello, world!" class TestClass: def __init__(self): self.value = 42 def method(self): return self.value """ @pytest_asyncio.fixture async def knowledge_base(test_config, vector_store): """Create a knowledge base for tests.""" kb = KnowledgeBase(test_config, vector_store) await kb.initialize() yield kb await kb.cleanup() ``` -------------------------------------------------------------------------------- /tests/test_file_relationships.py: -------------------------------------------------------------------------------- ```python import pytest @pytest.mark.asyncio async def test_create_file_relationship(client): """Test creating a file relationship.""" relationship_data = { "source_file": "src/main.py", "target_file": "src/utils.py", "relationship_type": "imports", "description": "Main imports utility functions", "metadata": {"importance": "high"} } response = await client.post("/relationships", json=relationship_data) assert response.status_code == 200 data = response.json() assert data["source_file"] == relationship_data["source_file"] assert data["target_file"] == relationship_data["target_file"] assert data["relationship_type"] == relationship_data["relationship_type"] @pytest.mark.asyncio async def test_get_file_relationships(client): """Test getting file relationships.""" # Create a test relationship first relationship_data = { "source_file": "src/test.py", "target_file": "src/helper.py", "relationship_type": "depends_on" } await client.post("/relationships", json=relationship_data) # Test getting all relationships response = await client.get("/relationships") assert response.status_code == 200 data = response.json() assert len(data) > 0 assert isinstance(data, list) # Test filtering by source file response = await client.get("/relationships", params={"source_file": "src/test.py"}) assert response.status_code == 200 data = response.json() assert all(r["source_file"] == "src/test.py" for r in data) @pytest.mark.asyncio async def test_create_web_source(client): """Test creating a web source.""" source_data = { "url": "https://example.com/docs", "title": "API Documentation", "content_type": "documentation", "description": "External API documentation", "tags": ["api", "docs"], "metadata": {"version": "1.0"} } response = await client.post("/web-sources", json=source_data) assert response.status_code == 200 data = response.json() assert data["url"] == source_data["url"] assert data["title"] == source_data["title"] assert data["content_type"] == source_data["content_type"] @pytest.mark.asyncio async def test_get_web_sources(client): """Test getting web sources.""" # Create a test web source first source_data = { "url": "https://example.com/tutorial", "title": "Tutorial", "content_type": "tutorial", "tags": ["guide", "tutorial"] } await client.post("/web-sources", json=source_data) # Test getting all web sources response = await client.get("/web-sources") assert response.status_code == 200 data = response.json() assert len(data) > 0 assert isinstance(data, list) # Test filtering by content type response = await client.get("/web-sources", params={"content_type": "tutorial"}) assert response.status_code == 200 data = response.json() assert all(s["content_type"] == "tutorial" for s in data) # Test filtering by tags response = await client.get("/web-sources", params={"tags": ["guide"]}) assert response.status_code == 200 data = response.json() assert any("guide" in s["tags"] for s in data) ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "mcp-codebase-insight" dynamic = ["version"] description = "MCP Codebase Insight Server" readme = "README.md" requires-python = ">=3.10" license = {text = "MIT"} authors = [ {name = "Tosin Akinosho"} ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = [ "fastapi>=0.109.0", "uvicorn>=0.23.2", "pydantic>=2.4.2", "starlette>=0.35.0", "asyncio>=3.4.3", "aiohttp>=3.9.0", "qdrant-client>=1.13.3", "sentence-transformers>=2.2.2", "torch>=2.0.0", "transformers>=4.34.0", "python-frontmatter>=1.0.0", "markdown>=3.4.4", "PyYAML>=6.0.1", "structlog>=23.1.0", "psutil>=5.9.5", "python-dotenv>=1.0.0", "requests>=2.31.0", "beautifulsoup4>=4.12.0", "scipy>=1.11.0", "python-slugify>=8.0.0", "slugify>=0.0.1", "numpy>=1.24.0", # "uvx>=0.4.0", # Temporarily commented out for development installation "mcp-server-qdrant>=0.2.0", "mcp>=1.5.0,<1.6.0", # Pin to MCP 1.5.0 for API compatibility ] [project.optional-dependencies] test = [ "pytest>=7.4.2", "pytest-asyncio>=0.21.1", "pytest-cov>=4.1.0", "httpx>=0.25.0", ] dev = [ "black>=23.9.1", "isort>=5.12.0", "mypy>=1.5.1", "flake8>=6.1.0", "bump2version>=1.0.1", "pre-commit>=3.5.0", "pdoc>=14.1.0", ] [project.urls] Homepage = "https://github.com/tosin2013/mcp-codebase-insight" Documentation = "https://github.com/tosin2013/mcp-codebase-insight/docs" Repository = "https://github.com/tosin2013/mcp-codebase-insight.git" Issues = "https://github.com/tosin2013/mcp-codebase-insight/issues" [project.scripts] mcp-codebase-insight = "mcp_codebase_insight.server:run" [tool.setuptools] package-dir = {"" = "src"} [tool.setuptools.packages.find] where = ["src"] include = ["mcp_codebase_insight*"] [tool.black] line-length = 88 target-version = ['py311'] include = '\.pyi?$' [tool.isort] profile = "black" multi_line_output = 3 include_trailing_comma = true force_grid_wrap = 0 use_parentheses = true ensure_newline_before_comments = true line_length = 88 [tool.mypy] python_version = "3.11" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true check_untyped_defs = true disallow_untyped_decorators = true no_implicit_optional = true warn_redundant_casts = true warn_unused_ignores = true warn_no_return = true warn_unreachable = true [tool.pytest.ini_options] minversion = "6.0" addopts = "-ra -q --cov=src --cov-report=term-missing" testpaths = ["tests"] asyncio_mode = "auto" [tool.coverage.run] source = ["src"] branch = true [tool.coverage.report] exclude_lines = [ "pragma: no cover", "def __repr__", "if self.debug:", "raise NotImplementedError", "if __name__ == .__main__.:", "pass", "raise ImportError", ] ignore_errors = true omit = ["tests/*", "setup.py"] ``` -------------------------------------------------------------------------------- /tests/components/test_task_manager.py: -------------------------------------------------------------------------------- ```python import sys import os import pytest import pytest_asyncio from pathlib import Path from typing import AsyncGenerator from src.mcp_codebase_insight.core.tasks import TaskManager, TaskType, TaskStatus from src.mcp_codebase_insight.core.config import ServerConfig @pytest_asyncio.fixture async def task_manager(test_config: ServerConfig): manager = TaskManager(test_config) await manager.initialize() yield manager await manager.cleanup() @pytest.mark.asyncio async def test_task_manager_initialization(task_manager: TaskManager): """Test that task manager initializes correctly.""" assert task_manager is not None assert task_manager.config is not None @pytest.mark.asyncio async def test_create_and_get_task(task_manager: TaskManager, test_code: str): """Test creating and retrieving tasks.""" # Create task task = await task_manager.create_task( type="code_analysis", title="Test task", description="Test task description", context={"code": test_code} ) assert task is not None # Get task retrieved_task = await task_manager.get_task(task.id) assert retrieved_task.context["code"] == test_code assert retrieved_task.type == TaskType.CODE_ANALYSIS assert retrieved_task.description == "Test task description" @pytest.mark.asyncio async def test_task_status_updates(task_manager: TaskManager, test_code: str): """Test task status updates.""" # Create task task = await task_manager.create_task( type="code_analysis", title="Status Test", description="Test task status updates", context={"code": test_code} ) # Update status await task_manager.update_task(task.id, status=TaskStatus.IN_PROGRESS) updated_task = await task_manager.get_task(task.id) assert updated_task.status == TaskStatus.IN_PROGRESS await task_manager.update_task(task.id, status=TaskStatus.COMPLETED) completed_task = await task_manager.get_task(task.id) assert completed_task.status == TaskStatus.COMPLETED @pytest.mark.asyncio async def test_task_result_updates(task_manager: TaskManager, test_code: str): """Test updating task results.""" # Create task task = await task_manager.create_task( type="code_analysis", title="Result Test", description="Test task result updates", context={"code": test_code} ) # Update result result = {"analysis": "Test analysis result"} await task_manager.update_task(task.id, result=result) # Verify result updated_task = await task_manager.get_task(task.id) assert updated_task.result == result @pytest.mark.asyncio async def test_list_tasks(task_manager: TaskManager, test_code: str): """Test listing tasks.""" # Create multiple tasks tasks = [] for i in range(3): task = await task_manager.create_task( type="code_analysis", title=f"List Test {i}", description=f"Test task {i}", context={"code": test_code} ) tasks.append(task) # List tasks task_list = await task_manager.list_tasks() assert len(task_list) >= 3 # Verify task descriptions descriptions = [task.description for task in task_list] for i in range(3): assert f"Test task {i}" in descriptions ``` -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- ```markdown # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added - Initial project setup - Core server implementation - ADR management system - Documentation management - Knowledge base with vector search - Debug system - Task management - Metrics and health monitoring - Caching system - Structured logging - Docker support - CI/CD pipeline - Test suite ### Changed - None ### Deprecated - None ### Removed - None ### Fixed - None ### Security - None ## [0.2.2] - 2025-03-25 ### Added - Implemented single source of truth for versioning ### Changed - Moved version to the package's __init__.py file as the canonical source - Updated setup.py to dynamically read version from __init__.py - Updated pyproject.toml to use dynamic versioning - Synchronized dependencies between setup.py, pyproject.toml and requirements.in ### Fixed - Missing dependencies in setup.py and pyproject.toml ## [0.2.1] - 2025-03-25 ### Added - Integrated Qdrant Docker container in CI/CD workflow for more realistic testing - Added collection initialization step for proper Qdrant setup in CI/CD - Created shared Qdrant client fixture for improved test reliability ### Changed - Updated Python version requirement from >=3.11 to >=3.9 for broader compatibility - Enhanced test fixture scoping to resolve event_loop fixture scope mismatches - Improved connection verification for Qdrant in GitHub Actions workflow ### Fixed - Resolved fixture scope mismatches in async tests - Fixed environment variable handling in test configuration ### Removed - None ### Security - None ## [0.2.0] - 2025-03-24 ### Added - None ### Changed - Improved async test fixture handling in component tests - Enhanced test discovery to properly distinguish between test functions and fixtures - Updated component test runner for better isolation and resource management ### Fixed - Resolved fixture scope mismatches in async tests - Fixed async event loop handling in component tests - Corrected test_metadata fixture identification in test_vector_store.py ### Removed - None ### Security - None ## [0.1.0] - 2025-03-19 ### Added - Initial release - Basic server functionality - Core components: - ADR management - Documentation handling - Knowledge base - Vector search - Task management - Health monitoring - Metrics collection - Caching - Logging - Docker support - CI/CD pipeline with GitHub Actions - Test coverage with pytest - Code quality tools: - Black - isort - flake8 - mypy - Documentation: - README - API documentation - Contributing guidelines - ADR templates - Development tools: - Makefile - Docker compose - Environment configuration - Version management [Unreleased]: https://github.com/modelcontextprotocol/mcp-codebase-insight/compare/v0.2.2...HEAD [0.2.2]: https://github.com/modelcontextprotocol/mcp-codebase-insight/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.2.1 [0.2.0]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.2.0 [0.1.0]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.1.0 ``` -------------------------------------------------------------------------------- /docs/documentation_map.md: -------------------------------------------------------------------------------- ```markdown # Documentation Relationship Map ```mermaid graph TD %% ADRs ADR1[ADR-0001: Testing Strategy] ADR2[ADR-0002: SSE Testing] ADR3[ADR-0003: Comprehensive Testing] ADR4[ADR-0004: Documentation Linking] %% Core Systems CS1[Vector Store System] CS2[Knowledge Base] CS3[Task Management] CS4[Health Monitoring] CS5[Error Handling] CS6[Metrics Collection] CS7[Cache Management] %% Features FA[Code Analysis] FB[ADR Management] FC[Documentation Management] %% Testing TA[Server Testing] TB[SSE Testing] %% Components C1[Server Framework] C2[Testing Framework] C3[Documentation Tools] %% Implementation Files I1[test_server_instance.py] I2[SSETestManager.py] I3[ServerTestFramework.py] I4[DocNode.py] I5[DocumentationMap.py] %% Core Classes CC1[ServerConfig] CC2[ErrorCode] CC3[ComponentState] CC4[TaskTracker] CC5[DocumentationType] %% Relationships - Core Systems CS1 --> CC1 CS2 --> CS1 CS2 --> CS7 CS3 --> CC4 CS4 --> CC3 CS5 --> CC2 %% Relationships - ADRs ADR1 --> I1 ADR1 --> C1 ADR2 --> I2 ADR2 --> TB ADR3 --> I3 ADR3 --> C2 ADR4 --> I4 ADR4 --> I5 ADR4 --> C3 %% Relationships - Features FA --> CS2 FA --> CS1 FB --> ADR1 FB --> ADR2 FB --> ADR3 FB --> ADR4 FC --> C3 FC --> CC5 %% Relationships - Testing TA --> I1 TA --> I3 TB --> I2 TB --> ADR2 %% Component Relationships C1 --> CC1 C1 --> CS4 C2 --> I2 C2 --> I3 C3 --> I4 C3 --> I5 %% Error Handling CS5 --> FA CS5 --> FB CS5 --> FC CS5 --> CS1 CS5 --> CS2 CS5 --> CS3 %% Styling classDef adr fill:#f9f,stroke:#333,stroke-width:2px classDef feature fill:#bbf,stroke:#333,stroke-width:2px classDef testing fill:#bfb,stroke:#333,stroke-width:2px classDef component fill:#fbb,stroke:#333,stroke-width:2px classDef implementation fill:#ddd,stroke:#333,stroke-width:1px classDef core fill:#ffd,stroke:#333,stroke-width:2px classDef class fill:#dff,stroke:#333,stroke-width:1px class ADR1,ADR2,ADR3,ADR4 adr class FA,FB,FC feature class TA,TB testing class C1,C2,C3 component class I1,I2,I3,I4,I5 implementation class CS1,CS2,CS3,CS4,CS5,CS6,CS7 core class CC1,CC2,CC3,CC4,CC5 class ``` ## Documentation Map Legend ### Node Types - **Purple**: Architecture Decision Records (ADRs) - **Blue**: Feature Documentation - **Green**: Testing Documentation - **Red**: Key Components - **Gray**: Implementation Files - **Yellow**: Core Systems - **Light Blue**: Core Classes ### Relationship Types - Arrows indicate dependencies or references between documents - Direct connections show implementation relationships - Indirect connections show conceptual relationships ### Key Areas 1. **Core Systems** - Vector Store and Knowledge Base - Task Management and Health Monitoring - Error Handling and Metrics Collection - Cache Management 2. **Testing Infrastructure** - Centered around ADR-0001 and ADR-0002 - Connected to Server and SSE testing implementations 3. **Documentation Management** - Focused on ADR-0004 - Links to Documentation Tools and models 4. **Feature Implementation** - Shows how features connect to components - Demonstrates implementation dependencies 5. **Error Handling** - Centralized error management - Connected to all major systems - Standardized error codes and types ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/server_test_isolation.py: -------------------------------------------------------------------------------- ```python """Test isolation for ServerState. This module provides utilities to create isolated ServerState instances for testing, preventing state conflicts between parallel test runs. """ from typing import Dict, Optional import asyncio import uuid import logging from .core.state import ServerState from .utils.logger import get_logger logger = get_logger(__name__) # Store of server states keyed by instance ID _server_states: Dict[str, ServerState] = {} def get_isolated_server_state(instance_id: Optional[str] = None) -> ServerState: """Get or create an isolated ServerState instance for tests. Args: instance_id: Optional unique ID for the server state Returns: An isolated ServerState instance """ global _server_states if instance_id is None: # Create a new ServerState without storing it instance_id = f"temp_{uuid.uuid4().hex}" if instance_id not in _server_states: logger.debug(f"Creating new isolated ServerState with ID: {instance_id}") _server_states[instance_id] = ServerState() return _server_states[instance_id] async def cleanup_all_server_states(): """Clean up all tracked server states.""" global _server_states logger.debug(f"Cleaning up {len(_server_states)} isolated server states") # Make a copy of the states to avoid modification during iteration states_to_clean = list(_server_states.items()) cleanup_tasks = [] for instance_id, state in states_to_clean: try: logger.debug(f"Cleaning up ServerState: {instance_id}") if state.initialized: # Get active tasks before cleanup active_tasks = state.get_active_tasks() if active_tasks: logger.debug( f"Found {len(active_tasks)} active tasks for {instance_id}" ) # Schedule state cleanup with increased timeout cleanup_task = asyncio.create_task( asyncio.wait_for(state.cleanup(), timeout=5.0) ) cleanup_tasks.append((instance_id, cleanup_task)) else: logger.debug(f"Skipping uninitialized ServerState: {instance_id}") except Exception as e: logger.error( f"Error preparing cleanup for ServerState {instance_id}: {e}", exc_info=True ) # Wait for all cleanup tasks to complete if cleanup_tasks: for instance_id, task in cleanup_tasks: try: await task logger.debug(f"State {instance_id} cleaned up successfully") # Verify no tasks remain state = _server_states.get(instance_id) if state and state.get_task_count() > 0: logger.warning( f"State {instance_id} still has {state.get_task_count()} " "active tasks after cleanup" ) except asyncio.TimeoutError: logger.warning(f"State cleanup timed out for {instance_id}") # Force cleanup state = _server_states.get(instance_id) if state: state.initialized = False except Exception as e: logger.error(f"Error during state cleanup for {instance_id}: {e}") # Clear all states from global store _server_states.clear() logger.debug("All server states cleaned up") ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/task_tracker.py: -------------------------------------------------------------------------------- ```python """Task tracking and management for async operations.""" import asyncio import logging from typing import Set, Optional from datetime import datetime from ..utils.logger import get_logger logger = get_logger(__name__) class TaskTracker: """Tracks and manages async tasks with improved error handling and logging.""" def __init__(self): """Initialize the task tracker.""" self._tasks: Set[asyncio.Task] = set() self._loop = asyncio.get_event_loop() self._loop_id = id(self._loop) self._start_time = datetime.utcnow() logger.debug(f"TaskTracker initialized with loop ID: {self._loop_id}") def track_task(self, task: asyncio.Task) -> None: """Track a new task and set up completion handling. Args: task: The asyncio.Task to track """ if id(asyncio.get_event_loop()) != self._loop_id: logger.warning( f"Task created in different event loop context. " f"Expected: {self._loop_id}, Got: {id(asyncio.get_event_loop())}" ) self._tasks.add(task) task.add_done_callback(self._handle_task_completion) logger.debug(f"Tracking new task: {task.get_name()}") def _handle_task_completion(self, task: asyncio.Task) -> None: """Handle task completion and cleanup. Args: task: The completed task """ self._tasks.discard(task) if task.exception(): logger.error( f"Task {task.get_name()} failed with error: {task.exception()}", exc_info=True ) else: logger.debug(f"Task {task.get_name()} completed successfully") async def cancel_all_tasks(self, timeout: float = 5.0) -> None: """Cancel all tracked tasks and wait for completion. Args: timeout: Maximum time to wait for tasks to cancel """ if not self._tasks: logger.debug("No tasks to cancel") return logger.debug(f"Cancelling {len(self._tasks)} tasks") for task in self._tasks: if not task.done() and not task.cancelled(): task.cancel() try: await asyncio.wait_for( asyncio.gather(*self._tasks, return_exceptions=True), timeout=timeout ) logger.debug("All tasks cancelled successfully") except asyncio.TimeoutError: logger.warning(f"Task cancellation timed out after {timeout} seconds") except Exception as e: logger.error(f"Error during task cancellation: {e}", exc_info=True) def get_active_tasks(self) -> Set[asyncio.Task]: """Get all currently active tasks. Returns: Set of active asyncio.Task objects """ return self._tasks.copy() def get_task_count(self) -> int: """Get the number of currently tracked tasks. Returns: Number of active tasks """ return len(self._tasks) def get_uptime(self) -> float: """Get the uptime of the task tracker in seconds. Returns: Uptime in seconds """ return (datetime.utcnow() - self._start_time).total_seconds() def __del__(self): """Cleanup when the tracker is destroyed.""" if self._tasks: logger.warning( f"TaskTracker destroyed with {len(self._tasks)} " "unfinished tasks" ) ``` -------------------------------------------------------------------------------- /docs/getting-started/installation.md: -------------------------------------------------------------------------------- ```markdown # Installation Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Prerequisites Before installing MCP Codebase Insight, ensure you have the following: - Python 3.11 or higher - pip (Python package installer) - Git - Docker (optional, for containerized deployment) - 4GB RAM minimum (8GB recommended) - 2GB free disk space ## System Requirements ### Operating Systems - Linux (Ubuntu 20.04+, CentOS 8+) - macOS (10.15+) - Windows 10/11 with WSL2 ### Python Dependencies - FastAPI - Pydantic - httpx - sentence-transformers - qdrant-client ## Installation Methods ### 1. Using pip (Recommended) ```bash # Create and activate a virtual environment python -m venv venv source venv/bin/activate # On Windows: venv\Scripts\activate # Install MCP Codebase Insight pip install mcp-codebase-insight # Verify installation mcp-codebase-insight --version ``` ### 2. Using Docker ```bash # Pull the Docker image docker pull modelcontextprotocol/mcp-codebase-insight # Create necessary directories mkdir -p docs knowledge cache # Run the container docker run -p 3000:3000 \ --env-file .env \ -v $(pwd)/docs:/app/docs \ -v $(pwd)/knowledge:/app/knowledge \ -v $(pwd)/cache:/app/cache \ modelcontextprotocol/mcp-codebase-insight ``` ### 3. From Source ```bash # Clone the repository git clone https://github.com/modelcontextprotocol/mcp-codebase-insight.git cd mcp-codebase-insight # Create and activate virtual environment python -m venv venv source venv/bin/activate # On Windows: venv\Scripts\activate # Install dependencies pip install -r requirements.txt # Install in development mode pip install -e . ``` ## Environment Setup 1. Create a `.env` file in your project root: ```bash MCP_HOST=127.0.0.1 MCP_PORT=3000 QDRANT_URL=http://localhost:6333 MCP_DOCS_CACHE_DIR=./docs MCP_ADR_DIR=./docs/adrs MCP_KB_STORAGE_DIR=./knowledge MCP_DISK_CACHE_DIR=./cache LOG_LEVEL=INFO ``` 2. Create required directories: ```bash mkdir -p docs/adrs knowledge cache ``` ## Post-Installation Steps 1. **Vector Database Setup** - Follow the [Qdrant Setup Guide](qdrant_setup.md) to install and configure Qdrant 2. **Verify Installation** ```bash # Start the server mcp-codebase-insight --host 127.0.0.1 --port 3000 # In another terminal, test the health endpoint curl http://localhost:3000/health ``` 3. **Initial Configuration** - Configure authentication (if needed) - Set up logging - Configure metrics collection ## Common Installation Issues ### 1. Dependencies Installation Fails ```bash # Try upgrading pip pip install --upgrade pip # Install wheel pip install wheel # Retry installation pip install mcp-codebase-insight ``` ### 2. Port Already in Use ```bash # Check what's using port 3000 lsof -i :3000 # On Linux/macOS netstat -ano | findstr :3000 # On Windows # Use a different port mcp-codebase-insight --port 3001 ``` ### 3. Permission Issues ```bash # Fix directory permissions chmod -R 755 docs knowledge cache ``` ## Next Steps - Read the [Configuration Guide](configuration.md) for detailed setup options - Follow the [Quick Start Tutorial](quickstart.md) to begin using the system - Check the [Best Practices](../development/best-practices.md) for optimal usage - Follow the [Qdrant Setup](qdrant_setup.md) to set up the vector database ## Support If you encounter any issues during installation: 1. Check the [Troubleshooting Guide](../troubleshooting/common-issues.md) 2. Search existing [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 3. Open a new issue if needed ``` -------------------------------------------------------------------------------- /docs/SSE_INTEGRATION.md: -------------------------------------------------------------------------------- ```markdown # Server-Sent Events (SSE) Integration This document explains the Server-Sent Events (SSE) integration in the MCP Codebase Insight server, including its purpose, architecture, and usage instructions. ## Overview The SSE integration enables real-time, bidirectional communication between the MCP Codebase Insight server and clients using the Model Context Protocol (MCP). This allows clients to receive live updates for long-running operations and establish persistent connections for continuous data flow. ## Architecture The SSE integration is built as a modular component within the MCP Codebase Insight system, following these design principles: 1. **Separation of Concerns**: The SSE transport layer is isolated from the core application logic 2. **Non-Interference**: SSE endpoints operate alongside existing REST API endpoints without disruption 3. **Shared Resources**: Both REST and SSE interfaces use the same underlying components and state ### Key Components - **MCP_CodebaseInsightServer**: Manages the MCP protocol server and exposes system functionality as MCP tools - **FastMCP**: The core MCP protocol implementation that handles messaging format and protocol features - **SseServerTransport**: Implements the SSE protocol for persistent connections - **Starlette Integration**: Low-level ASGI application that handles SSE connections ### Endpoint Structure - `/mcp/sse/`: Establishes the SSE connection for real-time events - `/mcp/messages/`: Handles incoming messages from clients via HTTP POST ### Data Flow ``` Client <---> SSE Connection (/mcp/sse/) <---> MCP Server <---> Core Components <---> Message POST (/mcp/messages/) <--/ ``` ## Available Tools The SSE integration exposes these core system capabilities as MCP tools: 1. **vector-search**: Search for code snippets semantically similar to a query text 2. **knowledge-search**: Search for patterns in the knowledge base 3. **adr-list**: Retrieve architectural decision records 4. **task-status**: Check status of long-running tasks ## Usage Instructions ### Client Configuration To connect to the SSE endpoint, configure your MCP client as follows: ```json { "mcpClients": { "codebase-insight-sse": { "url": "http://localhost:8000/mcp", "transport": "sse" } } } ``` ### Example: Connecting with MCP Client ```python from mcp.client import Client # Connect to the SSE endpoint client = Client.connect("codebase-insight-sse") # Use vector search tool results = await client.call_tool( "vector-search", {"query": "function that parses JSON", "limit": 5} ) ``` ## Testing The SSE implementation includes tests to verify: 1. Connection establishment and maintenance 2. Tool registration and execution 3. Error handling and reconnection behavior Run SSE-specific tests with: ```bash pytest tests/integration/test_sse.py -v ``` ## Security Considerations The SSE integration inherits the security model of the main application. When security features like authentication are enabled, they apply to SSE connections as well. ## Performance Considerations SSE connections are persistent and can consume server resources. Consider these guidelines: - Implement client-side reconnection strategies with exponential backoff - Set reasonable timeouts for idle connections - Monitor connection counts in production environments ## Troubleshooting Common issues and solutions: 1. **Connection Refused**: Ensure the server is running and the client is using the correct URL 2. **Invalid SSE Format**: Check for middleware that might buffer responses 3. **Connection Drops**: Verify network stability and implement reconnection logic ``` -------------------------------------------------------------------------------- /docs/adrs/001_use_docker_for_qdrant.md: -------------------------------------------------------------------------------- ```markdown # Use Docker for Qdrant Vector Database ## Status Accepted ## Context We need a vector database to store and search through code patterns and documentation embeddings. Qdrant is chosen as our vector database solution, and we need to determine the best way to deploy and manage it. ## Decision Drivers * Ease of deployment and setup * Development environment consistency * Production readiness * Resource management * Scalability * Maintainability ## Considered Options ### Option 1: Docker Container * Use official Qdrant Docker image * Run as containerized service * Manage with Docker Compose for local development * Use Kubernetes for production deployment ### Option 2: Native Installation * Install Qdrant directly on host system * Manage as system service * Configure through system files * Handle updates through package manager ### Option 3: Cloud-Hosted Solution * Use managed Qdrant Cloud service * Pay per usage * Managed infrastructure * Automatic updates and maintenance ## Decision We will use Docker for running Qdrant. This decision is based on several factors: 1. **Development Environment**: Docker provides consistent environment across all developer machines 2. **Easy Setup**: Simple `docker run` command to get started 3. **Resource Isolation**: Container ensures clean resource management 4. **Version Control**: Easy version management through Docker tags 5. **Production Ready**: Same container can be used in production 6. **Scaling**: Can be deployed to Kubernetes when needed ## Expected Consequences ### Positive Consequences * Consistent environment across development and production * Easy setup process for new developers * Clean isolation from other system components * Simple version management * Clear resource boundaries * Easy backup and restore procedures * Portable across different platforms ### Negative Consequences * Additional Docker knowledge required * Small performance overhead from containerization * Need to manage container resources carefully * Additional complexity in monitoring setup ## Pros and Cons of the Options ### Docker Container * ✅ Consistent environment * ✅ Easy setup and teardown * ✅ Good isolation * ✅ Version control * ✅ Production ready * ❌ Container overhead * ❌ Requires Docker knowledge ### Native Installation * ✅ Direct system access * ✅ No containerization overhead * ✅ Full control over configuration * ❌ System-dependent setup * ❌ Potential conflicts with system packages * ❌ More complex version management ### Cloud-Hosted Solution * ✅ No infrastructure management * ✅ Automatic scaling * ✅ Managed backups * ❌ Higher cost * ❌ Less control * ❌ Internet dependency * ❌ Potential latency issues ## Implementation ### Docker Run Command ```bash docker run -d -p 6333:6333 -p 6334:6334 \ -v $(pwd)/qdrant_storage:/qdrant/storage \ qdrant/qdrant ``` ### Docker Compose Configuration ```yaml version: '3.8' services: qdrant: image: qdrant/qdrant ports: - "6333:6333" - "6334:6334" volumes: - qdrant_storage:/qdrant/storage environment: - RUST_LOG=info volumes: qdrant_storage: ``` ## Notes * Monitor container resource usage in production * Set up proper backup procedures for the storage volume * Consider implementing health checks * Document recovery procedures ## Metadata * Created: 2025-03-19 * Last Modified: 2025-03-19 * Author: Development Team * Approvers: Technical Lead, Infrastructure Team * Status: Accepted * Tags: infrastructure, database, docker, vector-search * References: * [Qdrant Docker Documentation](https://qdrant.tech/documentation/guides/installation/#docker) * [Docker Best Practices](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/) ``` -------------------------------------------------------------------------------- /tests/components/test_vector_store.py: -------------------------------------------------------------------------------- ```python import pytest import pytest_asyncio import uuid import sys import os from pathlib import Path from typing import AsyncGenerator, Dict from fastapi.testclient import TestClient # Ensure the src directory is in the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) from src.mcp_codebase_insight.core.vector_store import VectorStore from src.mcp_codebase_insight.core.config import ServerConfig from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding import logging logger = logging.getLogger(__name__) @pytest_asyncio.fixture async def test_metadata() -> Dict: """Standard test metadata for consistency across tests.""" return { "type": "code", "language": "python", "title": "Test Code", "description": "Test code snippet for vector store testing", "tags": ["test", "vector"] } @pytest_asyncio.fixture async def embedder(): return SentenceTransformerEmbedding() @pytest_asyncio.fixture async def vector_store(test_config: ServerConfig, embedder): store = VectorStore(test_config.qdrant_url, embedder) await store.initialize() yield store await store.cleanup() @pytest.mark.asyncio async def test_vector_store_initialization(vector_store: VectorStore): """Test that vector store initializes correctly.""" assert vector_store is not None assert vector_store.embedder is not None assert vector_store.client is not None assert vector_store.initialized is True logger.info("Vector store initialization test passed") @pytest.mark.asyncio async def test_vector_store_add_and_search(vector_store: VectorStore, test_metadata: Dict): """Test adding and searching vectors.""" # Test data test_text = "Test code snippet with unique identifier" # Add vector logger.info("Adding vector to store") vector_id = await vector_store.add_vector(test_text, test_metadata) assert vector_id is not None # Search for similar vectors logger.info("Searching for similar vectors") results = await vector_store.search_similar(test_text, limit=1) assert len(results) > 0 # Use get() with default value for safety assert results[0].metadata.get("type", "unknown") == "code" # Log metadata for debugging logger.info(f"Original metadata: {test_metadata}") logger.info(f"Retrieved metadata: {results[0].metadata}") # Verify all expected metadata fields are present missing_keys = [] for key in test_metadata: if key not in results[0].metadata: missing_keys.append(key) assert not missing_keys, f"Metadata is missing expected keys: {missing_keys}" logger.info("Vector store add and search test passed") @pytest.mark.asyncio async def test_vector_store_cleanup(test_config: ServerConfig, embedder: SentenceTransformerEmbedding): """Test that cleanup works correctly.""" # Use the configured collection name for this test # This ensures we're using the properly initialized collection collection_name = os.environ.get("MCP_COLLECTION_NAME", test_config.collection_name) store = VectorStore( test_config.qdrant_url, embedder, collection_name=collection_name ) logger.info(f"Initializing vector store with collection {collection_name}") await store.initialize() assert store.initialized is True # Add a vector to verify there's something to clean up await store.add_vector("Test cleanup text", {"type": "test"}) # Now clean up logger.info(f"Cleaning up vector store with collection {collection_name}") await store.cleanup() # Verify the store is no longer initialized assert store.initialized is False # Clean up remaining resources await store.close() logger.info("Vector store cleanup test passed") ``` -------------------------------------------------------------------------------- /system-card.yml: -------------------------------------------------------------------------------- ```yaml name: MCP Codebase Insight version: 0.1.0 description: A system for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management. poc_scope: - Vector-based code analysis and similarity search - Pattern detection and knowledge base operations - Dual-transport architecture (SSE and stdio) - Task management and tracking - Memory operations and persistence environment: requirements: python: ">=3.11" docker: ">=20.10.0" ram_gb: 4 cpu_cores: 2 disk_space_gb: 20 dependencies: core: - mcp-firecrawl - httpx-sse - python-frontmatter - qdrant-client>=1.13.3 - fastapi>=0.115.12 - numpy>=2.2.4 transport: - mcp-transport - mcp-stdio - mcp-sse development: - pytest - black - isort - mypy - pip-tools - bump2version configuration: env_vars: required: - QDRANT_HOST - QDRANT_PORT - API_KEY - TRANSPORT_MODE optional: - DEBUG_MODE - LOG_LEVEL files: required: - .env - docker-compose.yml optional: - .env.local setup: steps: 1_environment: - Create and activate Python virtual environment - Install dependencies from requirements.txt - Copy .env.example to .env and configure 2_services: - Start Docker - Run docker-compose up for Qdrant - Wait for services to be ready 3_validation: - Run main PoC validation script - Check individual component validations if needed - Verify transport configurations validation: scripts: main: path: scripts/validate_poc.py description: "Main validation script that orchestrates all component checks" components: vector_store: path: scripts/validate_vector_store.py description: "Validates vector store operations and search functionality" knowledge_base: path: scripts/validate_knowledge_base.py description: "Tests knowledge base operations and entity relations" transport: description: "Transport validation is included in the main PoC script" health_checks: services: qdrant: endpoint: http://localhost:6333/health method: GET api: endpoint: http://localhost:8000/health method: GET headers: Authorization: "Bearer ${API_KEY}" functional_checks: vector_store: - Test vector operations with sample code - Validate embedding dimensions - Verify search functionality knowledge_base: - Create and verify test entities - Test entity relations - Validate query operations transport: sse: - Verify event stream connection - Test bidirectional communication - Check error handling stdio: - Verify process communication - Test command execution - Validate response format troubleshooting: environment: - Check Python and Docker versions - Verify system resources - Validate dependency installation services: - Check Docker container status - View service logs - Verify port availability transport: - Test SSE endpoint connectivity - Verify stdio binary functionality - Check authentication configuration data: - Verify Qdrant collection status - Check knowledge base connectivity - Test data persistence metrics: collection: - System resource usage - Request latency - Transport performance - Operation success rates monitoring: - Component health status - Error rates and types - Resource utilization - Transport switching events documentation: references: - docs/system_architecture/README.md - docs/api/README.md - docs/adrs/006_transport_protocols.md - docs/development/README.md ``` -------------------------------------------------------------------------------- /examples/use_with_claude.py: -------------------------------------------------------------------------------- ```python """Example of using MCP Codebase Insight with Claude.""" import json import httpx import os from typing import Dict, Any import asyncio # Configure server URL SERVER_URL = os.getenv("MCP_SERVER_URL", "http://localhost:3000") async def call_tool(name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: """Call a tool endpoint on the server.""" async with httpx.AsyncClient() as client: response = await client.post( f"{SERVER_URL}/tools/{name}", json={ "name": name, "arguments": arguments } ) response.raise_for_status() return response.json() async def analyze_code(code: str, context: Dict[str, Any] = None) -> Dict[str, Any]: """Analyze code using the server.""" return await call_tool("analyze-code", { "code": code, "context": context or {} }) async def search_knowledge(query: str, pattern_type: str = None) -> Dict[str, Any]: """Search knowledge base.""" return await call_tool("search-knowledge", { "query": query, "type": pattern_type, "limit": 5 }) async def create_adr( title: str, context: Dict[str, Any], options: list, decision: str ) -> Dict[str, Any]: """Create an ADR.""" return await call_tool("create-adr", { "title": title, "context": context, "options": options, "decision": decision }) async def debug_issue( description: str, issue_type: str = None, context: Dict[str, Any] = None ) -> Dict[str, Any]: """Debug an issue.""" return await call_tool("debug-issue", { "description": description, "type": issue_type, "context": context or {} }) async def get_task_status(task_id: str) -> Dict[str, Any]: """Get task status and results.""" return await call_tool("get-task", { "task_id": task_id }) async def main(): """Example usage.""" try: # Example code analysis code = """ def calculate_fibonacci(n: int) -> int: if n <= 1: return n return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) """ print("\nAnalyzing code...") result = await analyze_code(code) print(json.dumps(result, indent=2)) # Example knowledge search print("\nSearching knowledge base...") result = await search_knowledge( query="What are the best practices for error handling in Python?", pattern_type="code" ) print(json.dumps(result, indent=2)) # Example ADR creation print("\nCreating ADR...") result = await create_adr( title="Use FastAPI for REST API", context={ "problem": "Need a modern Python web framework", "constraints": ["Must be async", "Must have good documentation"] }, options=[ { "title": "FastAPI", "pros": ["Async by default", "Great docs", "Type hints"], "cons": ["Newer framework"] }, { "title": "Flask", "pros": ["Mature", "Simple"], "cons": ["Not async by default"] } ], decision="We will use FastAPI for its async support and type hints" ) print(json.dumps(result, indent=2)) # Example debugging print("\nDebugging issue...") result = await debug_issue( description="Application crashes when processing large files", issue_type="performance", context={ "file_size": "2GB", "memory_usage": "8GB", "error": "MemoryError" } ) print(json.dumps(result, indent=2)) except Exception as e: print(f"Error: {e}") if __name__ == "__main__": asyncio.run(main()) ``` -------------------------------------------------------------------------------- /system-architecture.md: -------------------------------------------------------------------------------- ```markdown # System Architecture - MCP Codebase Insight This document outlines the system architecture of the MCP Codebase Insight project using various diagrams to illustrate different aspects of the system. ## High-Level System Architecture ```mermaid graph TB Client[Client Applications] --> API[FastAPI Server] API --> Core[Core Services] subgraph Core Services CodeAnalysis[Code Analysis Service] ADR[ADR Management] Doc[Documentation Service] Knowledge[Knowledge Base] Debug[Debug System] Metrics[Metrics & Health] Cache[Caching System] end Core --> VectorDB[(Qdrant Vector DB)] Core --> FileSystem[(File System)] CodeAnalysis --> VectorDB Knowledge --> VectorDB ADR --> FileSystem Doc --> FileSystem ``` ## Component Relationships ```mermaid graph LR subgraph Core Components Embeddings[Embeddings Service] VectorStore[Vector Store Service] Knowledge[Knowledge Service] Tasks[Tasks Service] Prompts[Prompts Service] Debug[Debug Service] Health[Health Service] Config[Config Service] Cache[Cache Service] end Embeddings --> VectorStore Knowledge --> VectorStore Knowledge --> Embeddings Tasks --> Knowledge Debug --> Knowledge Prompts --> Tasks Health --> Cache %% Instead of linking to "Core Components", link to each node individually Config --> Embeddings Config --> VectorStore Config --> Knowledge Config --> Tasks Config --> Prompts Config --> Debug Config --> Health Config --> Cache ``` ## Data Flow Architecture ```mermaid sequenceDiagram participant Client participant API participant Knowledge participant Embeddings participant VectorStore participant Cache Client->>API: Request Analysis API->>Cache: Check Cache alt Cache Hit Cache-->>API: Return Cached Result else Cache Miss API->>Knowledge: Process Request Knowledge->>Embeddings: Generate Embeddings Embeddings->>VectorStore: Store/Query Vectors VectorStore-->>Knowledge: Vector Results Knowledge-->>API: Analysis Results API->>Cache: Store Results API-->>Client: Return Results end ``` ## Directory Structure ```mermaid graph TD Root[mcp-codebase-insight] --> Src[src/] Root --> Tests[tests/] Root --> Docs[docs/] Root --> Scripts[scripts/] Root --> Knowledge[knowledge/] Src --> Core[core/] Src --> Utils[utils/] Core --> Components{Core Components} Components --> ADR[adr.py] Components --> Cache[cache.py] Components --> Config[config.py] Components --> Debug[debug.py] Components --> Doc[documentation.py] Components --> Embed[embeddings.py] Components --> Know[knowledge.py] Components --> Vector[vector_store.py] Knowledge --> Patterns[patterns/] Knowledge --> Tasks[tasks/] Knowledge --> Prompts[prompts/] ``` ## Security and Authentication Flow ```mermaid graph TD Request[Client Request] --> Auth[Authentication Layer] Auth --> Validation[Request Validation] Validation --> RateLimit[Rate Limiting] RateLimit --> Processing[Request Processing] subgraph Security Measures Auth Validation RateLimit Logging[Audit Logging] end Processing --> Logging Processing --> Response[API Response] ``` This architecture documentation illustrates the main components and their interactions within the MCP Codebase Insight system. The system is designed to be modular, scalable, and maintainable, with clear separation of concerns between different components. Key architectural decisions: 1. Use of FastAPI for high-performance API endpoints 2. Vector database (Qdrant) for efficient similarity search 3. Modular core services for different functionalities 4. Caching layer for improved performance 5. Clear separation between data storage and business logic 6. Comprehensive security measures 7. Structured knowledge management system ``` -------------------------------------------------------------------------------- /docs/documentation_summary.md: -------------------------------------------------------------------------------- ```markdown # MCP Codebase Insight Documentation Structure ## Architecture Decision Records (ADRs) ### Testing Strategy (ADR-0001) Core decisions about testing infrastructure, focusing on: - Server management and startup - Test client configuration - SSE testing approach Implemented by: - `tests.integration.test_sse.test_server_instance` - `tests.integration.test_sse.test_client` - `src.mcp_codebase_insight.server.lifespan` ### SSE Testing Strategy (ADR-0002) Detailed approach to testing Server-Sent Events, covering: - Connection management - Event handling - Test patterns Implemented by: - `tests.framework.sse.SSETestManager` - `tests.integration.test_sse.test_sse_message_flow` ### Comprehensive Testing Strategy (ADR-0003) Framework for testing all components: - Server testing framework - SSE test management - Test client configuration - Integration patterns Implemented by: - `tests.framework.server.ServerTestFramework` - `tests.framework.sse.SSETestManager` - `tests.conftest.configured_test_client` ### Documentation Linking Strategy (ADR-0004) System for maintaining documentation-code relationships: - Documentation node management - Code element tracking - Link validation Implemented by: - `src.mcp_codebase_insight.documentation.models.DocNode` - `src.mcp_codebase_insight.documentation.models.DocumentationMap` - `src.mcp_codebase_insight.documentation.loader.DocLoader` ## Feature Documentation ### Code Analysis Overview of code analysis capabilities: - Pattern detection - Quality analysis - Dependency tracking Implemented by: - `src.mcp_codebase_insight.analysis` ### ADR Management Tools for managing Architecture Decision Records: - ADR creation - Status tracking - Implementation linking Implemented by: - `src.mcp_codebase_insight.adr` ### Documentation Management Documentation tooling and processes: - Documentation-code linking - Validation tools - Generation utilities Implemented by: - `src.mcp_codebase_insight.documentation` - `src.mcp_codebase_insight.documentation.annotations` ## Testing Documentation ### Server Testing Framework and patterns for server testing: - Server lifecycle management - Health checking - Configuration testing Implemented by: - `tests.framework.server.ServerTestFramework` - `tests.conftest.configured_test_client` ### SSE Testing Patterns and tools for SSE testing: - Connection management - Event verification - Integration testing Implemented by: - `tests.framework.sse.SSETestManager` - `tests.integration.test_sse.test_sse_connection` - `tests.integration.test_sse.test_sse_message_flow` ## Key Components ### Server Framework - Server configuration and lifecycle management - Health check endpoints - SSE infrastructure Key files: - `src.mcp_codebase_insight.server.ServerConfig` - `src.mcp_codebase_insight.server.lifespan` ### Testing Framework - Test client configuration - Server test fixtures - SSE test utilities Key files: - `tests.framework.server.ServerTestFramework` - `tests.framework.sse.SSETestManager` - `tests.conftest.configured_test_client` ### Documentation Tools - Documentation-code linking - Validation utilities - Generation tools Key files: - `src.mcp_codebase_insight.documentation.models` - `src.mcp_codebase_insight.documentation.loader` - `src.mcp_codebase_insight.documentation.annotations` ## Documentation Coverage ### Well-Documented Areas 1. Testing infrastructure - Server testing framework - SSE testing components - Test client configuration 2. Documentation management - Documentation models - Loading and validation - Code annotations ### Areas Needing More Documentation 1. Code analysis features - Implementation details - Usage patterns - Configuration options 2. ADR management tools - CLI interface - Template system - Integration features ## Next Steps 1. **Documentation Improvements** - Add more code examples - Create API reference docs - Expand configuration guides 2. **Testing Enhancements** - Add performance test docs - Document error scenarios - Create debugging guides 3. **Feature Documentation** - Complete code analysis docs - Expand ADR management docs - Add integration guides ``` -------------------------------------------------------------------------------- /docs/troubleshooting/faq.md: -------------------------------------------------------------------------------- ```markdown # Frequently Asked Questions > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## General Questions ### What is MCP Codebase Insight? MCP Codebase Insight is a tool for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management. ### What are the system requirements? - Python 3.11 or higher - 4GB RAM minimum (8GB recommended) - 2GB free disk space - Docker (optional, for containerized deployment) ### Which operating systems are supported? - Linux (Ubuntu 20.04+, CentOS 8+) - macOS (10.15+) - Windows 10/11 with WSL2 ## Installation ### Do I need to install Qdrant separately? Yes, Qdrant is required for vector storage. You can install it via Docker (recommended) or from source. See the [Qdrant Setup Guide](../getting-started/qdrant_setup.md). ### Can I use a different vector database? Currently, only Qdrant is supported. Support for other vector databases may be added in future releases. ### Why am I getting permission errors during installation? This usually happens when trying to install in system directories. Try: 1. Using a virtual environment 2. Installing with `--user` flag 3. Using proper permissions for directories ## Usage ### How do I start analyzing my codebase? 1. Install MCP Codebase Insight 2. Set up Qdrant 3. Configure your environment 4. Run the server 5. Use the API or CLI to analyze your code ### Can I analyze multiple repositories at once? Yes, you can analyze multiple repositories by: 1. Using batch analysis 2. Creating separate collections 3. Merging results afterward ### How do I customize the analysis? You can customize: - Analysis patterns - Vector search parameters - Documentation generation - Output formats See the [Configuration Guide](../getting-started/configuration.md). ## Performance ### Why is vector search slow? Common reasons: 1. Large vector collection 2. Limited memory 3. Network latency 4. Insufficient CPU resources Solutions: 1. Enable disk storage 2. Adjust batch size 3. Optimize search parameters 4. Scale hardware resources ### How much memory do I need? Memory requirements depend on: - Codebase size - Vector collection size - Batch processing size - Concurrent operations Minimum: 4GB RAM Recommended: 8GB+ RAM ### Can I run it in production? Yes, but consider: 1. Setting up authentication 2. Configuring CORS 3. Using SSL/TLS 4. Implementing monitoring 5. Setting up backups ## Features ### Does it support my programming language? Currently supported: - Python - JavaScript/TypeScript - Java - Go - Ruby More languages planned for future releases. ### Can it generate documentation? Yes, it can: 1. Generate API documentation 2. Create architecture diagrams 3. Maintain ADRs 4. Build knowledge bases ### How does pattern detection work? Pattern detection uses: 1. Vector embeddings 2. AST analysis 3. Semantic search 4. Machine learning models ## Integration ### Can I integrate with my IDE? Yes, through: 1. REST API 2. Language Server Protocol 3. Custom extensions ### Does it work with CI/CD pipelines? Yes, you can: 1. Run analysis in CI 2. Generate reports 3. Enforce patterns 4. Update documentation ### Can I use it with existing tools? Integrates with: 1. Git 2. Documentation generators 3. Code quality tools 4. Issue trackers ## Troubleshooting ### Where are the log files? Default locations: - Server logs: `./logs/server.log` - Access logs: `./logs/access.log` - Debug logs: `./logs/debug.log` ### How do I report bugs? 1. Check [existing issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 2. Create new issue with: - Clear description - Steps to reproduce - System information - Log files ### How do I get support? Support options: 1. [Documentation](../README.md) 2. [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 3. [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 4. [Discord Community](https://discord.gg/mcp-codebase-insight) ## Next Steps - [Common Issues](common-issues.md) - [Installation Guide](../getting-started/installation.md) - [Configuration Guide](../getting-started/configuration.md) ``` -------------------------------------------------------------------------------- /docs/getting-started/docker-setup.md: -------------------------------------------------------------------------------- ```markdown # Docker Setup Guide This guide covers how to set up and run MCP Codebase Insight using Docker. ## Prerequisites - Docker installed on your system - Basic knowledge of Docker commands - Qdrant vector database accessible from your Docker network (required) ## Running Qdrant Container MCP Codebase Insight requires a running Qdrant instance. Make sure to start Qdrant before running the MCP container: ```bash # Create a directory for Qdrant data mkdir -p qdrant_data # Pull and run Qdrant container docker pull qdrant/qdrant docker run -d --name qdrant \ -p 6333:6333 -p 6334:6334 \ -v $(pwd)/qdrant_data:/qdrant/storage \ qdrant/qdrant ``` You can access the Qdrant web UI at http://localhost:6334 to verify it's running correctly. ## Quick Start with Docker ```bash # Pull the image docker pull modelcontextprotocol/mcp-codebase-insight # Run the container docker run -p 3000:3000 \ --env-file .env \ -v $(pwd)/docs:/app/docs \ -v $(pwd)/knowledge:/app/knowledge \ tosin2013/mcp-codebase-insight ``` ## Creating a .env File for Docker Create a `.env` file in your project directory with the following content: ``` MCP_HOST=0.0.0.0 MCP_PORT=3000 MCP_LOG_LEVEL=INFO QDRANT_URL=http://host.docker.internal:6333 MCP_DOCS_CACHE_DIR=/app/docs MCP_ADR_DIR=/app/docs/adrs MCP_KB_STORAGE_DIR=/app/knowledge MCP_DISK_CACHE_DIR=/app/cache ``` > **Note:** When using Docker, the host is set to `0.0.0.0` to allow connections from outside the container. If your Qdrant instance is running on the host machine, use `host.docker.internal` instead of `localhost`. ## Volume Mounts The Docker command mounts several directories from your host system into the container: - `$(pwd)/docs:/app/docs`: Maps your local docs directory to the container's docs directory - `$(pwd)/knowledge:/app/knowledge`: Maps your local knowledge directory to the container's knowledge directory Make sure these directories exist on your host system before running the container: ```bash mkdir -p docs/adrs knowledge ``` ## Using Docker Compose For a more manageable setup, you can use Docker Compose. Create a `docker-compose.yml` file in your project directory: ```yaml version: '3' services: mcp-codebase-insight: image: tosin2013/mcp-codebase-insight ports: - "3000:3000" volumes: - ./docs:/app/docs - ./knowledge:/app/knowledge - ./cache:/app/cache env_file: - .env networks: - mcp-network qdrant: image: qdrant/qdrant ports: - "6333:6333" volumes: - ./qdrant_data:/qdrant/storage networks: - mcp-network networks: mcp-network: driver: bridge ``` Then start the services: ```bash docker-compose up -d ``` ## Advanced Docker Configuration ### Using Custom Embedding Models To use a custom embedding model, add the model path to your volume mounts and update the environment configuration: ```bash docker run -p 3000:3000 \ --env-file .env \ -v $(pwd)/docs:/app/docs \ -v $(pwd)/knowledge:/app/knowledge \ -v $(pwd)/models:/app/models \ -e MCP_EMBEDDING_MODEL=/app/models/custom-model \ tosin2013/mcp-codebase-insight ``` ### Securing Your Docker Deployment For production environments: 1. Use Docker networks to isolate the MCP and Qdrant services 2. Don't expose the Qdrant port to the public internet 3. Set up proper authentication for both services 4. Use Docker secrets for sensitive information 5. Consider using a reverse proxy with HTTPS for the API ## Troubleshooting Docker Issues ### Connection Refused to Qdrant If you're getting connection errors to Qdrant, check: - Is Qdrant running? (`docker ps | grep qdrant`) - Is the URL correct in the `.env` file? - Are both services on the same Docker network? - Try using the service name instead of `host.docker.internal` if using Docker Compose ### Container Exits Immediately If the container exits immediately: - Check the Docker logs: `docker logs <container_id>` - Ensure all required environment variables are set - Verify that the mounted directories have correct permissions ### Out of Memory Errors If you encounter out of memory errors: - Increase the memory limit for the container - Reduce the vector dimension or batch size in your configuration - Consider using a more efficient embedding model ``` -------------------------------------------------------------------------------- /docs/troubleshooting/common-issues.md: -------------------------------------------------------------------------------- ```markdown # Troubleshooting Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Common Issues ### Installation Issues #### 1. Dependencies Installation Fails ```bash Error: Failed building wheel for sentence-transformers ``` **Solution:** ```bash # Update pip and install wheel pip install --upgrade pip pip install wheel # Try installing with specific version pip install sentence-transformers==2.2.2 # If still failing, install system dependencies # Ubuntu/Debian: sudo apt-get install python3-dev build-essential # CentOS/RHEL: sudo yum install python3-devel gcc ``` #### 2. Permission Denied ```bash PermissionError: [Errno 13] Permission denied: '/usr/local/lib/python3.11/site-packages' ``` **Solution:** ```bash # Install in user space pip install --user mcp-codebase-insight # Or fix directory permissions sudo chown -R $USER:$USER venv/ ``` ### Server Issues #### 1. Port Already in Use ```bash [Errno 48] Address already in use ``` **Solution:** ```bash # Find process using the port lsof -i :3000 # On Linux/macOS netstat -ano | findstr :3000 # On Windows # Kill the process kill -9 <PID> # Or use a different port mcp-codebase-insight --port 3001 ``` #### 2. Server Won't Start ```bash ERROR: [Errno 2] No such file or directory: './docs' ``` **Solution:** ```bash # Create required directories mkdir -p docs/adrs knowledge cache # Fix permissions chmod -R 755 docs knowledge cache ``` ### Vector Store Issues #### 1. Qdrant Connection Failed ```bash ConnectionError: Failed to connect to Qdrant server ``` **Solution:** ```bash # Check if Qdrant is running curl http://localhost:6333/health # Start Qdrant if not running docker start qdrant # Verify environment variable echo $QDRANT_URL # Should be: http://localhost:6333 ``` #### 2. Collection Creation Failed ```bash Error: Collection 'code_vectors' already exists ``` **Solution:** ```bash # List existing collections curl http://localhost:6333/collections # Delete existing collection if needed curl -X DELETE http://localhost:6333/collections/code_vectors # Create new collection with correct parameters python -c " from qdrant_client import QdrantClient client = QdrantClient('localhost', port=6333) client.recreate_collection( collection_name='code_vectors', vectors_config={'size': 384, 'distance': 'Cosine'} ) " ``` ### Memory Issues #### 1. Out of Memory ```bash MemoryError: Unable to allocate array with shape (1000000, 384) ``` **Solution:** ```yaml # Adjust batch size in config.yaml vector_store: batch_size: 100 # Reduce from default # Or set environment variable export MCP_BATCH_SIZE=100 ``` #### 2. Slow Performance ```bash WARNING: Vector search taking longer than expected ``` **Solution:** ```yaml # Enable disk storage in config.yaml vector_store: on_disk: true # Adjust cache size performance: cache_size: 1000 ``` ### Documentation Issues #### 1. Documentation Map Failed ```bash Error: Unable to create documentation map: Invalid directory structure ``` **Solution:** ```bash # Verify directory structure tree docs/ # Create required structure mkdir -p docs/{adrs,api,components} touch docs/index.md ``` #### 2. Search Not Working ```bash Error: Search index not found ``` **Solution:** ```bash # Rebuild search index curl -X POST http://localhost:3000/api/docs/rebuild-index # Verify index exists ls -l docs/.search_index ``` ## Debugging Tips ### 1. Enable Debug Logging ```bash # Set environment variable export MCP_LOG_LEVEL=DEBUG # Or use command line flag mcp-codebase-insight --debug ``` ### 2. Check System Resources ```bash # Check memory usage free -h # Check disk space df -h # Check CPU usage top ``` ### 3. Verify Configuration ```bash # Print current config mcp-codebase-insight show-config # Validate config file mcp-codebase-insight validate-config --config config.yaml ``` ## Getting Help If you're still experiencing issues: 1. Check the [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues) 2. Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions) 3. Review the [FAQ](faq.md) 4. Contact Support: - Discord: [Join Server](https://discord.gg/mcp-codebase-insight) - Email: [email protected] ## Next Steps - [Installation Guide](../getting-started/installation.md) - [Configuration Guide](../getting-started/configuration.md) - [Development Guide](../development/README.md) ``` -------------------------------------------------------------------------------- /docs/getting-started/configuration.md: -------------------------------------------------------------------------------- ```markdown # Configuration Guide > 🚧 **Documentation In Progress** > > This documentation is being actively developed. More details will be added soon. ## Configuration Methods MCP Codebase Insight can be configured through: 1. Environment variables 2. Configuration file 3. Command-line arguments Priority order (highest to lowest): 1. Command-line arguments 2. Environment variables 3. Configuration file 4. Default values ## Environment Variables ### Required Variables ```bash # Server Configuration MCP_HOST=127.0.0.1 MCP_PORT=3000 # Vector Store QDRANT_URL=http://localhost:6333 # Storage Paths MCP_DOCS_CACHE_DIR=./docs MCP_ADR_DIR=./docs/adrs MCP_KB_STORAGE_DIR=./knowledge MCP_DISK_CACHE_DIR=./cache ``` ### Optional Variables ```bash # Logging MCP_LOG_LEVEL=INFO MCP_LOG_FORMAT=json # Performance MCP_CACHE_SIZE=1000 MCP_WORKER_COUNT=4 ``` ## Configuration File Create `config.yaml` in your project root: ```yaml server: host: 127.0.0.1 port: 3000 workers: 4 vector_store: url: http://localhost:6333 collection: code_vectors storage: docs_cache: ./docs adr_dir: ./docs/adrs kb_storage: ./knowledge disk_cache: ./cache logging: level: INFO format: json ``` ## Command-line Arguments ```bash mcp-codebase-insight --help Options: --host TEXT Server host address --port INTEGER Server port --log-level TEXT Logging level --debug Enable debug mode --config PATH Path to config file --qdrant-url TEXT Qdrant server URL --docs-dir PATH Documentation directory --adr-dir PATH ADR directory --kb-dir PATH Knowledge base directory --cache-dir PATH Cache directory --workers INTEGER Number of workers --batch-size INTEGER Batch size --timeout INTEGER Request timeout --auth Enable authentication --metrics Enable metrics --help Show this message and exit ``` ## Feature-specific Configuration ### 1. Vector Store Configuration ```yaml vector_store: # Embedding model settings model: name: all-MiniLM-L6-v2 dimension: 384 normalize: true # Collection settings collection: name: mcp_vectors distance: Cosine on_disk: false # Search settings search: limit: 10 threshold: 0.75 ``` ### 2. Documentation Management ```yaml documentation: # Auto-generation settings auto_generate: true min_confidence: 0.8 # Crawling settings crawl: max_depth: 3 timeout: 30 exclude_patterns: ["*.git*", "node_modules"] # Storage settings storage: format: markdown index_file: _index.md ``` ### 3. ADR Management ```yaml adr: # Template settings template_dir: templates/adr default_template: default.md # Workflow settings require_approval: true auto_number: true # Storage settings storage: format: markdown naming: date-title ``` ## Environment-specific Configurations ### Development ```yaml debug: true log_level: DEBUG metrics: enabled: false vector_store: on_disk: false ``` ### Production ```yaml debug: false log_level: INFO security: auth_enabled: true allowed_origins: ["https://your-domain.com"] metrics: enabled: true vector_store: on_disk: true ``` ### Testing ```yaml debug: true log_level: DEBUG vector_store: collection_name: test_vectors storage: docs_cache_dir: ./test/docs ``` ## Best Practices 1. **Security** - Always enable authentication in production - Use environment variables for sensitive values - Restrict CORS origins in production 2. **Performance** - Adjust worker count based on CPU cores - Enable disk storage for large vector collections - Configure appropriate batch sizes 3. **Monitoring** - Enable metrics in production - Set appropriate log levels - Configure health check endpoints 4. **Storage** - Use absolute paths in production - Implement backup strategies - Monitor disk usage ## Validation To validate your configuration: ```bash mcp-codebase-insight validate-config --config config.yaml ``` ## Troubleshooting Common configuration issues and solutions: 1. **Permission Denied** ```bash # Fix directory permissions chmod -R 755 docs knowledge cache ``` 2. **Port Already in Use** ```bash # Use different port export MCP_PORT=3001 ``` 3. **Memory Issues** ```yaml # Adjust batch size performance: batch_size: 50 ``` ## Next Steps - [Quick Start Guide](quickstart.md) - [API Reference](../api/rest-api.md) - [Development Guide](../development/README.md) ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/embeddings.py: -------------------------------------------------------------------------------- ```python """Text embedding using sentence-transformers.""" from typing import List, Union import asyncio import logging from sentence_transformers import SentenceTransformer logger = logging.getLogger(__name__) class SentenceTransformerEmbedding: """Text embedding using sentence-transformers.""" def __init__(self, model_name: str = "all-MiniLM-L6-v2"): """Initialize embedding model.""" self.model_name = model_name self.model = None self.vector_size = None self.initialized = False async def initialize(self): """Initialize the embedding model.""" if self.initialized: return max_retries = 3 retry_delay = 2.0 for attempt in range(max_retries): try: # Define the model loading function def load_model(): logger.debug(f"Loading model {self.model_name}") model = SentenceTransformer(self.model_name) vector_size = model.get_sentence_embedding_dimension() return model, vector_size # Load the model with a timeout logger.debug(f"Starting model loading attempt {attempt + 1}/{max_retries}") model, vector_size = await asyncio.to_thread(load_model) self.model = model self.vector_size = vector_size self.initialized = True logger.debug(f"Model loaded successfully with vector size {self.vector_size}") return except asyncio.TimeoutError: if attempt < max_retries - 1: logger.warning(f"Timeout loading model on attempt {attempt + 1}, retrying in {retry_delay}s") await asyncio.sleep(retry_delay) retry_delay *= 2 else: logger.error(f"Failed to load model after {max_retries} attempts") raise RuntimeError(f"Failed to load embedding model {self.model_name}: Timeout after {max_retries} attempts") except Exception as e: logger.error(f"Failed to load embedding model {self.model_name}: {str(e)}") raise RuntimeError(f"Failed to load embedding model {self.model_name}: {str(e)}") async def embed(self, text: Union[str, List[str]]) -> Union[List[float], List[List[float]]]: """Generate embeddings for text.""" if not self.initialized: await self.initialize() try: # Convert single string to list for consistent handling texts = [text] if isinstance(text, str) else text # Generate embeddings embeddings = self.model.encode( texts, convert_to_tensor=False, # Return numpy array normalize_embeddings=True # L2 normalize embeddings ) # Convert numpy arrays to lists for JSON serialization if isinstance(text, str): return embeddings[0].tolist() return [embedding.tolist() for embedding in embeddings] except Exception as e: logger.error(f"Failed to generate embeddings: {str(e)}") raise RuntimeError(f"Failed to generate embeddings: {str(e)}") async def embed_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]: """Generate embeddings for a batch of texts.""" if not self.initialized: await self.initialize() try: # Generate embeddings in batches all_embeddings = [] for i in range(0, len(texts), batch_size): batch = texts[i:i + batch_size] embeddings = self.model.encode( batch, convert_to_tensor=False, normalize_embeddings=True, batch_size=batch_size ) all_embeddings.extend(embeddings.tolist()) return all_embeddings except Exception as e: logger.error(f"Failed to generate batch embeddings: {str(e)}") raise RuntimeError(f"Failed to generate batch embeddings: {str(e)}") async def embed_with_cache( self, text: str, cache_manager = None ) -> List[float]: """Generate embeddings with caching.""" if not cache_manager: return await self.embed(text) # Try to get from cache cache_key = f"embedding:{hash(text)}" cached = cache_manager.get_from_memory(cache_key) if cached: return cached # Generate new embedding embedding = await self.embed(text) # Cache the result cache_manager.put_in_memory(cache_key, embedding) return embedding def get_vector_size(self) -> int: """Get the size of embedding vectors.""" return self.vector_size ``` -------------------------------------------------------------------------------- /src/mcp_codebase_insight/core/config.py: -------------------------------------------------------------------------------- ```python """Server configuration module.""" from dataclasses import dataclass, field from pathlib import Path from typing import Optional, Dict, Any import os import logging logger = logging.getLogger(__name__) @dataclass class ServerConfig: """Server configuration.""" host: str = "127.0.0.1" port: int = 3000 log_level: str = "INFO" qdrant_url: str = "http://localhost:6333" qdrant_api_key: Optional[str] = None docs_cache_dir: Path = Path("docs") adr_dir: Path = Path("docs/adrs") kb_storage_dir: Path = Path("knowledge") embedding_model: str = "all-MiniLM-L6-v2" collection_name: str = "codebase_patterns" debug_mode: bool = False metrics_enabled: bool = True cache_enabled: bool = True memory_cache_size: int = 1000 disk_cache_dir: Optional[Path] = Path("cache") # Default to "cache" instead of None _state: Dict[str, Any] = field(default_factory=dict) def __post_init__(self): """Convert string paths to Path objects and process defaults.""" # Convert string paths to Path objects for attr_name in ["docs_cache_dir", "adr_dir", "kb_storage_dir"]: attr_value = getattr(self, attr_name) if attr_value is not None and not isinstance(attr_value, Path): setattr(self, attr_name, Path(attr_value)) # Handle disk_cache_dir specifically if self.cache_enabled: if self.disk_cache_dir is None: # Default to "cache" directory when None but cache is enabled self.disk_cache_dir = Path("cache") logger.debug(f"Setting default disk_cache_dir to {self.disk_cache_dir}") elif not isinstance(self.disk_cache_dir, Path): self.disk_cache_dir = Path(self.disk_cache_dir) else: # If cache is disabled, set disk_cache_dir to None regardless of previous value self.disk_cache_dir = None logger.debug("Cache disabled, setting disk_cache_dir to None") # Initialize state self._state = { "initialized": False, "components": {}, "metrics": {}, "errors": [] } @classmethod def from_env(cls) -> 'ServerConfig': """Create configuration from environment variables.""" cache_enabled = os.getenv("MCP_CACHE_ENABLED", "true").lower() == "true" disk_cache_path = os.getenv("MCP_DISK_CACHE_DIR", "cache") return cls( host=os.getenv("MCP_HOST", "127.0.0.1"), port=int(os.getenv("MCP_PORT", "3000")), log_level=os.getenv("MCP_LOG_LEVEL", "INFO"), qdrant_url=os.getenv("QDRANT_URL", "http://localhost:6333"), qdrant_api_key=os.getenv("QDRANT_API_KEY"), embedding_model=os.getenv("MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2"), collection_name=os.getenv("MCP_COLLECTION_NAME", "codebase_patterns"), docs_cache_dir=Path(os.getenv("MCP_DOCS_CACHE_DIR", "docs")), adr_dir=Path(os.getenv("MCP_ADR_DIR", "docs/adrs")), kb_storage_dir=Path(os.getenv("MCP_KB_STORAGE_DIR", "knowledge")), disk_cache_dir=Path(disk_cache_path) if cache_enabled else None, debug_mode=os.getenv("MCP_DEBUG", "false").lower() == "true", metrics_enabled=os.getenv("MCP_METRICS_ENABLED", "true").lower() == "true", cache_enabled=cache_enabled, memory_cache_size=int(os.getenv("MCP_MEMORY_CACHE_SIZE", "1000")) ) def create_directories(self) -> None: """Create all required directories for the server. This method should be called during server initialization to ensure all necessary directories exist before components are initialized. """ logger.debug("Creating required directories") # Create standard directories self.docs_cache_dir.mkdir(parents=True, exist_ok=True) self.adr_dir.mkdir(parents=True, exist_ok=True) self.kb_storage_dir.mkdir(parents=True, exist_ok=True) # Create cache directory if enabled and configured if self.cache_enabled and self.disk_cache_dir is not None: logger.debug(f"Creating disk cache directory: {self.disk_cache_dir}") self.disk_cache_dir.mkdir(parents=True, exist_ok=True) elif not self.cache_enabled: logger.debug("Cache is disabled, skipping disk cache directory creation") logger.debug("All required directories created") def get_state(self, key: str, default: Any = None) -> Any: """Get state value.""" return self._state.get(key, default) def set_state(self, key: str, value: Any): """Set state value.""" self._state[key] = value def update_state(self, updates: Dict[str, Any]): """Update multiple state values.""" self._state.update(updates) def clear_state(self): """Clear all state.""" self._state.clear() self._state = { "initialized": False, "components": {}, "metrics": {}, "errors": [] } ```