This is page 1 of 7. Use http://codebase.md/tosin2013/mcp-codebase-insight?page={x} to view the full context.
# Directory Structure
```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│ ├── bin
│ │ ├── activate
│ │ ├── activate.csh
│ │ ├── activate.fish
│ │ ├── Activate.ps1
│ │ ├── coverage
│ │ ├── coverage-3.11
│ │ ├── coverage3
│ │ ├── pip
│ │ ├── pip-compile
│ │ ├── pip-sync
│ │ ├── pip3
│ │ ├── pip3.11
│ │ ├── py.test
│ │ ├── pyproject-build
│ │ ├── pytest
│ │ ├── python
│ │ ├── python3
│ │ ├── python3.11
│ │ └── wheel
│ └── pyvenv.cfg
├── .env.example
├── .github
│ ├── agents
│ │ ├── DebugAgent.agent.md
│ │ ├── DocAgent.agent.md
│ │ ├── README.md
│ │ ├── TestAgent.agent.md
│ │ └── VectorStoreAgent.agent.md
│ ├── copilot-instructions.md
│ └── workflows
│ ├── build-verification.yml
│ ├── publish.yml
│ └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── create_release_issues.sh
├── debug_tests.md
├── Dockerfile
├── docs
│ ├── adrs
│ │ └── 001_use_docker_for_qdrant.md
│ ├── api.md
│ ├── components
│ │ └── README.md
│ ├── cookbook.md
│ ├── development
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ └── README.md
│ ├── documentation_map.md
│ ├── documentation_summary.md
│ ├── features
│ │ ├── adr-management.md
│ │ ├── code-analysis.md
│ │ └── documentation.md
│ ├── getting-started
│ │ ├── configuration.md
│ │ ├── docker-setup.md
│ │ ├── installation.md
│ │ ├── qdrant_setup.md
│ │ └── quickstart.md
│ ├── qdrant_setup.md
│ ├── README.md
│ ├── SSE_INTEGRATION.md
│ ├── system_architecture
│ │ └── README.md
│ ├── templates
│ │ └── adr.md
│ ├── testing_guide.md
│ ├── troubleshooting
│ │ ├── common-issues.md
│ │ └── faq.md
│ ├── vector_store_best_practices.md
│ └── workflows
│ └── README.md
├── error_logs.txt
├── examples
│ └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│ ├── backend_summary.txt
│ ├── database_summary.txt
│ └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│ ├── check_qdrant_health.sh
│ ├── compile_requirements.sh
│ ├── load_example_patterns.py
│ ├── macos_install.sh
│ ├── README.md
│ ├── setup_qdrant.sh
│ ├── start_mcp_server.sh
│ ├── store_code_relationships.py
│ ├── store_report_in_mcp.py
│ ├── validate_knowledge_base.py
│ ├── validate_poc.py
│ ├── validate_vector_store.py
│ └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│ └── mcp_codebase_insight
│ ├── __init__.py
│ ├── __main__.py
│ ├── asgi.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── adr.py
│ │ ├── cache.py
│ │ ├── component_status.py
│ │ ├── config.py
│ │ ├── debug.py
│ │ ├── di.py
│ │ ├── documentation.py
│ │ ├── embeddings.py
│ │ ├── errors.py
│ │ ├── health.py
│ │ ├── knowledge.py
│ │ ├── metrics.py
│ │ ├── prompts.py
│ │ ├── sse.py
│ │ ├── state.py
│ │ ├── task_tracker.py
│ │ ├── tasks.py
│ │ └── vector_store.py
│ ├── models.py
│ ├── server_test_isolation.py
│ ├── server.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── logger.py
│ └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│ ├── components
│ │ ├── conftest.py
│ │ ├── test_core_components.py
│ │ ├── test_embeddings.py
│ │ ├── test_knowledge_base.py
│ │ ├── test_sse_components.py
│ │ ├── test_stdio_components.py
│ │ ├── test_task_manager.py
│ │ └── test_vector_store.py
│ ├── config
│ │ └── test_config_and_env.py
│ ├── conftest.py
│ ├── integration
│ │ ├── fixed_test2.py
│ │ ├── test_api_endpoints.py
│ │ ├── test_api_endpoints.py-e
│ │ ├── test_communication_integration.py
│ │ └── test_server.py
│ ├── README.md
│ ├── README.test.md
│ ├── test_build_verifier.py
│ └── test_file_relationships.py
└── trajectories
└── tosinakinosho
├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
│ └── db62b9
│ └── config.yaml
├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
│ └── 03565e
│ ├── 03565e.traj
│ └── config.yaml
└── default__openrouter
└── anthropic
└── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
└── 03565e
├── 03565e.pred
├── 03565e.traj
└── config.yaml
```
# Files
--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
```yaml
codecov:
require_ci_to_pass: yes
notify:
wait_for_ci: yes
coverage:
precision: 2
round: down
range: "70...100"
status:
project:
default:
target: 80%
threshold: 2%
base: auto
if_ci_failed: error
informational: false
only_pulls: false
patch:
default:
target: 80%
threshold: 2%
base: auto
if_ci_failed: error
informational: false
only_pulls: false
parsers:
gcov:
branch_detection:
conditional: yes
loop: yes
method: no
macro: no
comment:
layout: "reach,diff,flags,files,footer"
behavior: default
require_changes: false
require_base: no
require_head: yes
branches:
- main
ignore:
- "tests/**/*"
- "setup.py"
- "docs/**/*"
- "examples/**/*"
- "scripts/**/*"
- "**/version.py"
- "**/__init__.py"
```
--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
```
[bumpversion]
current_version = 0.1.0
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>[a-z]+)(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}{release}{build}
{major}.{minor}.{patch}
[bumpversion:part:release]
optional_value = prod
first_value = dev
values =
dev
prod
[bumpversion:part:build]
first_value = 1
[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"
[bumpversion:file:src/mcp_codebase_insight/version.py]
search = __version__ = "{current_version}"
replace = __version__ = "{new_version}"
[bumpversion:file:src/mcp_codebase_insight/version.py]
search = VERSION_MAJOR = {current_version.split(".")[0]}
replace = VERSION_MAJOR = {new_version.split(".")[0]}
[bumpversion:file:src/mcp_codebase_insight/version.py]
search = VERSION_MINOR = {current_version.split(".")[1]}
replace = VERSION_MINOR = {new_version.split(".")[1]}
[bumpversion:file:src/mcp_codebase_insight/version.py]
search = VERSION_PATCH = {current_version.split(".")[2]}
replace = VERSION_PATCH = {new_version.split(".")[2]}
```
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
```
# Server configuration
MCP_HOST=127.0.0.1
MCP_PORT=3000
MCP_LOG_LEVEL=INFO
MCP_DEBUG=false
# Qdrant configuration
QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=your-qdrant-api-key-here
# Directory configuration
MCP_DOCS_CACHE_DIR=docs
MCP_ADR_DIR=docs/adrs
MCP_KB_STORAGE_DIR=knowledge
MCP_DISK_CACHE_DIR=cache
# Model configuration
MCP_EMBEDDING_MODEL=all-MiniLM-L6-v2
MCP_COLLECTION_NAME=codebase_patterns
# Feature flags
MCP_METRICS_ENABLED=true
MCP_CACHE_ENABLED=true
MCP_MEMORY_CACHE_SIZE=1000
# Optional: Authentication (if needed)
# MCP_AUTH_ENABLED=false
# MCP_AUTH_SECRET_KEY=your-secret-key
# MCP_AUTH_TOKEN_EXPIRY=3600
# Optional: Rate limiting (if needed)
# MCP_RATE_LIMIT_ENABLED=false
# MCP_RATE_LIMIT_REQUESTS=100
# MCP_RATE_LIMIT_WINDOW=60
# Optional: SSL/TLS configuration (if needed)
# MCP_SSL_ENABLED=false
# MCP_SSL_CERT_FILE=path/to/cert.pem
# MCP_SSL_KEY_FILE=path/to/key.pem
# Optional: Proxy configuration (if needed)
# MCP_PROXY_URL=http://proxy:8080
# MCP_NO_PROXY=localhost,127.0.0.1
# Optional: External services (if needed)
# MCP_GITHUB_TOKEN=your-github-token
# MCP_JIRA_URL=https://your-jira-instance
# MCP_JIRA_TOKEN=your-jira-token
# Optional: Monitoring (if needed)
# MCP_SENTRY_DSN=your-sentry-dsn
# MCP_DATADOG_API_KEY=your-datadog-api-key
# MCP_PROMETHEUS_ENABLED=false
# Test Configuration
# These variables are used when running tests
MCP_TEST_MODE=1
MCP_TEST_QDRANT_URL=http://localhost:6333
MCP_TEST_COLLECTION_NAME=test_collection
MCP_TEST_EMBEDDING_MODEL=all-MiniLM-L6-v2
# Event Loop Debug Mode
# Uncomment to enable asyncio debug mode for testing
# PYTHONASYNCIODEBUG=1
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Virtual Environment
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
.project
.pydevproject
.settings/
# Testing
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
htmlcov/
# Documentation
docs/_build/
docs/api/
# Project specific
docs/adrs/*
!docs/adrs/001_use_docker_for_qdrant.md
!docs/adrs/README.md
knowledge/*
!knowledge/README.md
cache/*
!cache/README.md
logs/*
!logs/README.md
.test_cache/
test_knowledge/
build_output.txt
testreport.txt
test_env/
codebase_stats.txt
dependency_map.txt
vector_relationship_graph.*
verification-config.json
*.dot
*.json.tmp
# Jupyter Notebook
.ipynb_checkpoints
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# macOS
.DS_Store
.AppleDouble
.LSOverride
Icon
._*
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Windows
Thumbs.db
ehthumbs.db
Desktop.ini
$RECYCLE.BIN/
*.cab
*.msi
*.msm
*.msp
*.lnk
# Linux
*~
.fuse_hidden*
.directory
.Trash-*
.nfs*
# Project specific
.env
.env.*
!.env.example
*.log
logs/
cache/
knowledge/
docs/adrs/*
!docs/adrs/001_use_docker_for_qdrant.md
# Documentation and ADRs (temporary private)
docs/adrs/
docs/private/
docs/internal/
# Cache and Temporary Files
cache/
.cache/
tmp/
temp/
*.tmp
*.bak
*.log
# Sensitive Configuration
.env*
!.env.example
*.key
*.pem
*.crt
secrets/
private/
# Vector Database
qdrant_storage/
# Knowledge Base (private for now)
knowledge/patterns/
knowledge/tasks/
knowledge/private/
# Build and Distribution
dist/
build/
*.pyc
*.pyo
*.pyd
.Python
*.so
# Misc
.DS_Store
Thumbs.db
*.swp
*.swo
*~
# Project Specific
mcp.json
.cursor/rules/
module_summaries/
logs/
references/private/
prompts/
# Ignore Qdrant data storage directory
qdrant_data/
.aider*
```
--------------------------------------------------------------------------------
/tests/README.test.md:
--------------------------------------------------------------------------------
```markdown
import pytest
from pathlib import Path
@pytest.fixture
def readme_content():
readme_path = Path(__file__).parent / "README.md"
with open(readme_path, "r") as f:
return f.read()
```
--------------------------------------------------------------------------------
/docs/components/README.md:
--------------------------------------------------------------------------------
```markdown
# Core Components
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Overview
This document details the core components of the MCP Codebase Insight system. For workflow information, please see the [Workflows Documentation](../workflows/README.md).
## Components
### Server Framework
- API endpoint management
- Request validation
- Response formatting
- Server lifecycle management
### Testing Framework
- Test environment management
- Component-level testing
- Integration test support
- Performance testing tools
### Documentation Tools
- Documentation generation
- Relationship analysis
- Validation tools
- Integration with code analysis
## Implementation Details
See the [System Architecture](../system_architecture/README.md) for more details on how these components interact
```
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
```markdown
# Utility Scripts
This directory contains utility scripts for the MCP Codebase Insight project.
## Available Scripts
### check_qdrant_health.sh
**Purpose**: Checks if the Qdrant vector database service is available and healthy.
**Usage**:
```bash
./check_qdrant_health.sh [qdrant_url] [max_retries] [sleep_seconds]
```
**Parameters**:
- `qdrant_url` - URL of the Qdrant service (default: "http://localhost:6333")
- `max_retries` - Maximum number of retry attempts (default: 20)
- `sleep_seconds` - Seconds to wait between retries (default: 5)
**Example**:
```bash
./check_qdrant_health.sh "http://localhost:6333" 30 2
```
> Note: This script uses `apt-get` and may require `sudo` privileges on Linux systems. Ensure `curl` and `jq` are pre-installed or run with proper permissions.
**Exit Codes**:
- 0: Qdrant service is accessible and healthy
- 1: Qdrant service is not accessible or not healthy
### compile_requirements.sh
**Purpose**: Compiles and generates version-specific requirements files for different Python versions.
**Usage**:
```bash
./compile_requirements.sh <python-version>
```
**Example**:
```bash
./compile_requirements.sh 3.11
```
### load_example_patterns.py
**Purpose**: Loads example patterns and ADRs into the knowledge base for demonstration or testing.
**Usage**:
```bash
python load_example_patterns.py [--help]
```
### verify_build.py
**Purpose**: Verifies the build status and generates a build verification report.
**Usage**:
```bash
python verify_build.py [--config <file>] [--output <report-file>]
```
## Usage in GitHub Actions
These scripts are used in our GitHub Actions workflows to automate and standardize common tasks. For example, `check_qdrant_health.sh` is used in both the build verification and TDD verification workflows to ensure the Qdrant service is available before running tests.
## Adding New Scripts
When adding new scripts to this directory:
1. Make them executable: `chmod +x scripts/your_script.sh`
2. Include a header comment explaining the purpose and usage
3. Add error handling and sensible defaults
4. Update this README with information about the script
5. Use parameter validation and help text when appropriate
```
--------------------------------------------------------------------------------
/docs/development/README.md:
--------------------------------------------------------------------------------
```markdown
# Development Guide
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Overview
This guide covers development setup, contribution guidelines, and best practices for the MCP Codebase Insight project.
## Development Setup
1. **Clone Repository**
```bash
git clone https://github.com/modelcontextprotocol/mcp-codebase-insight
cd mcp-codebase-insight
```
2. **Create Virtual Environment**
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. **Install Development Dependencies**
```bash
pip install -e ".[dev]"
```
4. **Setup Pre-commit Hooks**
```bash
pre-commit install
```
## Project Structure
```
mcp-codebase-insight/
├── src/
│ └── mcp_codebase_insight/
│ ├── analysis/ # Code analysis modules
│ ├── documentation/ # Documentation management
│ ├── kb/ # Knowledge base operations
│ └── server/ # FastAPI server
├── tests/
│ ├── integration/ # Integration tests
│ └── unit/ # Unit tests
├── docs/ # Documentation
└── examples/ # Example usage
```
## Testing
```bash
# Run unit tests
pytest tests/unit
# Run integration tests
pytest tests/integration
# Run with coverage
pytest --cov=src tests/
```
## Code Style
- Follow PEP 8
- Use type hints
- Document functions and classes
- Keep functions focused and small
- Write tests for new features
## Git Workflow
1. Create feature branch
2. Make changes
3. Run tests
4. Submit pull request
## Documentation
- Update docs for new features
- Include docstrings
- Add examples when relevant
## Debugging
### Server Debugging
```python
import debugpy
debugpy.listen(("0.0.0.0", 5678))
debugpy.wait_for_client()
```
### VSCode Launch Configuration
```json
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Remote Attach",
"type": "python",
"request": "attach",
"port": 5678,
"host": "localhost"
}
]
}
```
## Performance Profiling
```bash
python -m cProfile -o profile.stats your_script.py
python -m snakeviz profile.stats
```
## Next Steps
- [Contributing Guidelines](CONTRIBUTING.md)
- [Code of Conduct](CODE_OF_CONDUCT.md)
- [API Reference](../api/rest-api.md)
```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
```markdown
# MCP Codebase Insight - WIP
> 🚧 **Development in Progress**
>
> This project is actively under development. Features and documentation are being continuously updated.
## Overview
MCP Codebase Insight is a system for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management.
## Current Development Status
### Completed Features
- ✅ Core Vector Store System
- ✅ Basic Knowledge Base
- ✅ SSE Integration
- ✅ Testing Framework
- ✅ TDD and Debugging Framework (rules_template integration)
### In Progress
- 🔄 Documentation Management System
- 🔄 Advanced Pattern Detection
- 🔄 Performance Optimization
- 🔄 Integration Testing
- 🔄 Debugging Utilities Enhancement
### Planned
- 📋 Extended API Documentation
- 📋 Custom Pattern Plugins
- 📋 Advanced Caching Strategies
- 📋 Deployment Guides
- 📋 Comprehensive Error Tracking System
## Quick Start
1. **Installation**
```bash
pip install mcp-codebase-insight
```
2. **Basic Usage**
```python
from mcp_codebase_insight import CodebaseAnalyzer
analyzer = CodebaseAnalyzer()
results = analyzer.analyze_code("path/to/code")
```
3. **Running Tests**
```bash
# Run all tests
pytest tests/
# Run unit tests
pytest tests/unit/
# Run component tests
pytest tests/components/
# Run tests with coverage
pytest tests/ --cov=src --cov-report=term-missing
```
4. **Debugging Utilities**
```python
from mcp_codebase_insight.utils.debug_utils import debug_trace, DebugContext, get_error_tracker
# Use debug trace decorator
@debug_trace
def my_function():
# Implementation
# Use debug context
with DebugContext("operation_name"):
# Code to debug
# Track errors
try:
# Risky operation
except Exception as e:
error_id = get_error_tracker().record_error(e, context={"operation": "description"})
print(f"Error recorded with ID: {error_id}")
```
## Testing and Debugging
### Test-Driven Development
This project follows Test-Driven Development (TDD) principles:
1. Write a failing test first (Red)
2. Write minimal code to make the test pass (Green)
3. Refactor for clean code while keeping tests passing (Refactor)
Our TDD documentation can be found in [docs/tdd/workflow.md](docs/tdd/workflow.md).
### Debugging Framework
We use Agans' 9 Rules of Debugging:
1. Understand the System
2. Make It Fail
3. Quit Thinking and Look
4. Divide and Conquer
5. Change One Thing at a Time
6. Keep an Audit Trail
7. Check the Plug
8. Get a Fresh View
9. If You Didn't Fix It, It Isn't Fixed
Learn more about our debugging approach in [docs/debuggers/agans_9_rules.md](docs/debuggers/agans_9_rules.md).
## Documentation
- [System Architecture](docs/system_architecture/README.md)
- [Core Components](docs/components/README.md)
- [API Reference](docs/api/README.md)
- [Development Guide](docs/development/README.md)
- [Workflows](docs/workflows/README.md)
- [TDD Workflow](docs/tdd/workflow.md)
- [Debugging Practices](docs/debuggers/best_practices.md)
## Contributing
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
## Support
- [Issue Tracker](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
- [Discussions](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions)
```
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
```markdown
# Test Structure
This directory contains the test suite for the MCP Codebase Insight project. The tests are organized into the following structure:
## Directory Structure
```
tests/
├── components/ # Component-level tests
│ ├── test_vector_store.py
│ ├── test_knowledge_base.py
│ ├── test_task_manager.py
│ └── ...
├── integration/ # Integration and API tests
│ ├── test_api_endpoints.py
│ └── test_server.py
├── config/ # Configuration tests
│ └── test_config_and_env.py
├── conftest.py # Shared test fixtures
└── README.md # This file
```
## Test Categories
1. **Component Tests** (`components/`)
- Unit tests for individual components
- Tests component initialization, methods, and cleanup
- Isolated from other components where possible
2. **Integration Tests** (`integration/`)
- Tests for API endpoints
- Server lifecycle tests
- Component interaction tests
3. **Configuration Tests** (`config/`)
- Environment variable handling
- Configuration file parsing
- Directory setup and permissions
## API Test Coverage
The following API endpoints are tested in the integration tests:
| Endpoint | Test Status | Test File |
|----------|-------------|-----------|
| `/health` | ✅ Tested | `test_api_endpoints.py` |
| `/api/vector-store/search` | ✅ Tested | `test_api_endpoints.py` |
| `/api/docs/adrs` | ✅ Tested | `test_api_endpoints.py` |
| `/api/docs/adrs/{adr_id}` | ✅ Tested | `test_api_endpoints.py` |
| `/api/docs/patterns` | ✅ Tested | `test_api_endpoints.py` |
| `/api/docs/patterns/{pattern_id}` | ✅ Tested | `test_api_endpoints.py` |
| `/api/analyze` | ✅ Tested | `test_api_endpoints.py` |
| `/api/tasks/create` | ✅ Tested | `test_api_endpoints.py` |
| `/api/tasks` | ✅ Tested | `test_api_endpoints.py` |
| `/api/tasks/{task_id}` | ✅ Tested | `test_api_endpoints.py` |
| `/api/debug/issues` | ✅ Tested | `test_api_endpoints.py` |
| `/api/debug/issues/{issue_id}` | ✅ Tested | `test_api_endpoints.py` |
| `/api/debug/issues/{issue_id}/analyze` | ✅ Tested | `test_api_endpoints.py` |
| `/tools/*` | ✅ Tested | `test_api_endpoints.py` |
Each test verifies:
- Successful responses with valid input
- Error handling with invalid input
- Response structure and content validation
- Edge cases where applicable
## Running Tests
To run all tests:
```bash
python -m pytest tests/
```
To run specific test categories:
```bash
# Run component tests
python -m pytest tests/components/
# Run integration tests
python -m pytest tests/integration/
# Run config tests
python -m pytest tests/config/
# Run API endpoint tests only
python -m pytest tests/integration/test_api_endpoints.py
# Run tests for a specific API endpoint
python -m pytest tests/integration/test_api_endpoints.py::test_health_check
```
## Test Fixtures
Shared test fixtures are defined in `conftest.py` and include:
- `temp_dir`: Temporary directory for test files
- `test_config`: Server configuration for testing
- `embedder`: Sentence transformer embedder
- `vector_store`: Vector store instance
- `test_server`: Server instance for testing
- `test_client`: FastAPI test client
- `test_code`: Sample code for testing
- `test_adr`: Sample ADR data
- `env_vars`: Environment variables for testing
## Writing New Tests
1. Place new tests in the appropriate directory based on what they're testing
2. Use the shared fixtures from `conftest.py`
3. Follow the existing patterns for async tests and cleanup
4. Add proper docstrings and comments
5. Ensure proper cleanup in fixtures that create resources
## Test Dependencies
The test suite has the following dependencies:
- pytest
- pytest-asyncio
- httpx
- fastapi
- sentence-transformers
Make sure these are installed before running tests.
```
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
```markdown
# MCP Codebase Insight Documentation
Welcome to the MCP Codebase Insight documentation. This directory contains detailed information about installation, configuration, usage, and development of the MCP Codebase Insight tool.
## Documentation Structure
### Getting Started
- [Installation Guide](getting-started/installation.md) - Complete installation instructions
- [Configuration Guide](getting-started/configuration.md) - Configuration options and environment setup
- [Quick Start Tutorial](getting-started/quickstart.md) - Get up and running quickly
- [Qdrant Setup](getting-started/qdrant_setup.md) - Vector database setup and configuration
### Core Features
- [Code Analysis](features/code-analysis.md) - Understanding code patterns and insights
- [ADR Management](features/adr-management.md) - Managing architectural decisions
- [Documentation Management](features/documentation.md) - Auto-generation and maintenance
- [Knowledge Base](features/knowledge-base.md) - Pattern storage and retrieval
- [Debug System](features/debug-system.md) - Intelligent debugging assistance
- [Build Verification](features/build-verification.md) - Automated build checks
### API Reference
- [REST API](api/rest-api.md) - Complete API endpoint documentation
- [SSE Integration](SSE_INTEGRATION.md) - Server-Sent Events integration guide
- [Vector Store API](api/vector-store-api.md) - Vector database interaction
- [Client Libraries](api/client-libraries.md) - Available client SDKs
### Development
- [Contributing Guide](development/contributing.md) - How to contribute to the project
- [Architecture Overview](development/architecture.md) - System architecture and design
- [Testing Guide](testing_guide.md) - Writing and running tests
- [Best Practices](development/best-practices.md) - Coding standards and guidelines
### Deployment
- [Production Deployment](deployment/production.md) - Production setup guide
- [Docker Deployment](deployment/docker.md) - Container-based deployment
- [Scaling Guide](deployment/scaling.md) - Handling increased load
- [Monitoring](deployment/monitoring.md) - System monitoring and alerts
### Troubleshooting
- [Common Issues](troubleshooting/common-issues.md) - Frequently encountered problems
- [FAQ](troubleshooting/faq.md) - Frequently asked questions
- [Debug Guide](troubleshooting/debug-guide.md) - Advanced debugging techniques
- [Support](troubleshooting/support.md) - Getting help and support
## Quick Links
- [GitHub Repository](https://github.com/modelcontextprotocol/mcp-codebase-insight)
- [Issue Tracker](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
- [Discussions](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions)
- [Release Notes](CHANGELOG.md)
- [License](../LICENSE)
## Contributing to Documentation
We welcome contributions to improve this documentation. Please see our [Contributing Guide](development/contributing.md) for details on:
- Documentation style guide
- How to submit documentation changes
- Documentation testing
- Building documentation locally
## Documentation Versions
This documentation corresponds to the latest stable release of MCP Codebase Insight. For other versions:
- [Latest Development](https://github.com/modelcontextprotocol/mcp-codebase-insight/tree/main/docs)
- [Version History](https://github.com/modelcontextprotocol/mcp-codebase-insight/releases)
## Support
If you need help or have questions:
1. Check the [FAQ](troubleshooting/faq.md) and [Common Issues](troubleshooting/common-issues.md)
2. Search existing [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
3. Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions)
4. Open a new issue if needed
```
--------------------------------------------------------------------------------
/docs/system_architecture/README.md:
--------------------------------------------------------------------------------
```markdown
# System Architecture
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Overview
This document provides a comprehensive overview of the MCP Codebase Insight system architecture. For detailed workflow information, please see the [Workflows Documentation](../workflows/README.md).
## Architecture Components
### Core Systems
- Vector Store System
- Knowledge Base
- Task Management
- Health Monitoring
- Error Handling
- Metrics Collection
- Cache Management
### Documentation
- ADR Management
- Documentation Tools
- API Documentation
### Testing
- Test Framework
- SSE Testing
- Integration Testing
## Detailed Documentation
- [Core Components](../components/README.md)
- [API Reference](../api/README.md)
- [Development Guide](../development/README.md)
## System Overview
This document provides a comprehensive overview of the MCP Codebase Insight system architecture, focusing on system interactions, dependencies, and design considerations.
## Core Systems
### 1. Vector Store System (`src/mcp_codebase_insight/core/vector_store.py`)
- **Purpose**: Manages code embeddings and semantic search capabilities
- **Key Components**:
- Qdrant integration for vector storage
- Embedding generation and management
- Search optimization and caching
- **Integration Points**:
- Knowledge Base for semantic understanding
- Cache Management for performance optimization
- Health Monitoring for system status
### 2. Knowledge Base (`src/mcp_codebase_insight/core/knowledge.py`)
- **Purpose**: Central repository for code insights and relationships
- **Key Components**:
- Pattern detection and storage
- Relationship mapping
- Semantic analysis
- **Feedback Loops**:
- Updates vector store with new patterns
- Receives feedback from code analysis
- Improves pattern detection over time
### 3. Task Management (`src/mcp_codebase_insight/core/tasks.py`)
- **Purpose**: Handles async operations and job scheduling
- **Key Components**:
- Task scheduling and prioritization
- Progress tracking
- Resource management
- **Bottleneck Mitigation**:
- Task queuing strategies
- Resource allocation
- Error recovery
### 4. Health Monitoring (`src/mcp_codebase_insight/core/health.py`)
- **Purpose**: System health and performance monitoring
- **Key Components**:
- Component status tracking
- Performance metrics
- Alert system
- **Feedback Mechanisms**:
- Real-time status updates
- Performance optimization triggers
- System recovery procedures
### 5. Error Handling (`src/mcp_codebase_insight/core/errors.py`)
- **Purpose**: Centralized error management
- **Key Components**:
- Error classification
- Recovery strategies
- Logging and reporting
- **Resilience Features**:
- Graceful degradation
- Circuit breakers
- Error propagation control
## System Interactions
### Critical Paths
1. **Code Analysis Flow**:
```mermaid
sequenceDiagram
participant CA as Code Analysis
participant KB as Knowledge Base
participant VS as Vector Store
participant CM as Cache
CA->>VS: Request embeddings
VS->>CM: Check cache
CM-->>VS: Return cached/null
VS->>KB: Get patterns
KB-->>VS: Return patterns
VS-->>CA: Return analysis
```
2. **Health Monitoring Flow**:
```mermaid
sequenceDiagram
participant HM as Health Monitor
participant CS as Component State
participant TM as Task Manager
participant EH as Error Handler
HM->>CS: Check states
CS->>TM: Verify tasks
TM-->>CS: Task status
CS-->>HM: System status
HM->>EH: Report issues
```
## Performance Considerations
### Caching Strategy
- Multi-level caching (memory and disk)
- Cache invalidation triggers
- Cache size management
### Scalability Points
1. Vector Store:
- Horizontal scaling capabilities
- Batch processing optimization
- Search performance tuning
2. Task Management:
- Worker pool management
- Task prioritization
- Resource allocation
## Error Recovery
### Failure Scenarios
1. Vector Store Unavailable:
- Fallback to cached results
- Graceful degradation of search
- Automatic reconnection
2. Task Overload:
- Dynamic task throttling
- Priority-based scheduling
- Resource reallocation
## System Evolution
### Extension Points
1. Knowledge Base:
- Plugin system for new patterns
- Custom analyzers
- External integrations
2. Monitoring:
- Custom metrics
- Alert integrations
- Performance profiling
## Next Steps
1. **Documentation Needs**:
- Detailed component interaction guides
- Performance tuning documentation
- Deployment architecture guides
2. **System Improvements**:
- Enhanced caching strategies
- More robust error recovery
- Better performance monitoring
```
--------------------------------------------------------------------------------
/.github/agents/README.md:
--------------------------------------------------------------------------------
```markdown
# Custom Agents for MCP Codebase Insight
This directory contains specialized AI agent instructions tailored for the MCP Codebase Insight project. Each agent has deep knowledge of specific aspects of the codebase and can help you work more effectively.
## Available Agents
### 🧪 [TestAgent](./TestAgent.agent.md)
**Expertise**: Testing, test runner, async test patterns, debugging test failures
**Use when:**
- Writing new tests for features or bug fixes
- Running tests with the custom test runner
- Debugging test failures, especially async/event loop issues
- Improving test coverage
**Key Knowledge:**
- Custom `./run_tests.py` test runner usage
- Test isolation and event loop management
- pytest-asyncio patterns and fixtures
- Component and integration test structures
---
### 🔍 [VectorStoreAgent](./VectorStoreAgent.agent.md)
**Expertise**: Qdrant vector store, embeddings, semantic search, performance optimization
**Use when:**
- Working with the vector store (add, search, update, delete)
- Managing embeddings and collections
- Optimizing vector search performance
- Debugging Qdrant connection issues
**Key Knowledge:**
- VectorStore and EmbeddingProvider APIs
- Qdrant version compatibility
- Batch operations and filters
- Performance best practices
---
### 📝 [DocAgent](./DocAgent.agent.md)
**Expertise**: Documentation, ADRs, API docs, code comments, architecture diagrams
**Use when:**
- Creating or updating documentation
- Writing Architecture Decision Records (ADRs)
- Documenting APIs and code examples
- Creating architecture diagrams
**Key Knowledge:**
- ADR management system
- Documentation structure and templates
- Docstring format (Google style)
- Mermaid diagram syntax
---
### 🐛 [DebugAgent](./DebugAgent.agent.md)
**Expertise**: Debugging, issue diagnosis, error handling, Agans' 9 Rules
**Use when:**
- Debugging complex issues systematically
- Handling async/event loop errors
- Diagnosing Qdrant connection problems
- Investigating memory leaks or resource issues
**Key Knowledge:**
- Agans' 9 Rules of Debugging
- Common async/event loop issues
- Configuration and environment problems
- Systematic debugging workflows
---
## How to Use These Agents
### In VS Code with GitHub Copilot
1. **Open the agent file** you need (e.g., `TestAgent.agent.md`)
2. **Reference it in Copilot Chat**: "Using @TestAgent, help me write tests for the new feature"
3. **Ask specific questions**: "How do I run integration tests in isolation?"
### In Claude or Other AI Tools
1. **Copy the agent content** into your conversation
2. **Provide context**: "I'm the TestAgent for this project..."
3. **Ask your question** in the same conversation
### General Workflow
```mermaid
graph LR
A[Need Help] --> B{What Type?}
B -->|Testing| C[TestAgent]
B -->|Vector Store| D[VectorStoreAgent]
B -->|Documentation| E[DocAgent]
B -->|Debugging| F[DebugAgent]
C --> G[Get Specialized Help]
D --> G
E --> G
F --> G
```
## Agent Selection Guide
| Task | Recommended Agent | Why |
|------|------------------|-----|
| Write unit tests | TestAgent | Knows test patterns and runner |
| Fix failing tests | TestAgent + DebugAgent | Testing expertise + debugging |
| Add vector search | VectorStoreAgent | Deep Qdrant knowledge |
| Optimize queries | VectorStoreAgent | Performance expertise |
| Create ADR | DocAgent | ADR system expert |
| Update API docs | DocAgent | Documentation specialist |
| Debug async error | DebugAgent | Async troubleshooting expert |
| Qdrant won't connect | VectorStoreAgent + DebugAgent | Both have relevant knowledge |
| Memory leak | DebugAgent | Resource debugging specialist |
## Multi-Agent Collaboration
For complex tasks, you can use multiple agents:
**Example: Adding a New Feature**
1. **VectorStoreAgent**: Implement vector store operations
2. **TestAgent**: Write comprehensive tests
3. **DocAgent**: Document the feature and create ADR
4. **DebugAgent**: Help if issues arise during development
**Example Workflow:**
```bash
# 1. Implement feature with VectorStoreAgent
# "Help me add batch delete operation to VectorStore"
# 2. Write tests with TestAgent
# "Create tests for the batch delete operation"
# 3. Debug issues with DebugAgent
# "Tests failing with event loop errors, help debug"
# 4. Document with DocAgent
# "Document the new batch delete feature and create an ADR"
```
## Creating Your Own Agent
If you need a specialized agent for a specific domain:
```markdown
# [YourAgent] Agent
You are a specialized [domain] agent for MCP Codebase Insight.
## Your Responsibilities
1. [Primary responsibility]
2. [Secondary responsibility]
## Critical Knowledge
- [Key concept 1]
- [Key concept 2]
## Common Operations
[Examples and patterns]
## When to Escalate
[Limitations and handoff criteria]
```
## Agent Updates
These agents are living documents. Update them when:
- New patterns emerge in the codebase
- Common issues are discovered and solved
- APIs change significantly
- New best practices are established
## Feedback
If an agent:
- Gives incorrect information → Update the agent file
- Is missing important context → Add it to the agent
- Doesn't cover your use case → Create a new agent or extend existing one
## Related
- [Main Copilot Instructions](../copilot-instructions.md) - General project guidance
- [Contributing Guide](../../CONTRIBUTING.md) - How to contribute
- [Testing Guide](../../docs/testing_guide.md) - Detailed testing information
- [Architecture Docs](../../docs/system_architecture/) - System design
```
--------------------------------------------------------------------------------
/docs/workflows/README.md:
--------------------------------------------------------------------------------
```markdown
# MCP Codebase Insight Workflows
## Overview
This document details the various workflows supported by MCP Codebase Insight, including both user-facing and system-level processes. These workflows are designed to help developers effectively use and interact with the system's features.
## Quick Navigation
- [User Workflows](#user-workflows)
- [Code Analysis](#1-code-analysis-workflow)
- [Documentation Management](#2-documentation-management-workflow)
- [Testing](#3-testing-workflow)
- [System Workflows](#system-workflows)
- [Vector Store Operations](#1-vector-store-operations)
- [Health Monitoring](#2-health-monitoring)
- [Integration Points](#integration-points)
- [Best Practices](#best-practices)
- [Troubleshooting](#troubleshooting)
- [Next Steps](#next-steps)
## User Workflows
### 1. Code Analysis Workflow
#### Process Flow
```mermaid
graph TD
A[Developer] -->|Submit Code| B[Analysis Request]
B --> C{Analysis Type}
C -->|Pattern Detection| D[Pattern Analysis]
C -->|Semantic Search| E[Vector Search]
C -->|Documentation| F[Doc Analysis]
D --> G[Results]
E --> G
F --> G
G -->|Display| A
```
#### Steps
1. **Submit Code**
- Upload code files or provide repository URL
- Specify analysis parameters
- Set analysis scope
2. **Analysis Processing**
- Pattern detection runs against known patterns
- Semantic search finds similar code
- Documentation analysis checks coverage
3. **Results Review**
- View detected patterns
- Review suggestions
- Access related documentation
### 2. Documentation Management Workflow
#### Process Flow
```mermaid
graph TD
A[Developer] -->|Create/Update| B[Documentation]
B --> C{Doc Type}
C -->|ADR| D[ADR Processing]
C -->|API| E[API Docs]
C -->|Guide| F[User Guide]
D --> G[Link Analysis]
E --> G
F --> G
G -->|Update| H[Doc Map]
H -->|Validate| A
```
#### Steps
1. **Create/Update Documentation**
- Choose document type
- Write content
- Add metadata
2. **Processing**
- Analyze document relationships
- Update documentation map
- Validate links
3. **Validation**
- Check for broken links
- Verify consistency
- Update references
### 3. Testing Workflow
#### Process Flow
```mermaid
graph TD
A[Developer] -->|Run Tests| B[Test Suite]
B --> C{Test Type}
C -->|Unit| D[Unit Tests]
C -->|Integration| E[Integration Tests]
C -->|SSE| F[SSE Tests]
D --> G[Results]
E --> G
F --> G
G -->|Report| A
```
#### Steps
1. **Test Initialization**
- Set up test environment
- Configure test parameters
- Prepare test data
2. **Test Execution**
- Run selected test types
- Monitor progress
- Collect results
3. **Results Analysis**
- Review test reports
- Analyze failures
- Generate coverage reports
## System Workflows
### 1. Vector Store Operations
#### Process Flow
```mermaid
sequenceDiagram
participant User
participant Server
participant Cache
participant VectorStore
participant Knowledge
User->>Server: Request Analysis
Server->>Cache: Check Cache
Cache-->>Server: Cache Hit/Miss
alt Cache Miss
Server->>VectorStore: Generate Embeddings
VectorStore->>Knowledge: Get Patterns
Knowledge-->>VectorStore: Return Patterns
VectorStore-->>Server: Return Results
Server->>Cache: Update Cache
end
Server-->>User: Return Analysis
```
#### Components
1. **Cache Layer**
- In-memory cache for frequent requests
- Disk cache for larger datasets
- Cache invalidation strategy
2. **Vector Store**
- Embedding generation
- Vector search
- Pattern matching
3. **Knowledge Base**
- Pattern storage
- Relationship tracking
- Context management
### 2. Health Monitoring
#### Process Flow
```mermaid
sequenceDiagram
participant Monitor
participant Components
participant Tasks
participant Alerts
loop Every 30s
Monitor->>Components: Check Status
Components->>Tasks: Verify Tasks
Tasks-->>Components: Task Status
alt Issues Detected
Components->>Alerts: Raise Alert
Alerts->>Monitor: Alert Status
end
Components-->>Monitor: System Status
end
```
#### Components
1. **Monitor**
- Regular health checks
- Performance monitoring
- Resource tracking
2. **Components**
- Service status
- Resource usage
- Error rates
3. **Tasks**
- Task queue status
- Processing rates
- Error handling
4. **Alerts**
- Alert generation
- Notification routing
- Alert history
## Integration Points
### 1. External Systems
- Version Control Systems
- CI/CD Pipelines
- Issue Tracking Systems
- Documentation Platforms
### 2. APIs
- REST API for main operations
- SSE for real-time updates
- WebSocket for bi-directional communication
### 3. Storage
- Vector Database (Qdrant)
- Cache Storage
- Document Storage
## Best Practices
### 1. Code Analysis
- Regular analysis scheduling
- Incremental analysis for large codebases
- Pattern customization
### 2. Documentation
- Consistent formatting
- Regular updates
- Link validation
### 3. Testing
- Comprehensive test coverage
- Regular test runs
- Performance benchmarking
## Troubleshooting
### Common Issues
1. **Analysis Failures**
- Check input validation
- Verify system resources
- Review error logs
2. **Performance Issues**
- Monitor cache hit rates
- Check vector store performance
- Review resource usage
3. **Integration Issues**
- Verify API endpoints
- Check authentication
- Review connection settings
## Next Steps
1. **Workflow Optimization**
- Performance improvements
- Enhanced error handling
- Better user feedback
2. **New Features**
- Custom workflow creation
- Advanced analysis options
- Extended integration options
3. **Documentation**
- Workflow examples
- Integration guides
- Troubleshooting guides
```
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
```markdown
# Contributing to MCP Codebase Insight
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Getting Started
1. Fork the repository
2. Clone your fork
3. Create a new branch
4. Make your changes
5. Submit a pull request
## Development Setup
See the [Development Guide](docs/development/README.md) for detailed setup instructions.
## Code Style
- Follow PEP 8 guidelines
- Use type hints
- Write docstrings for all public functions and classes
- Keep functions focused and small
- Write clear commit messages
## Testing
- Write tests for new features
- Ensure all tests pass before submitting PR
- Include both unit and integration tests
- Document test cases
## Documentation
- Update documentation for new features
- Follow the documentation style guide
- Include examples where appropriate
- Keep documentation up to date with code
## Pull Request Process
1. Update documentation
2. Add tests
3. Update CHANGELOG.md
4. Submit PR with clear description
5. Address review comments
## Code of Conduct
Please note that this project is released with a [Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms.
```
--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
```markdown
# TechPath Project Guidelines
## Build & Test Commands
- **Python**: `make install-dev` (setup), `make start` (run server), `make check` (all checks)
- **Python Tests**: `make test` or `pytest tests/test_file.py::test_function_name` (single test)
- **Frontend**: `cd project && npm run dev` (development), `npm run build` (production)
- **Frontend Tests**: `cd project && npm test` or `npm test -- -t "test name pattern"` (single test)
- **Linting**: `make lint` (Python), `cd project && npm run lint` (TypeScript/React)
- **Formatting**: `make format` (Python), `prettier --write src/` (Frontend)
## Code Style Guidelines
- **Python**: Black (88 chars), isort for imports, type hints required
- **TypeScript**: 2-space indent, semicolons, strong typing with interfaces
- **Imports**: Group by external then internal, alphabetize
- **React**: Functional components with hooks, avoid class components
- **Types**: Define interfaces in separate files when reused
- **Naming**: camelCase for JS/TS variables, PascalCase for components/types, snake_case for Python
- **Error Handling**: Try/catch in async functions, propagate errors with descriptive messages
- **Comments**: Document complex logic, interfaces, and function parameters/returns
- **Testing**: Unit test coverage required, mock external dependencies
```
--------------------------------------------------------------------------------
/docs/development/CONTRIBUTING.md:
--------------------------------------------------------------------------------
```markdown
# Contributing Guidelines
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Welcome!
Thank you for considering contributing to MCP Codebase Insight! This document provides guidelines and workflows for contributing.
## Code of Conduct
Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md).
## How Can I Contribute?
### Reporting Bugs
1. Check if the bug is already reported in [Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
2. If not, create a new issue with:
- Clear title
- Detailed description
- Steps to reproduce
- Expected vs actual behavior
- Environment details
### Suggesting Enhancements
1. Check existing issues and discussions
2. Create a new issue with:
- Clear title
- Detailed description
- Use cases
- Implementation ideas (optional)
### Pull Requests
1. Fork the repository
2. Create a feature branch
3. Make your changes
4. Run tests and linting
5. Submit PR with:
- Clear title
- Description of changes
- Reference to related issues
- Updated documentation
## Development Process
### 1. Setup Development Environment
Follow the [Development Guide](README.md) for setup instructions.
### 2. Make Changes
1. Create a branch:
```bash
git checkout -b feature/your-feature
```
2. Make changes following our style guide
3. Add tests for new functionality
4. Update documentation
### 3. Test Your Changes
```bash
# Run all tests
pytest
# Run specific test file
pytest tests/path/to/test_file.py
# Run with coverage
pytest --cov=src tests/
```
### 4. Submit Changes
1. Push to your fork
2. Create pull request
3. Wait for review
4. Address feedback
## Style Guide
### Python Code Style
- Follow PEP 8
- Use type hints
- Maximum line length: 88 characters
- Use docstrings (Google style)
### Commit Messages
```
type(scope): description
[optional body]
[optional footer]
```
Types:
- feat: New feature
- fix: Bug fix
- docs: Documentation
- style: Formatting
- refactor: Code restructuring
- test: Adding tests
- chore: Maintenance
### Documentation
- Keep README.md updated
- Add docstrings to all public APIs
- Update relevant documentation files
- Include examples for new features
## Review Process
1. Automated checks must pass
2. At least one maintainer review
3. All feedback addressed
4. Documentation updated
5. Tests added/updated
## Getting Help
- Join our [Discord](https://discord.gg/mcp-codebase-insight)
- Ask in GitHub Discussions
- Contact maintainers
## Recognition
Contributors will be:
- Listed in CONTRIBUTORS.md
- Mentioned in release notes
- Credited in documentation
Thank you for contributing!
```
--------------------------------------------------------------------------------
/docs/development/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
```markdown
# Code of Conduct
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Project maintainers are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the project maintainers responsible for enforcement at
[INSERT CONTACT METHOD].
All complaints will be reviewed and investigated promptly and fairly.
## Enforcement Guidelines
Project maintainers will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from project maintainers, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
[homepage]: https://www.contributor-covenant.org
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/__init__.py:
--------------------------------------------------------------------------------
```python
"""Core package initialization."""
from .config import ServerConfig
__all__ = ["ServerConfig"]
```
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
```
pytest>=8.0
pytest-asyncio>=0.26.0
anyio>=3.0.0
httpx>=0.24.0
fastapi[all]>=0.100.0
qdrant-client>=1.2.0
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/__init__.py:
--------------------------------------------------------------------------------
```python
"""MCP Codebase Insight package."""
from .core.config import ServerConfig
__version__ = "0.2.2"
__all__ = ["ServerConfig"]
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/utils/__init__.py:
--------------------------------------------------------------------------------
```python
"""Utils package initialization."""
from .logger import Logger, get_logger, logger
__all__ = ["Logger", "get_logger", "logger"]
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/asgi.py:
--------------------------------------------------------------------------------
```python
"""ASGI application entry point."""
from .core.config import ServerConfig
from .server import CodebaseAnalysisServer
# Create server instance with default config
config = ServerConfig()
server = CodebaseAnalysisServer(config)
# Export the FastAPI app instance
app = server.app
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/component_status.py:
--------------------------------------------------------------------------------
```python
"""Component status enumeration."""
from enum import Enum
class ComponentStatus(str, Enum):
"""Component status enumeration."""
UNINITIALIZED = "uninitialized"
INITIALIZING = "initializing"
INITIALIZED = "initialized"
FAILED = "failed"
CLEANING = "cleaning"
CLEANED = "cleaned"
```
--------------------------------------------------------------------------------
/module_summaries/database_summary.txt:
--------------------------------------------------------------------------------
```
# Database Module Summary
- **Purpose**: Describe the database's role in the application.
- **Key Components**: List database types, schema designs, and any ORM tools used.
- **Dependencies**: Mention the relationships with the backend and data sources.
- **Largest Files**: Identify the largest database-related files and their purposes.
```
--------------------------------------------------------------------------------
/module_summaries/backend_summary.txt:
--------------------------------------------------------------------------------
```
# Backend Module Summary
- **Purpose**: Describe the backend's role in the application.
- **Key Components**: List key components such as main frameworks, APIs, and data handling.
- **Dependencies**: Mention any database connections and external services it relies on.
- **Largest Files**: Identify the largest backend files and their purposes.
```
--------------------------------------------------------------------------------
/module_summaries/frontend_summary.txt:
--------------------------------------------------------------------------------
```
# Frontend Module Summary
- **Purpose**: Describe the frontend's role in the application.
- **Key Components**: List key components such as main frameworks, libraries, and UI components.
- **Dependencies**: Mention any dependencies on backend services or external APIs.
- **Largest Files**: Identify the largest frontend files and their purposes.
```
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
```
[pytest]
asyncio_mode = strict
asyncio_default_fixture_loop_scope = session
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts = -v --cov=src/mcp_codebase_insight --cov-report=term-missing
filterwarnings =
ignore::DeprecationWarning:pkg_resources.*
ignore::DeprecationWarning:importlib.*
ignore::DeprecationWarning:pytest_asyncio.*
ignore::DeprecationWarning:pydantic.*
ignore::pydantic.PydanticDeprecatedSince20
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/version.py:
--------------------------------------------------------------------------------
```python
"""Version information."""
__version__ = "0.1.0"
__author__ = "MCP Team"
__author_email__ = "[email protected]"
__description__ = "MCP Codebase Insight Server"
__url__ = "https://github.com/modelcontextprotocol/mcp-codebase-insight"
__license__ = "MIT"
# Version components
VERSION_MAJOR = 0
VERSION_MINOR = 1
VERSION_PATCH = 0
VERSION_SUFFIX = ""
# Build version tuple
VERSION_INFO = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
# Build version string
VERSION = ".".join(map(str, VERSION_INFO))
if VERSION_SUFFIX:
VERSION += VERSION_SUFFIX
```
--------------------------------------------------------------------------------
/test_function.txt:
--------------------------------------------------------------------------------
```
async def test_health_check(client: httpx.AsyncClient):
"""Test the health check endpoint."""
response = await client.get("/health")
assert response.status_code == status.HTTP_200_OK
data = response.json()
# In test environment, we expect partially initialized state
assert "status" in data
assert "initialized" in data
# We don't assert on components field since it might be missing
# Accept 'ok' status in test environment
assert data["status"] in ["healthy", "initializing", "ok"], f"Unexpected status: {data["status"]}"
# Print status for debugging
print(f"Health status: {data}")
```
--------------------------------------------------------------------------------
/tests/integration/fixed_test2.py:
--------------------------------------------------------------------------------
```python
async def test_health_check(client: httpx.AsyncClient):
"""Test the health check endpoint."""
response = await client.get("/health")
assert response.status_code == status.HTTP_200_OK
data = response.json()
# In test environment, we expect partially initialized state
assert "status" in data
assert "initialized" in data
# We don't assert on components field since it might be missing
# Accept 'ok' status in test environment
assert data["status"] in ["healthy", "initializing", "ok"], f"Unexpected status: {data['status']}"
# Print status for debugging
print(f"Health status: {data}")
```
--------------------------------------------------------------------------------
/run_fixed_tests.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
# This script runs tests with proper path and environment setup
set -e
# Activate the virtual environment
source .venv/bin/activate
# Install the package in development mode
pip install -e .
# Set environment variables
export MCP_TEST_MODE=1
export QDRANT_URL="http://localhost:6333"
export MCP_COLLECTION_NAME="test_collection_$(date +%s)"
export PYTHONPATH="$PYTHONPATH:$(pwd)"
# Check if we should run a specific test or all tests
if [ $# -eq 0 ]; then
echo "Running specific vector store tests..."
python component_test_runner.py tests/components/test_vector_store.py
else
echo "Running specified tests: $*"
python component_test_runner.py "$@"
fi
```
--------------------------------------------------------------------------------
/debug_tests.md:
--------------------------------------------------------------------------------
```markdown
# Debug MCP Codebase Insight Tests
## Problem Statement
Debug and fix the test execution issues in the MCP Codebase Insight project. The main test script `run_tests.py` is encountering issues with module imports and test execution.
## Current Issues
1. Module import errors for `mcp_codebase_insight` package
2. Test execution failures
3. Coverage reporting issues
## Expected Behavior
- All tests should run successfully
- Coverage reports should be generated
- No import errors should occur
## Additional Context
- The project uses pytest for testing
- Coverage reporting is handled through pytest-cov
- The project is set up with a virtual environment
- Environment variables are set in .env file
```
--------------------------------------------------------------------------------
/docs/templates/adr.md:
--------------------------------------------------------------------------------
```markdown
# {title}
## Status
{status}
## Context
{context}
## Decision Drivers
<!-- What forces influenced this decision? -->
* Technical constraints
* Business requirements
* Resource constraints
* Time constraints
## Considered Options
{options}
## Decision
{decision}
## Expected Consequences
### Positive Consequences
{positive_consequences}
### Negative Consequences
{negative_consequences}
## Pros and Cons of the Options
{options_details}
## Links
<!-- Optional section for links to other decisions, patterns, or resources -->
## Notes
{notes}
## Metadata
* Created: {created_at}
* Last Modified: {updated_at}
* Author: {author}
* Approvers: {approvers}
* Status: {status}
* Tags: {tags}
{metadata}
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/models.py:
--------------------------------------------------------------------------------
```python
"""API request and response models."""
from typing import List, Dict, Any, Optional
from pydantic import BaseModel
class ToolRequest(BaseModel):
"""Base request model for tool endpoints."""
name: str
arguments: Dict[str, Any]
class CrawlDocsRequest(BaseModel):
"""Request model for crawl-docs endpoint."""
urls: List[str]
source_type: str
class AnalyzeCodeRequest(BaseModel):
"""Request model for analyze-code endpoint."""
code: str
context: Dict[str, Any]
class SearchKnowledgeRequest(BaseModel):
"""Request model for search-knowledge endpoint."""
query: str
pattern_type: str
limit: int = 5
class CodeAnalysisRequest(BaseModel):
"""Code analysis request model."""
code: str
context: Optional[Dict[str, Any]] = None
```
--------------------------------------------------------------------------------
/core_workflows.txt:
--------------------------------------------------------------------------------
```
# Core Workflows
## User Journeys
1. **Product Browsing**:
- Relevant code files: [list of files responsible for navigation, product listing]
- File sizes: [line counts for each key file]
2. **Checkout Process**:
- Relevant code files: [list of files responsible for cart management, payment handling]
- File sizes: [line counts for each key file]
3. **User Authentication**:
- Relevant code files: [list of files responsible for login, logout, user session management]
- File sizes: [line counts for each key file]
### Note:
- The workflows and summaries provided are examples. Please modify them to fit the specific use case and structure of your application repository.
- Pay special attention to large files, as they may represent core functionality or potential refactoring opportunities.
```
--------------------------------------------------------------------------------
/summary_document.txt:
--------------------------------------------------------------------------------
```
# Application Summary
## Architecture
This document provides a summary of the application's architecture, key modules, and their relationships.
## Key Modules
- Placeholder for module descriptions.
- Include information about the functionality, dependencies, and interaction with other modules.
## Key Files by Size
- See codebase_stats.txt for a complete listing of files by line count
- The largest files often represent core functionality or areas that might need refactoring
## High-Level Next Steps for LLM
1. Identify and generate module summaries for frontend, backend, and database.
2. Document core workflows and user journeys within the application.
3. Use the LLM relationship prompt (llm_relationship_prompt.txt) to generate a comprehensive relationship analysis.
4. Pay special attention to the largest files and their relationships to other components.
```
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
```yaml
name: Publish to PyPI
on:
push:
tags:
- 'v*'
jobs:
deploy:
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/mcp-codebase-insight
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/[email protected]
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build twine
- name: Build package
run: python -m build
- name: Check distribution
run: |
python -m twine check dist/*
ls -l dist/
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: python -m twine upload dist/*
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
{
"name": "vite-react-typescript-starter",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@supabase/supabase-js": "^2.39.7",
"lucide-react": "^0.344.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-router-dom": "^6.22.0",
"recharts": "^2.12.1"
},
"devDependencies": {
"@eslint/js": "^9.9.1",
"@tsconfig/recommended": "^1.0.3",
"@types/node": "^20.11.24",
"@types/react": "^18.3.5",
"@types/react-dom": "^18.3.0",
"@vitejs/plugin-react": "^4.3.1",
"autoprefixer": "^10.4.18",
"eslint": "^9.9.1",
"eslint-plugin-react-hooks": "^5.1.0-rc.0",
"eslint-plugin-react-refresh": "^0.4.11",
"globals": "^15.9.0",
"postcss": "^8.4.35",
"tailwindcss": "^3.4.1",
"typescript": "^5.5.3",
"typescript-eslint": "^8.3.0",
"vite": "^5.4.2"
}
}
```
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
```dockerfile
# Use Python 3.11 slim image
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# Install system dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Rust (needed for pydantic)
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
# Copy requirements file
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy source code
COPY src/ src/
COPY scripts/ scripts/
# Copy configuration files
COPY .env.example .env
# Create necessary directories
RUN mkdir -p \
docs/adrs \
knowledge \
cache \
logs
# Set permissions
RUN chmod +x scripts/start_mcp_server.sh
# Expose port
EXPOSE 3000
# Set entrypoint
ENTRYPOINT ["scripts/start_mcp_server.sh"]
# Set default command
CMD ["--host", "0.0.0.0", "--port", "3000"]
```
--------------------------------------------------------------------------------
/docs/getting-started/qdrant_setup.md:
--------------------------------------------------------------------------------
```markdown
# Qdrant Setup Guide
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Overview
This guide covers setting up Qdrant vector database for MCP Codebase Insight.
## Installation Methods
### 1. Using Docker (Recommended)
```bash
# Pull the Qdrant image
docker pull qdrant/qdrant
# Start Qdrant container
docker run -p 6333:6333 -v $(pwd)/qdrant_storage:/qdrant/storage qdrant/qdrant
```
### 2. From Binary
Download from [Qdrant Releases](https://github.com/qdrant/qdrant/releases)
### 3. From Source
```bash
git clone https://github.com/qdrant/qdrant
cd qdrant
cargo build --release
```
## Configuration
1. **Create Collection**
```python
from qdrant_client import QdrantClient
client = QdrantClient("localhost", port=6333)
client.create_collection(
collection_name="code_vectors",
vectors_config={"size": 384, "distance": "Cosine"}
)
```
2. **Verify Setup**
```bash
curl http://localhost:6333/collections/code_vectors
```
## Next Steps
- [Configuration Guide](configuration.md)
- [Quick Start Guide](quickstart.md)
- [API Reference](../api/rest-api.md)
```
--------------------------------------------------------------------------------
/tests/components/test_embeddings.py:
--------------------------------------------------------------------------------
```python
import sys
import os
# Ensure the src directory is in the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
import pytest
import asyncio
from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
@pytest.mark.asyncio
async def test_embedder_initialization():
"""Test that embedder initializes correctly."""
embedder = SentenceTransformerEmbedding()
try:
await asyncio.wait_for(embedder.initialize(), timeout=60.0)
assert embedder.model is not None
assert embedder.vector_size == 384 # Default size for all-MiniLM-L6-v2
except asyncio.TimeoutError:
pytest.fail("Embedder initialization timed out")
except Exception as e:
pytest.fail(f"Embedder initialization failed: {str(e)}")
@pytest.mark.asyncio
async def test_embedder_embedding():
"""Test that embedder can generate embeddings."""
embedder = SentenceTransformerEmbedding()
await embedder.initialize()
# Test single text embedding
text = "Test text"
embedding = await embedder.embed(text)
assert len(embedding) == embedder.vector_size
# Test batch embedding
texts = ["Test text 1", "Test text 2"]
embeddings = await embedder.embed_batch(texts)
assert len(embeddings) == 2
assert all(len(emb) == embedder.vector_size for emb in embeddings)
```
--------------------------------------------------------------------------------
/async_fixture_wrapper.py:
--------------------------------------------------------------------------------
```python
"""
Async Fixture Wrapper for Component Tests
This script serves as a wrapper for running component tests with complex async fixtures
to ensure they are properly awaited in isolated test mode.
"""
import os
import sys
import asyncio
import pytest
import importlib
from pathlib import Path
def run_with_async_fixture_support():
"""Run pytest with proper async fixture support."""
# Get the module path and test name from command line arguments
if len(sys.argv) < 3:
print("Usage: python async_fixture_wrapper.py <module_path> <test_name>")
sys.exit(1)
module_path = sys.argv[1]
test_name = sys.argv[2]
# Configure event loop policy for macOS if needed
if sys.platform == 'darwin':
import platform
if int(platform.mac_ver()[0].split('.')[0]) >= 10:
# macOS 10+ - use the right event loop policy
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
# Ensure PYTHONPATH is set correctly
base_dir = str(Path(module_path).parent.parent)
sys.path.insert(0, base_dir)
# Build pytest args
pytest_args = [module_path, f"-k={test_name}", "--asyncio-mode=strict"]
# Add any additional args
if len(sys.argv) > 3:
pytest_args.extend(sys.argv[3:])
# Run the test
exit_code = pytest.main(pytest_args)
sys.exit(exit_code)
if __name__ == "__main__":
run_with_async_fixture_support()
```
--------------------------------------------------------------------------------
/PULL_REQUEST.md:
--------------------------------------------------------------------------------
```markdown
# GitHub Actions Workflow Improvements
@coderabbit I'd like to request your detailed review of our GitHub Actions workflows.
## Overview
This PR aims to improve the GitHub Actions workflows in our repository by:
1. **Documenting** all existing workflows
2. **Addressing** the test pattern issue in build-verification.yml
3. **Extracting** common functionality into reusable scripts
4. **Standardizing** practices across different workflows
## Changes
- Added comprehensive documentation of all GitHub Actions workflows
- Fixed the wildcard pattern issue (`test_*`) in build-verification.yml
- Extracted Qdrant health check logic into a reusable script
- Added README for the scripts directory
## Benefits
- **Maintainability**: Common logic is now in a single location
- **Readability**: Workflows are cleaner and better documented
- **Reliability**: Fixed test pattern ensures more consistent test execution
- **Extensibility**: Easier to add new workflows or modify existing ones
## Request for Review
@coderabbit, I'm particularly interested in your feedback on:
1. Workflow structure and organization
2. Any redundancies or inefficiencies you notice
3. Any missing best practices
4. Suggestions for further improvements
## Future Improvements
We're planning to implement additional enhancements based on your feedback:
- Extract more common functionality into reusable actions
- Standardize environment variables across workflows
- Improve caching strategies
- Add workflow dependencies to avoid redundant work
Thank you for your time and expertise!
```
--------------------------------------------------------------------------------
/run_test_with_path_fix.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
# This script runs tests with a fix for the Python path issue
set -e
# Activate the virtual environment
source .venv/bin/activate
# Setup environment for Qdrant
export MCP_TEST_MODE=1
export QDRANT_URL="http://localhost:6333"
export MCP_COLLECTION_NAME="test_collection_$(date +%s)"
export PYTHONPATH="$PYTHONPATH:$(pwd)"
# Initialize Qdrant collection for testing
echo "Creating Qdrant collection for testing..."
python - << EOF
import os
from qdrant_client import QdrantClient
from qdrant_client.http import models
# Connect to Qdrant
client = QdrantClient(url="http://localhost:6333")
collection_name = os.environ.get("MCP_COLLECTION_NAME")
# Check if collection exists
collections = client.get_collections().collections
collection_names = [c.name for c in collections]
if collection_name in collection_names:
print(f"Collection {collection_name} already exists, recreating it...")
client.delete_collection(collection_name=collection_name)
# Create collection with vector size 384 (for all-MiniLM-L6-v2)
client.create_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(
size=384, # Dimension for all-MiniLM-L6-v2
distance=models.Distance.COSINE,
),
)
# Create test directory that might be needed
os.makedirs("qdrant_storage", exist_ok=True)
print(f"Successfully created collection {collection_name}")
EOF
# Run all component tests in vector_store
echo "Running all vector store tests with component_test_runner.py..."
python component_test_runner.py tests/components/test_vector_store.py
```
--------------------------------------------------------------------------------
/test_imports.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
"""
Test script to verify imports work correctly
"""
import sys
import importlib
import os
def test_import(module_name):
try:
module = importlib.import_module(module_name)
print(f"✅ Successfully imported {module_name}")
return True
except ImportError as e:
print(f"❌ Failed to import {module_name}: {e}")
return False
def print_path():
print("\nPython Path:")
for i, path in enumerate(sys.path):
print(f"{i}: {path}")
def main():
print("=== Testing Package Imports ===")
print("\nEnvironment:")
print(f"Python version: {sys.version}")
print(f"Working directory: {os.getcwd()}")
print("\nTesting core package imports:")
# First ensure the parent directory is in the path
sys.path.insert(0, os.getcwd())
print_path()
print("\nTesting imports:")
# Test basic Python imports
test_import("os")
test_import("sys")
# Test ML/NLP packages
test_import("torch")
test_import("numpy")
test_import("transformers")
test_import("sentence_transformers")
# Test FastAPI and web packages
test_import("fastapi")
test_import("starlette")
test_import("pydantic")
# Test database packages
test_import("qdrant_client")
# Test project specific modules
test_import("src.mcp_codebase_insight.core.config")
test_import("src.mcp_codebase_insight.core.embeddings")
test_import("src.mcp_codebase_insight.core.vector_store")
print("\n=== Testing Complete ===")
if __name__ == "__main__":
main()
```
--------------------------------------------------------------------------------
/scripts/setup_qdrant.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
# Script to set up Qdrant for MCP Codebase Insight
set -e
# Colors for output
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m' # No Color
echo "Setting up Qdrant for MCP Codebase Insight..."
# Check if Docker is running
if ! docker info > /dev/null 2>&1; then
echo -e "${RED}Error: Docker is not running${NC}"
exit 1
fi
# Check if port 6333 is available
if lsof -Pi :6333 -sTCP:LISTEN -t >/dev/null ; then
echo -e "${RED}Warning: Port 6333 is already in use${NC}"
echo "Checking if it's a Qdrant instance..."
if curl -s http://localhost:6333/health > /dev/null; then
echo -e "${GREEN}Existing Qdrant instance detected and healthy${NC}"
exit 0
else
echo -e "${RED}Port 6333 is in use by another service${NC}"
exit 1
fi
fi
# Create data directory if it doesn't exist
mkdir -p ./qdrant_data
# Stop and remove existing container if it exists
if docker ps -a | grep -q mcp-qdrant; then
echo "Removing existing mcp-qdrant container..."
docker stop mcp-qdrant || true
docker rm mcp-qdrant || true
fi
# Pull latest Qdrant image
echo "Pulling latest Qdrant image..."
docker pull qdrant/qdrant:latest
# Start Qdrant container
echo "Starting Qdrant container..."
docker run -d \
--name mcp-qdrant \
-p 6333:6333 \
-v "$(pwd)/qdrant_data:/qdrant/storage" \
qdrant/qdrant
# Wait for Qdrant to be ready
echo "Waiting for Qdrant to be ready..."
for i in {1..30}; do
if curl -s http://localhost:6333/health > /dev/null; then
echo -e "${GREEN}Qdrant is ready!${NC}"
exit 0
fi
echo "Waiting... ($i/30)"
sleep 1
done
echo -e "${RED}Error: Qdrant failed to start within 30 seconds${NC}"
exit 1
```
--------------------------------------------------------------------------------
/scripts/check_qdrant_health.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
set -euo pipefail
# Script to check if Qdrant service is available and healthy
# Usage: ./check_qdrant_health.sh [qdrant_url] [max_retries] [sleep_seconds]
# Default values
QDRANT_URL=${1:-"http://localhost:6333"}
MAX_RETRIES=${2:-20}
SLEEP_SECONDS=${3:-5}
echo "Checking Qdrant health at $QDRANT_URL (max $MAX_RETRIES attempts with $SLEEP_SECONDS seconds delay)"
# Install dependencies if not present
if ! command -v curl &> /dev/null || ! command -v jq &> /dev/null; then
echo "Installing required dependencies..."
apt-get update &> /dev/null && apt-get install -y curl jq &> /dev/null || true
fi
# Check if dependencies are available
if ! command -v curl &> /dev/null; then
echo "Error: curl command not found and could not be installed"
exit 1
fi
if ! command -v jq &> /dev/null; then
echo "Warning: jq command not found and could not be installed. JSON validation will be skipped."
JQ_AVAILABLE=false
else
JQ_AVAILABLE=true
fi
# Wait for Qdrant to be available
retry_count=0
until [ "$(curl -s -o /dev/null -w "%{http_code}" "$QDRANT_URL/collections")" -eq 200 ] || [ "$retry_count" -eq "$MAX_RETRIES" ]
do
echo "Waiting for Qdrant... (attempt $retry_count of $MAX_RETRIES)"
sleep "$SLEEP_SECONDS"
retry_count=$((retry_count+1))
done
if [ "$retry_count" -eq "$MAX_RETRIES" ]; then
echo "Qdrant service failed to become available after $((MAX_RETRIES * SLEEP_SECONDS)) seconds"
exit 1
fi
# Check for valid JSON response if jq is available
if [ "$JQ_AVAILABLE" = true ]; then
if ! curl -s "$QDRANT_URL/collections" | jq . > /dev/null; then
echo "Qdrant did not return valid JSON."
exit 1
fi
fi
echo "Qdrant service is accessible and healthy."
exit 0
```
--------------------------------------------------------------------------------
/docs/qdrant_setup.md:
--------------------------------------------------------------------------------
```markdown
# Qdrant Setup Guide
## Overview
This document outlines the setup and maintenance procedures for the Qdrant vector database instance required for running tests and development.
## Prerequisites
- Docker installed and running
- Port 6333 available on localhost
- Python 3.8+ with pip
## Setup Options
### Option 1: Docker Container (Recommended for Development)
```bash
# Pull the latest Qdrant image
docker pull qdrant/qdrant:latest
# Run Qdrant container
docker run -d \
--name mcp-qdrant \
-p 6333:6333 \
-v $(pwd)/qdrant_data:/qdrant/storage \
qdrant/qdrant
# Verify the instance is running
curl http://localhost:6333/health
```
### Option 2: Pre-existing Instance
If using a pre-existing Qdrant instance:
1. Ensure it's accessible at `localhost:6333`
2. Verify health status
3. Configure environment variables if needed:
```bash
export QDRANT_HOST=localhost
export QDRANT_PORT=6333
```
## Health Check
```python
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", port=6333)
health = client.health()
print(f"Qdrant health status: {health}")
```
## Maintenance
- Regular health checks are automated in CI/CD pipeline
- Database backups are stored in `./qdrant_data`
- Version updates should be coordinated with the team
## Troubleshooting
1. If container fails to start:
```bash
# Check logs
docker logs mcp-qdrant
# Verify port availability
lsof -i :6333
```
2. If connection fails:
```bash
# Restart container
docker restart mcp-qdrant
# Check container status
docker ps -a | grep mcp-qdrant
```
## Responsible Parties
- Primary maintainer: DevOps Team
- Documentation updates: Development Team Lead
- Testing coordination: QA Team Lead
## Version Control
- Document version: 1.0
- Last updated: 2025-03-24
- Next review: 2025-06-24
```
--------------------------------------------------------------------------------
/setup_qdrant_collection.py:
--------------------------------------------------------------------------------
```python
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.models import Distance, VectorParams
def setup_collection():
# Connect to Qdrant
client = QdrantClient(
url='https://e67ee53a-6e03-4526-9e41-3fde622323a9.us-east4-0.gcp.cloud.qdrant.io:6333',
api_key='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzQ1MTAyNzQ3fQ.3gvK8M7dJxZkSpyzpJtTGVUhjyjgbYEhEvl2aG7JodM'
)
collection_name = "mcp-codebase-insight"
try:
# Check if collection exists
collections = client.get_collections().collections
exists = any(c.name == collection_name for c in collections)
# If collection exists, recreate it
if exists:
print(f"\nRemoving existing collection '{collection_name}'")
client.delete_collection(collection_name=collection_name)
# Create a new collection with named vector configurations
print(f"\nCreating collection '{collection_name}' with named vectors")
# Create named vectors configuration
vectors_config = {
# For the default MCP server embedding model (all-MiniLM-L6-v2)
"fast-all-minilm-l6-v2": VectorParams(
size=384, # all-MiniLM-L6-v2 produces 384-dimensional vectors
distance=Distance.COSINE
)
}
client.create_collection(
collection_name=collection_name,
vectors_config=vectors_config
)
# Verify the collection was created properly
collection_info = client.get_collection(collection_name=collection_name)
print(f"\nCollection '{collection_name}' created successfully")
print(f"Vector configuration: {collection_info.config.params.vectors}")
print("\nCollection is ready for the MCP server")
except Exception as e:
print(f"\nError setting up collection: {e}")
if __name__ == '__main__':
setup_collection()
```
--------------------------------------------------------------------------------
/docs/vector_store_best_practices.md:
--------------------------------------------------------------------------------
```markdown
# VectorStore Best Practices
This document outlines best practices for working with the VectorStore component in the MCP Codebase Insight project.
## Metadata Structure
To ensure consistency and prevent `KeyError` exceptions, always follow these metadata structure guidelines:
### Required Fields
Always include these fields in your metadata when adding vectors:
- `type`: The type of content (e.g., "code", "documentation", "pattern")
- `language`: Programming language if applicable (e.g., "python", "javascript")
- `title`: Short descriptive title
- `description`: Longer description of the content
### Accessing Metadata
Always use the `.get()` method with a default value when accessing metadata fields:
```python
# Good - safe access pattern
result.metadata.get("type", "code")
# Bad - can cause KeyError
result.metadata["type"]
```
## Initialization and Cleanup
Follow these best practices for proper initialization and cleanup:
1. Always `await vector_store.initialize()` before using a VectorStore
2. Always `await vector_store.cleanup()` in test teardown/finally blocks
3. Use unique collection names in tests to prevent conflicts
4. Check `vector_store.initialized` status before operations
Example:
```python
try:
store = VectorStore(url, embedder, collection_name=unique_name)
await store.initialize()
# Use the store...
finally:
await store.cleanup()
await store.close()
```
## Vector Names and Dimensions
- Use consistent vector dimensions (384 for all-MiniLM-L6-v2)
- Be careful when overriding the vector_name parameter
- Ensure embedder and vector store are compatible
## Error Handling
- Check for component availability before use
- Handle initialization errors gracefully
- Log failures with meaningful messages
## Testing Guidelines
1. Use isolated test collections with unique names
2. Clean up all test data after tests
3. Verify metadata structure in tests
4. Use standardized test data fixtures
5. Test both positive and negative paths
By following these guidelines, you can avoid common issues like the "KeyError: 'type'" problem that was occurring in the codebase.
```
--------------------------------------------------------------------------------
/scripts/macos_install.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
# Exit on error
set -e
echo "Installing MCP Codebase Insight development environment..."
# Check for Homebrew
if ! command -v brew &> /dev/null; then
echo "Installing Homebrew..."
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
else
echo "Homebrew already installed, updating..."
brew update
fi
# Check for Python
if ! command -v python3 &> /dev/null; then
echo "Installing Python..."
brew install [email protected]
else
echo "Python already installed"
fi
# Check for Docker
if ! command -v docker &> /dev/null; then
echo "Installing Docker..."
brew install --cask docker
echo "Starting Docker..."
open -a Docker
# Wait for Docker to start
echo "Waiting for Docker to start..."
while ! docker info &> /dev/null; do
sleep 1
done
else
echo "Docker already installed"
fi
# Create virtual environment
echo "Creating virtual environment..."
python3.11 -m venv .venv
# Activate virtual environment
echo "Activating virtual environment..."
source .venv/bin/activate
# Install dependencies
echo "Installing Python dependencies..."
pip install --upgrade pip
pip install -r requirements.txt
# Start Qdrant
echo "Starting Qdrant container..."
if ! docker ps | grep -q qdrant; then
docker run -d -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage \
qdrant/qdrant
echo "Qdrant container started"
else
echo "Qdrant container already running"
fi
# Create required directories
echo "Creating project directories..."
mkdir -p docs/adrs
mkdir -p docs/templates
mkdir -p knowledge/patterns
mkdir -p references
mkdir -p logs/debug
# Copy environment file if it doesn't exist
if [ ! -f .env ]; then
echo "Creating .env file..."
cp .env.example .env
echo "Please update .env with your settings"
fi
# Load example patterns
echo "Loading example patterns..."
python scripts/load_example_patterns.py
echo "
Installation complete! 🎉
To start development:
1. Update .env with your settings
2. Activate the virtual environment:
source .venv/bin/activate
3. Start the server:
make run
For more information, see the README.md file.
"
```
--------------------------------------------------------------------------------
/start-mcpserver.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
# This script starts the MCP Qdrant server with SSE transport
set -x
source .venv/bin/activate
# Set the PATH to include the local bin directory
export PATH="$HOME/.local/bin:$PATH"
# Define environment variables
export COLLECTION_NAME="mcp-codebase-insight"
export EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
export QDRANT_URL="${QDRANT_URL:-http://localhost:6333}"
export QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzQ1MTAyNzQ3fQ.3gvK8M7dJxZkSpyzpJtTGVUhjyjgbYEhEvl2aG7JodM"
# Define tool descriptions
TOOL_STORE_DESCRIPTION="Store reusable code snippets and test results. 'information' contains a description. 'metadata' is a dictionary with a 'type' key: 'code' for code snippets, 'test_result' for test results. For 'code', 'metadata' includes a 'code' key with the code. For 'test_result', 'metadata' includes 'test_name', 'status' (pass/fail), and 'error_message'."
TOOL_FIND_DESCRIPTION="Search for code snippets and test results. The 'query' parameter describes what you're looking for. Returned results will have a 'metadata' field with a 'type' key indicating 'code' or 'test_result'. Use this to find code or analyze test failures."
# Default port for the SSE transport (can be overridden with PORT env var)
PORT="${PORT:-8000}"
# Determine transport type (default to sse if not specified)
TRANSPORT="${TRANSPORT:-sse}"
# Check if uvx and mcp-server-qdrant are installed
if ! command -v uvx &> /dev/null; then
echo "Error: uvx is not installed. Please install it with: pip install uvx"
exit 1
fi
if ! python -c "import importlib.util; print(importlib.util.find_spec('mcp_server_qdrant') is not None)" | grep -q "True"; then
echo "Error: mcp-server-qdrant is not installed. Please install it with: pip install mcp-server-qdrant"
exit 1
fi
echo "Starting MCP Qdrant server with $TRANSPORT transport on port $PORT..."
# Run the MCP Qdrant server with the specified transport
if [ "$TRANSPORT" = "sse" ]; then
# For SSE transport, we need to specify the port
uvx mcp-server-qdrant --transport sse --port $PORT
else
# For other transports (e.g., stdio which is the default)
uvx mcp-server-qdrant
fi
```
--------------------------------------------------------------------------------
/docs/testing_guide.md:
--------------------------------------------------------------------------------
```markdown
# Testing Guide for MCP Codebase Insight
## Asynchronous Testing
The MCP Codebase Insight project uses asynchronous APIs and should be tested using proper async test clients. Here are guidelines for testing:
### Async vs Sync Testing Clients
The project provides two test client fixtures:
1. **`test_client`** - Use for asynchronous tests
- Returns an `AsyncClient` from httpx
- Must be used with `await` for requests
- Must be used with `@pytest.mark.asyncio` decorator
2. **`sync_test_client`** - Use for synchronous tests
- Returns a `TestClient` from FastAPI
- Used for simpler tests where async is not needed
- No need for await or asyncio decorators
### Example: Async Test
```python
import pytest
@pytest.mark.asyncio
async def test_my_endpoint(test_client):
"""Test an endpoint asynchronously."""
response = await test_client.get("/my-endpoint")
assert response.status_code == 200
data = response.json()
assert "result" in data
```
### Example: Sync Test
```python
def test_simple_endpoint(sync_test_client):
"""Test an endpoint synchronously."""
response = sync_test_client.get("/simple-endpoint")
assert response.status_code == 200
```
### Common Issues
1. **Using TestClient with async:** The error `'TestClient' object does not support the asynchronous context manager protocol` occurs when trying to use TestClient in an async context. Always use the `test_client` fixture for async tests.
2. **Mixing async/sync:** Don't mix async and sync patterns in the same test.
3. **Missing asyncio mark:** Always add `@pytest.mark.asyncio` to async test functions.
## Test Isolation
Tests should be isolated to prevent state interference between tests:
1. Each test gets its own server instance with isolated state
2. Vector store tests use unique collection names
3. Cleanup is performed automatically after tests
## Running Tests
Run tests using pytest:
```bash
# Run all tests
pytest
# Run specific test file
pytest tests/test_file_relationships.py
# Run specific test function
pytest tests/test_file_relationships.py::test_create_file_relationship
```
For more advanced test running options, use the `run_tests.py` script in the project root.
```
--------------------------------------------------------------------------------
/.compile-venv-py3.11/bin/activate.fish:
--------------------------------------------------------------------------------
```
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
# (https://fishshell.com/); you cannot run it directly.
function deactivate -d "Exit virtual environment and return to normal shell environment"
# reset old environment variables
if test -n "$_OLD_VIRTUAL_PATH"
set -gx PATH $_OLD_VIRTUAL_PATH
set -e _OLD_VIRTUAL_PATH
end
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
set -e _OLD_VIRTUAL_PYTHONHOME
end
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
set -e _OLD_FISH_PROMPT_OVERRIDE
# prevents error when using nested fish instances (Issue #93858)
if functions -q _old_fish_prompt
functions -e fish_prompt
functions -c _old_fish_prompt fish_prompt
functions -e _old_fish_prompt
end
end
set -e VIRTUAL_ENV
set -e VIRTUAL_ENV_PROMPT
if test "$argv[1]" != "nondestructive"
# Self-destruct!
functions -e deactivate
end
end
# Unset irrelevant variables.
deactivate nondestructive
set -gx VIRTUAL_ENV /Users/tosinakinosho/workspaces/mcp-codebase-insight/.compile-venv-py3.11
set -gx _OLD_VIRTUAL_PATH $PATH
set -gx PATH "$VIRTUAL_ENV/"bin $PATH
# Unset PYTHONHOME if set.
if set -q PYTHONHOME
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
set -e PYTHONHOME
end
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
# fish uses a function instead of an env var to generate the prompt.
# Save the current fish_prompt function as the function _old_fish_prompt.
functions -c fish_prompt _old_fish_prompt
# With the original prompt function renamed, we can override with our own.
function fish_prompt
# Save the return status of the last command.
set -l old_status $status
# Output the venv prompt; color taken from the blue of the Python logo.
printf "%s%s%s" (set_color 4B8BBE) '(.compile-venv-py3.11) ' (set_color normal)
# Restore the return status of the previous command.
echo "exit $old_status" | .
# Output the original/"old" prompt.
_old_fish_prompt
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
set -gx VIRTUAL_ENV_PROMPT '(.compile-venv-py3.11) '
end
```
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
```python
from setuptools import setup, find_packages
import re
import os
# Read version from __init__.py
with open(os.path.join("src", "mcp_codebase_insight", "__init__.py"), "r") as f:
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M)
if version_match:
version = version_match.group(1)
else:
raise RuntimeError("Unable to find version string")
setup(
name="mcp-codebase-insight",
version=version,
description="Model Context Protocol (MCP) server for codebase analysis and insights",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
author="Model Context Protocol",
author_email="[email protected]",
url="https://github.com/modelcontextprotocol/mcp-codebase-insight",
packages=find_packages(where="src"),
package_dir={"": "src"},
install_requires=[
"fastapi>=0.103.2,<0.104.0",
"uvicorn>=0.23.2,<0.24.0",
"pydantic>=2.4.2,<3.0.0",
"starlette>=0.27.0,<0.28.0",
"asyncio>=3.4.3",
"aiohttp>=3.9.0,<4.0.0",
"qdrant-client>=1.13.3",
"sentence-transformers>=2.2.2",
"torch>=2.0.0",
"transformers>=4.34.0,<5.0.0",
"python-frontmatter>=1.0.0",
"markdown>=3.4.4",
"PyYAML>=6.0.1",
"structlog>=23.1.0",
"psutil>=5.9.5",
"python-dotenv>=1.0.0",
"requests>=2.31.0",
"beautifulsoup4>=4.12.0",
"scipy>=1.11.0",
"numpy>=1.24.0",
"python-slugify>=8.0.0",
"slugify>=0.0.1",
# Temporarily commented out for development installation
# "uvx>=0.4.0",
"mcp-server-qdrant>=0.2.0",
"mcp==1.5.0",
],
python_requires=">=3.9",
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Software Development :: Libraries :: Python Modules",
],
entry_points={
"console_scripts": [
"mcp-codebase-insight=mcp_codebase_insight.server:run",
],
},
)
```
--------------------------------------------------------------------------------
/scripts/start_mcp_server.sh:
--------------------------------------------------------------------------------
```bash
#!/bin/bash
set -e
# Function to log messages
log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
}
# Function to check if Qdrant is available
check_qdrant() {
local url="${QDRANT_URL:-http://localhost:6333}"
local max_attempts=30
local attempt=1
log "Checking Qdrant connection at $url"
while [ $attempt -le $max_attempts ]; do
if curl -s -f "$url/health" > /dev/null 2>&1; then
log "Qdrant is available"
return 0
fi
log "Waiting for Qdrant (attempt $attempt/$max_attempts)..."
sleep 2
attempt=$((attempt + 1))
done
log "Error: Could not connect to Qdrant"
return 1
}
# Function to check Python environment
check_python() {
if ! command -v python3 &> /dev/null; then
log "Error: Python 3 is not installed"
exit 1
fi
if ! python3 -c "import pkg_resources; pkg_resources.require('fastapi>=0.103.2')" &> /dev/null; then
log "Error: Required Python packages are not installed"
exit 1
fi
}
# Function to setup environment
setup_env() {
# Create required directories if they don't exist
mkdir -p docs/adrs knowledge cache logs
# Copy example env file if .env doesn't exist
if [ ! -f .env ] && [ -f .env.example ]; then
cp .env.example .env
log "Created .env from example"
fi
# Set default environment variables if not set
export MCP_HOST=${MCP_HOST:-0.0.0.0}
export MCP_PORT=${MCP_PORT:-3000}
export MCP_LOG_LEVEL=${MCP_LOG_LEVEL:-INFO}
log "Environment setup complete"
}
# Main startup sequence
main() {
log "Starting MCP Codebase Insight Server"
# Perform checks
check_python
setup_env
check_qdrant
# Parse command line arguments
local host="0.0.0.0"
local port="3000"
while [[ $# -gt 0 ]]; do
case $1 in
--host)
host="$2"
shift 2
;;
--port)
port="$2"
shift 2
;;
*)
log "Unknown option: $1"
exit 1
;;
esac
done
# Start server
log "Starting server on $host:$port"
exec python3 -m mcp_codebase_insight
}
# Run main function with all arguments
main "$@"
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/__main__.py:
--------------------------------------------------------------------------------
```python
"""Main entry point for MCP server."""
import os
from pathlib import Path
import sys
import logging
import uvicorn
from dotenv import load_dotenv
from .core.config import ServerConfig
from .server import create_app
from .utils.logger import get_logger
# Configure logging
logger = get_logger(__name__)
def get_config() -> ServerConfig:
"""Get server configuration."""
try:
# Load environment variables
load_dotenv()
config = ServerConfig(
host=os.getenv("MCP_HOST", "127.0.0.1"),
port=int(os.getenv("MCP_PORT", "3000")),
log_level=os.getenv("MCP_LOG_LEVEL", "INFO"),
qdrant_url=os.getenv("QDRANT_URL", "http://localhost:6333"),
docs_cache_dir=Path(os.getenv("MCP_DOCS_CACHE_DIR", "docs")),
adr_dir=Path(os.getenv("MCP_ADR_DIR", "docs/adrs")),
kb_storage_dir=Path(os.getenv("MCP_KB_STORAGE_DIR", "knowledge")),
embedding_model=os.getenv("MCP_EMBEDDING_MODEL", "all-MiniLM-L6-v2"),
collection_name=os.getenv("MCP_COLLECTION_NAME", "codebase_patterns"),
debug_mode=os.getenv("MCP_DEBUG", "false").lower() == "true",
metrics_enabled=os.getenv("MCP_METRICS_ENABLED", "true").lower() == "true",
cache_enabled=os.getenv("MCP_CACHE_ENABLED", "true").lower() == "true",
memory_cache_size=int(os.getenv("MCP_MEMORY_CACHE_SIZE", "1000")),
disk_cache_dir=Path(os.getenv("MCP_DISK_CACHE_DIR", "cache")) if os.getenv("MCP_DISK_CACHE_DIR") else None
)
logger.info("Configuration loaded successfully")
return config
except Exception as e:
logger.error(f"Failed to load configuration: {e}", exc_info=True)
raise
def main():
"""Run the server."""
try:
# Get configuration
config = get_config()
# Create FastAPI app
app = create_app(config)
# Log startup message
logger.info(
f"Starting MCP Codebase Insight Server on {config.host}:{config.port} "
f"(log level: {config.log_level}, debug mode: {config.debug_mode})"
)
# Run using Uvicorn directly
uvicorn.run(
app=app,
host=config.host,
port=config.port,
log_level=config.log_level.lower(),
loop="auto",
lifespan="on",
workers=1
)
except Exception as e:
logger.error(f"Server error: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
# Run main directly without asyncio.run()
main()
```
--------------------------------------------------------------------------------
/scripts/validate_knowledge_base.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
"""
Knowledge Base Validation Script
Tests knowledge base operations using Firecrawl MCP.
"""
import asyncio
import logging
from mcp_firecrawl import (
test_knowledge_operations,
validate_entity_relations,
verify_query_results
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def validate_knowledge_base(config: dict) -> bool:
"""Validate knowledge base operations."""
logger.info("Testing knowledge base operations...")
# Test basic knowledge operations
ops_result = await test_knowledge_operations({
"url": "http://localhost:8001",
"auth_token": config["API_KEY"],
"test_entities": [
{"name": "TestClass", "type": "class"},
{"name": "test_method", "type": "method"},
{"name": "test_variable", "type": "variable"}
],
"verify_persistence": True
})
# Validate entity relations
relations_result = await validate_entity_relations({
"url": "http://localhost:8001",
"auth_token": config["API_KEY"],
"test_relations": [
{"from": "TestClass", "to": "test_method", "type": "contains"},
{"from": "test_method", "to": "test_variable", "type": "uses"}
],
"verify_bidirectional": True
})
# Verify query functionality
query_result = await verify_query_results({
"url": "http://localhost:8001",
"auth_token": config["API_KEY"],
"test_queries": [
"find classes that use test_variable",
"find methods in TestClass",
"find variables used by test_method"
],
"expected_matches": {
"classes": ["TestClass"],
"methods": ["test_method"],
"variables": ["test_variable"]
}
})
all_passed = all([
ops_result.success,
relations_result.success,
query_result.success
])
if all_passed:
logger.info("Knowledge base validation successful")
else:
logger.error("Knowledge base validation failed")
if not ops_result.success:
logger.error("Knowledge operations failed")
if not relations_result.success:
logger.error("Entity relations validation failed")
if not query_result.success:
logger.error("Query validation failed")
return all_passed
if __name__ == "__main__":
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent))
from scripts.config import load_config
config = load_config()
success = asyncio.run(validate_knowledge_base(config))
sys.exit(0 if success else 1)
```
--------------------------------------------------------------------------------
/test_fixes.md:
--------------------------------------------------------------------------------
```markdown
# MCP Codebase Insight Test Fixes
## Identified Issues
1. **Package Import Problems**
- The tests were trying to import from `mcp_codebase_insight` directly, but the package needed to be imported from `src.mcp_codebase_insight`
- The Python path wasn't correctly set up to include the project root directory
2. **Missing Dependencies**
- The `sentence-transformers` package was installed in the wrong Python environment (Python 3.13 instead of 3.11)
- Had to explicitly install it in the correct environment
3. **Test Isolation Problems**
- Tests were failing due to not being properly isolated
- The `component_test_runner.py` script needed fixes to properly load test modules
4. **Qdrant Server Issue**
- The `test_vector_store_cleanup` test failed due to permission issues in the Qdrant server
- The server couldn't create a collection directory for the test
## Applied Fixes
1. **Fixed Import Paths**
- Modified test files to use `from src.mcp_codebase_insight...` instead of `from mcp_codebase_insight...`
- Added code to explicitly set `sys.path` to include the project root directory
2. **Fixed Dependency Issues**
- Ran `python3.11 -m pip install sentence-transformers` to install the package in the correct environment
- Verified all dependencies were properly installed
3. **Created a Test Runner Script**
- Created `run_test_with_path_fix.sh` to set up the proper environment variables and paths
- Modified `component_test_runner.py` to better handle module loading
4. **Fixed Test Module Loading**
- Added a `load_test_module` function to properly handle import paths
- Ensured the correct Python path is set before importing test modules
## Results
- Successfully ran 2 out of 3 vector store tests:
- ✅ `test_vector_store_initialization`
- ✅ `test_vector_store_add_and_search`
- ❌ `test_vector_store_cleanup` (still failing due to Qdrant server issue)
## Recommendations for Remaining Issue
The `test_vector_store_cleanup` test is failing due to the Qdrant server not being able to create a directory for the collection. This could be fixed by:
1. Checking the Qdrant server configuration to ensure it has proper permissions to create directories
2. Creating the necessary directories beforehand
3. Modifying the test to use a collection name that already exists or mock the collection creation
The error message suggests a file system permission issue:
```
"Can't create directory for collection cleanup_test_db679546. Error: No such file or directory (os error 2)"
```
A simpler fix for testing purposes might be to modify the Qdrant Docker run command to include a volume mount with proper permissions:
```bash
docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_data:/qdrant/storage qdrant/qdrant
```
This would ensure the storage directory exists and has the right permissions.
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/utils/logger.py:
--------------------------------------------------------------------------------
```python
"""Structured logging module."""
import logging
import sys
from typing import Any, Dict, Optional
import structlog
# Configure structlog
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
class Logger:
"""Structured logger."""
def __init__(
self,
name: str,
level: str = "INFO",
extra: Optional[Dict[str, Any]] = None
):
"""Initialize logger."""
# Set log level
log_level = getattr(logging, level.upper())
logging.basicConfig(
format="%(message)s",
stream=sys.stdout,
level=log_level,
)
# Create logger
self.logger = structlog.get_logger(name)
self.extra = extra or {}
def bind(self, **kwargs) -> "Logger":
"""Create new logger with additional context."""
extra = {**self.extra, **kwargs}
return Logger(
name=self.logger.name,
level=logging.getLevelName(self.logger.level),
extra=extra
)
def debug(self, event: str, **kwargs):
"""Log debug message."""
self.logger.debug(
event,
**{**self.extra, **kwargs}
)
def info(self, event: str, **kwargs):
"""Log info message."""
self.logger.info(
event,
**{**self.extra, **kwargs}
)
def warning(self, event: str, **kwargs):
"""Log warning message."""
self.logger.warning(
event,
**{**self.extra, **kwargs}
)
def error(self, event: str, **kwargs):
"""Log error message."""
self.logger.error(
event,
**{**self.extra, **kwargs}
)
def exception(self, event: str, exc_info: bool = True, **kwargs):
"""Log exception message."""
self.logger.exception(
event,
exc_info=exc_info,
**{**self.extra, **kwargs}
)
def critical(self, event: str, **kwargs):
"""Log critical message."""
self.logger.critical(
event,
**{**self.extra, **kwargs}
)
def get_logger(
name: str,
level: str = "INFO",
extra: Optional[Dict[str, Any]] = None
) -> Logger:
"""Get logger instance."""
return Logger(name, level, extra)
# Default logger
logger = get_logger("mcp_codebase_insight")
```
--------------------------------------------------------------------------------
/scripts/validate_vector_store.py:
--------------------------------------------------------------------------------
```python
#!/usr/bin/env python3
"""
Vector Store Validation Script
Tests vector store operations using local codebase.
"""
import asyncio
import logging
from pathlib import Path
import sys
# Add the src directory to the Python path
sys.path.append(str(Path(__file__).parent.parent / "src"))
from mcp_codebase_insight.core.vector_store import VectorStore
from mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def validate_vector_store(config: dict) -> bool:
"""Validate vector store operations."""
logger.info("Testing vector store operations...")
try:
# Initialize embedder
embedder = SentenceTransformerEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
await embedder.initialize()
logger.info("Embedder initialized successfully")
# Initialize vector store
vector_store = VectorStore(
url=config.get("QDRANT_URL", "http://localhost:6333"),
embedder=embedder,
collection_name=config.get("COLLECTION_NAME", "mcp-codebase-insight"),
api_key=config.get("QDRANT_API_KEY", ""),
vector_name="default"
)
await vector_store.initialize()
logger.info("Vector store initialized successfully")
# Test vector operations
test_text = "def test_function():\n pass"
embedding = await embedder.embed(test_text)
# Store vector
await vector_store.add_vector(
text=test_text,
metadata={"type": "code", "content": test_text}
)
logger.info("Vector storage test passed")
# Search for similar vectors
logger.info("Searching for similar vectors")
results = await vector_store.search_similar(
query=test_text,
limit=1
)
if not results or len(results) == 0:
logger.error("Vector search test failed: No results found")
return False
logger.info("Vector search test passed")
# Verify result metadata
result = results[0]
if not result.metadata or result.metadata.get("type") != "code":
logger.error("Vector metadata test failed: Invalid metadata")
return False
logger.info("Vector metadata test passed")
return True
except Exception as e:
logger.error(f"Vector store validation failed: {e}")
return False
if __name__ == "__main__":
# Load config from environment or .env file
from dotenv import load_dotenv
load_dotenv()
import os
config = {
"QDRANT_URL": os.getenv("QDRANT_URL", "http://localhost:6333"),
"COLLECTION_NAME": os.getenv("COLLECTION_NAME", "mcp-codebase-insight"),
"QDRANT_API_KEY": os.getenv("QDRANT_API_KEY", "")
}
success = asyncio.run(validate_vector_store(config))
sys.exit(0 if success else 1)
```
--------------------------------------------------------------------------------
/tests/components/conftest.py:
--------------------------------------------------------------------------------
```python
"""
Component Test Fixture Configuration.
This file defines fixtures specifically for component tests that might have different
scope requirements than the main test fixtures.
"""
import pytest
import pytest_asyncio
import sys
import os
from pathlib import Path
import uuid
from typing import Dict
# Import required components
from src.mcp_codebase_insight.core.config import ServerConfig
from src.mcp_codebase_insight.core.vector_store import VectorStore
from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
from src.mcp_codebase_insight.core.knowledge import KnowledgeBase
from src.mcp_codebase_insight.core.tasks import TaskManager
# Ensure the src directory is in the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
@pytest.fixture
def test_config():
"""Create a server configuration for tests.
This is an alias for test_server_config that allows component tests to use
their expected fixture name.
"""
config = ServerConfig(
host="localhost",
port=8000,
log_level="DEBUG",
qdrant_url="http://localhost:6333",
docs_cache_dir=Path(".test_cache") / "docs",
adr_dir=Path(".test_cache") / "docs/adrs",
kb_storage_dir=Path(".test_cache") / "knowledge",
embedding_model="all-MiniLM-L6-v2",
collection_name=f"test_collection_{uuid.uuid4().hex[:8]}",
debug_mode=True,
metrics_enabled=False,
cache_enabled=True,
memory_cache_size=1000,
disk_cache_dir=Path(".test_cache") / "cache"
)
return config
@pytest.fixture
def test_metadata() -> Dict:
"""Standard test metadata for consistency across tests."""
return {
"type": "code",
"language": "python",
"title": "Test Code",
"description": "Test code snippet for vector store testing",
"tags": ["test", "vector"]
}
@pytest_asyncio.fixture
async def embedder():
"""Create an embedder for tests."""
return SentenceTransformerEmbedding()
@pytest_asyncio.fixture
async def vector_store(test_config, embedder):
"""Create a vector store for tests."""
store = VectorStore(test_config.qdrant_url, embedder)
await store.initialize()
yield store
await store.cleanup()
@pytest_asyncio.fixture
async def task_manager(test_config):
"""Create a task manager for tests."""
manager = TaskManager(test_config)
await manager.initialize()
yield manager
await manager.cleanup()
@pytest.fixture
def test_code():
"""Provide sample code for testing task-related functionality."""
return """
def example_function():
\"\"\"This is a test function for task manager tests.\"\"\"
return "Hello, world!"
class TestClass:
def __init__(self):
self.value = 42
def method(self):
return self.value
"""
@pytest_asyncio.fixture
async def knowledge_base(test_config, vector_store):
"""Create a knowledge base for tests."""
kb = KnowledgeBase(test_config, vector_store)
await kb.initialize()
yield kb
await kb.cleanup()
```
--------------------------------------------------------------------------------
/tests/test_file_relationships.py:
--------------------------------------------------------------------------------
```python
import pytest
@pytest.mark.asyncio
async def test_create_file_relationship(client):
"""Test creating a file relationship."""
relationship_data = {
"source_file": "src/main.py",
"target_file": "src/utils.py",
"relationship_type": "imports",
"description": "Main imports utility functions",
"metadata": {"importance": "high"}
}
response = await client.post("/relationships", json=relationship_data)
assert response.status_code == 200
data = response.json()
assert data["source_file"] == relationship_data["source_file"]
assert data["target_file"] == relationship_data["target_file"]
assert data["relationship_type"] == relationship_data["relationship_type"]
@pytest.mark.asyncio
async def test_get_file_relationships(client):
"""Test getting file relationships."""
# Create a test relationship first
relationship_data = {
"source_file": "src/test.py",
"target_file": "src/helper.py",
"relationship_type": "depends_on"
}
await client.post("/relationships", json=relationship_data)
# Test getting all relationships
response = await client.get("/relationships")
assert response.status_code == 200
data = response.json()
assert len(data) > 0
assert isinstance(data, list)
# Test filtering by source file
response = await client.get("/relationships", params={"source_file": "src/test.py"})
assert response.status_code == 200
data = response.json()
assert all(r["source_file"] == "src/test.py" for r in data)
@pytest.mark.asyncio
async def test_create_web_source(client):
"""Test creating a web source."""
source_data = {
"url": "https://example.com/docs",
"title": "API Documentation",
"content_type": "documentation",
"description": "External API documentation",
"tags": ["api", "docs"],
"metadata": {"version": "1.0"}
}
response = await client.post("/web-sources", json=source_data)
assert response.status_code == 200
data = response.json()
assert data["url"] == source_data["url"]
assert data["title"] == source_data["title"]
assert data["content_type"] == source_data["content_type"]
@pytest.mark.asyncio
async def test_get_web_sources(client):
"""Test getting web sources."""
# Create a test web source first
source_data = {
"url": "https://example.com/tutorial",
"title": "Tutorial",
"content_type": "tutorial",
"tags": ["guide", "tutorial"]
}
await client.post("/web-sources", json=source_data)
# Test getting all web sources
response = await client.get("/web-sources")
assert response.status_code == 200
data = response.json()
assert len(data) > 0
assert isinstance(data, list)
# Test filtering by content type
response = await client.get("/web-sources", params={"content_type": "tutorial"})
assert response.status_code == 200
data = response.json()
assert all(s["content_type"] == "tutorial" for s in data)
# Test filtering by tags
response = await client.get("/web-sources", params={"tags": ["guide"]})
assert response.status_code == 200
data = response.json()
assert any("guide" in s["tags"] for s in data)
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "mcp-codebase-insight"
dynamic = ["version"]
description = "MCP Codebase Insight Server"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
{name = "Tosin Akinosho"}
]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"fastapi>=0.109.0",
"uvicorn>=0.23.2",
"pydantic>=2.4.2",
"starlette>=0.35.0",
"asyncio>=3.4.3",
"aiohttp>=3.9.0",
"qdrant-client>=1.13.3",
"sentence-transformers>=2.2.2",
"torch>=2.0.0",
"transformers>=4.34.0",
"python-frontmatter>=1.0.0",
"markdown>=3.4.4",
"PyYAML>=6.0.1",
"structlog>=23.1.0",
"psutil>=5.9.5",
"python-dotenv>=1.0.0",
"requests>=2.31.0",
"beautifulsoup4>=4.12.0",
"scipy>=1.11.0",
"python-slugify>=8.0.0",
"slugify>=0.0.1",
"numpy>=1.24.0",
# "uvx>=0.4.0", # Temporarily commented out for development installation
"mcp-server-qdrant>=0.2.0",
"mcp>=1.5.0,<1.6.0", # Pin to MCP 1.5.0 for API compatibility
]
[project.optional-dependencies]
test = [
"pytest>=7.4.2",
"pytest-asyncio>=0.21.1",
"pytest-cov>=4.1.0",
"httpx>=0.25.0",
]
dev = [
"black>=23.9.1",
"isort>=5.12.0",
"mypy>=1.5.1",
"flake8>=6.1.0",
"bump2version>=1.0.1",
"pre-commit>=3.5.0",
"pdoc>=14.1.0",
]
[project.urls]
Homepage = "https://github.com/tosin2013/mcp-codebase-insight"
Documentation = "https://github.com/tosin2013/mcp-codebase-insight/docs"
Repository = "https://github.com/tosin2013/mcp-codebase-insight.git"
Issues = "https://github.com/tosin2013/mcp-codebase-insight/issues"
[project.scripts]
mcp-codebase-insight = "mcp_codebase_insight.server:run"
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
include = ["mcp_codebase_insight*"]
[tool.black]
line-length = 88
target-version = ['py311']
include = '\.pyi?$'
[tool.isort]
profile = "black"
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
line_length = 88
[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --cov=src --cov-report=term-missing"
testpaths = ["tests"]
asyncio_mode = "auto"
[tool.coverage.run]
source = ["src"]
branch = true
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"if self.debug:",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"pass",
"raise ImportError",
]
ignore_errors = true
omit = ["tests/*", "setup.py"]
```
--------------------------------------------------------------------------------
/tests/components/test_task_manager.py:
--------------------------------------------------------------------------------
```python
import sys
import os
import pytest
import pytest_asyncio
from pathlib import Path
from typing import AsyncGenerator
from src.mcp_codebase_insight.core.tasks import TaskManager, TaskType, TaskStatus
from src.mcp_codebase_insight.core.config import ServerConfig
@pytest_asyncio.fixture
async def task_manager(test_config: ServerConfig):
manager = TaskManager(test_config)
await manager.initialize()
yield manager
await manager.cleanup()
@pytest.mark.asyncio
async def test_task_manager_initialization(task_manager: TaskManager):
"""Test that task manager initializes correctly."""
assert task_manager is not None
assert task_manager.config is not None
@pytest.mark.asyncio
async def test_create_and_get_task(task_manager: TaskManager, test_code: str):
"""Test creating and retrieving tasks."""
# Create task
task = await task_manager.create_task(
type="code_analysis",
title="Test task",
description="Test task description",
context={"code": test_code}
)
assert task is not None
# Get task
retrieved_task = await task_manager.get_task(task.id)
assert retrieved_task.context["code"] == test_code
assert retrieved_task.type == TaskType.CODE_ANALYSIS
assert retrieved_task.description == "Test task description"
@pytest.mark.asyncio
async def test_task_status_updates(task_manager: TaskManager, test_code: str):
"""Test task status updates."""
# Create task
task = await task_manager.create_task(
type="code_analysis",
title="Status Test",
description="Test task status updates",
context={"code": test_code}
)
# Update status
await task_manager.update_task(task.id, status=TaskStatus.IN_PROGRESS)
updated_task = await task_manager.get_task(task.id)
assert updated_task.status == TaskStatus.IN_PROGRESS
await task_manager.update_task(task.id, status=TaskStatus.COMPLETED)
completed_task = await task_manager.get_task(task.id)
assert completed_task.status == TaskStatus.COMPLETED
@pytest.mark.asyncio
async def test_task_result_updates(task_manager: TaskManager, test_code: str):
"""Test updating task results."""
# Create task
task = await task_manager.create_task(
type="code_analysis",
title="Result Test",
description="Test task result updates",
context={"code": test_code}
)
# Update result
result = {"analysis": "Test analysis result"}
await task_manager.update_task(task.id, result=result)
# Verify result
updated_task = await task_manager.get_task(task.id)
assert updated_task.result == result
@pytest.mark.asyncio
async def test_list_tasks(task_manager: TaskManager, test_code: str):
"""Test listing tasks."""
# Create multiple tasks
tasks = []
for i in range(3):
task = await task_manager.create_task(
type="code_analysis",
title=f"List Test {i}",
description=f"Test task {i}",
context={"code": test_code}
)
tasks.append(task)
# List tasks
task_list = await task_manager.list_tasks()
assert len(task_list) >= 3
# Verify task descriptions
descriptions = [task.description for task in task_list]
for i in range(3):
assert f"Test task {i}" in descriptions
```
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
```markdown
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Initial project setup
- Core server implementation
- ADR management system
- Documentation management
- Knowledge base with vector search
- Debug system
- Task management
- Metrics and health monitoring
- Caching system
- Structured logging
- Docker support
- CI/CD pipeline
- Test suite
### Changed
- None
### Deprecated
- None
### Removed
- None
### Fixed
- None
### Security
- None
## [0.2.2] - 2025-03-25
### Added
- Implemented single source of truth for versioning
### Changed
- Moved version to the package's __init__.py file as the canonical source
- Updated setup.py to dynamically read version from __init__.py
- Updated pyproject.toml to use dynamic versioning
- Synchronized dependencies between setup.py, pyproject.toml and requirements.in
### Fixed
- Missing dependencies in setup.py and pyproject.toml
## [0.2.1] - 2025-03-25
### Added
- Integrated Qdrant Docker container in CI/CD workflow for more realistic testing
- Added collection initialization step for proper Qdrant setup in CI/CD
- Created shared Qdrant client fixture for improved test reliability
### Changed
- Updated Python version requirement from >=3.11 to >=3.9 for broader compatibility
- Enhanced test fixture scoping to resolve event_loop fixture scope mismatches
- Improved connection verification for Qdrant in GitHub Actions workflow
### Fixed
- Resolved fixture scope mismatches in async tests
- Fixed environment variable handling in test configuration
### Removed
- None
### Security
- None
## [0.2.0] - 2025-03-24
### Added
- None
### Changed
- Improved async test fixture handling in component tests
- Enhanced test discovery to properly distinguish between test functions and fixtures
- Updated component test runner for better isolation and resource management
### Fixed
- Resolved fixture scope mismatches in async tests
- Fixed async event loop handling in component tests
- Corrected test_metadata fixture identification in test_vector_store.py
### Removed
- None
### Security
- None
## [0.1.0] - 2025-03-19
### Added
- Initial release
- Basic server functionality
- Core components:
- ADR management
- Documentation handling
- Knowledge base
- Vector search
- Task management
- Health monitoring
- Metrics collection
- Caching
- Logging
- Docker support
- CI/CD pipeline with GitHub Actions
- Test coverage with pytest
- Code quality tools:
- Black
- isort
- flake8
- mypy
- Documentation:
- README
- API documentation
- Contributing guidelines
- ADR templates
- Development tools:
- Makefile
- Docker compose
- Environment configuration
- Version management
[Unreleased]: https://github.com/modelcontextprotocol/mcp-codebase-insight/compare/v0.2.2...HEAD
[0.2.2]: https://github.com/modelcontextprotocol/mcp-codebase-insight/compare/v0.2.1...v0.2.2
[0.2.1]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.2.1
[0.2.0]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.2.0
[0.1.0]: https://github.com/modelcontextprotocol/mcp-codebase-insight/releases/tag/v0.1.0
```
--------------------------------------------------------------------------------
/docs/documentation_map.md:
--------------------------------------------------------------------------------
```markdown
# Documentation Relationship Map
```mermaid
graph TD
%% ADRs
ADR1[ADR-0001: Testing Strategy]
ADR2[ADR-0002: SSE Testing]
ADR3[ADR-0003: Comprehensive Testing]
ADR4[ADR-0004: Documentation Linking]
%% Core Systems
CS1[Vector Store System]
CS2[Knowledge Base]
CS3[Task Management]
CS4[Health Monitoring]
CS5[Error Handling]
CS6[Metrics Collection]
CS7[Cache Management]
%% Features
FA[Code Analysis]
FB[ADR Management]
FC[Documentation Management]
%% Testing
TA[Server Testing]
TB[SSE Testing]
%% Components
C1[Server Framework]
C2[Testing Framework]
C3[Documentation Tools]
%% Implementation Files
I1[test_server_instance.py]
I2[SSETestManager.py]
I3[ServerTestFramework.py]
I4[DocNode.py]
I5[DocumentationMap.py]
%% Core Classes
CC1[ServerConfig]
CC2[ErrorCode]
CC3[ComponentState]
CC4[TaskTracker]
CC5[DocumentationType]
%% Relationships - Core Systems
CS1 --> CC1
CS2 --> CS1
CS2 --> CS7
CS3 --> CC4
CS4 --> CC3
CS5 --> CC2
%% Relationships - ADRs
ADR1 --> I1
ADR1 --> C1
ADR2 --> I2
ADR2 --> TB
ADR3 --> I3
ADR3 --> C2
ADR4 --> I4
ADR4 --> I5
ADR4 --> C3
%% Relationships - Features
FA --> CS2
FA --> CS1
FB --> ADR1
FB --> ADR2
FB --> ADR3
FB --> ADR4
FC --> C3
FC --> CC5
%% Relationships - Testing
TA --> I1
TA --> I3
TB --> I2
TB --> ADR2
%% Component Relationships
C1 --> CC1
C1 --> CS4
C2 --> I2
C2 --> I3
C3 --> I4
C3 --> I5
%% Error Handling
CS5 --> FA
CS5 --> FB
CS5 --> FC
CS5 --> CS1
CS5 --> CS2
CS5 --> CS3
%% Styling
classDef adr fill:#f9f,stroke:#333,stroke-width:2px
classDef feature fill:#bbf,stroke:#333,stroke-width:2px
classDef testing fill:#bfb,stroke:#333,stroke-width:2px
classDef component fill:#fbb,stroke:#333,stroke-width:2px
classDef implementation fill:#ddd,stroke:#333,stroke-width:1px
classDef core fill:#ffd,stroke:#333,stroke-width:2px
classDef class fill:#dff,stroke:#333,stroke-width:1px
class ADR1,ADR2,ADR3,ADR4 adr
class FA,FB,FC feature
class TA,TB testing
class C1,C2,C3 component
class I1,I2,I3,I4,I5 implementation
class CS1,CS2,CS3,CS4,CS5,CS6,CS7 core
class CC1,CC2,CC3,CC4,CC5 class
```
## Documentation Map Legend
### Node Types
- **Purple**: Architecture Decision Records (ADRs)
- **Blue**: Feature Documentation
- **Green**: Testing Documentation
- **Red**: Key Components
- **Gray**: Implementation Files
- **Yellow**: Core Systems
- **Light Blue**: Core Classes
### Relationship Types
- Arrows indicate dependencies or references between documents
- Direct connections show implementation relationships
- Indirect connections show conceptual relationships
### Key Areas
1. **Core Systems**
- Vector Store and Knowledge Base
- Task Management and Health Monitoring
- Error Handling and Metrics Collection
- Cache Management
2. **Testing Infrastructure**
- Centered around ADR-0001 and ADR-0002
- Connected to Server and SSE testing implementations
3. **Documentation Management**
- Focused on ADR-0004
- Links to Documentation Tools and models
4. **Feature Implementation**
- Shows how features connect to components
- Demonstrates implementation dependencies
5. **Error Handling**
- Centralized error management
- Connected to all major systems
- Standardized error codes and types
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/server_test_isolation.py:
--------------------------------------------------------------------------------
```python
"""Test isolation for ServerState.
This module provides utilities to create isolated ServerState instances for testing,
preventing state conflicts between parallel test runs.
"""
from typing import Dict, Optional
import asyncio
import uuid
import logging
from .core.state import ServerState
from .utils.logger import get_logger
logger = get_logger(__name__)
# Store of server states keyed by instance ID
_server_states: Dict[str, ServerState] = {}
def get_isolated_server_state(instance_id: Optional[str] = None) -> ServerState:
"""Get or create an isolated ServerState instance for tests.
Args:
instance_id: Optional unique ID for the server state
Returns:
An isolated ServerState instance
"""
global _server_states
if instance_id is None:
# Create a new ServerState without storing it
instance_id = f"temp_{uuid.uuid4().hex}"
if instance_id not in _server_states:
logger.debug(f"Creating new isolated ServerState with ID: {instance_id}")
_server_states[instance_id] = ServerState()
return _server_states[instance_id]
async def cleanup_all_server_states():
"""Clean up all tracked server states."""
global _server_states
logger.debug(f"Cleaning up {len(_server_states)} isolated server states")
# Make a copy of the states to avoid modification during iteration
states_to_clean = list(_server_states.items())
cleanup_tasks = []
for instance_id, state in states_to_clean:
try:
logger.debug(f"Cleaning up ServerState: {instance_id}")
if state.initialized:
# Get active tasks before cleanup
active_tasks = state.get_active_tasks()
if active_tasks:
logger.debug(
f"Found {len(active_tasks)} active tasks for {instance_id}"
)
# Schedule state cleanup with increased timeout
cleanup_task = asyncio.create_task(
asyncio.wait_for(state.cleanup(), timeout=5.0)
)
cleanup_tasks.append((instance_id, cleanup_task))
else:
logger.debug(f"Skipping uninitialized ServerState: {instance_id}")
except Exception as e:
logger.error(
f"Error preparing cleanup for ServerState {instance_id}: {e}",
exc_info=True
)
# Wait for all cleanup tasks to complete
if cleanup_tasks:
for instance_id, task in cleanup_tasks:
try:
await task
logger.debug(f"State {instance_id} cleaned up successfully")
# Verify no tasks remain
state = _server_states.get(instance_id)
if state and state.get_task_count() > 0:
logger.warning(
f"State {instance_id} still has {state.get_task_count()} "
"active tasks after cleanup"
)
except asyncio.TimeoutError:
logger.warning(f"State cleanup timed out for {instance_id}")
# Force cleanup
state = _server_states.get(instance_id)
if state:
state.initialized = False
except Exception as e:
logger.error(f"Error during state cleanup for {instance_id}: {e}")
# Clear all states from global store
_server_states.clear()
logger.debug("All server states cleaned up")
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/task_tracker.py:
--------------------------------------------------------------------------------
```python
"""Task tracking and management for async operations."""
import asyncio
import logging
from typing import Set, Optional
from datetime import datetime
from ..utils.logger import get_logger
logger = get_logger(__name__)
class TaskTracker:
"""Tracks and manages async tasks with improved error handling and logging."""
def __init__(self):
"""Initialize the task tracker."""
self._tasks: Set[asyncio.Task] = set()
self._loop = asyncio.get_event_loop()
self._loop_id = id(self._loop)
self._start_time = datetime.utcnow()
logger.debug(f"TaskTracker initialized with loop ID: {self._loop_id}")
def track_task(self, task: asyncio.Task) -> None:
"""Track a new task and set up completion handling.
Args:
task: The asyncio.Task to track
"""
if id(asyncio.get_event_loop()) != self._loop_id:
logger.warning(
f"Task created in different event loop context. "
f"Expected: {self._loop_id}, Got: {id(asyncio.get_event_loop())}"
)
self._tasks.add(task)
task.add_done_callback(self._handle_task_completion)
logger.debug(f"Tracking new task: {task.get_name()}")
def _handle_task_completion(self, task: asyncio.Task) -> None:
"""Handle task completion and cleanup.
Args:
task: The completed task
"""
self._tasks.discard(task)
if task.exception():
logger.error(
f"Task {task.get_name()} failed with error: {task.exception()}",
exc_info=True
)
else:
logger.debug(f"Task {task.get_name()} completed successfully")
async def cancel_all_tasks(self, timeout: float = 5.0) -> None:
"""Cancel all tracked tasks and wait for completion.
Args:
timeout: Maximum time to wait for tasks to cancel
"""
if not self._tasks:
logger.debug("No tasks to cancel")
return
logger.debug(f"Cancelling {len(self._tasks)} tasks")
for task in self._tasks:
if not task.done() and not task.cancelled():
task.cancel()
try:
await asyncio.wait_for(
asyncio.gather(*self._tasks, return_exceptions=True),
timeout=timeout
)
logger.debug("All tasks cancelled successfully")
except asyncio.TimeoutError:
logger.warning(f"Task cancellation timed out after {timeout} seconds")
except Exception as e:
logger.error(f"Error during task cancellation: {e}", exc_info=True)
def get_active_tasks(self) -> Set[asyncio.Task]:
"""Get all currently active tasks.
Returns:
Set of active asyncio.Task objects
"""
return self._tasks.copy()
def get_task_count(self) -> int:
"""Get the number of currently tracked tasks.
Returns:
Number of active tasks
"""
return len(self._tasks)
def get_uptime(self) -> float:
"""Get the uptime of the task tracker in seconds.
Returns:
Uptime in seconds
"""
return (datetime.utcnow() - self._start_time).total_seconds()
def __del__(self):
"""Cleanup when the tracker is destroyed."""
if self._tasks:
logger.warning(
f"TaskTracker destroyed with {len(self._tasks)} "
"unfinished tasks"
)
```
--------------------------------------------------------------------------------
/docs/getting-started/installation.md:
--------------------------------------------------------------------------------
```markdown
# Installation Guide
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Prerequisites
Before installing MCP Codebase Insight, ensure you have the following:
- Python 3.11 or higher
- pip (Python package installer)
- Git
- Docker (optional, for containerized deployment)
- 4GB RAM minimum (8GB recommended)
- 2GB free disk space
## System Requirements
### Operating Systems
- Linux (Ubuntu 20.04+, CentOS 8+)
- macOS (10.15+)
- Windows 10/11 with WSL2
### Python Dependencies
- FastAPI
- Pydantic
- httpx
- sentence-transformers
- qdrant-client
## Installation Methods
### 1. Using pip (Recommended)
```bash
# Create and activate a virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install MCP Codebase Insight
pip install mcp-codebase-insight
# Verify installation
mcp-codebase-insight --version
```
### 2. Using Docker
```bash
# Pull the Docker image
docker pull modelcontextprotocol/mcp-codebase-insight
# Create necessary directories
mkdir -p docs knowledge cache
# Run the container
docker run -p 3000:3000 \
--env-file .env \
-v $(pwd)/docs:/app/docs \
-v $(pwd)/knowledge:/app/knowledge \
-v $(pwd)/cache:/app/cache \
modelcontextprotocol/mcp-codebase-insight
```
### 3. From Source
```bash
# Clone the repository
git clone https://github.com/modelcontextprotocol/mcp-codebase-insight.git
cd mcp-codebase-insight
# Create and activate virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install dependencies
pip install -r requirements.txt
# Install in development mode
pip install -e .
```
## Environment Setup
1. Create a `.env` file in your project root:
```bash
MCP_HOST=127.0.0.1
MCP_PORT=3000
QDRANT_URL=http://localhost:6333
MCP_DOCS_CACHE_DIR=./docs
MCP_ADR_DIR=./docs/adrs
MCP_KB_STORAGE_DIR=./knowledge
MCP_DISK_CACHE_DIR=./cache
LOG_LEVEL=INFO
```
2. Create required directories:
```bash
mkdir -p docs/adrs knowledge cache
```
## Post-Installation Steps
1. **Vector Database Setup**
- Follow the [Qdrant Setup Guide](qdrant_setup.md) to install and configure Qdrant
2. **Verify Installation**
```bash
# Start the server
mcp-codebase-insight --host 127.0.0.1 --port 3000
# In another terminal, test the health endpoint
curl http://localhost:3000/health
```
3. **Initial Configuration**
- Configure authentication (if needed)
- Set up logging
- Configure metrics collection
## Common Installation Issues
### 1. Dependencies Installation Fails
```bash
# Try upgrading pip
pip install --upgrade pip
# Install wheel
pip install wheel
# Retry installation
pip install mcp-codebase-insight
```
### 2. Port Already in Use
```bash
# Check what's using port 3000
lsof -i :3000 # On Linux/macOS
netstat -ano | findstr :3000 # On Windows
# Use a different port
mcp-codebase-insight --port 3001
```
### 3. Permission Issues
```bash
# Fix directory permissions
chmod -R 755 docs knowledge cache
```
## Next Steps
- Read the [Configuration Guide](configuration.md) for detailed setup options
- Follow the [Quick Start Tutorial](quickstart.md) to begin using the system
- Check the [Best Practices](../development/best-practices.md) for optimal usage
- Follow the [Qdrant Setup](qdrant_setup.md) to set up the vector database
## Support
If you encounter any issues during installation:
1. Check the [Troubleshooting Guide](../troubleshooting/common-issues.md)
2. Search existing [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
3. Open a new issue if needed
```
--------------------------------------------------------------------------------
/docs/SSE_INTEGRATION.md:
--------------------------------------------------------------------------------
```markdown
# Server-Sent Events (SSE) Integration
This document explains the Server-Sent Events (SSE) integration in the MCP Codebase Insight server, including its purpose, architecture, and usage instructions.
## Overview
The SSE integration enables real-time, bidirectional communication between the MCP Codebase Insight server and clients using the Model Context Protocol (MCP). This allows clients to receive live updates for long-running operations and establish persistent connections for continuous data flow.
## Architecture
The SSE integration is built as a modular component within the MCP Codebase Insight system, following these design principles:
1. **Separation of Concerns**: The SSE transport layer is isolated from the core application logic
2. **Non-Interference**: SSE endpoints operate alongside existing REST API endpoints without disruption
3. **Shared Resources**: Both REST and SSE interfaces use the same underlying components and state
### Key Components
- **MCP_CodebaseInsightServer**: Manages the MCP protocol server and exposes system functionality as MCP tools
- **FastMCP**: The core MCP protocol implementation that handles messaging format and protocol features
- **SseServerTransport**: Implements the SSE protocol for persistent connections
- **Starlette Integration**: Low-level ASGI application that handles SSE connections
### Endpoint Structure
- `/mcp/sse/`: Establishes the SSE connection for real-time events
- `/mcp/messages/`: Handles incoming messages from clients via HTTP POST
### Data Flow
```
Client <---> SSE Connection (/mcp/sse/) <---> MCP Server <---> Core Components
<---> Message POST (/mcp/messages/) <--/
```
## Available Tools
The SSE integration exposes these core system capabilities as MCP tools:
1. **vector-search**: Search for code snippets semantically similar to a query text
2. **knowledge-search**: Search for patterns in the knowledge base
3. **adr-list**: Retrieve architectural decision records
4. **task-status**: Check status of long-running tasks
## Usage Instructions
### Client Configuration
To connect to the SSE endpoint, configure your MCP client as follows:
```json
{
"mcpClients": {
"codebase-insight-sse": {
"url": "http://localhost:8000/mcp",
"transport": "sse"
}
}
}
```
### Example: Connecting with MCP Client
```python
from mcp.client import Client
# Connect to the SSE endpoint
client = Client.connect("codebase-insight-sse")
# Use vector search tool
results = await client.call_tool(
"vector-search",
{"query": "function that parses JSON", "limit": 5}
)
```
## Testing
The SSE implementation includes tests to verify:
1. Connection establishment and maintenance
2. Tool registration and execution
3. Error handling and reconnection behavior
Run SSE-specific tests with:
```bash
pytest tests/integration/test_sse.py -v
```
## Security Considerations
The SSE integration inherits the security model of the main application. When security features like authentication are enabled, they apply to SSE connections as well.
## Performance Considerations
SSE connections are persistent and can consume server resources. Consider these guidelines:
- Implement client-side reconnection strategies with exponential backoff
- Set reasonable timeouts for idle connections
- Monitor connection counts in production environments
## Troubleshooting
Common issues and solutions:
1. **Connection Refused**: Ensure the server is running and the client is using the correct URL
2. **Invalid SSE Format**: Check for middleware that might buffer responses
3. **Connection Drops**: Verify network stability and implement reconnection logic
```
--------------------------------------------------------------------------------
/docs/adrs/001_use_docker_for_qdrant.md:
--------------------------------------------------------------------------------
```markdown
# Use Docker for Qdrant Vector Database
## Status
Accepted
## Context
We need a vector database to store and search through code patterns and documentation embeddings. Qdrant is chosen as our vector database solution, and we need to determine the best way to deploy and manage it.
## Decision Drivers
* Ease of deployment and setup
* Development environment consistency
* Production readiness
* Resource management
* Scalability
* Maintainability
## Considered Options
### Option 1: Docker Container
* Use official Qdrant Docker image
* Run as containerized service
* Manage with Docker Compose for local development
* Use Kubernetes for production deployment
### Option 2: Native Installation
* Install Qdrant directly on host system
* Manage as system service
* Configure through system files
* Handle updates through package manager
### Option 3: Cloud-Hosted Solution
* Use managed Qdrant Cloud service
* Pay per usage
* Managed infrastructure
* Automatic updates and maintenance
## Decision
We will use Docker for running Qdrant. This decision is based on several factors:
1. **Development Environment**: Docker provides consistent environment across all developer machines
2. **Easy Setup**: Simple `docker run` command to get started
3. **Resource Isolation**: Container ensures clean resource management
4. **Version Control**: Easy version management through Docker tags
5. **Production Ready**: Same container can be used in production
6. **Scaling**: Can be deployed to Kubernetes when needed
## Expected Consequences
### Positive Consequences
* Consistent environment across development and production
* Easy setup process for new developers
* Clean isolation from other system components
* Simple version management
* Clear resource boundaries
* Easy backup and restore procedures
* Portable across different platforms
### Negative Consequences
* Additional Docker knowledge required
* Small performance overhead from containerization
* Need to manage container resources carefully
* Additional complexity in monitoring setup
## Pros and Cons of the Options
### Docker Container
* ✅ Consistent environment
* ✅ Easy setup and teardown
* ✅ Good isolation
* ✅ Version control
* ✅ Production ready
* ❌ Container overhead
* ❌ Requires Docker knowledge
### Native Installation
* ✅ Direct system access
* ✅ No containerization overhead
* ✅ Full control over configuration
* ❌ System-dependent setup
* ❌ Potential conflicts with system packages
* ❌ More complex version management
### Cloud-Hosted Solution
* ✅ No infrastructure management
* ✅ Automatic scaling
* ✅ Managed backups
* ❌ Higher cost
* ❌ Less control
* ❌ Internet dependency
* ❌ Potential latency issues
## Implementation
### Docker Run Command
```bash
docker run -d -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage \
qdrant/qdrant
```
### Docker Compose Configuration
```yaml
version: '3.8'
services:
qdrant:
image: qdrant/qdrant
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant_storage:/qdrant/storage
environment:
- RUST_LOG=info
volumes:
qdrant_storage:
```
## Notes
* Monitor container resource usage in production
* Set up proper backup procedures for the storage volume
* Consider implementing health checks
* Document recovery procedures
## Metadata
* Created: 2025-03-19
* Last Modified: 2025-03-19
* Author: Development Team
* Approvers: Technical Lead, Infrastructure Team
* Status: Accepted
* Tags: infrastructure, database, docker, vector-search
* References:
* [Qdrant Docker Documentation](https://qdrant.tech/documentation/guides/installation/#docker)
* [Docker Best Practices](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/)
```
--------------------------------------------------------------------------------
/tests/components/test_vector_store.py:
--------------------------------------------------------------------------------
```python
import pytest
import pytest_asyncio
import uuid
import sys
import os
from pathlib import Path
from typing import AsyncGenerator, Dict
from fastapi.testclient import TestClient
# Ensure the src directory is in the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
from src.mcp_codebase_insight.core.vector_store import VectorStore
from src.mcp_codebase_insight.core.config import ServerConfig
from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
import logging
logger = logging.getLogger(__name__)
@pytest_asyncio.fixture
async def test_metadata() -> Dict:
"""Standard test metadata for consistency across tests."""
return {
"type": "code",
"language": "python",
"title": "Test Code",
"description": "Test code snippet for vector store testing",
"tags": ["test", "vector"]
}
@pytest_asyncio.fixture
async def embedder():
return SentenceTransformerEmbedding()
@pytest_asyncio.fixture
async def vector_store(test_config: ServerConfig, embedder):
store = VectorStore(test_config.qdrant_url, embedder)
await store.initialize()
yield store
await store.cleanup()
@pytest.mark.asyncio
async def test_vector_store_initialization(vector_store: VectorStore):
"""Test that vector store initializes correctly."""
assert vector_store is not None
assert vector_store.embedder is not None
assert vector_store.client is not None
assert vector_store.initialized is True
logger.info("Vector store initialization test passed")
@pytest.mark.asyncio
async def test_vector_store_add_and_search(vector_store: VectorStore, test_metadata: Dict):
"""Test adding and searching vectors."""
# Test data
test_text = "Test code snippet with unique identifier"
# Add vector
logger.info("Adding vector to store")
vector_id = await vector_store.add_vector(test_text, test_metadata)
assert vector_id is not None
# Search for similar vectors
logger.info("Searching for similar vectors")
results = await vector_store.search_similar(test_text, limit=1)
assert len(results) > 0
# Use get() with default value for safety
assert results[0].metadata.get("type", "unknown") == "code"
# Log metadata for debugging
logger.info(f"Original metadata: {test_metadata}")
logger.info(f"Retrieved metadata: {results[0].metadata}")
# Verify all expected metadata fields are present
missing_keys = []
for key in test_metadata:
if key not in results[0].metadata:
missing_keys.append(key)
assert not missing_keys, f"Metadata is missing expected keys: {missing_keys}"
logger.info("Vector store add and search test passed")
@pytest.mark.asyncio
async def test_vector_store_cleanup(test_config: ServerConfig, embedder: SentenceTransformerEmbedding):
"""Test that cleanup works correctly."""
# Use the configured collection name for this test
# This ensures we're using the properly initialized collection
collection_name = os.environ.get("MCP_COLLECTION_NAME", test_config.collection_name)
store = VectorStore(
test_config.qdrant_url,
embedder,
collection_name=collection_name
)
logger.info(f"Initializing vector store with collection {collection_name}")
await store.initialize()
assert store.initialized is True
# Add a vector to verify there's something to clean up
await store.add_vector("Test cleanup text", {"type": "test"})
# Now clean up
logger.info(f"Cleaning up vector store with collection {collection_name}")
await store.cleanup()
# Verify the store is no longer initialized
assert store.initialized is False
# Clean up remaining resources
await store.close()
logger.info("Vector store cleanup test passed")
```
--------------------------------------------------------------------------------
/system-card.yml:
--------------------------------------------------------------------------------
```yaml
name: MCP Codebase Insight
version: 0.1.0
description: A system for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management.
poc_scope:
- Vector-based code analysis and similarity search
- Pattern detection and knowledge base operations
- Dual-transport architecture (SSE and stdio)
- Task management and tracking
- Memory operations and persistence
environment:
requirements:
python: ">=3.11"
docker: ">=20.10.0"
ram_gb: 4
cpu_cores: 2
disk_space_gb: 20
dependencies:
core:
- mcp-firecrawl
- httpx-sse
- python-frontmatter
- qdrant-client>=1.13.3
- fastapi>=0.115.12
- numpy>=2.2.4
transport:
- mcp-transport
- mcp-stdio
- mcp-sse
development:
- pytest
- black
- isort
- mypy
- pip-tools
- bump2version
configuration:
env_vars:
required:
- QDRANT_HOST
- QDRANT_PORT
- API_KEY
- TRANSPORT_MODE
optional:
- DEBUG_MODE
- LOG_LEVEL
files:
required:
- .env
- docker-compose.yml
optional:
- .env.local
setup:
steps:
1_environment:
- Create and activate Python virtual environment
- Install dependencies from requirements.txt
- Copy .env.example to .env and configure
2_services:
- Start Docker
- Run docker-compose up for Qdrant
- Wait for services to be ready
3_validation:
- Run main PoC validation script
- Check individual component validations if needed
- Verify transport configurations
validation:
scripts:
main:
path: scripts/validate_poc.py
description: "Main validation script that orchestrates all component checks"
components:
vector_store:
path: scripts/validate_vector_store.py
description: "Validates vector store operations and search functionality"
knowledge_base:
path: scripts/validate_knowledge_base.py
description: "Tests knowledge base operations and entity relations"
transport:
description: "Transport validation is included in the main PoC script"
health_checks:
services:
qdrant:
endpoint: http://localhost:6333/health
method: GET
api:
endpoint: http://localhost:8000/health
method: GET
headers:
Authorization: "Bearer ${API_KEY}"
functional_checks:
vector_store:
- Test vector operations with sample code
- Validate embedding dimensions
- Verify search functionality
knowledge_base:
- Create and verify test entities
- Test entity relations
- Validate query operations
transport:
sse:
- Verify event stream connection
- Test bidirectional communication
- Check error handling
stdio:
- Verify process communication
- Test command execution
- Validate response format
troubleshooting:
environment:
- Check Python and Docker versions
- Verify system resources
- Validate dependency installation
services:
- Check Docker container status
- View service logs
- Verify port availability
transport:
- Test SSE endpoint connectivity
- Verify stdio binary functionality
- Check authentication configuration
data:
- Verify Qdrant collection status
- Check knowledge base connectivity
- Test data persistence
metrics:
collection:
- System resource usage
- Request latency
- Transport performance
- Operation success rates
monitoring:
- Component health status
- Error rates and types
- Resource utilization
- Transport switching events
documentation:
references:
- docs/system_architecture/README.md
- docs/api/README.md
- docs/adrs/006_transport_protocols.md
- docs/development/README.md
```
--------------------------------------------------------------------------------
/examples/use_with_claude.py:
--------------------------------------------------------------------------------
```python
"""Example of using MCP Codebase Insight with Claude."""
import json
import httpx
import os
from typing import Dict, Any
import asyncio
# Configure server URL
SERVER_URL = os.getenv("MCP_SERVER_URL", "http://localhost:3000")
async def call_tool(name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Call a tool endpoint on the server."""
async with httpx.AsyncClient() as client:
response = await client.post(
f"{SERVER_URL}/tools/{name}",
json={
"name": name,
"arguments": arguments
}
)
response.raise_for_status()
return response.json()
async def analyze_code(code: str, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""Analyze code using the server."""
return await call_tool("analyze-code", {
"code": code,
"context": context or {}
})
async def search_knowledge(query: str, pattern_type: str = None) -> Dict[str, Any]:
"""Search knowledge base."""
return await call_tool("search-knowledge", {
"query": query,
"type": pattern_type,
"limit": 5
})
async def create_adr(
title: str,
context: Dict[str, Any],
options: list,
decision: str
) -> Dict[str, Any]:
"""Create an ADR."""
return await call_tool("create-adr", {
"title": title,
"context": context,
"options": options,
"decision": decision
})
async def debug_issue(
description: str,
issue_type: str = None,
context: Dict[str, Any] = None
) -> Dict[str, Any]:
"""Debug an issue."""
return await call_tool("debug-issue", {
"description": description,
"type": issue_type,
"context": context or {}
})
async def get_task_status(task_id: str) -> Dict[str, Any]:
"""Get task status and results."""
return await call_tool("get-task", {
"task_id": task_id
})
async def main():
"""Example usage."""
try:
# Example code analysis
code = """
def calculate_fibonacci(n: int) -> int:
if n <= 1:
return n
return calculate_fibonacci(n-1) + calculate_fibonacci(n-2)
"""
print("\nAnalyzing code...")
result = await analyze_code(code)
print(json.dumps(result, indent=2))
# Example knowledge search
print("\nSearching knowledge base...")
result = await search_knowledge(
query="What are the best practices for error handling in Python?",
pattern_type="code"
)
print(json.dumps(result, indent=2))
# Example ADR creation
print("\nCreating ADR...")
result = await create_adr(
title="Use FastAPI for REST API",
context={
"problem": "Need a modern Python web framework",
"constraints": ["Must be async", "Must have good documentation"]
},
options=[
{
"title": "FastAPI",
"pros": ["Async by default", "Great docs", "Type hints"],
"cons": ["Newer framework"]
},
{
"title": "Flask",
"pros": ["Mature", "Simple"],
"cons": ["Not async by default"]
}
],
decision="We will use FastAPI for its async support and type hints"
)
print(json.dumps(result, indent=2))
# Example debugging
print("\nDebugging issue...")
result = await debug_issue(
description="Application crashes when processing large files",
issue_type="performance",
context={
"file_size": "2GB",
"memory_usage": "8GB",
"error": "MemoryError"
}
)
print(json.dumps(result, indent=2))
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
asyncio.run(main())
```
--------------------------------------------------------------------------------
/system-architecture.md:
--------------------------------------------------------------------------------
```markdown
# System Architecture - MCP Codebase Insight
This document outlines the system architecture of the MCP Codebase Insight project using various diagrams to illustrate different aspects of the system.
## High-Level System Architecture
```mermaid
graph TB
Client[Client Applications] --> API[FastAPI Server]
API --> Core[Core Services]
subgraph Core Services
CodeAnalysis[Code Analysis Service]
ADR[ADR Management]
Doc[Documentation Service]
Knowledge[Knowledge Base]
Debug[Debug System]
Metrics[Metrics & Health]
Cache[Caching System]
end
Core --> VectorDB[(Qdrant Vector DB)]
Core --> FileSystem[(File System)]
CodeAnalysis --> VectorDB
Knowledge --> VectorDB
ADR --> FileSystem
Doc --> FileSystem
```
## Component Relationships
```mermaid
graph LR
subgraph Core Components
Embeddings[Embeddings Service]
VectorStore[Vector Store Service]
Knowledge[Knowledge Service]
Tasks[Tasks Service]
Prompts[Prompts Service]
Debug[Debug Service]
Health[Health Service]
Config[Config Service]
Cache[Cache Service]
end
Embeddings --> VectorStore
Knowledge --> VectorStore
Knowledge --> Embeddings
Tasks --> Knowledge
Debug --> Knowledge
Prompts --> Tasks
Health --> Cache
%% Instead of linking to "Core Components", link to each node individually
Config --> Embeddings
Config --> VectorStore
Config --> Knowledge
Config --> Tasks
Config --> Prompts
Config --> Debug
Config --> Health
Config --> Cache
```
## Data Flow Architecture
```mermaid
sequenceDiagram
participant Client
participant API
participant Knowledge
participant Embeddings
participant VectorStore
participant Cache
Client->>API: Request Analysis
API->>Cache: Check Cache
alt Cache Hit
Cache-->>API: Return Cached Result
else Cache Miss
API->>Knowledge: Process Request
Knowledge->>Embeddings: Generate Embeddings
Embeddings->>VectorStore: Store/Query Vectors
VectorStore-->>Knowledge: Vector Results
Knowledge-->>API: Analysis Results
API->>Cache: Store Results
API-->>Client: Return Results
end
```
## Directory Structure
```mermaid
graph TD
Root[mcp-codebase-insight] --> Src[src/]
Root --> Tests[tests/]
Root --> Docs[docs/]
Root --> Scripts[scripts/]
Root --> Knowledge[knowledge/]
Src --> Core[core/]
Src --> Utils[utils/]
Core --> Components{Core Components}
Components --> ADR[adr.py]
Components --> Cache[cache.py]
Components --> Config[config.py]
Components --> Debug[debug.py]
Components --> Doc[documentation.py]
Components --> Embed[embeddings.py]
Components --> Know[knowledge.py]
Components --> Vector[vector_store.py]
Knowledge --> Patterns[patterns/]
Knowledge --> Tasks[tasks/]
Knowledge --> Prompts[prompts/]
```
## Security and Authentication Flow
```mermaid
graph TD
Request[Client Request] --> Auth[Authentication Layer]
Auth --> Validation[Request Validation]
Validation --> RateLimit[Rate Limiting]
RateLimit --> Processing[Request Processing]
subgraph Security Measures
Auth
Validation
RateLimit
Logging[Audit Logging]
end
Processing --> Logging
Processing --> Response[API Response]
```
This architecture documentation illustrates the main components and their interactions within the MCP Codebase Insight system. The system is designed to be modular, scalable, and maintainable, with clear separation of concerns between different components.
Key architectural decisions:
1. Use of FastAPI for high-performance API endpoints
2. Vector database (Qdrant) for efficient similarity search
3. Modular core services for different functionalities
4. Caching layer for improved performance
5. Clear separation between data storage and business logic
6. Comprehensive security measures
7. Structured knowledge management system
```
--------------------------------------------------------------------------------
/docs/documentation_summary.md:
--------------------------------------------------------------------------------
```markdown
# MCP Codebase Insight Documentation Structure
## Architecture Decision Records (ADRs)
### Testing Strategy (ADR-0001)
Core decisions about testing infrastructure, focusing on:
- Server management and startup
- Test client configuration
- SSE testing approach
Implemented by:
- `tests.integration.test_sse.test_server_instance`
- `tests.integration.test_sse.test_client`
- `src.mcp_codebase_insight.server.lifespan`
### SSE Testing Strategy (ADR-0002)
Detailed approach to testing Server-Sent Events, covering:
- Connection management
- Event handling
- Test patterns
Implemented by:
- `tests.framework.sse.SSETestManager`
- `tests.integration.test_sse.test_sse_message_flow`
### Comprehensive Testing Strategy (ADR-0003)
Framework for testing all components:
- Server testing framework
- SSE test management
- Test client configuration
- Integration patterns
Implemented by:
- `tests.framework.server.ServerTestFramework`
- `tests.framework.sse.SSETestManager`
- `tests.conftest.configured_test_client`
### Documentation Linking Strategy (ADR-0004)
System for maintaining documentation-code relationships:
- Documentation node management
- Code element tracking
- Link validation
Implemented by:
- `src.mcp_codebase_insight.documentation.models.DocNode`
- `src.mcp_codebase_insight.documentation.models.DocumentationMap`
- `src.mcp_codebase_insight.documentation.loader.DocLoader`
## Feature Documentation
### Code Analysis
Overview of code analysis capabilities:
- Pattern detection
- Quality analysis
- Dependency tracking
Implemented by:
- `src.mcp_codebase_insight.analysis`
### ADR Management
Tools for managing Architecture Decision Records:
- ADR creation
- Status tracking
- Implementation linking
Implemented by:
- `src.mcp_codebase_insight.adr`
### Documentation Management
Documentation tooling and processes:
- Documentation-code linking
- Validation tools
- Generation utilities
Implemented by:
- `src.mcp_codebase_insight.documentation`
- `src.mcp_codebase_insight.documentation.annotations`
## Testing Documentation
### Server Testing
Framework and patterns for server testing:
- Server lifecycle management
- Health checking
- Configuration testing
Implemented by:
- `tests.framework.server.ServerTestFramework`
- `tests.conftest.configured_test_client`
### SSE Testing
Patterns and tools for SSE testing:
- Connection management
- Event verification
- Integration testing
Implemented by:
- `tests.framework.sse.SSETestManager`
- `tests.integration.test_sse.test_sse_connection`
- `tests.integration.test_sse.test_sse_message_flow`
## Key Components
### Server Framework
- Server configuration and lifecycle management
- Health check endpoints
- SSE infrastructure
Key files:
- `src.mcp_codebase_insight.server.ServerConfig`
- `src.mcp_codebase_insight.server.lifespan`
### Testing Framework
- Test client configuration
- Server test fixtures
- SSE test utilities
Key files:
- `tests.framework.server.ServerTestFramework`
- `tests.framework.sse.SSETestManager`
- `tests.conftest.configured_test_client`
### Documentation Tools
- Documentation-code linking
- Validation utilities
- Generation tools
Key files:
- `src.mcp_codebase_insight.documentation.models`
- `src.mcp_codebase_insight.documentation.loader`
- `src.mcp_codebase_insight.documentation.annotations`
## Documentation Coverage
### Well-Documented Areas
1. Testing infrastructure
- Server testing framework
- SSE testing components
- Test client configuration
2. Documentation management
- Documentation models
- Loading and validation
- Code annotations
### Areas Needing More Documentation
1. Code analysis features
- Implementation details
- Usage patterns
- Configuration options
2. ADR management tools
- CLI interface
- Template system
- Integration features
## Next Steps
1. **Documentation Improvements**
- Add more code examples
- Create API reference docs
- Expand configuration guides
2. **Testing Enhancements**
- Add performance test docs
- Document error scenarios
- Create debugging guides
3. **Feature Documentation**
- Complete code analysis docs
- Expand ADR management docs
- Add integration guides
```
--------------------------------------------------------------------------------
/docs/troubleshooting/faq.md:
--------------------------------------------------------------------------------
```markdown
# Frequently Asked Questions
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## General Questions
### What is MCP Codebase Insight?
MCP Codebase Insight is a tool for analyzing and understanding codebases through semantic analysis, pattern detection, and documentation management.
### What are the system requirements?
- Python 3.11 or higher
- 4GB RAM minimum (8GB recommended)
- 2GB free disk space
- Docker (optional, for containerized deployment)
### Which operating systems are supported?
- Linux (Ubuntu 20.04+, CentOS 8+)
- macOS (10.15+)
- Windows 10/11 with WSL2
## Installation
### Do I need to install Qdrant separately?
Yes, Qdrant is required for vector storage. You can install it via Docker (recommended) or from source. See the [Qdrant Setup Guide](../getting-started/qdrant_setup.md).
### Can I use a different vector database?
Currently, only Qdrant is supported. Support for other vector databases may be added in future releases.
### Why am I getting permission errors during installation?
This usually happens when trying to install in system directories. Try:
1. Using a virtual environment
2. Installing with `--user` flag
3. Using proper permissions for directories
## Usage
### How do I start analyzing my codebase?
1. Install MCP Codebase Insight
2. Set up Qdrant
3. Configure your environment
4. Run the server
5. Use the API or CLI to analyze your code
### Can I analyze multiple repositories at once?
Yes, you can analyze multiple repositories by:
1. Using batch analysis
2. Creating separate collections
3. Merging results afterward
### How do I customize the analysis?
You can customize:
- Analysis patterns
- Vector search parameters
- Documentation generation
- Output formats
See the [Configuration Guide](../getting-started/configuration.md).
## Performance
### Why is vector search slow?
Common reasons:
1. Large vector collection
2. Limited memory
3. Network latency
4. Insufficient CPU resources
Solutions:
1. Enable disk storage
2. Adjust batch size
3. Optimize search parameters
4. Scale hardware resources
### How much memory do I need?
Memory requirements depend on:
- Codebase size
- Vector collection size
- Batch processing size
- Concurrent operations
Minimum: 4GB RAM
Recommended: 8GB+ RAM
### Can I run it in production?
Yes, but consider:
1. Setting up authentication
2. Configuring CORS
3. Using SSL/TLS
4. Implementing monitoring
5. Setting up backups
## Features
### Does it support my programming language?
Currently supported:
- Python
- JavaScript/TypeScript
- Java
- Go
- Ruby
More languages planned for future releases.
### Can it generate documentation?
Yes, it can:
1. Generate API documentation
2. Create architecture diagrams
3. Maintain ADRs
4. Build knowledge bases
### How does pattern detection work?
Pattern detection uses:
1. Vector embeddings
2. AST analysis
3. Semantic search
4. Machine learning models
## Integration
### Can I integrate with my IDE?
Yes, through:
1. REST API
2. Language Server Protocol
3. Custom extensions
### Does it work with CI/CD pipelines?
Yes, you can:
1. Run analysis in CI
2. Generate reports
3. Enforce patterns
4. Update documentation
### Can I use it with existing tools?
Integrates with:
1. Git
2. Documentation generators
3. Code quality tools
4. Issue trackers
## Troubleshooting
### Where are the log files?
Default locations:
- Server logs: `./logs/server.log`
- Access logs: `./logs/access.log`
- Debug logs: `./logs/debug.log`
### How do I report bugs?
1. Check [existing issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
2. Create new issue with:
- Clear description
- Steps to reproduce
- System information
- Log files
### How do I get support?
Support options:
1. [Documentation](../README.md)
2. [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
3. [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions)
4. [Discord Community](https://discord.gg/mcp-codebase-insight)
## Next Steps
- [Common Issues](common-issues.md)
- [Installation Guide](../getting-started/installation.md)
- [Configuration Guide](../getting-started/configuration.md)
```
--------------------------------------------------------------------------------
/docs/getting-started/docker-setup.md:
--------------------------------------------------------------------------------
```markdown
# Docker Setup Guide
This guide covers how to set up and run MCP Codebase Insight using Docker.
## Prerequisites
- Docker installed on your system
- Basic knowledge of Docker commands
- Qdrant vector database accessible from your Docker network (required)
## Running Qdrant Container
MCP Codebase Insight requires a running Qdrant instance. Make sure to start Qdrant before running the MCP container:
```bash
# Create a directory for Qdrant data
mkdir -p qdrant_data
# Pull and run Qdrant container
docker pull qdrant/qdrant
docker run -d --name qdrant \
-p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_data:/qdrant/storage \
qdrant/qdrant
```
You can access the Qdrant web UI at http://localhost:6334 to verify it's running correctly.
## Quick Start with Docker
```bash
# Pull the image
docker pull modelcontextprotocol/mcp-codebase-insight
# Run the container
docker run -p 3000:3000 \
--env-file .env \
-v $(pwd)/docs:/app/docs \
-v $(pwd)/knowledge:/app/knowledge \
tosin2013/mcp-codebase-insight
```
## Creating a .env File for Docker
Create a `.env` file in your project directory with the following content:
```
MCP_HOST=0.0.0.0
MCP_PORT=3000
MCP_LOG_LEVEL=INFO
QDRANT_URL=http://host.docker.internal:6333
MCP_DOCS_CACHE_DIR=/app/docs
MCP_ADR_DIR=/app/docs/adrs
MCP_KB_STORAGE_DIR=/app/knowledge
MCP_DISK_CACHE_DIR=/app/cache
```
> **Note:** When using Docker, the host is set to `0.0.0.0` to allow connections from outside the container. If your Qdrant instance is running on the host machine, use `host.docker.internal` instead of `localhost`.
## Volume Mounts
The Docker command mounts several directories from your host system into the container:
- `$(pwd)/docs:/app/docs`: Maps your local docs directory to the container's docs directory
- `$(pwd)/knowledge:/app/knowledge`: Maps your local knowledge directory to the container's knowledge directory
Make sure these directories exist on your host system before running the container:
```bash
mkdir -p docs/adrs knowledge
```
## Using Docker Compose
For a more manageable setup, you can use Docker Compose. Create a `docker-compose.yml` file in your project directory:
```yaml
version: '3'
services:
mcp-codebase-insight:
image: tosin2013/mcp-codebase-insight
ports:
- "3000:3000"
volumes:
- ./docs:/app/docs
- ./knowledge:/app/knowledge
- ./cache:/app/cache
env_file:
- .env
networks:
- mcp-network
qdrant:
image: qdrant/qdrant
ports:
- "6333:6333"
volumes:
- ./qdrant_data:/qdrant/storage
networks:
- mcp-network
networks:
mcp-network:
driver: bridge
```
Then start the services:
```bash
docker-compose up -d
```
## Advanced Docker Configuration
### Using Custom Embedding Models
To use a custom embedding model, add the model path to your volume mounts and update the environment configuration:
```bash
docker run -p 3000:3000 \
--env-file .env \
-v $(pwd)/docs:/app/docs \
-v $(pwd)/knowledge:/app/knowledge \
-v $(pwd)/models:/app/models \
-e MCP_EMBEDDING_MODEL=/app/models/custom-model \
tosin2013/mcp-codebase-insight
```
### Securing Your Docker Deployment
For production environments:
1. Use Docker networks to isolate the MCP and Qdrant services
2. Don't expose the Qdrant port to the public internet
3. Set up proper authentication for both services
4. Use Docker secrets for sensitive information
5. Consider using a reverse proxy with HTTPS for the API
## Troubleshooting Docker Issues
### Connection Refused to Qdrant
If you're getting connection errors to Qdrant, check:
- Is Qdrant running? (`docker ps | grep qdrant`)
- Is the URL correct in the `.env` file?
- Are both services on the same Docker network?
- Try using the service name instead of `host.docker.internal` if using Docker Compose
### Container Exits Immediately
If the container exits immediately:
- Check the Docker logs: `docker logs <container_id>`
- Ensure all required environment variables are set
- Verify that the mounted directories have correct permissions
### Out of Memory Errors
If you encounter out of memory errors:
- Increase the memory limit for the container
- Reduce the vector dimension or batch size in your configuration
- Consider using a more efficient embedding model
```
--------------------------------------------------------------------------------
/docs/troubleshooting/common-issues.md:
--------------------------------------------------------------------------------
```markdown
# Troubleshooting Guide
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Common Issues
### Installation Issues
#### 1. Dependencies Installation Fails
```bash
Error: Failed building wheel for sentence-transformers
```
**Solution:**
```bash
# Update pip and install wheel
pip install --upgrade pip
pip install wheel
# Try installing with specific version
pip install sentence-transformers==2.2.2
# If still failing, install system dependencies
# Ubuntu/Debian:
sudo apt-get install python3-dev build-essential
# CentOS/RHEL:
sudo yum install python3-devel gcc
```
#### 2. Permission Denied
```bash
PermissionError: [Errno 13] Permission denied: '/usr/local/lib/python3.11/site-packages'
```
**Solution:**
```bash
# Install in user space
pip install --user mcp-codebase-insight
# Or fix directory permissions
sudo chown -R $USER:$USER venv/
```
### Server Issues
#### 1. Port Already in Use
```bash
[Errno 48] Address already in use
```
**Solution:**
```bash
# Find process using the port
lsof -i :3000 # On Linux/macOS
netstat -ano | findstr :3000 # On Windows
# Kill the process
kill -9 <PID>
# Or use a different port
mcp-codebase-insight --port 3001
```
#### 2. Server Won't Start
```bash
ERROR: [Errno 2] No such file or directory: './docs'
```
**Solution:**
```bash
# Create required directories
mkdir -p docs/adrs knowledge cache
# Fix permissions
chmod -R 755 docs knowledge cache
```
### Vector Store Issues
#### 1. Qdrant Connection Failed
```bash
ConnectionError: Failed to connect to Qdrant server
```
**Solution:**
```bash
# Check if Qdrant is running
curl http://localhost:6333/health
# Start Qdrant if not running
docker start qdrant
# Verify environment variable
echo $QDRANT_URL
# Should be: http://localhost:6333
```
#### 2. Collection Creation Failed
```bash
Error: Collection 'code_vectors' already exists
```
**Solution:**
```bash
# List existing collections
curl http://localhost:6333/collections
# Delete existing collection if needed
curl -X DELETE http://localhost:6333/collections/code_vectors
# Create new collection with correct parameters
python -c "
from qdrant_client import QdrantClient
client = QdrantClient('localhost', port=6333)
client.recreate_collection(
collection_name='code_vectors',
vectors_config={'size': 384, 'distance': 'Cosine'}
)
"
```
### Memory Issues
#### 1. Out of Memory
```bash
MemoryError: Unable to allocate array with shape (1000000, 384)
```
**Solution:**
```yaml
# Adjust batch size in config.yaml
vector_store:
batch_size: 100 # Reduce from default
# Or set environment variable
export MCP_BATCH_SIZE=100
```
#### 2. Slow Performance
```bash
WARNING: Vector search taking longer than expected
```
**Solution:**
```yaml
# Enable disk storage in config.yaml
vector_store:
on_disk: true
# Adjust cache size
performance:
cache_size: 1000
```
### Documentation Issues
#### 1. Documentation Map Failed
```bash
Error: Unable to create documentation map: Invalid directory structure
```
**Solution:**
```bash
# Verify directory structure
tree docs/
# Create required structure
mkdir -p docs/{adrs,api,components}
touch docs/index.md
```
#### 2. Search Not Working
```bash
Error: Search index not found
```
**Solution:**
```bash
# Rebuild search index
curl -X POST http://localhost:3000/api/docs/rebuild-index
# Verify index exists
ls -l docs/.search_index
```
## Debugging Tips
### 1. Enable Debug Logging
```bash
# Set environment variable
export MCP_LOG_LEVEL=DEBUG
# Or use command line flag
mcp-codebase-insight --debug
```
### 2. Check System Resources
```bash
# Check memory usage
free -h
# Check disk space
df -h
# Check CPU usage
top
```
### 3. Verify Configuration
```bash
# Print current config
mcp-codebase-insight show-config
# Validate config file
mcp-codebase-insight validate-config --config config.yaml
```
## Getting Help
If you're still experiencing issues:
1. Check the [GitHub Issues](https://github.com/modelcontextprotocol/mcp-codebase-insight/issues)
2. Join our [Discussion Forum](https://github.com/modelcontextprotocol/mcp-codebase-insight/discussions)
3. Review the [FAQ](faq.md)
4. Contact Support:
- Discord: [Join Server](https://discord.gg/mcp-codebase-insight)
- Email: [email protected]
## Next Steps
- [Installation Guide](../getting-started/installation.md)
- [Configuration Guide](../getting-started/configuration.md)
- [Development Guide](../development/README.md)
```
--------------------------------------------------------------------------------
/docs/getting-started/configuration.md:
--------------------------------------------------------------------------------
```markdown
# Configuration Guide
> 🚧 **Documentation In Progress**
>
> This documentation is being actively developed. More details will be added soon.
## Configuration Methods
MCP Codebase Insight can be configured through:
1. Environment variables
2. Configuration file
3. Command-line arguments
Priority order (highest to lowest):
1. Command-line arguments
2. Environment variables
3. Configuration file
4. Default values
## Environment Variables
### Required Variables
```bash
# Server Configuration
MCP_HOST=127.0.0.1
MCP_PORT=3000
# Vector Store
QDRANT_URL=http://localhost:6333
# Storage Paths
MCP_DOCS_CACHE_DIR=./docs
MCP_ADR_DIR=./docs/adrs
MCP_KB_STORAGE_DIR=./knowledge
MCP_DISK_CACHE_DIR=./cache
```
### Optional Variables
```bash
# Logging
MCP_LOG_LEVEL=INFO
MCP_LOG_FORMAT=json
# Performance
MCP_CACHE_SIZE=1000
MCP_WORKER_COUNT=4
```
## Configuration File
Create `config.yaml` in your project root:
```yaml
server:
host: 127.0.0.1
port: 3000
workers: 4
vector_store:
url: http://localhost:6333
collection: code_vectors
storage:
docs_cache: ./docs
adr_dir: ./docs/adrs
kb_storage: ./knowledge
disk_cache: ./cache
logging:
level: INFO
format: json
```
## Command-line Arguments
```bash
mcp-codebase-insight --help
Options:
--host TEXT Server host address
--port INTEGER Server port
--log-level TEXT Logging level
--debug Enable debug mode
--config PATH Path to config file
--qdrant-url TEXT Qdrant server URL
--docs-dir PATH Documentation directory
--adr-dir PATH ADR directory
--kb-dir PATH Knowledge base directory
--cache-dir PATH Cache directory
--workers INTEGER Number of workers
--batch-size INTEGER Batch size
--timeout INTEGER Request timeout
--auth Enable authentication
--metrics Enable metrics
--help Show this message and exit
```
## Feature-specific Configuration
### 1. Vector Store Configuration
```yaml
vector_store:
# Embedding model settings
model:
name: all-MiniLM-L6-v2
dimension: 384
normalize: true
# Collection settings
collection:
name: mcp_vectors
distance: Cosine
on_disk: false
# Search settings
search:
limit: 10
threshold: 0.75
```
### 2. Documentation Management
```yaml
documentation:
# Auto-generation settings
auto_generate: true
min_confidence: 0.8
# Crawling settings
crawl:
max_depth: 3
timeout: 30
exclude_patterns: ["*.git*", "node_modules"]
# Storage settings
storage:
format: markdown
index_file: _index.md
```
### 3. ADR Management
```yaml
adr:
# Template settings
template_dir: templates/adr
default_template: default.md
# Workflow settings
require_approval: true
auto_number: true
# Storage settings
storage:
format: markdown
naming: date-title
```
## Environment-specific Configurations
### Development
```yaml
debug: true
log_level: DEBUG
metrics:
enabled: false
vector_store:
on_disk: false
```
### Production
```yaml
debug: false
log_level: INFO
security:
auth_enabled: true
allowed_origins: ["https://your-domain.com"]
metrics:
enabled: true
vector_store:
on_disk: true
```
### Testing
```yaml
debug: true
log_level: DEBUG
vector_store:
collection_name: test_vectors
storage:
docs_cache_dir: ./test/docs
```
## Best Practices
1. **Security**
- Always enable authentication in production
- Use environment variables for sensitive values
- Restrict CORS origins in production
2. **Performance**
- Adjust worker count based on CPU cores
- Enable disk storage for large vector collections
- Configure appropriate batch sizes
3. **Monitoring**
- Enable metrics in production
- Set appropriate log levels
- Configure health check endpoints
4. **Storage**
- Use absolute paths in production
- Implement backup strategies
- Monitor disk usage
## Validation
To validate your configuration:
```bash
mcp-codebase-insight validate-config --config config.yaml
```
## Troubleshooting
Common configuration issues and solutions:
1. **Permission Denied**
```bash
# Fix directory permissions
chmod -R 755 docs knowledge cache
```
2. **Port Already in Use**
```bash
# Use different port
export MCP_PORT=3001
```
3. **Memory Issues**
```yaml
# Adjust batch size
performance:
batch_size: 50
```
## Next Steps
- [Quick Start Guide](quickstart.md)
- [API Reference](../api/rest-api.md)
- [Development Guide](../development/README.md)
```
--------------------------------------------------------------------------------
/.github/agents/TestAgent.agent.md:
--------------------------------------------------------------------------------
```markdown
# Test Agent
You are a specialized testing agent for the MCP Codebase Insight project. Your expertise is in writing, running, and debugging tests for this async Python codebase.
## Your Responsibilities
1. **Write Tests**: Create comprehensive test cases for new features and bug fixes
2. **Run Tests**: Execute tests using the custom test runner with proper isolation
3. **Debug Test Failures**: Analyze and fix failing tests, especially async/event loop issues
4. **Test Coverage**: Ensure new code has adequate test coverage
## Critical Knowledge
### Test Runner Usage
**ALWAYS use `./run_tests.py`** - Never use plain `pytest` directly.
```bash
# Run all tests with isolation and coverage
./run_tests.py --all --clean --isolated --coverage
# Run specific categories
./run_tests.py --component --isolated # Component tests
./run_tests.py --integration --isolated # Integration tests
./run_tests.py --api --isolated # API endpoint tests
# Run specific test
./run_tests.py --test test_vector_store_initialization
./run_tests.py --file tests/components/test_cache.py
```
**Why custom runner?**: Event loop conflicts between test modules require special isolation handling.
### Test Structure
- **Component tests** (`tests/components/`): Single service unit tests
- **Integration tests** (`tests/integration/`): Multi-component workflow tests
- **Config tests** (`tests/config/`): Configuration and environment tests
- **API tests** (`tests/integration/test_api_endpoints.py`): FastAPI endpoint tests
### Test Fixtures (from `conftest.py`)
Key fixtures available:
- `event_loop`: Session-scoped event loop (process-specific)
- `test_config`: ServerConfig with test defaults
- `vector_store`: Initialized VectorStore instance
- `cache_manager`: CacheManager with test config
- `embedder`: SentenceTransformer embedding provider
### Async Test Patterns
```python
import pytest
import pytest_asyncio
# Async fixture
@pytest_asyncio.fixture
async def my_service():
service = MyService()
await service.initialize()
yield service
await service.cleanup()
# Async test
@pytest.mark.asyncio
async def test_my_feature(my_service):
result = await my_service.do_something()
assert result.status == "success"
```
### Common Test Issues & Solutions
**Event Loop Errors**:
```bash
# Use isolation flags
./run_tests.py --isolated --sequential
```
**Async Fixture Issues**:
- Use `@pytest_asyncio.fixture` for async fixtures
- Use `@pytest.mark.asyncio` for async tests
- Check `conftest.py` for process-specific event loop setup
**Component Initialization**:
```python
# Always check component status before use
assert component.status == ComponentStatus.INITIALIZED
```
**Cleanup Issues**:
```python
# Always cleanup in fixtures
try:
yield component
finally:
await component.cleanup()
```
## Test Writing Guidelines
1. **Isolation**: Each test should be independent and cleanup after itself
2. **Mocking**: Mock external dependencies (Qdrant, file system when appropriate)
3. **Assertions**: Use descriptive assertions with error messages
4. **Coverage**: Aim for >80% coverage on new code
5. **Performance**: Tests should complete in <5 minutes total
## Example Test Templates
### Component Test
```python
@pytest.mark.asyncio
async def test_cache_stores_and_retrieves(cache_manager):
"""Test cache can store and retrieve values."""
# Arrange
key = "test_key"
value = {"data": "test_value"}
# Act
await cache_manager.set(key, value)
result = await cache_manager.get(key)
# Assert
assert result is not None
assert result["data"] == "test_value"
```
### Integration Test
```python
@pytest.mark.asyncio
async def test_full_analysis_workflow(client, test_config):
"""Test complete analysis workflow from request to response."""
# Arrange
code_sample = "def hello(): return 'world'"
# Act
response = await client.post("/analyze", json={"code": code_sample})
# Assert
assert response.status_code == 200
data = response.json()
assert "patterns_found" in data
assert data["patterns_found"] >= 0
```
## Running Tests in Your Workflow
1. **Before starting**: Run related tests to understand current state
2. **While coding**: Run specific test file to validate changes
3. **Before committing**: Run full test suite with coverage
4. **If tests fail**: Use `--verbose` and check logs for async issues
## Key Files to Reference
- `run_tests.py`: Custom test runner implementation
- `tests/conftest.py`: Test fixtures and event loop management
- `tests/README.test.md`: Testing documentation
- `docs/testing_guide.md`: Comprehensive testing guide
## When to Escalate
- Consistent event loop errors despite isolation flags
- Test failures that only occur in CI/CD but not locally
- Memory leaks or resource warnings during tests
- Tests that require architectural changes to fixtures
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/embeddings.py:
--------------------------------------------------------------------------------
```python
"""Text embedding using sentence-transformers."""
from typing import List, Union
import asyncio
import logging
from sentence_transformers import SentenceTransformer
logger = logging.getLogger(__name__)
class SentenceTransformerEmbedding:
"""Text embedding using sentence-transformers."""
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
"""Initialize embedding model."""
self.model_name = model_name
self.model = None
self.vector_size = None
self.initialized = False
async def initialize(self):
"""Initialize the embedding model."""
if self.initialized:
return
max_retries = 3
retry_delay = 2.0
for attempt in range(max_retries):
try:
# Define the model loading function
def load_model():
logger.debug(f"Loading model {self.model_name}")
model = SentenceTransformer(self.model_name)
vector_size = model.get_sentence_embedding_dimension()
return model, vector_size
# Load the model with a timeout
logger.debug(f"Starting model loading attempt {attempt + 1}/{max_retries}")
model, vector_size = await asyncio.to_thread(load_model)
self.model = model
self.vector_size = vector_size
self.initialized = True
logger.debug(f"Model loaded successfully with vector size {self.vector_size}")
return
except asyncio.TimeoutError:
if attempt < max_retries - 1:
logger.warning(f"Timeout loading model on attempt {attempt + 1}, retrying in {retry_delay}s")
await asyncio.sleep(retry_delay)
retry_delay *= 2
else:
logger.error(f"Failed to load model after {max_retries} attempts")
raise RuntimeError(f"Failed to load embedding model {self.model_name}: Timeout after {max_retries} attempts")
except Exception as e:
logger.error(f"Failed to load embedding model {self.model_name}: {str(e)}")
raise RuntimeError(f"Failed to load embedding model {self.model_name}: {str(e)}")
async def embed(self, text: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
"""Generate embeddings for text."""
if not self.initialized:
await self.initialize()
try:
# Convert single string to list for consistent handling
texts = [text] if isinstance(text, str) else text
# Generate embeddings
embeddings = self.model.encode(
texts,
convert_to_tensor=False, # Return numpy array
normalize_embeddings=True # L2 normalize embeddings
)
# Convert numpy arrays to lists for JSON serialization
if isinstance(text, str):
return embeddings[0].tolist()
return [embedding.tolist() for embedding in embeddings]
except Exception as e:
logger.error(f"Failed to generate embeddings: {str(e)}")
raise RuntimeError(f"Failed to generate embeddings: {str(e)}")
async def embed_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]:
"""Generate embeddings for a batch of texts."""
if not self.initialized:
await self.initialize()
try:
# Generate embeddings in batches
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
embeddings = self.model.encode(
batch,
convert_to_tensor=False,
normalize_embeddings=True,
batch_size=batch_size
)
all_embeddings.extend(embeddings.tolist())
return all_embeddings
except Exception as e:
logger.error(f"Failed to generate batch embeddings: {str(e)}")
raise RuntimeError(f"Failed to generate batch embeddings: {str(e)}")
async def embed_with_cache(
self,
text: str,
cache_manager = None
) -> List[float]:
"""Generate embeddings with caching."""
if not cache_manager:
return await self.embed(text)
# Try to get from cache
cache_key = f"embedding:{hash(text)}"
cached = cache_manager.get_from_memory(cache_key)
if cached:
return cached
# Generate new embedding
embedding = await self.embed(text)
# Cache the result
cache_manager.put_in_memory(cache_key, embedding)
return embedding
def get_vector_size(self) -> int:
"""Get the size of embedding vectors."""
return self.vector_size
```