This is page 3 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│ ├── bin
│ │ ├── activate
│ │ ├── activate.csh
│ │ ├── activate.fish
│ │ ├── Activate.ps1
│ │ ├── coverage
│ │ ├── coverage-3.11
│ │ ├── coverage3
│ │ ├── pip
│ │ ├── pip-compile
│ │ ├── pip-sync
│ │ ├── pip3
│ │ ├── pip3.11
│ │ ├── py.test
│ │ ├── pyproject-build
│ │ ├── pytest
│ │ ├── python
│ │ ├── python3
│ │ ├── python3.11
│ │ └── wheel
│ └── pyvenv.cfg
├── .env.example
├── .github
│ ├── agents
│ │ ├── DebugAgent.agent.md
│ │ ├── DocAgent.agent.md
│ │ ├── README.md
│ │ ├── TestAgent.agent.md
│ │ └── VectorStoreAgent.agent.md
│ ├── copilot-instructions.md
│ └── workflows
│ ├── build-verification.yml
│ ├── publish.yml
│ └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── create_release_issues.sh
├── debug_tests.md
├── Dockerfile
├── docs
│ ├── adrs
│ │ └── 001_use_docker_for_qdrant.md
│ ├── api.md
│ ├── components
│ │ └── README.md
│ ├── cookbook.md
│ ├── development
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ └── README.md
│ ├── documentation_map.md
│ ├── documentation_summary.md
│ ├── features
│ │ ├── adr-management.md
│ │ ├── code-analysis.md
│ │ └── documentation.md
│ ├── getting-started
│ │ ├── configuration.md
│ │ ├── docker-setup.md
│ │ ├── installation.md
│ │ ├── qdrant_setup.md
│ │ └── quickstart.md
│ ├── qdrant_setup.md
│ ├── README.md
│ ├── SSE_INTEGRATION.md
│ ├── system_architecture
│ │ └── README.md
│ ├── templates
│ │ └── adr.md
│ ├── testing_guide.md
│ ├── troubleshooting
│ │ ├── common-issues.md
│ │ └── faq.md
│ ├── vector_store_best_practices.md
│ └── workflows
│ └── README.md
├── error_logs.txt
├── examples
│ └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│ ├── backend_summary.txt
│ ├── database_summary.txt
│ └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│ ├── check_qdrant_health.sh
│ ├── compile_requirements.sh
│ ├── load_example_patterns.py
│ ├── macos_install.sh
│ ├── README.md
│ ├── setup_qdrant.sh
│ ├── start_mcp_server.sh
│ ├── store_code_relationships.py
│ ├── store_report_in_mcp.py
│ ├── validate_knowledge_base.py
│ ├── validate_poc.py
│ ├── validate_vector_store.py
│ └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│ └── mcp_codebase_insight
│ ├── __init__.py
│ ├── __main__.py
│ ├── asgi.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── adr.py
│ │ ├── cache.py
│ │ ├── component_status.py
│ │ ├── config.py
│ │ ├── debug.py
│ │ ├── di.py
│ │ ├── documentation.py
│ │ ├── embeddings.py
│ │ ├── errors.py
│ │ ├── health.py
│ │ ├── knowledge.py
│ │ ├── metrics.py
│ │ ├── prompts.py
│ │ ├── sse.py
│ │ ├── state.py
│ │ ├── task_tracker.py
│ │ ├── tasks.py
│ │ └── vector_store.py
│ ├── models.py
│ ├── server_test_isolation.py
│ ├── server.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── logger.py
│ └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│ ├── components
│ │ ├── conftest.py
│ │ ├── test_core_components.py
│ │ ├── test_embeddings.py
│ │ ├── test_knowledge_base.py
│ │ ├── test_sse_components.py
│ │ ├── test_stdio_components.py
│ │ ├── test_task_manager.py
│ │ └── test_vector_store.py
│ ├── config
│ │ └── test_config_and_env.py
│ ├── conftest.py
│ ├── integration
│ │ ├── fixed_test2.py
│ │ ├── test_api_endpoints.py
│ │ ├── test_api_endpoints.py-e
│ │ ├── test_communication_integration.py
│ │ └── test_server.py
│ ├── README.md
│ ├── README.test.md
│ ├── test_build_verifier.py
│ └── test_file_relationships.py
└── trajectories
└── tosinakinosho
├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
│ └── db62b9
│ └── config.yaml
├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
│ └── 03565e
│ ├── 03565e.traj
│ └── config.yaml
└── default__openrouter
└── anthropic
└── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
└── 03565e
├── 03565e.pred
├── 03565e.traj
└── config.yaml
```
# Files
--------------------------------------------------------------------------------
/tests/components/test_core_components.py:
--------------------------------------------------------------------------------
```python
1 | """Test core server components."""
2 |
3 | import sys
4 | import os
5 |
6 | # Ensure the src directory is in the Python path
7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
8 |
9 | import pytest
10 | from datetime import datetime
11 | from uuid import uuid4
12 |
13 | from src.mcp_codebase_insight.core.config import ServerConfig
14 | from src.mcp_codebase_insight.core.adr import ADRManager, ADRStatus
15 | from src.mcp_codebase_insight.core.debug import DebugSystem, IssueType, IssueStatus
16 | from src.mcp_codebase_insight.core.documentation import DocumentationManager, DocumentationType
17 | from src.mcp_codebase_insight.core.knowledge import KnowledgeBase, Pattern, PatternType, PatternConfidence
18 | from src.mcp_codebase_insight.core.tasks import TaskManager, TaskType, TaskStatus, TaskPriority
19 | from src.mcp_codebase_insight.core.metrics import MetricsManager, MetricType
20 | from src.mcp_codebase_insight.core.health import HealthManager, HealthStatus
21 | from src.mcp_codebase_insight.core.cache import CacheManager
22 | from src.mcp_codebase_insight.core.vector_store import VectorStore
23 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
24 |
25 | @pytest.mark.asyncio
26 | async def test_adr_manager(test_config: ServerConfig, test_adr: dict):
27 | """Test ADR manager functions."""
28 | manager = ADRManager(test_config)
29 |
30 | # Test creation
31 | adr = await manager.create_adr(
32 | title=test_adr["title"],
33 | context=test_adr["context"],
34 | options=test_adr["options"],
35 | decision=test_adr["decision"]
36 | )
37 |
38 | assert adr.title == test_adr["title"]
39 | assert adr.status == ADRStatus.PROPOSED
40 |
41 | # Test retrieval
42 | retrieved = await manager.get_adr(adr.id)
43 | assert retrieved is not None
44 | assert retrieved.id == adr.id
45 |
46 | # Test update
47 | updated = await manager.update_adr(
48 | adr.id,
49 | status=ADRStatus.ACCEPTED
50 | )
51 | assert updated.status == ADRStatus.ACCEPTED
52 |
53 | @pytest.mark.asyncio
54 | async def test_knowledge_base(test_config: ServerConfig, qdrant_client):
55 | """Test knowledge base functions."""
56 | # Initialize vector store with embedder
57 | embedder = SentenceTransformerEmbedding()
58 | vector_store = VectorStore(
59 | url=test_config.qdrant_url,
60 | embedder=embedder
61 | )
62 | kb = KnowledgeBase(test_config, vector_store=vector_store)
63 |
64 | # Test pattern creation
65 | now = datetime.utcnow()
66 | pattern = Pattern(
67 | id=uuid4(),
68 | name="Test Pattern",
69 | description="A test pattern",
70 | type=PatternType.CODE,
71 | content="def test(): pass",
72 | confidence=PatternConfidence.HIGH,
73 | created_at=now,
74 | updated_at=now
75 | )
76 |
77 | # Test pattern storage
78 | stored_pattern = await kb.add_pattern(
79 | name=pattern.name,
80 | type=pattern.type,
81 | description=pattern.description,
82 | content=pattern.content,
83 | confidence=pattern.confidence
84 | )
85 |
86 | # Verify stored pattern
87 | assert stored_pattern.name == pattern.name
88 | assert stored_pattern.type == pattern.type
89 | assert stored_pattern.description == pattern.description
90 | assert stored_pattern.content == pattern.content
91 | assert stored_pattern.confidence == pattern.confidence
92 |
93 | @pytest.mark.asyncio
94 | async def test_task_manager(test_config: ServerConfig, test_code: str):
95 | """Test task manager functions."""
96 | manager = TaskManager(
97 | config=test_config,
98 | adr_manager=ADRManager(test_config),
99 | debug_system=DebugSystem(test_config),
100 | doc_manager=DocumentationManager(test_config),
101 | knowledge_base=KnowledgeBase(test_config, None),
102 | prompt_manager=None
103 | )
104 |
105 | # Test task creation
106 | task = await manager.create_task(
107 | type=TaskType.CODE_ANALYSIS,
108 | title="Test Task",
109 | description="Analyze test code",
110 | priority=TaskPriority.MEDIUM,
111 | context={"code": test_code}
112 | )
113 |
114 | assert task.title == "Test Task"
115 | assert task.status == TaskStatus.PENDING
116 |
117 | # Test task retrieval
118 | retrieved = await manager.get_task(task.id)
119 | assert retrieved is not None
120 | assert retrieved.id == task.id
121 |
122 | @pytest.mark.asyncio
123 | async def test_metrics_manager(test_config: ServerConfig):
124 | """Test metrics manager functions."""
125 | # Override the metrics_enabled setting for this test
126 | test_config.metrics_enabled = True
127 |
128 | manager = MetricsManager(test_config)
129 | await manager.initialize()
130 |
131 | try:
132 | # Test metric recording
133 | await manager.record_metric(
134 | "test_metric",
135 | MetricType.COUNTER,
136 | 1.0,
137 | {"label": "test"}
138 | )
139 |
140 | # Test metric retrieval
141 | metrics = await manager.get_metrics(["test_metric"])
142 | assert len(metrics) == 1
143 | assert "test_metric" in metrics
144 | finally:
145 | # Cleanup
146 | await manager.cleanup()
147 |
148 | @pytest.mark.asyncio
149 | async def test_health_manager(test_config: ServerConfig):
150 | """Test health manager functions."""
151 | manager = HealthManager(test_config)
152 |
153 | # Test health check
154 | health = await manager.check_health()
155 | assert health.status is not None
156 | assert isinstance(health.components, dict)
157 | assert isinstance(health.timestamp, datetime)
158 |
159 | @pytest.mark.asyncio
160 | async def test_cache_manager(test_config: ServerConfig):
161 | """Test cache manager functions."""
162 | manager = CacheManager(test_config)
163 | await manager.initialize() # Initialize the manager
164 |
165 | try:
166 | # Test memory cache
167 | manager.put_in_memory("test_key", "test_value")
168 | result = manager.get_from_memory("test_key")
169 | assert result == "test_value"
170 |
171 | # Test persistent cache
172 | manager.put_in_disk("test_key", "test_value")
173 | result = manager.get_from_disk("test_key")
174 | assert result == "test_value"
175 |
176 | # Test combined operations
177 | manager.put("combined_key", "combined_value")
178 | result = manager.get("combined_key")
179 | assert result == "combined_value"
180 |
181 | # Test removal
182 | manager.remove("test_key")
183 | assert manager.get("test_key") is None
184 | finally:
185 | await manager.cleanup() # Clean up after tests
186 |
187 | @pytest.mark.asyncio
188 | async def test_documentation_manager(test_config: ServerConfig):
189 | """Test documentation manager functions."""
190 | manager = DocumentationManager(test_config)
191 |
192 | # Test document creation
193 | doc = await manager.add_document(
194 | title="Test Doc",
195 | content="Test content",
196 | type=DocumentationType.REFERENCE
197 | )
198 |
199 | assert doc.title == "Test Doc"
200 |
201 | # Test document retrieval
202 | retrieved = await manager.get_document(doc.id)
203 | assert retrieved is not None
204 | assert retrieved.id == doc.id
205 |
206 | @pytest.mark.asyncio
207 | async def test_debug_system(test_config: ServerConfig):
208 | """Test debug system functions."""
209 | system = DebugSystem(test_config)
210 |
211 | # Test issue creation
212 | issue = await system.create_issue(
213 | title="Test issue",
214 | type=IssueType.BUG,
215 | description={"message": "Test description", "steps": ["Step 1", "Step 2"]}
216 | )
217 |
218 | assert issue.title == "Test issue"
219 | assert issue.type == IssueType.BUG
220 | assert issue.status == IssueStatus.OPEN
221 | assert "message" in issue.description
222 | assert "steps" in issue.description
223 |
```
--------------------------------------------------------------------------------
/.github/agents/VectorStoreAgent.agent.md:
--------------------------------------------------------------------------------
```markdown
1 | # Vector Store Agent
2 |
3 | You are a specialized agent for working with the Qdrant vector store and embedding systems in the MCP Codebase Insight project.
4 |
5 | ## Your Responsibilities
6 |
7 | 1. **Vector Store Operations**: Add, search, update, and delete patterns in Qdrant
8 | 2. **Embedding Management**: Handle embedding generation and caching
9 | 3. **Collection Management**: Initialize and maintain Qdrant collections
10 | 4. **Performance Optimization**: Optimize vector search queries and batch operations
11 |
12 | ## Critical Knowledge
13 |
14 | ### Vector Store Architecture
15 |
16 | **VectorStore** (`src/mcp_codebase_insight/core/vector_store.py`):
17 | - Qdrant client wrapper with retry logic
18 | - Collection initialization and management
19 | - Vector search with filtering
20 | - Batch operations support
21 |
22 | **EmbeddingProvider** (`src/mcp_codebase_insight/core/embeddings.py`):
23 | - Lazy-loading sentence transformers
24 | - Default model: `all-MiniLM-L6-v2` (384 dimensions)
25 | - Caching of embeddings for performance
26 |
27 | ### Initialization Pattern
28 |
29 | ```python
30 | from src.mcp_codebase_insight.core import VectorStore, EmbeddingProvider
31 | from sentence_transformers import SentenceTransformer
32 |
33 | # Create embedding provider
34 | model = SentenceTransformer("all-MiniLM-L6-v2")
35 | embedder = EmbeddingProvider(model)
36 | await embedder.initialize()
37 |
38 | # Create vector store
39 | vector_store = VectorStore(
40 | url=config.qdrant_url,
41 | embedder=embedder,
42 | collection_name="codebase_patterns",
43 | vector_size=384
44 | )
45 | await vector_store.initialize()
46 |
47 | # Always cleanup
48 | try:
49 | # Use vector store
50 | pass
51 | finally:
52 | await vector_store.cleanup()
53 | ```
54 |
55 | ### Common Operations
56 |
57 | **Store Pattern**:
58 | ```python
59 | pattern_id = str(uuid.uuid4())
60 | await vector_store.add(
61 | id=pattern_id,
62 | text="Code pattern description",
63 | metadata={
64 | "pattern_name": "Repository Pattern",
65 | "type": "pattern",
66 | "language": "python",
67 | "examples": ["example1.py", "example2.py"]
68 | }
69 | )
70 | ```
71 |
72 | **Search Patterns**:
73 | ```python
74 | # Basic search
75 | results = await vector_store.search(
76 | text="database access pattern",
77 | limit=5
78 | )
79 |
80 | # Search with filters
81 | results = await vector_store.search(
82 | text="async error handling",
83 | filter_params={
84 | "must": [
85 | {"key": "type", "match": {"value": "pattern"}},
86 | {"key": "language", "match": {"value": "python"}}
87 | ]
88 | },
89 | limit=10
90 | )
91 |
92 | # Process results
93 | for result in results:
94 | print(f"Pattern: {result.payload.get('pattern_name')}")
95 | print(f"Score: {result.score}")
96 | print(f"Metadata: {result.payload}")
97 | ```
98 |
99 | **Update Pattern**:
100 | ```python
101 | await vector_store.update(
102 | id=pattern_id,
103 | text="Updated pattern description",
104 | metadata={"pattern_name": "Updated Name", "version": 2}
105 | )
106 | ```
107 |
108 | **Delete Pattern**:
109 | ```python
110 | await vector_store.delete(id=pattern_id)
111 | ```
112 |
113 | ### Version Compatibility (IMPORTANT!)
114 |
115 | Qdrant client versions have parameter changes:
116 | - **v1.13.3+**: Uses `query` parameter
117 | - **Older versions**: Uses `query_vector` parameter
118 |
119 | The VectorStore code supports both for compatibility. When updating, check comments in `vector_store.py` around line 16.
120 |
121 | ### Configuration
122 |
123 | **Environment Variables**:
124 | ```bash
125 | QDRANT_URL=http://localhost:6333 # Qdrant server URL
126 | QDRANT_API_KEY=your-key # Optional API key
127 | MCP_EMBEDDING_MODEL=all-MiniLM-L6-v2 # Model name
128 | MCP_COLLECTION_NAME=codebase_patterns # Collection name
129 | ```
130 |
131 | **Starting Qdrant**:
132 | ```bash
133 | # Docker
134 | docker run -p 6333:6333 qdrant/qdrant
135 |
136 | # Or via docker-compose (if available)
137 | docker-compose up -d qdrant
138 |
139 | # Check health
140 | curl http://localhost:6333/collections
141 | ```
142 |
143 | ### Common Issues & Solutions
144 |
145 | **Qdrant Connection Failure**:
146 | ```python
147 | # VectorStore gracefully handles initialization failure
148 | # Server continues with reduced functionality
149 | # Check logs for: "Vector store not available"
150 |
151 | # Verify Qdrant is running
152 | curl http://localhost:6333/collections
153 |
154 | # Check environment variable
155 | echo $QDRANT_URL
156 | ```
157 |
158 | **Embedding Dimension Mismatch**:
159 | ```python
160 | # Ensure vector_size matches model output
161 | embedder = EmbeddingProvider(model)
162 | await embedder.initialize()
163 | vector_size = embedder.vector_size # Use this!
164 |
165 | vector_store = VectorStore(
166 | url=url,
167 | embedder=embedder,
168 | vector_size=vector_size # Match embedder
169 | )
170 | ```
171 |
172 | **Collection Already Exists**:
173 | ```python
174 | # VectorStore handles this automatically
175 | # Checks if collection exists before creating
176 | # Safe to call initialize() multiple times
177 | ```
178 |
179 | **Slow Search Queries**:
180 | ```python
181 | # Use filters to narrow search space
182 | filter_params = {
183 | "must": [{"key": "type", "match": {"value": "pattern"}}]
184 | }
185 |
186 | # Limit results appropriately
187 | results = await vector_store.search(text, filter_params=filter_params, limit=10)
188 |
189 | # Consider caching frequent queries
190 | ```
191 |
192 | ### Batch Operations
193 |
194 | ```python
195 | # Store multiple patterns efficiently
196 | patterns = [
197 | {
198 | "id": str(uuid.uuid4()),
199 | "text": "Pattern 1",
200 | "metadata": {"type": "pattern"}
201 | },
202 | {
203 | "id": str(uuid.uuid4()),
204 | "text": "Pattern 2",
205 | "metadata": {"type": "pattern"}
206 | }
207 | ]
208 |
209 | # Use batch add (if implemented) or loop with small delays
210 | for pattern in patterns:
211 | await vector_store.add(**pattern)
212 | await asyncio.sleep(0.01) # Avoid rate limiting
213 | ```
214 |
215 | ### Testing Vector Store
216 |
217 | ```python
218 | @pytest.mark.asyncio
219 | async def test_vector_store_search(vector_store):
220 | """Test vector search returns relevant results."""
221 | # Arrange - add test pattern
222 | test_id = str(uuid.uuid4())
223 | await vector_store.add(
224 | id=test_id,
225 | text="Test pattern for async operations",
226 | metadata={"type": "test", "language": "python"}
227 | )
228 |
229 | # Act - search for similar patterns
230 | results = await vector_store.search(
231 | text="asynchronous programming patterns",
232 | limit=5
233 | )
234 |
235 | # Assert
236 | assert len(results) > 0
237 | assert any(r.id == test_id for r in results)
238 |
239 | # Cleanup
240 | await vector_store.delete(id=test_id)
241 | ```
242 |
243 | ### Performance Best Practices
244 |
245 | 1. **Cache embeddings**: EmbeddingProvider caches automatically
246 | 2. **Batch operations**: Group similar operations when possible
247 | 3. **Use filters**: Narrow search space with metadata filters
248 | 4. **Limit results**: Don't fetch more than needed
249 | 5. **Connection pooling**: Reuse Qdrant client connections
250 | 6. **Retry logic**: VectorStore has built-in retry for transient failures
251 |
252 | ### Key Files to Reference
253 |
254 | - `src/mcp_codebase_insight/core/vector_store.py`: Main implementation
255 | - `src/mcp_codebase_insight/core/embeddings.py`: Embedding provider
256 | - `tests/components/test_vector_store.py`: Test examples
257 | - `docs/vector_store_best_practices.md`: Best practices guide
258 | - `docs/qdrant_setup.md`: Qdrant setup instructions
259 |
260 | ### Integration with Other Components
261 |
262 | **KnowledgeBase**: Uses VectorStore for semantic search
263 | ```python
264 | kb = KnowledgeBase(vector_store)
265 | await kb.initialize()
266 | results = await kb.search_patterns(query="error handling")
267 | ```
268 |
269 | **CacheManager**: Caches embeddings and search results
270 | ```python
271 | # Embeddings are automatically cached
272 | # Search results can be cached at application level
273 | ```
274 |
275 | ### When to Escalate
276 |
277 | - Qdrant version incompatibility issues
278 | - Performance degradation with large datasets (>100k patterns)
279 | - Collection corruption or data loss
280 | - Embedding model changes requiring re-indexing
281 | - Advanced Qdrant features (quantization, sharding, etc.)
282 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/debug.py:
--------------------------------------------------------------------------------
```python
1 | """Debug system for issue tracking and analysis."""
2 |
3 | import json
4 | from datetime import datetime
5 | from enum import Enum
6 | from pathlib import Path
7 | from typing import Dict, List, Optional
8 | from uuid import UUID, uuid4
9 |
10 | from pydantic import BaseModel
11 |
12 | class IssueType(str, Enum):
13 | """Issue type enumeration."""
14 |
15 | BUG = "bug"
16 | PERFORMANCE = "performance"
17 | SECURITY = "security"
18 | DESIGN = "design"
19 | DOCUMENTATION = "documentation"
20 | OTHER = "other"
21 |
22 | class IssueStatus(str, Enum):
23 | """Issue status enumeration."""
24 |
25 | OPEN = "open"
26 | IN_PROGRESS = "in_progress"
27 | RESOLVED = "resolved"
28 | CLOSED = "closed"
29 | WONT_FIX = "wont_fix"
30 |
31 | class Issue(BaseModel):
32 | """Issue model."""
33 |
34 | id: UUID
35 | title: str
36 | type: IssueType
37 | status: IssueStatus
38 | description: Dict
39 | steps: Optional[List[Dict]] = None
40 | created_at: datetime
41 | updated_at: datetime
42 | resolved_at: Optional[datetime] = None
43 | metadata: Optional[Dict[str, str]] = None
44 |
45 | class DebugSystem:
46 | """System for debugging and issue management."""
47 |
48 | def __init__(self, config):
49 | """Initialize debug system."""
50 | self.config = config
51 | self.debug_dir = Path(config.docs_cache_dir) / "debug"
52 | self.debug_dir.mkdir(parents=True, exist_ok=True)
53 | self.issues: Dict[UUID, Issue] = {}
54 | self.initialized = False
55 |
56 | async def initialize(self) -> None:
57 | """Initialize debug system."""
58 | if self.initialized:
59 | return
60 |
61 | try:
62 | # Load existing issues
63 | if self.debug_dir.exists():
64 | for issue_file in self.debug_dir.glob("*.json"):
65 | try:
66 | with open(issue_file) as f:
67 | data = json.load(f)
68 | issue = Issue(**data)
69 | self.issues[issue.id] = issue
70 | except Exception as e:
71 | # Log error but continue loading other issues
72 | print(f"Error loading issue {issue_file}: {e}")
73 |
74 | self.initialized = True
75 | except Exception as e:
76 | print(f"Error initializing debug system: {e}")
77 | await self.cleanup()
78 | raise RuntimeError(f"Failed to initialize debug system: {str(e)}")
79 |
80 | async def cleanup(self) -> None:
81 | """Clean up debug system resources."""
82 | if not self.initialized:
83 | return
84 |
85 | try:
86 | # Save any pending issues
87 | for issue in self.issues.values():
88 | try:
89 | await self._save_issue(issue)
90 | except Exception as e:
91 | print(f"Error saving issue {issue.id}: {e}")
92 | # Clear in-memory issues
93 | self.issues.clear()
94 | except Exception as e:
95 | print(f"Error cleaning up debug system: {e}")
96 | finally:
97 | self.initialized = False
98 |
99 | async def create_issue(
100 | self,
101 | title: str,
102 | type: str,
103 | description: Dict
104 | ) -> Issue:
105 | """Create a new issue."""
106 | now = datetime.utcnow()
107 | issue = Issue(
108 | id=uuid4(),
109 | title=title,
110 | type=IssueType(type),
111 | status=IssueStatus.OPEN,
112 | description=description,
113 | created_at=now,
114 | updated_at=now
115 | )
116 |
117 | await self._save_issue(issue)
118 | return issue
119 |
120 | async def get_issue(self, issue_id: UUID) -> Optional[Issue]:
121 | """Get issue by ID."""
122 | issue_path = self.debug_dir / f"{issue_id}.json"
123 | if not issue_path.exists():
124 | return None
125 |
126 | with open(issue_path) as f:
127 | data = json.load(f)
128 | return Issue(**data)
129 |
130 | async def update_issue(
131 | self,
132 | issue_id: UUID,
133 | status: Optional[IssueStatus] = None,
134 | steps: Optional[List[Dict]] = None,
135 | metadata: Optional[Dict[str, str]] = None
136 | ) -> Optional[Issue]:
137 | """Update issue status and details."""
138 | issue = await self.get_issue(issue_id)
139 | if not issue:
140 | return None
141 |
142 | if status:
143 | issue.status = status
144 | if status == IssueStatus.RESOLVED:
145 | issue.resolved_at = datetime.utcnow()
146 | if steps:
147 | issue.steps = steps
148 | if metadata:
149 | issue.metadata = {**(issue.metadata or {}), **metadata}
150 |
151 | issue.updated_at = datetime.utcnow()
152 | await self._save_issue(issue)
153 | return issue
154 |
155 | async def list_issues(
156 | self,
157 | type: Optional[IssueType] = None,
158 | status: Optional[IssueStatus] = None
159 | ) -> List[Issue]:
160 | """List all issues, optionally filtered by type and status."""
161 | issues = []
162 | for path in self.debug_dir.glob("*.json"):
163 | with open(path) as f:
164 | data = json.load(f)
165 | issue = Issue(**data)
166 | if (not type or issue.type == type) and (not status or issue.status == status):
167 | issues.append(issue)
168 | return sorted(issues, key=lambda x: x.created_at)
169 |
170 | async def analyze_issue(self, issue_id: UUID) -> List[Dict]:
171 | """Analyze issue and generate debug steps."""
172 | issue = await self.get_issue(issue_id)
173 | if not issue:
174 | return []
175 |
176 | # Generate analysis steps based on issue type
177 | steps = []
178 |
179 | if issue.type == IssueType.BUG:
180 | steps.extend([
181 | {"type": "check", "name": "Reproduce Issue", "description": "Steps to reproduce the issue"},
182 | {"type": "check", "name": "Error Logs", "description": "Check relevant error logs"},
183 | {"type": "check", "name": "Stack Trace", "description": "Analyze stack trace if available"},
184 | {"type": "check", "name": "Code Review", "description": "Review related code sections"}
185 | ])
186 |
187 | elif issue.type == IssueType.PERFORMANCE:
188 | steps.extend([
189 | {"type": "check", "name": "Profiling", "description": "Run performance profiling"},
190 | {"type": "check", "name": "Resource Usage", "description": "Monitor CPU, memory, I/O"},
191 | {"type": "check", "name": "Query Analysis", "description": "Review database queries"},
192 | {"type": "check", "name": "Bottlenecks", "description": "Identify performance bottlenecks"}
193 | ])
194 |
195 | elif issue.type == IssueType.SECURITY:
196 | steps.extend([
197 | {"type": "check", "name": "Vulnerability Scan", "description": "Run security scanners"},
198 | {"type": "check", "name": "Access Control", "description": "Review permissions"},
199 | {"type": "check", "name": "Input Validation", "description": "Check input handling"},
200 | {"type": "check", "name": "Dependencies", "description": "Audit dependencies"}
201 | ])
202 |
203 | # Update issue with analysis steps
204 | await self.update_issue(issue_id, steps=steps)
205 | return steps
206 |
207 | async def _save_issue(self, issue: Issue) -> None:
208 | """Save issue to file."""
209 | issue_path = self.debug_dir / f"{issue.id}.json"
210 | with open(issue_path, "w") as f:
211 | json.dump(issue.model_dump(), f, indent=2, default=str)
212 |
```
--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
```markdown
1 | # MCP Codebase Insight - AI Agent Instructions
2 |
3 | ## Architecture Overview
4 |
5 | **MCP Server with Vector-Backed Knowledge Base**: FastAPI-based Model Context Protocol server providing codebase analysis through Qdrant vector store, semantic search with `sentence-transformers`, and pattern detection.
6 |
7 | ### Core Service Components (`src/mcp_codebase_insight/core/`)
8 | - **VectorStore** (`vector_store.py`): Qdrant client wrapper with retry logic, collection initialization
9 | - **EmbeddingProvider** (`embeddings.py`): Sentence transformers (`all-MiniLM-L6-v2` default), lazy initialization
10 | - **CacheManager** (`cache.py`): Dual-layer (memory + disk) caching for embeddings and API results
11 | - **KnowledgeBase** (`knowledge.py`): Semantic search over stored patterns with vector similarity
12 | - **ServerState** (`state.py`): Component lifecycle via DIContainer, async initialization/cleanup tracking
13 | - **ADRManager** (`adr.py`): Markdown frontmatter-based Architecture Decision Records in `docs/adrs/`
14 |
15 | ### Service Initialization Pattern
16 | All core services follow async init/cleanup: `await service.initialize()` → use service → `await service.cleanup()`. ServerState manages component lifecycle through DIContainer, tracking status with `ComponentStatus` enum. See `server_lifespan()` in `server.py` for orchestration example.
17 |
18 | ### Configuration & Environment
19 | - **ServerConfig** (`core/config.py`): Uses `@dataclass`, loads from env with `ServerConfig.from_env()`
20 | - **Key env vars**: `QDRANT_URL`, `MCP_EMBEDDING_MODEL`, `MCP_COLLECTION_NAME`, `MCP_CACHE_ENABLED`, `MCP_DISK_CACHE_DIR`
21 | - **Directory structure**: Config auto-creates `docs/`, `docs/adrs/`, `knowledge/`, `cache/` on init via `config.create_directories()`
22 |
23 | ## Development Workflows
24 |
25 | ### Running Tests
26 | **Custom test runner**: `./run_tests.py` (NOT plain pytest) - handles asyncio isolation, event loop cleanup
27 | ```bash
28 | # Run all tests with isolation and coverage
29 | ./run_tests.py --all --clean --isolated --coverage
30 |
31 | # Run specific test categories
32 | ./run_tests.py --component --isolated # Component tests
33 | ./run_tests.py --integration --isolated # Integration tests
34 | ./run_tests.py --api --isolated # API endpoint tests
35 |
36 | # Run specific test file or function
37 | ./run_tests.py --file tests/components/test_cache.py
38 | ./run_tests.py --test test_vector_store_initialization
39 | ```
40 |
41 | **Why custom runner?**: Event loop conflicts between test modules. Runner provides `--isolated` (PYTHONPATH isolation), `--sequential` (no parallelism), `--fully-isolated` (separate processes per module).
42 |
43 | ### Makefile Commands
44 | ```bash
45 | make install # Install dependencies from requirements.txt
46 | make test # Runs ./run_tests.py with recommended flags
47 | make lint # flake8 + mypy + black --check + isort --check
48 | make format # black + isort code formatting
49 | make run # python -m mcp_codebase_insight
50 | make docker-build # Build container with Qdrant integration
51 | ```
52 |
53 | ### Docker & Qdrant Setup
54 | - **Dockerfile**: Python 3.11-slim, Rust toolchain (pydantic build), multi-stage cache optimization
55 | - **Qdrant**: External vector DB (port 6333), not bundled. Start via `docker-compose` or local install
56 | - **Container mounts**: Mount `docs/`, `knowledge/`, `cache/`, `logs/` for persistence
57 |
58 | ## Code Conventions & Patterns
59 |
60 | ### Async/Await Discipline
61 | - **All I/O operations are async**: File system via `aiofiles`, Qdrant via async client, cache operations
62 | - **Test isolation**: `conftest.py` manages session-scoped event loops with `_event_loops` dict, mutex locks (`_loops_lock`, `_tests_lock`)
63 | - **Fixtures**: Use `@pytest_asyncio.fixture` for async fixtures, `@pytest.mark.asyncio` for async tests
64 |
65 | ### Error Handling & Logging
66 | - **Structured logging**: `from ..utils.logger import get_logger` → `logger = get_logger(__name__)`
67 | - **Component-level error tracking**: ServerState stores errors in ComponentState, retry counts tracked
68 | - **Graceful degradation**: VectorStore initialization can fail (Qdrant unavailable), server continues with reduced functionality
69 |
70 | ### Testing Patterns
71 | - **Test fixtures in conftest.py**: `event_loop`, `test_config`, `vector_store`, `cache_manager` (session/function scoped)
72 | - **Isolation via server_test_isolation.py**: `get_isolated_server_state()` provides per-test server instances
73 | - **Component tests**: Focus on single service unit (e.g., `test_vector_store.py` → VectorStore CRUD operations)
74 | - **Integration tests**: Multi-component workflows (e.g., `test_api_endpoints.py` → FastAPI routes with live services)
75 |
76 | ### Dependency Injection Pattern
77 | DIContainer (`core/di.py`) manages component initialization order:
78 | 1. ServerConfig from env
79 | 2. Embedding model (SentenceTransformer)
80 | 3. VectorStore (needs embedder + Qdrant client)
81 | 4. CacheManager, MetricsManager, HealthManager
82 | 5. KnowledgeBase (needs VectorStore)
83 | 6. TaskManager, ADRManager
84 |
85 | **Usage**: Create DIContainer, call `await container.initialize()`, access via `container.get_component("vector_store")`
86 |
87 | ### Type Hints & Dataclasses
88 | - **Strict typing**: All functions have type hints (params + return types), mypy enforced in lint
89 | - **@dataclass for config/models**: ServerConfig, ComponentState, ADR, SearchResult use dataclasses
90 | - **Optional vs None**: Use `Optional[Type]` for potentially None values, explicit None checks
91 |
92 | ## Key File Relationships
93 |
94 | - **server.py** → imports core services, defines `server_lifespan` context manager
95 | - **core/state.py** → imports DIContainer, manages component registry
96 | - **core/di.py** → imports all service classes, orchestrates initialization
97 | - **tests/conftest.py** → imports ServerState, server_test_isolation for fixture setup
98 | - **run_tests.py** → spawns pytest subprocess with custom args, handles event loop cleanup
99 |
100 | ## Project-Specific Quirks
101 |
102 | 1. **Qdrant client version sensitivity**: Comments in `vector_store.py` note parameter name changes (`query_vector` → `query` in v1.13.3+). Code supports both for compatibility.
103 |
104 | 2. **Cache directory creation**: `disk_cache_dir` defaults to `"cache"` if `MCP_CACHE_ENABLED=true` but path not specified. Set to `None` if cache disabled (see `ServerConfig.__post_init__`).
105 |
106 | 3. **ADR numbering**: ADRManager auto-increments `next_adr_number` by scanning `docs/adrs/` for `NNN-*.md` patterns on init.
107 |
108 | 4. **Test runner event loop management**: `conftest.py` maintains process-specific event loop dict to avoid "different loop" errors across test modules.
109 |
110 | 5. **Component status tracking**: Don't assume component is ready after creation. Check `component.status == ComponentStatus.INITIALIZED` before use.
111 |
112 | ## Common Debugging Patterns
113 |
114 | - **Qdrant connection issues**: Check `QDRANT_URL` env var, verify Qdrant is running (`curl http://localhost:6333/collections`)
115 | - **Event loop errors in tests**: Use `--isolated` and `--sequential` flags with `run_tests.py`, check `conftest.py` fixtures are async
116 | - **Missing embeddings**: EmbeddingProvider lazy-loads model on first use, check `initialized` flag
117 | - **Cache not persisting**: Verify `MCP_DISK_CACHE_DIR` is writable, check `cache_enabled` in config
118 |
119 | ## References
120 |
121 | - **System architecture diagrams**: `system-architecture.md` (Mermaid diagrams for components, data flow)
122 | - **Detailed setup guides**: `docs/getting-started/` for installation, Qdrant setup, Docker
123 | - **Testing philosophy**: Follows TDD, see `docs/tdd/workflow.md` and Agans' 9 Rules in `docs/debuggers/`
124 | - **Existing AI context**: `CLAUDE.md` has legacy build/test commands (superseded by Makefile + run_tests.py)
125 |
```
--------------------------------------------------------------------------------
/docs/features/documentation.md:
--------------------------------------------------------------------------------
```markdown
1 | # Documentation Management
2 |
3 | MCP Codebase Insight provides powerful tools for managing technical documentation, ensuring it stays up-to-date with your codebase and is easily accessible.
4 |
5 | ## Overview
6 |
7 | The documentation management feature:
8 | - Auto-generates documentation from code
9 | - Maintains documentation-code links
10 | - Provides semantic search capabilities
11 | - Supports multiple documentation formats
12 | - Enables documentation validation
13 | - Tracks documentation coverage
14 |
15 | ## Features
16 |
17 | ### 1. Documentation Generation
18 |
19 | Automatically generate documentation from code:
20 |
21 | ```python
22 | # Example: Generate documentation for a module
23 | response = await client.post(
24 | "http://localhost:3000/api/docs/generate",
25 | json={
26 | "source": "src/auth/",
27 | "output_format": "markdown",
28 | "include_private": False,
29 | "template": "api-docs"
30 | }
31 | )
32 |
33 | docs = response.json()
34 | print(f"Generated {len(docs['files'])} documentation files")
35 | ```
36 |
37 | ### 2. Documentation Search
38 |
39 | Search through documentation using semantic understanding:
40 |
41 | ```python
42 | # Example: Search documentation
43 | response = await client.get(
44 | "http://localhost:3000/api/docs/search",
45 | params={
46 | "query": "how to implement authentication",
47 | "doc_types": ["guide", "api", "tutorial"],
48 | "limit": 5
49 | }
50 | )
51 |
52 | results = response.json()
53 | for doc in results["matches"]:
54 | print(f"- {doc['title']} (Score: {doc['score']})")
55 | ```
56 |
57 | ### 3. Documentation Validation
58 |
59 | Validate documentation completeness and accuracy:
60 |
61 | ```python
62 | # Example: Validate documentation
63 | response = await client.post(
64 | "http://localhost:3000/api/docs/validate",
65 | json={
66 | "paths": ["docs/api/", "docs/guides/"],
67 | "rules": ["broken-links", "code-coverage", "freshness"]
68 | }
69 | )
70 |
71 | validation = response.json()
72 | print(f"Found {len(validation['issues'])} issues")
73 | ```
74 |
75 | ### 4. Documentation Crawling
76 |
77 | Crawl and index external documentation:
78 |
79 | ```python
80 | # Example: Crawl documentation
81 | response = await client.post(
82 | "http://localhost:3000/api/docs/crawl",
83 | json={
84 | "urls": [
85 | "https://api.example.com/docs",
86 | "https://wiki.example.com/technical-docs"
87 | ],
88 | "depth": 2,
89 | "include_patterns": ["*.md", "*.html"],
90 | "exclude_patterns": ["*draft*", "*private*"]
91 | }
92 | )
93 | ```
94 |
95 | ## Usage
96 |
97 | ### Basic Documentation Workflow
98 |
99 | 1. **Generate Documentation**
100 | ```bash
101 | # Using CLI
102 | mcp-codebase-insight docs generate \
103 | --source src/ \
104 | --output docs/api \
105 | --template api-reference
106 | ```
107 |
108 | 2. **Validate Documentation**
109 | ```bash
110 | # Check documentation quality
111 | mcp-codebase-insight docs validate \
112 | --path docs/ \
113 | --rules all
114 | ```
115 |
116 | 3. **Update Documentation**
117 | ```bash
118 | # Update existing documentation
119 | mcp-codebase-insight docs update \
120 | --path docs/api \
121 | --sync-with-code
122 | ```
123 |
124 | 4. **Search Documentation**
125 | ```bash
126 | # Search in documentation
127 | mcp-codebase-insight docs search \
128 | "authentication implementation" \
129 | --type guide \
130 | --limit 5
131 | ```
132 |
133 | ### Documentation Templates
134 |
135 | Create custom documentation templates:
136 |
137 | ```yaml
138 | # templates/docs/api-reference.yaml
139 | name: "API Reference Template"
140 | sections:
141 | - title: "Overview"
142 | required: true
143 | content:
144 | - "Brief description"
145 | - "Key features"
146 | - "Requirements"
147 |
148 | - title: "Installation"
149 | required: true
150 | content:
151 | - "Step-by-step instructions"
152 | - "Configuration options"
153 |
154 | - title: "API Methods"
155 | required: true
156 | for_each: "method"
157 | content:
158 | - "Method signature"
159 | - "Parameters"
160 | - "Return values"
161 | - "Examples"
162 | ```
163 |
164 | ## Configuration
165 |
166 | ### Documentation Settings
167 |
168 | ```yaml
169 | documentation:
170 | # Generation settings
171 | generation:
172 | templates_dir: "./templates/docs"
173 | output_dir: "./docs"
174 | default_format: "markdown"
175 | include_private: false
176 |
177 | # Validation settings
178 | validation:
179 | rules:
180 | broken_links: true
181 | code_coverage: true
182 | freshness: true
183 | max_age_days: 90
184 |
185 | # Search settings
186 | search:
187 | index_update_interval: "1h"
188 | min_score: 0.5
189 | max_results: 10
190 |
191 | # Crawling settings
192 | crawling:
193 | max_depth: 3
194 | timeout: 30
195 | concurrent_requests: 5
196 | respect_robots_txt: true
197 | ```
198 |
199 | ### Storage Settings
200 |
201 | ```yaml
202 | storage:
203 | # File storage
204 | files:
205 | path: "./docs"
206 | backup_path: "./docs/backup"
207 |
208 | # Vector storage
209 | vectors:
210 | collection: "documentation"
211 | dimension: 384
212 |
213 | # Cache settings
214 | cache:
215 | enabled: true
216 | ttl: 3600
217 | max_size: "1GB"
218 | ```
219 |
220 | ## Best Practices
221 |
222 | 1. **Documentation Structure**
223 | - Use consistent formatting
224 | - Follow a clear hierarchy
225 | - Include examples
226 | - Keep sections focused
227 |
228 | 2. **Maintenance**
229 | - Update regularly
230 | - Remove outdated content
231 | - Track changes with code
232 | - Validate links
233 |
234 | 3. **Organization**
235 | - Use clear categories
236 | - Maintain an index
237 | - Cross-reference related docs
238 | - Version appropriately
239 |
240 | 4. **Quality**
241 | - Include code examples
242 | - Add diagrams where helpful
243 | - Proofread content
244 | - Test code samples
245 |
246 | ## API Reference
247 |
248 | ### Documentation Endpoints
249 |
250 | | Endpoint | Method | Description |
251 | |----------|--------|-------------|
252 | | `/api/docs/generate` | POST | Generate documentation |
253 | | `/api/docs/validate` | POST | Validate documentation |
254 | | `/api/docs/search` | GET | Search documentation |
255 | | `/api/docs/crawl` | POST | Crawl external docs |
256 | | `/api/docs/update` | POST | Update documentation |
257 | | `/api/docs/stats` | GET | Get documentation stats |
258 |
259 | ### Response Format
260 |
261 | ```json
262 | {
263 | "documentation": {
264 | "id": "uuid",
265 | "title": "string",
266 | "content": "string",
267 | "format": "string",
268 | "metadata": {
269 | "author": "string",
270 | "created_at": "datetime",
271 | "updated_at": "datetime",
272 | "version": "string"
273 | },
274 | "related_code": [{
275 | "file": "string",
276 | "lines": [int, int],
277 | "type": "string"
278 | }],
279 | "validation": {
280 | "status": "string",
281 | "issues": [{
282 | "type": "string",
283 | "severity": "string",
284 | "message": "string"
285 | }]
286 | }
287 | }
288 | }
289 | ```
290 |
291 | ## Integration
292 |
293 | ### IDE Integration
294 |
295 | ```python
296 | # VS Code Extension Example
297 | from mcp.client import Client
298 |
299 | client = Client.connect()
300 |
301 | # Document current file
302 | async def document_current_file(file_path: str):
303 | response = await client.post(
304 | "/api/docs/generate",
305 | json={
306 | "source": file_path,
307 | "template": "code-reference"
308 | }
309 | )
310 | return response.json()
311 | ```
312 |
313 | ### CI/CD Integration
314 |
315 | ```yaml
316 | # GitHub Actions Example
317 | name: Documentation Check
318 |
319 | on: [push, pull_request]
320 |
321 | jobs:
322 | validate-docs:
323 | runs-on: ubuntu-latest
324 | steps:
325 | - uses: actions/checkout@v2
326 | - name: Validate Documentation
327 | run: |
328 | curl -X POST http://localhost:3000/api/docs/validate \
329 | -H "Content-Type: application/json" \
330 | -d '{
331 | "paths": ["docs/"],
332 | "rules": ["all"]
333 | }'
334 | ```
335 |
336 | ## Troubleshooting
337 |
338 | ### Common Issues
339 |
340 | 1. **Generation Fails**
341 | ```bash
342 | # Check template validity
343 | mcp-codebase-insight docs validate-template \
344 | --template api-reference
345 | ```
346 |
347 | 2. **Search Not Working**
348 | ```bash
349 | # Rebuild search index
350 | mcp-codebase-insight docs rebuild-index
351 | ```
352 |
353 | 3. **Validation Errors**
354 | ```bash
355 | # Get detailed validation report
356 | mcp-codebase-insight docs validate \
357 | --path docs/ \
358 | --verbose
359 | ```
360 |
361 | ## Next Steps
362 |
363 | - [Documentation Templates](docs/templates.md)
364 | - [Style Guide](docs/style-guide.md)
365 | - [Advanced Search](docs/search.md)
366 | - [Automation Guide](docs/automation.md)
```
--------------------------------------------------------------------------------
/docs/features/adr-management.md:
--------------------------------------------------------------------------------
```markdown
1 | # ADR Management
2 |
3 | Architecture Decision Records (ADRs) are documents that capture important architectural decisions made along with their context and consequences. MCP Codebase Insight provides comprehensive tools for managing ADRs.
4 |
5 | ## Overview
6 |
7 | The ADR management feature:
8 | - Creates and maintains ADR documents
9 | - Tracks decision history and status
10 | - Links ADRs to code implementations
11 | - Provides templates and workflows
12 | - Enables searching and analysis of past decisions
13 |
14 | ## Features
15 |
16 | ### 1. ADR Creation
17 |
18 | Create new ADRs with structured templates:
19 |
20 | ```python
21 | # Example: Creating a new ADR
22 | response = await client.post(
23 | "http://localhost:3000/api/adrs",
24 | json={
25 | "title": "Use GraphQL for API",
26 | "status": "PROPOSED",
27 | "context": {
28 | "problem": "Need efficient data fetching",
29 | "constraints": [
30 | "Multiple client applications",
31 | "Complex data relationships"
32 | ]
33 | },
34 | "options": [
35 | {
36 | "title": "GraphQL",
37 | "pros": [
38 | "Flexible data fetching",
39 | "Strong typing",
40 | "Built-in documentation"
41 | ],
42 | "cons": [
43 | "Learning curve",
44 | "Complex server setup"
45 | ]
46 | },
47 | {
48 | "title": "REST",
49 | "pros": [
50 | "Simple and familiar",
51 | "Mature ecosystem"
52 | ],
53 | "cons": [
54 | "Over/under fetching",
55 | "Multiple endpoints"
56 | ]
57 | }
58 | ],
59 | "decision": "We will use GraphQL",
60 | "consequences": [
61 | "Need to train team on GraphQL",
62 | "Better client performance",
63 | "Simplified API evolution"
64 | ]
65 | }
66 | )
67 |
68 | adr = response.json()
69 | print(f"Created ADR: {adr['id']}")
70 | ```
71 |
72 | ### 2. ADR Lifecycle Management
73 |
74 | Track and update ADR status:
75 |
76 | ```python
77 | # Update ADR status
78 | response = await client.patch(
79 | f"http://localhost:3000/api/adrs/{adr_id}",
80 | json={
81 | "status": "ACCEPTED",
82 | "metadata": {
83 | "approved_by": "Architecture Board",
84 | "approved_date": "2024-03-26"
85 | }
86 | }
87 | )
88 | ```
89 |
90 | ### 3. ADR Search and Analysis
91 |
92 | Search through existing ADRs:
93 |
94 | ```python
95 | # Search ADRs
96 | response = await client.get(
97 | "http://localhost:3000/api/adrs/search",
98 | params={
99 | "query": "authentication",
100 | "status": "ACCEPTED",
101 | "date_from": "2023-01-01"
102 | }
103 | )
104 |
105 | results = response.json()
106 | for adr in results["adrs"]:
107 | print(f"- {adr['title']} ({adr['status']})")
108 | ```
109 |
110 | ### 4. Code Implementation Tracking
111 |
112 | Link ADRs to code implementations:
113 |
114 | ```python
115 | # Link ADR to code
116 | response = await client.post(
117 | f"http://localhost:3000/api/adrs/{adr_id}/implementations",
118 | json={
119 | "files": ["src/graphql/schema.ts", "src/graphql/resolvers/"],
120 | "pull_request": "https://github.com/org/repo/pull/123",
121 | "status": "IN_PROGRESS"
122 | }
123 | )
124 | ```
125 |
126 | ## Usage
127 |
128 | ### Basic ADR Workflow
129 |
130 | 1. **Create ADR**
131 | ```bash
132 | # Using CLI
133 | mcp-codebase-insight adr new \
134 | --title "Use GraphQL for API" \
135 | --template graphql-decision
136 | ```
137 |
138 | 2. **Review and Collaborate**
139 | ```bash
140 | # Get ADR details
141 | curl http://localhost:3000/api/adrs/{adr_id}
142 |
143 | # Add comments
144 | curl -X POST http://localhost:3000/api/adrs/{adr_id}/comments \
145 | -d '{"text": "Consider Apollo Federation for microservices"}'
146 | ```
147 |
148 | 3. **Update Status**
149 | ```bash
150 | # Update status
151 | curl -X PATCH http://localhost:3000/api/adrs/{adr_id} \
152 | -d '{"status": "ACCEPTED"}'
153 | ```
154 |
155 | 4. **Track Implementation**
156 | ```bash
157 | # Add implementation details
158 | curl -X POST http://localhost:3000/api/adrs/{adr_id}/implementations \
159 | -d '{
160 | "files": ["src/graphql/"],
161 | "status": "COMPLETED",
162 | "metrics": {
163 | "coverage": 95,
164 | "performance_impact": "+12%"
165 | }
166 | }'
167 | ```
168 |
169 | ### ADR Templates
170 |
171 | Create custom ADR templates:
172 |
173 | ```yaml
174 | # templates/adr/microservice-decision.yaml
175 | name: "Microservice Decision Template"
176 | sections:
177 | - title: "Service Boundaries"
178 | required: true
179 | prompts:
180 | - "What domain does this service handle?"
181 | - "What are the integration points?"
182 |
183 | - title: "Data Ownership"
184 | required: true
185 | prompts:
186 | - "What data does this service own?"
187 | - "How is data shared with other services?"
188 |
189 | - title: "Technical Stack"
190 | required: true
191 | subsections:
192 | - "Language & Framework"
193 | - "Database"
194 | - "Message Queue"
195 | - "Deployment Platform"
196 | ```
197 |
198 | ## Configuration
199 |
200 | ### ADR Settings
201 |
202 | ```yaml
203 | adr:
204 | # Storage settings
205 | storage:
206 | path: "./docs/adrs"
207 | format: "markdown"
208 | naming_convention: "YYYY-MM-DD-title"
209 |
210 | # Workflow settings
211 | workflow:
212 | require_approval: true
213 | approvers: ["arch-board"]
214 | auto_number: true
215 |
216 | # Templates
217 | templates:
218 | path: "./templates/adr"
219 | default: "basic-decision"
220 |
221 | # Implementation tracking
222 | implementation:
223 | require_evidence: true
224 | track_metrics: true
225 | ```
226 |
227 | ### Integration Settings
228 |
229 | ```yaml
230 | integrations:
231 | github:
232 | enabled: true
233 | repo: "org/repo"
234 | pr_template: "adr-implementation"
235 | labels: ["architecture", "adr"]
236 |
237 | jira:
238 | enabled: true
239 | project: "ARCH"
240 | issue_type: "Architecture Decision"
241 | ```
242 |
243 | ## Best Practices
244 |
245 | 1. **ADR Creation**
246 | - Use clear, descriptive titles
247 | - Include sufficient context
248 | - Document all considered options
249 | - Be explicit about consequences
250 |
251 | 2. **Review Process**
252 | - Involve stakeholders early
253 | - Document discussions
254 | - Consider technical and business impact
255 | - Set clear acceptance criteria
256 |
257 | 3. **Implementation**
258 | - Link to concrete evidence
259 | - Track metrics and impact
260 | - Update status regularly
261 | - Document deviations
262 |
263 | 4. **Maintenance**
264 | - Review periodically
265 | - Update affected ADRs
266 | - Archive superseded decisions
267 | - Maintain traceability
268 |
269 | ## API Reference
270 |
271 | ### ADR Endpoints
272 |
273 | | Endpoint | Method | Description |
274 | |----------|--------|-------------|
275 | | `/api/adrs` | GET | List all ADRs |
276 | | `/api/adrs` | POST | Create new ADR |
277 | | `/api/adrs/{id}` | GET | Get ADR details |
278 | | `/api/adrs/{id}` | PATCH | Update ADR |
279 | | `/api/adrs/search` | GET | Search ADRs |
280 | | `/api/adrs/{id}/implementations` | POST | Add implementation |
281 | | `/api/adrs/{id}/comments` | POST | Add comment |
282 |
283 | ### Response Format
284 |
285 | ```json
286 | {
287 | "id": "uuid",
288 | "title": "string",
289 | "status": "string",
290 | "context": {
291 | "problem": "string",
292 | "constraints": ["string"]
293 | },
294 | "options": [{
295 | "title": "string",
296 | "pros": ["string"],
297 | "cons": ["string"]
298 | }],
299 | "decision": "string",
300 | "consequences": ["string"],
301 | "metadata": {
302 | "created_at": "datetime",
303 | "updated_at": "datetime",
304 | "created_by": "string",
305 | "approved_by": "string"
306 | },
307 | "implementations": [{
308 | "files": ["string"],
309 | "status": "string",
310 | "metrics": {}
311 | }]
312 | }
313 | ```
314 |
315 | ## Troubleshooting
316 |
317 | ### Common Issues
318 |
319 | 1. **Template Not Found**
320 | ```bash
321 | # Check template directory
322 | ls -l templates/adr/
323 |
324 | # Verify template path in config
325 | cat config.yaml | grep template
326 | ```
327 |
328 | 2. **Permission Issues**
329 | ```bash
330 | # Fix ADR directory permissions
331 | chmod -R 755 docs/adrs/
332 | ```
333 |
334 | 3. **Integration Errors**
335 | ```bash
336 | # Check integration status
337 | curl http://localhost:3000/api/status/integrations
338 | ```
339 |
340 | ## Next Steps
341 |
342 | - [ADR Templates Guide](adr/templates.md)
343 | - [Integration Setup](../integration/index.md)
344 | - [Workflow Customization](adr/workflow.md)
345 | - [Metrics and Reporting](adr/metrics.md)
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/errors.py:
--------------------------------------------------------------------------------
```python
1 | """Error handling module."""
2 |
3 | from enum import Enum
4 | from typing import Any, Dict, Optional
5 |
6 | class ErrorCode(str, Enum):
7 | """Error code enumeration."""
8 |
9 | # General errors
10 | INTERNAL_ERROR = "internal_error"
11 | INVALID_REQUEST = "invalid_request"
12 | NOT_FOUND = "not_found"
13 | ALREADY_EXISTS = "already_exists"
14 | VALIDATION_ERROR = "validation_error"
15 |
16 | # Component-specific errors
17 | VECTOR_STORE_ERROR = "vector_store_error"
18 | EMBEDDING_ERROR = "embedding_error"
19 | CACHE_ERROR = "cache_error"
20 | TASK_ERROR = "task_error"
21 | ADR_ERROR = "adr_error"
22 | DOCUMENTATION_ERROR = "documentation_error"
23 | DEBUG_ERROR = "debug_error"
24 | PROMPT_ERROR = "prompt_error"
25 |
26 | # Resource errors
27 | RESOURCE_NOT_FOUND = "resource_not_found"
28 | RESOURCE_UNAVAILABLE = "resource_unavailable"
29 | RESOURCE_EXHAUSTED = "resource_exhausted"
30 |
31 | # Authentication/Authorization errors
32 | UNAUTHORIZED = "unauthorized"
33 | FORBIDDEN = "forbidden"
34 | TOKEN_EXPIRED = "token_expired"
35 |
36 | # Rate limiting errors
37 | RATE_LIMITED = "rate_limited"
38 | QUOTA_EXCEEDED = "quota_exceeded"
39 |
40 | # Configuration errors
41 | CONFIG_ERROR = "config_error"
42 | MISSING_CONFIG = "missing_config"
43 | INVALID_CONFIG = "invalid_config"
44 |
45 | class BaseError(Exception):
46 | """Base error class."""
47 |
48 | def __init__(
49 | self,
50 | code: ErrorCode,
51 | message: str,
52 | details: Optional[Dict[str, Any]] = None
53 | ):
54 | """Initialize error."""
55 | self.code = code
56 | self.message = message
57 | self.details = details or {}
58 | super().__init__(message)
59 |
60 | def to_dict(self) -> Dict[str, Any]:
61 | """Convert error to dictionary."""
62 | return {
63 | "code": self.code,
64 | "message": self.message,
65 | "details": self.details
66 | }
67 |
68 | class InternalError(BaseError):
69 | """Internal server error."""
70 |
71 | def __init__(
72 | self,
73 | message: str = "Internal server error",
74 | details: Optional[Dict[str, Any]] = None
75 | ):
76 | """Initialize error."""
77 | super().__init__(ErrorCode.INTERNAL_ERROR, message, details)
78 |
79 | class InvalidRequestError(BaseError):
80 | """Invalid request error."""
81 |
82 | def __init__(
83 | self,
84 | message: str = "Invalid request",
85 | details: Optional[Dict[str, Any]] = None
86 | ):
87 | """Initialize error."""
88 | super().__init__(ErrorCode.INVALID_REQUEST, message, details)
89 |
90 | class NotFoundError(BaseError):
91 | """Not found error."""
92 |
93 | def __init__(
94 | self,
95 | message: str = "Resource not found",
96 | details: Optional[Dict[str, Any]] = None
97 | ):
98 | """Initialize error."""
99 | super().__init__(ErrorCode.NOT_FOUND, message, details)
100 |
101 | class AlreadyExistsError(BaseError):
102 | """Already exists error."""
103 |
104 | def __init__(
105 | self,
106 | message: str = "Resource already exists",
107 | details: Optional[Dict[str, Any]] = None
108 | ):
109 | """Initialize error."""
110 | super().__init__(ErrorCode.ALREADY_EXISTS, message, details)
111 |
112 | class ValidationError(BaseError):
113 | """Validation error."""
114 |
115 | def __init__(
116 | self,
117 | message: str = "Validation error",
118 | details: Optional[Dict[str, Any]] = None
119 | ):
120 | """Initialize error."""
121 | super().__init__(ErrorCode.VALIDATION_ERROR, message, details)
122 |
123 | class VectorStoreError(BaseError):
124 | """Vector store error."""
125 |
126 | def __init__(
127 | self,
128 | message: str = "Vector store error",
129 | details: Optional[Dict[str, Any]] = None
130 | ):
131 | """Initialize error."""
132 | super().__init__(ErrorCode.VECTOR_STORE_ERROR, message, details)
133 |
134 | class EmbeddingError(BaseError):
135 | """Embedding error."""
136 |
137 | def __init__(
138 | self,
139 | message: str = "Embedding error",
140 | details: Optional[Dict[str, Any]] = None
141 | ):
142 | """Initialize error."""
143 | super().__init__(ErrorCode.EMBEDDING_ERROR, message, details)
144 |
145 | class CacheError(BaseError):
146 | """Cache error."""
147 |
148 | def __init__(
149 | self,
150 | message: str = "Cache error",
151 | details: Optional[Dict[str, Any]] = None
152 | ):
153 | """Initialize error."""
154 | super().__init__(ErrorCode.CACHE_ERROR, message, details)
155 |
156 | class TaskError(BaseError):
157 | """Task error."""
158 |
159 | def __init__(
160 | self,
161 | message: str = "Task error",
162 | details: Optional[Dict[str, Any]] = None
163 | ):
164 | """Initialize error."""
165 | super().__init__(ErrorCode.TASK_ERROR, message, details)
166 |
167 | class ADRError(BaseError):
168 | """ADR error."""
169 |
170 | def __init__(
171 | self,
172 | message: str = "ADR error",
173 | details: Optional[Dict[str, Any]] = None
174 | ):
175 | """Initialize error."""
176 | super().__init__(ErrorCode.ADR_ERROR, message, details)
177 |
178 | class DocumentationError(BaseError):
179 | """Documentation error."""
180 |
181 | def __init__(
182 | self,
183 | message: str = "Documentation error",
184 | details: Optional[Dict[str, Any]] = None
185 | ):
186 | """Initialize error."""
187 | super().__init__(ErrorCode.DOCUMENTATION_ERROR, message, details)
188 |
189 | class DebugError(BaseError):
190 | """Debug error."""
191 |
192 | def __init__(
193 | self,
194 | message: str = "Debug error",
195 | details: Optional[Dict[str, Any]] = None
196 | ):
197 | """Initialize error."""
198 | super().__init__(ErrorCode.DEBUG_ERROR, message, details)
199 |
200 | class PromptError(BaseError):
201 | """Prompt error."""
202 |
203 | def __init__(
204 | self,
205 | message: str = "Prompt error",
206 | details: Optional[Dict[str, Any]] = None
207 | ):
208 | """Initialize error."""
209 | super().__init__(ErrorCode.PROMPT_ERROR, message, details)
210 |
211 | class ConfigError(BaseError):
212 | """Configuration error."""
213 |
214 | def __init__(
215 | self,
216 | message: str = "Configuration error",
217 | details: Optional[Dict[str, Any]] = None
218 | ):
219 | """Initialize error."""
220 | super().__init__(ErrorCode.CONFIG_ERROR, message, details)
221 |
222 | class UnauthorizedError(BaseError):
223 | """Unauthorized error."""
224 |
225 | def __init__(
226 | self,
227 | message: str = "Unauthorized",
228 | details: Optional[Dict[str, Any]] = None
229 | ):
230 | """Initialize error."""
231 | super().__init__(ErrorCode.UNAUTHORIZED, message, details)
232 |
233 | class ForbiddenError(BaseError):
234 | """Forbidden error."""
235 |
236 | def __init__(
237 | self,
238 | message: str = "Forbidden",
239 | details: Optional[Dict[str, Any]] = None
240 | ):
241 | """Initialize error."""
242 | super().__init__(ErrorCode.FORBIDDEN, message, details)
243 |
244 | class RateLimitedError(BaseError):
245 | """Rate limited error."""
246 |
247 | def __init__(
248 | self,
249 | message: str = "Rate limited",
250 | details: Optional[Dict[str, Any]] = None
251 | ):
252 | """Initialize error."""
253 | super().__init__(ErrorCode.RATE_LIMITED, message, details)
254 |
255 | class ResourceNotFoundError(BaseError):
256 | """Resource not found error."""
257 |
258 | def __init__(
259 | self,
260 | message: str = "Resource not found",
261 | details: Optional[Dict[str, Any]] = None
262 | ):
263 | """Initialize error."""
264 | super().__init__(ErrorCode.RESOURCE_NOT_FOUND, message, details)
265 |
266 | class ProcessingError(BaseError):
267 | """Processing error."""
268 |
269 | def __init__(
270 | self,
271 | message: str = "Processing error",
272 | details: Optional[Dict[str, Any]] = None
273 | ):
274 | """Initialize error."""
275 | super().__init__(ErrorCode.INTERNAL_ERROR, message, details)
276 |
277 | def handle_error(error: Exception) -> Dict[str, Any]:
278 | """Convert error to API response format."""
279 | if isinstance(error, BaseError):
280 | return error.to_dict()
281 |
282 | return {
283 | "code": ErrorCode.INTERNAL_ERROR,
284 | "message": str(error),
285 | "details": {}
286 | }
287 |
```
--------------------------------------------------------------------------------
/tests/components/test_stdio_components.py:
--------------------------------------------------------------------------------
```python
1 | import asyncio
2 | import json
3 | import pytest
4 | from unittest.mock import MagicMock, AsyncMock, patch
5 | from io import StringIO
6 |
7 | class MockStdinReader:
8 | def __init__(self, input_data):
9 | self.input_stream = StringIO(input_data)
10 |
11 | async def readline(self):
12 | return self.input_stream.readline()
13 |
14 | class MockStdoutWriter:
15 | def __init__(self):
16 | self.output = StringIO()
17 |
18 | async def write(self, data):
19 | self.output.write(data)
20 |
21 | async def drain(self):
22 | pass
23 |
24 | def get_output(self):
25 | return self.output.getvalue()
26 |
27 | @pytest.fixture
28 | async def mock_stdio():
29 | input_data = '{"type": "register", "tool_id": "test_tool"}\n'
30 | reader = MockStdinReader(input_data)
31 | writer = MockStdoutWriter()
32 | return reader, writer
33 |
34 | @pytest.mark.asyncio
35 | async def test_stdio_registration(mock_stdio):
36 | """Test tool registration via stdio."""
37 | reader, writer = await mock_stdio
38 |
39 | # Process registration message
40 | line = await reader.readline()
41 | message = json.loads(line)
42 |
43 | # Verify registration message format
44 | assert message["type"] == "register"
45 | assert message["tool_id"] == "test_tool"
46 |
47 | # Send registration acknowledgment
48 | response = {
49 | "type": "registration_success",
50 | "tool_id": message["tool_id"]
51 | }
52 | await writer.write(json.dumps(response) + "\n")
53 |
54 | # Verify response was written
55 | assert "registration_success" in writer.get_output()
56 | assert message["tool_id"] in writer.get_output()
57 |
58 | @pytest.mark.asyncio
59 | async def test_stdio_message_streaming():
60 | """Test streaming messages via stdio."""
61 | # Set up mock streams with multiple messages
62 | input_messages = [
63 | {"type": "request", "id": "1", "method": "test", "params": {}},
64 | {"type": "request", "id": "2", "method": "test", "params": {}}
65 | ]
66 | input_data = "\n".join(json.dumps(msg) for msg in input_messages) + "\n"
67 |
68 | reader = MockStdinReader(input_data)
69 | writer = MockStdoutWriter()
70 |
71 | # Process messages
72 | messages_received = []
73 | while True:
74 | line = await reader.readline()
75 | if not line:
76 | break
77 | messages_received.append(json.loads(line))
78 |
79 | # Verify all messages were received
80 | assert len(messages_received) == len(input_messages)
81 | assert all(msg["type"] == "request" for msg in messages_received)
82 |
83 | @pytest.mark.asyncio
84 | async def test_stdio_error_handling():
85 | """Test error handling in stdio communication."""
86 | # Test invalid JSON
87 | reader = MockStdinReader("invalid json\n")
88 | writer = MockStdoutWriter()
89 |
90 | line = await reader.readline()
91 | try:
92 | message = json.loads(line)
93 | except json.JSONDecodeError as e:
94 | error_response = {
95 | "type": "error",
96 | "error": "Invalid JSON format"
97 | }
98 | await writer.write(json.dumps(error_response) + "\n")
99 |
100 | assert "error" in writer.get_output()
101 | assert "Invalid JSON format" in writer.get_output()
102 |
103 | @pytest.mark.asyncio
104 | async def test_stdio_message_ordering():
105 | """Test message ordering and response correlation."""
106 | # Set up messages with sequence numbers
107 | input_messages = [
108 | {"type": "request", "id": "1", "sequence": 1},
109 | {"type": "request", "id": "2", "sequence": 2},
110 | {"type": "request", "id": "3", "sequence": 3}
111 | ]
112 | input_data = "\n".join(json.dumps(msg) for msg in input_messages) + "\n"
113 |
114 | reader = MockStdinReader(input_data)
115 | writer = MockStdoutWriter()
116 |
117 | # Process messages and send responses
118 | sequence = 1
119 | while True:
120 | line = await reader.readline()
121 | if not line:
122 | break
123 |
124 | message = json.loads(line)
125 | assert message["sequence"] == sequence
126 |
127 | response = {
128 | "type": "response",
129 | "id": message["id"],
130 | "sequence": sequence
131 | }
132 | await writer.write(json.dumps(response) + "\n")
133 | sequence += 1
134 |
135 | # Verify response ordering
136 | output = writer.get_output()
137 | responses = [json.loads(line) for line in output.strip().split("\n")]
138 | assert all(resp["sequence"] == idx + 1 for idx, resp in enumerate(responses))
139 |
140 | @pytest.mark.asyncio
141 | async def test_stdio_large_message():
142 | """Test handling of large messages via stdio."""
143 | # Create a large message
144 | large_data = "x" * 1024 * 1024 # 1MB of data
145 | large_message = {
146 | "type": "request",
147 | "id": "large",
148 | "data": large_data
149 | }
150 |
151 | reader = MockStdinReader(json.dumps(large_message) + "\n")
152 | writer = MockStdoutWriter()
153 |
154 | # Process large message
155 | line = await reader.readline()
156 | message = json.loads(line)
157 |
158 | # Verify message was received correctly
159 | assert len(message["data"]) == len(large_data)
160 | assert message["data"] == large_data
161 |
162 | # Send large response
163 | response = {
164 | "type": "response",
165 | "id": message["id"],
166 | "data": large_data
167 | }
168 | await writer.write(json.dumps(response) + "\n")
169 |
170 | # Verify large response was written
171 | output = writer.get_output()
172 | response_message = json.loads(output)
173 | assert len(response_message["data"]) == len(large_data)
174 |
175 | @pytest.mark.asyncio
176 | async def test_stdio_buffer_overflow_handling():
177 | """Test handling of buffer overflow in stdio communication."""
178 | very_large_data = "x" * (10 * 1024 * 1024)
179 | very_large_message = {
180 | "type": "request",
181 | "id": "overflow_test",
182 | "data": very_large_data
183 | }
184 | reader = MockStdinReader(json.dumps(very_large_message) + "\n")
185 | writer = MockStdoutWriter()
186 | line = await reader.readline()
187 | try:
188 | message = json.loads(line)
189 | assert len(message["data"]) == len(very_large_data)
190 | response = {
191 | "type": "response",
192 | "id": message["id"],
193 | "status": "received",
194 | "data_size": len(message["data"])
195 | }
196 | await writer.write(json.dumps(response) + "\n")
197 | assert "received" in writer.get_output()
198 | assert str(len(very_large_data)) in writer.get_output()
199 | except json.JSONDecodeError:
200 | pytest.fail("Failed to parse large JSON message")
201 | except MemoryError:
202 | pytest.fail("Memory error when processing large message")
203 |
204 | @pytest.mark.asyncio
205 | async def test_stdio_component_unavailability():
206 | """Test stdio behavior when a required component is unavailable."""
207 | reader = MockStdinReader('{"type": "request", "id": "test", "method": "unavailable_component", "params": {}}\n')
208 | writer = MockStdoutWriter()
209 | line = await reader.readline()
210 | message = json.loads(line)
211 | component_available = False
212 | if component_available:
213 | response = {
214 | "type": "response",
215 | "id": message["id"],
216 | "result": "success"
217 | }
218 | else:
219 | response = {
220 | "type": "error",
221 | "id": message["id"],
222 | "error": "Component unavailable",
223 | "code": "COMPONENT_UNAVAILABLE"
224 | }
225 | await writer.write(json.dumps(response) + "\n")
226 | output = writer.get_output()
227 | assert "error" in output
228 | assert "Component unavailable" in output
229 | assert "COMPONENT_UNAVAILABLE" in output
230 |
231 | @pytest.mark.asyncio
232 | async def test_stdio_protocol_version_check():
233 | """Test handling of protocol version mismatches in stdio communication."""
234 | reader = MockStdinReader('{"type": "init", "protocol_version": "1.0", "client_id": "test_client"}\n')
235 | writer = MockStdoutWriter()
236 | supported_versions = ["2.0", "2.1"]
237 | line = await reader.readline()
238 | message = json.loads(line)
239 | client_version = message.get("protocol_version", "unknown")
240 | is_compatible = client_version in supported_versions
241 | if is_compatible:
242 | response = {
243 | "type": "init_success",
244 | "server_version": supported_versions[-1]
245 | }
246 | else:
247 | response = {
248 | "type": "init_error",
249 | "error": "Incompatible protocol version",
250 | "supported_versions": supported_versions
251 | }
252 | await writer.write(json.dumps(response) + "\n")
253 | output = writer.get_output()
254 | assert "init_error" in output
255 | assert "Incompatible protocol version" in output
256 | assert all(version in output for version in supported_versions)
```
--------------------------------------------------------------------------------
/tests/components/test_knowledge_base.py:
--------------------------------------------------------------------------------
```python
1 | import sys
2 | import os
3 |
4 | # Ensure the src directory is in the Python path
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
6 |
7 | import pytest
8 | import pytest_asyncio
9 | from pathlib import Path
10 | from typing import AsyncGenerator
11 | from src.mcp_codebase_insight.core.knowledge import KnowledgeBase, PatternType, PatternConfidence
12 | from src.mcp_codebase_insight.core.config import ServerConfig
13 | from src.mcp_codebase_insight.core.vector_store import VectorStore
14 |
15 | @pytest_asyncio.fixture
16 | async def knowledge_base(test_config: ServerConfig, vector_store: VectorStore):
17 | kb = KnowledgeBase(test_config, vector_store)
18 | await kb.initialize()
19 | yield kb
20 | await kb.cleanup()
21 |
22 | @pytest.mark.asyncio
23 | async def test_knowledge_base_initialization(knowledge_base: KnowledgeBase):
24 | """Test that knowledge base initializes correctly."""
25 | assert knowledge_base is not None
26 | assert knowledge_base.vector_store is not None
27 | assert knowledge_base.config is not None
28 |
29 | @pytest.mark.asyncio
30 | async def test_add_and_get_pattern(knowledge_base: KnowledgeBase):
31 | """Test adding and retrieving patterns."""
32 | # Add pattern
33 | pattern_data = {
34 | "name": "Test Pattern",
35 | "description": "A test pattern",
36 | "content": "def test(): pass", # Note: renamed from 'code' to 'content' to match implementation
37 | "tags": ["test", "example"]
38 | }
39 |
40 | pattern = await knowledge_base.add_pattern(
41 | name=pattern_data["name"],
42 | type=PatternType.CODE,
43 | description=pattern_data["description"],
44 | content=pattern_data["content"],
45 | confidence=PatternConfidence.MEDIUM,
46 | tags=pattern_data["tags"]
47 | )
48 |
49 | assert pattern.id is not None
50 |
51 | # Get pattern
52 | retrieved = await knowledge_base.get_pattern(pattern.id)
53 | assert retrieved.name == pattern_data["name"]
54 | assert retrieved.description == pattern_data["description"]
55 |
56 | @pytest.mark.asyncio
57 | async def test_find_similar_patterns(knowledge_base: KnowledgeBase):
58 | """Test finding similar patterns."""
59 | # Add test patterns
60 | pattern1_data = {
61 | "name": "Test Pattern 1",
62 | "description": "First test pattern",
63 | "content": "def test1(): pass",
64 | "tags": ["test"]
65 | }
66 | pattern2_data = {
67 | "name": "Test Pattern 2",
68 | "description": "Second test pattern",
69 | "content": "def test2(): pass",
70 | "tags": ["test"]
71 | }
72 |
73 | pattern1 = await knowledge_base.add_pattern(
74 | name=pattern1_data["name"],
75 | type=PatternType.CODE,
76 | description=pattern1_data["description"],
77 | content=pattern1_data["content"],
78 | confidence=PatternConfidence.MEDIUM,
79 | tags=pattern1_data["tags"]
80 | )
81 |
82 | pattern2 = await knowledge_base.add_pattern(
83 | name=pattern2_data["name"],
84 | type=PatternType.CODE,
85 | description=pattern2_data["description"],
86 | content=pattern2_data["content"],
87 | confidence=PatternConfidence.MEDIUM,
88 | tags=pattern2_data["tags"]
89 | )
90 |
91 | # Search for similar patterns
92 | similar = await knowledge_base.find_similar_patterns("test pattern")
93 | assert len(similar) > 0
94 |
95 | @pytest.mark.asyncio
96 | async def test_update_pattern(knowledge_base: KnowledgeBase):
97 | """Test updating patterns."""
98 | # Add initial pattern
99 | pattern_data = {
100 | "name": "Original Pattern",
101 | "description": "Original description",
102 | "content": "def original(): pass",
103 | "tags": ["original"]
104 | }
105 |
106 | pattern = await knowledge_base.add_pattern(
107 | name=pattern_data["name"],
108 | type=PatternType.CODE,
109 | description=pattern_data["description"],
110 | content=pattern_data["content"],
111 | confidence=PatternConfidence.MEDIUM,
112 | tags=pattern_data["tags"]
113 | )
114 |
115 | # Update pattern
116 | updated_data = {
117 | "name": "Updated Pattern",
118 | "description": "Updated description",
119 | "content": "def updated(): pass",
120 | "tags": ["updated"]
121 | }
122 |
123 | await knowledge_base.update_pattern(
124 | pattern_id=pattern.id,
125 | description=updated_data["description"],
126 | content=updated_data["content"],
127 | tags=updated_data["tags"]
128 | )
129 |
130 | # Verify update
131 | retrieved = await knowledge_base.get_pattern(pattern.id)
132 | # Name is not updated by the update_pattern method
133 | assert retrieved.name == pattern_data["name"] # Original name should remain
134 | assert retrieved.description == updated_data["description"]
135 |
136 | @pytest.mark.asyncio
137 | async def test_delete_pattern(knowledge_base: KnowledgeBase):
138 | """Test deleting patterns."""
139 | # Add a pattern to delete
140 | pattern_data = {
141 | "name": "Pattern to Delete",
142 | "description": "This pattern will be deleted",
143 | "content": "def to_be_deleted(): pass",
144 | "tags": ["delete", "test"]
145 | }
146 |
147 | pattern = await knowledge_base.add_pattern(
148 | name=pattern_data["name"],
149 | type=PatternType.CODE,
150 | description=pattern_data["description"],
151 | content=pattern_data["content"],
152 | confidence=PatternConfidence.MEDIUM,
153 | tags=pattern_data["tags"]
154 | )
155 |
156 | # Verify pattern exists
157 | retrieved_before = await knowledge_base.get_pattern(pattern.id)
158 | assert retrieved_before is not None
159 |
160 | # Delete the pattern
161 | await knowledge_base.delete_pattern(pattern.id)
162 |
163 | # Verify pattern no longer exists
164 | try:
165 | retrieved_after = await knowledge_base.get_pattern(pattern.id)
166 | assert retrieved_after is None, "Pattern should have been deleted"
167 | except Exception as e:
168 | # Either the pattern is None or an exception is raised (both are acceptable)
169 | pass
170 |
171 | @pytest.mark.asyncio
172 | async def test_search_patterns_by_tag(knowledge_base: KnowledgeBase):
173 | """Test searching patterns by tag."""
174 | # Add patterns with different tags
175 | tag1_pattern = await knowledge_base.add_pattern(
176 | name="Tag1 Pattern",
177 | type=PatternType.CODE,
178 | description="Pattern with tag1",
179 | content="def tag1_function(): pass",
180 | confidence=PatternConfidence.HIGH,
181 | tags=["tag1", "common"]
182 | )
183 |
184 | tag2_pattern = await knowledge_base.add_pattern(
185 | name="Tag2 Pattern",
186 | type=PatternType.CODE,
187 | description="Pattern with tag2",
188 | content="def tag2_function(): pass",
189 | confidence=PatternConfidence.HIGH,
190 | tags=["tag2", "common"]
191 | )
192 |
193 | # Search by tag1
194 | tag1_results = await knowledge_base.search_patterns(tags=["tag1"])
195 | assert any(p.id == tag1_pattern.id for p in tag1_results)
196 | assert not any(p.id == tag2_pattern.id for p in tag1_results)
197 |
198 | # Search by tag2
199 | tag2_results = await knowledge_base.search_patterns(tags=["tag2"])
200 | assert any(p.id == tag2_pattern.id for p in tag2_results)
201 | assert not any(p.id == tag1_pattern.id for p in tag2_results)
202 |
203 | # Search by common tag
204 | common_results = await knowledge_base.search_patterns(tags=["common"])
205 | assert any(p.id == tag1_pattern.id for p in common_results)
206 | assert any(p.id == tag2_pattern.id for p in common_results)
207 |
208 | @pytest.mark.asyncio
209 | async def test_pattern_versioning(knowledge_base: KnowledgeBase):
210 | """Test pattern versioning functionality."""
211 | # Create initial pattern
212 | initial_pattern = await knowledge_base.add_pattern(
213 | name="Versioned Pattern",
214 | type=PatternType.CODE,
215 | description="Initial version",
216 | content="def version1(): pass",
217 | confidence=PatternConfidence.MEDIUM,
218 | tags=["versioned"]
219 | )
220 |
221 | # Update pattern multiple times to create versions
222 | await knowledge_base.update_pattern(
223 | pattern_id=initial_pattern.id,
224 | description="Version 2",
225 | content="def version2(): pass"
226 | )
227 |
228 | await knowledge_base.update_pattern(
229 | pattern_id=initial_pattern.id,
230 | description="Version 3",
231 | content="def version3(): pass"
232 | )
233 |
234 | # Get the latest version
235 | latest = await knowledge_base.get_pattern(initial_pattern.id)
236 | assert latest.description == "Version 3"
237 | assert "version3" in latest.content
238 |
239 | # If versioning is supported, try to get a specific version
240 | try:
241 | # This might not be implemented in all versions of the knowledge base
242 | versions = await knowledge_base.get_pattern_versions(initial_pattern.id)
243 | if versions and len(versions) > 1:
244 | # If we have version history, verify it
245 | assert len(versions) >= 3, "Should have at least 3 versions"
246 | assert any("Version 2" in v.description for v in versions)
247 | assert any("Initial version" in v.description for v in versions)
248 | except (AttributeError, NotImplementedError):
249 | # Versioning might not be implemented, which is fine
250 | pass
```
--------------------------------------------------------------------------------
/test_fix_helper.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | A utility script to help fix common test issues in the MCP Codebase Insight project.
4 | This script can:
5 | 1. Update import paths in all test files
6 | 2. Check for proper dependencies
7 | 3. Set up proper Python path in conftest.py files
8 | """
9 |
10 | import os
11 | import re
12 | import sys
13 | import importlib
14 | import subprocess
15 | from pathlib import Path
16 | from typing import List, Tuple, Dict, Optional
17 |
18 |
19 | def add_python_path_to_conftest(conftest_path: str) -> bool:
20 | """Add Python path setting to a conftest.py file."""
21 | if not os.path.exists(conftest_path):
22 | print(f"Error: {conftest_path} does not exist")
23 | return False
24 |
25 | with open(conftest_path, 'r') as f:
26 | content = f.read()
27 |
28 | # Check if Python path is already set
29 | if "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))" in content:
30 | print(f"Python path already set in {conftest_path}")
31 | return True
32 |
33 | # Add import statements if needed
34 | imports_to_add = []
35 | if "import sys" not in content:
36 | imports_to_add.append("import sys")
37 | if "import os" not in content:
38 | imports_to_add.append("import os")
39 |
40 | # Find a good spot to insert the path setting (after imports)
41 | lines = content.split('\n')
42 | insert_position = 0
43 |
44 | # Find the last import statement
45 | for i, line in enumerate(lines):
46 | if line.startswith('import ') or line.startswith('from '):
47 | insert_position = i + 1
48 |
49 | # Insert the Python path setting
50 | path_setting = "\n# Ensure the src directory is in the Python path\nsys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))\n"
51 |
52 | # Add imports if needed
53 | if imports_to_add:
54 | path_setting = "\n" + "\n".join(imports_to_add) + path_setting
55 |
56 | # Insert into content
57 | new_content = '\n'.join(lines[:insert_position]) + path_setting + '\n'.join(lines[insert_position:])
58 |
59 | # Write back to file
60 | with open(conftest_path, 'w') as f:
61 | f.write(new_content)
62 |
63 | print(f"Added Python path setting to {conftest_path}")
64 | return True
65 |
66 |
67 | def fix_imports_in_file(file_path: str) -> Tuple[int, int]:
68 | """Fix import paths in a Python file, changing from 'mcp_codebase_insight' to 'src.mcp_codebase_insight'."""
69 | try:
70 | with open(file_path, 'r', encoding='utf-8') as f:
71 | content = f.read()
72 | except UnicodeDecodeError:
73 | # Try with a different encoding or skip the file
74 | try:
75 | with open(file_path, 'r', encoding='latin-1') as f:
76 | content = f.read()
77 | except Exception as e:
78 | print(f"Error reading {file_path}: {e}")
79 | return 0, 0
80 |
81 | # Look for the problematic imports
82 | pattern = r'from\s+mcp_codebase_insight\.'
83 | matches = re.findall(pattern, content)
84 | if not matches:
85 | return 0, 0 # No matches found
86 |
87 | # Replace with correct import path
88 | new_content = re.sub(pattern, 'from src.mcp_codebase_insight.', content)
89 |
90 | # Add sys.path.insert if not already present and there were matches
91 | if 'sys.path.insert' not in new_content:
92 | import_sys_path = (
93 | "import sys\n"
94 | "import os\n\n"
95 | "# Ensure the src directory is in the Python path\n"
96 | "sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))\n\n"
97 | )
98 |
99 | # Find a good spot to insert the path setting (before imports)
100 | lines = new_content.split('\n')
101 | insert_position = 0
102 |
103 | # Find the first import statement
104 | for i, line in enumerate(lines):
105 | if line.startswith('import ') or line.startswith('from '):
106 | insert_position = i
107 | break
108 |
109 | # Reconstruct the content with path inserted
110 | new_content = '\n'.join(lines[:insert_position]) + '\n' + import_sys_path + '\n'.join(lines[insert_position:])
111 |
112 | # Write the changes back to the file with the same encoding we used to read it
113 | try:
114 | with open(file_path, 'w', encoding='utf-8') as f:
115 | f.write(new_content)
116 | except UnicodeEncodeError:
117 | with open(file_path, 'w', encoding='latin-1') as f:
118 | f.write(new_content)
119 |
120 | return len(matches), 1 # Return number of replacements and files modified
121 |
122 |
123 | def find_and_fix_test_files(root_dir: str = '.') -> Tuple[int, int]:
124 | """Find all test files in the project and fix their imports."""
125 | test_files = []
126 | conftest_files = []
127 |
128 | # Walk through the directory structure to find test files
129 | for root, _, files in os.walk(root_dir):
130 | for file in files:
131 | if file.startswith('test_') and file.endswith('.py'):
132 | test_files.append(os.path.join(root, file))
133 | elif file == 'conftest.py':
134 | conftest_files.append(os.path.join(root, file))
135 |
136 | # Fix imports in all test files
137 | total_replacements = 0
138 | total_files_modified = 0
139 |
140 | for file_path in test_files:
141 | replacements, files_modified = fix_imports_in_file(file_path)
142 | total_replacements += replacements
143 | total_files_modified += files_modified
144 | if replacements > 0:
145 | print(f"Fixed {replacements} imports in {file_path}")
146 |
147 | # Update conftest files
148 | for conftest_path in conftest_files:
149 | if add_python_path_to_conftest(conftest_path):
150 | total_files_modified += 1
151 |
152 | return total_replacements, total_files_modified
153 |
154 |
155 | def check_dependencies() -> bool:
156 | """Check if all required dependencies are installed."""
157 | required_packages = [
158 | 'sentence-transformers',
159 | 'torch',
160 | 'fastapi',
161 | 'qdrant-client',
162 | 'pytest',
163 | 'pytest-asyncio'
164 | ]
165 |
166 | missing_packages = []
167 |
168 | for package in required_packages:
169 | try:
170 | importlib.import_module(package.replace('-', '_'))
171 | print(f"✅ {package} is installed")
172 | except ImportError:
173 | missing_packages.append(package)
174 | print(f"❌ {package} is NOT installed")
175 |
176 | if missing_packages:
177 | print("\nMissing packages:")
178 | for package in missing_packages:
179 | print(f"- {package}")
180 | return False
181 |
182 | return True
183 |
184 |
185 | def install_dependencies() -> bool:
186 | """Install missing dependencies."""
187 | try:
188 | subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
189 | return True
190 | except subprocess.CalledProcessError:
191 | print("Failed to install dependencies from requirements.txt")
192 | return False
193 |
194 |
195 | def create_path_fix_script() -> bool:
196 | """Create a script to fix path issues when running tests."""
197 | script_content = """#!/bin/bash
198 | # This script runs tests with proper path and environment setup
199 |
200 | set -e
201 |
202 | # Activate the virtual environment (or create it if it doesn't exist)
203 | if [ ! -d ".venv" ]; then
204 | echo "Creating virtual environment..."
205 | python3 -m venv .venv
206 | fi
207 |
208 | source .venv/bin/activate
209 |
210 | # Install required dependencies
211 | echo "Installing required dependencies..."
212 | pip install -e .
213 | pip install pytest pytest-asyncio
214 |
215 | # Set environment variables
216 | export MCP_TEST_MODE=1
217 | export QDRANT_URL="http://localhost:6333"
218 | export MCP_COLLECTION_NAME="test_collection_$(date +%s)"
219 | export PYTHONPATH="$PYTHONPATH:$(pwd)"
220 |
221 | # Run the tests
222 | echo "Running tests..."
223 | python -m pytest "$@"
224 | """
225 |
226 | script_path = 'run_fixed_tests.sh'
227 | with open(script_path, 'w') as f:
228 | f.write(script_content)
229 |
230 | # Make the script executable
231 | os.chmod(script_path, 0o755)
232 |
233 | print(f"Created {script_path} - use it to run tests with proper path setup")
234 | return True
235 |
236 |
237 | def main():
238 | """Main entry point."""
239 | print("=== MCP Codebase Insight Test Fix Helper ===\n")
240 |
241 | # Find and fix import issues
242 | print("Fixing import paths in test files...")
243 | replacements, files_modified = find_and_fix_test_files()
244 | print(f"Fixed {replacements} imports in {files_modified} files\n")
245 |
246 | # Check dependencies
247 | print("Checking dependencies...")
248 | if not check_dependencies():
249 | print("\nWould you like to install missing dependencies? (y/n)")
250 | choice = input().strip().lower()
251 | if choice == 'y':
252 | install_dependencies()
253 |
254 | # Create helper script
255 | print("\nCreating test runner script...")
256 | create_path_fix_script()
257 |
258 | print("\n=== Fixes Complete ===")
259 | print("""
260 | Next steps:
261 | 1. Run the tests using: ./run_fixed_tests.sh [test_options]
262 | e.g., ./run_fixed_tests.sh tests/components/test_vector_store.py -v
263 |
264 | 2. If Qdrant collection creation fails, check the Docker container:
265 | docker run -d -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_data:/qdrant/storage qdrant/qdrant
266 |
267 | 3. If specific tests still fail, check their requirements individually
268 | """)
269 |
270 |
271 | if __name__ == "__main__":
272 | main()
273 |
```
--------------------------------------------------------------------------------
/.compile-venv-py3.11/bin/Activate.ps1:
--------------------------------------------------------------------------------
```
1 | <#
2 | .Synopsis
3 | Activate a Python virtual environment for the current PowerShell session.
4 |
5 | .Description
6 | Pushes the python executable for a virtual environment to the front of the
7 | $Env:PATH environment variable and sets the prompt to signify that you are
8 | in a Python virtual environment. Makes use of the command line switches as
9 | well as the `pyvenv.cfg` file values present in the virtual environment.
10 |
11 | .Parameter VenvDir
12 | Path to the directory that contains the virtual environment to activate. The
13 | default value for this is the parent of the directory that the Activate.ps1
14 | script is located within.
15 |
16 | .Parameter Prompt
17 | The prompt prefix to display when this virtual environment is activated. By
18 | default, this prompt is the name of the virtual environment folder (VenvDir)
19 | surrounded by parentheses and followed by a single space (ie. '(.venv) ').
20 |
21 | .Example
22 | Activate.ps1
23 | Activates the Python virtual environment that contains the Activate.ps1 script.
24 |
25 | .Example
26 | Activate.ps1 -Verbose
27 | Activates the Python virtual environment that contains the Activate.ps1 script,
28 | and shows extra information about the activation as it executes.
29 |
30 | .Example
31 | Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
32 | Activates the Python virtual environment located in the specified location.
33 |
34 | .Example
35 | Activate.ps1 -Prompt "MyPython"
36 | Activates the Python virtual environment that contains the Activate.ps1 script,
37 | and prefixes the current prompt with the specified string (surrounded in
38 | parentheses) while the virtual environment is active.
39 |
40 | .Notes
41 | On Windows, it may be required to enable this Activate.ps1 script by setting the
42 | execution policy for the user. You can do this by issuing the following PowerShell
43 | command:
44 |
45 | PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
46 |
47 | For more information on Execution Policies:
48 | https://go.microsoft.com/fwlink/?LinkID=135170
49 |
50 | #>
51 | Param(
52 | [Parameter(Mandatory = $false)]
53 | [String]
54 | $VenvDir,
55 | [Parameter(Mandatory = $false)]
56 | [String]
57 | $Prompt
58 | )
59 |
60 | <# Function declarations --------------------------------------------------- #>
61 |
62 | <#
63 | .Synopsis
64 | Remove all shell session elements added by the Activate script, including the
65 | addition of the virtual environment's Python executable from the beginning of
66 | the PATH variable.
67 |
68 | .Parameter NonDestructive
69 | If present, do not remove this function from the global namespace for the
70 | session.
71 |
72 | #>
73 | function global:deactivate ([switch]$NonDestructive) {
74 | # Revert to original values
75 |
76 | # The prior prompt:
77 | if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
78 | Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
79 | Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
80 | }
81 |
82 | # The prior PYTHONHOME:
83 | if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
84 | Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
85 | Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
86 | }
87 |
88 | # The prior PATH:
89 | if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
90 | Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
91 | Remove-Item -Path Env:_OLD_VIRTUAL_PATH
92 | }
93 |
94 | # Just remove the VIRTUAL_ENV altogether:
95 | if (Test-Path -Path Env:VIRTUAL_ENV) {
96 | Remove-Item -Path env:VIRTUAL_ENV
97 | }
98 |
99 | # Just remove VIRTUAL_ENV_PROMPT altogether.
100 | if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
101 | Remove-Item -Path env:VIRTUAL_ENV_PROMPT
102 | }
103 |
104 | # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
105 | if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
106 | Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
107 | }
108 |
109 | # Leave deactivate function in the global namespace if requested:
110 | if (-not $NonDestructive) {
111 | Remove-Item -Path function:deactivate
112 | }
113 | }
114 |
115 | <#
116 | .Description
117 | Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
118 | given folder, and returns them in a map.
119 |
120 | For each line in the pyvenv.cfg file, if that line can be parsed into exactly
121 | two strings separated by `=` (with any amount of whitespace surrounding the =)
122 | then it is considered a `key = value` line. The left hand string is the key,
123 | the right hand is the value.
124 |
125 | If the value starts with a `'` or a `"` then the first and last character is
126 | stripped from the value before being captured.
127 |
128 | .Parameter ConfigDir
129 | Path to the directory that contains the `pyvenv.cfg` file.
130 | #>
131 | function Get-PyVenvConfig(
132 | [String]
133 | $ConfigDir
134 | ) {
135 | Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
136 |
137 | # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
138 | $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
139 |
140 | # An empty map will be returned if no config file is found.
141 | $pyvenvConfig = @{ }
142 |
143 | if ($pyvenvConfigPath) {
144 |
145 | Write-Verbose "File exists, parse `key = value` lines"
146 | $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
147 |
148 | $pyvenvConfigContent | ForEach-Object {
149 | $keyval = $PSItem -split "\s*=\s*", 2
150 | if ($keyval[0] -and $keyval[1]) {
151 | $val = $keyval[1]
152 |
153 | # Remove extraneous quotations around a string value.
154 | if ("'""".Contains($val.Substring(0, 1))) {
155 | $val = $val.Substring(1, $val.Length - 2)
156 | }
157 |
158 | $pyvenvConfig[$keyval[0]] = $val
159 | Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
160 | }
161 | }
162 | }
163 | return $pyvenvConfig
164 | }
165 |
166 |
167 | <# Begin Activate script --------------------------------------------------- #>
168 |
169 | # Determine the containing directory of this script
170 | $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
171 | $VenvExecDir = Get-Item -Path $VenvExecPath
172 |
173 | Write-Verbose "Activation script is located in path: '$VenvExecPath'"
174 | Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
175 | Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
176 |
177 | # Set values required in priority: CmdLine, ConfigFile, Default
178 | # First, get the location of the virtual environment, it might not be
179 | # VenvExecDir if specified on the command line.
180 | if ($VenvDir) {
181 | Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
182 | }
183 | else {
184 | Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
185 | $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
186 | Write-Verbose "VenvDir=$VenvDir"
187 | }
188 |
189 | # Next, read the `pyvenv.cfg` file to determine any required value such
190 | # as `prompt`.
191 | $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
192 |
193 | # Next, set the prompt from the command line, or the config file, or
194 | # just use the name of the virtual environment folder.
195 | if ($Prompt) {
196 | Write-Verbose "Prompt specified as argument, using '$Prompt'"
197 | }
198 | else {
199 | Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
200 | if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
201 | Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
202 | $Prompt = $pyvenvCfg['prompt'];
203 | }
204 | else {
205 | Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
206 | Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
207 | $Prompt = Split-Path -Path $venvDir -Leaf
208 | }
209 | }
210 |
211 | Write-Verbose "Prompt = '$Prompt'"
212 | Write-Verbose "VenvDir='$VenvDir'"
213 |
214 | # Deactivate any currently active virtual environment, but leave the
215 | # deactivate function in place.
216 | deactivate -nondestructive
217 |
218 | # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
219 | # that there is an activated venv.
220 | $env:VIRTUAL_ENV = $VenvDir
221 |
222 | if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
223 |
224 | Write-Verbose "Setting prompt to '$Prompt'"
225 |
226 | # Set the prompt to include the env name
227 | # Make sure _OLD_VIRTUAL_PROMPT is global
228 | function global:_OLD_VIRTUAL_PROMPT { "" }
229 | Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
230 | New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
231 |
232 | function global:prompt {
233 | Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
234 | _OLD_VIRTUAL_PROMPT
235 | }
236 | $env:VIRTUAL_ENV_PROMPT = $Prompt
237 | }
238 |
239 | # Clear PYTHONHOME
240 | if (Test-Path -Path Env:PYTHONHOME) {
241 | Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
242 | Remove-Item -Path Env:PYTHONHOME
243 | }
244 |
245 | # Add the venv to the PATH
246 | Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
247 | $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
248 |
```
--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
```python
1 | import json
2 | import logging
3 | from contextlib import asynccontextmanager
4 | from typing import AsyncIterator, Dict, Any, Optional, List
5 | from datetime import datetime
6 | import time
7 |
8 | from mcp.server import Server
9 | from mcp.server.fastmcp import Context, FastMCP
10 | from qdrant_client import QdrantClient
11 | from sentence_transformers import SentenceTransformer
12 |
13 | from .core import (
14 | ServerConfig,
15 | EmbeddingProvider,
16 | VectorStore,
17 | CacheManager,
18 | HealthMonitor,
19 | MetricsCollector,
20 | ErrorContext,
21 | handle_error
22 | )
23 | from .utils.logger import get_logger
24 |
25 | logger = get_logger(__name__)
26 |
27 | class CodebaseAnalyzer:
28 | """Analyzes code patterns and architecture."""
29 |
30 | def __init__(
31 | self,
32 | vector_store: VectorStore,
33 | cache_manager: CacheManager,
34 | metrics_collector: MetricsCollector
35 | ):
36 | self.vector_store = vector_store
37 | self.cache_manager = cache_manager
38 | self.metrics_collector = metrics_collector
39 |
40 | async def analyze_patterns(self, code_text: str) -> Dict[str, Any]:
41 | """Analyze code patterns in the given text."""
42 | start_time = time.time()
43 |
44 | try:
45 | # Try cache first
46 | cached_result = await self.cache_manager.result_cache.get_result(
47 | "analyze_patterns", code_text
48 | )
49 | if cached_result:
50 | await self.metrics_collector.record_cache_access(hit=True)
51 | return cached_result
52 |
53 | await self.metrics_collector.record_cache_access(hit=False)
54 |
55 | # Search for similar patterns
56 | similar_patterns = await self.vector_store.search(
57 | text=code_text,
58 | filter_params={"must": [{"key": "type", "match": {"value": "pattern"}}]},
59 | limit=5
60 | )
61 | await self.metrics_collector.record_vector_query()
62 |
63 | result = {
64 | "patterns_found": len(similar_patterns),
65 | "matches": [
66 | {
67 | "pattern": p.payload.get("pattern_name", "Unknown"),
68 | "description": p.payload.get("description", ""),
69 | "similarity": p.score,
70 | "examples": p.payload.get("examples", [])
71 | }
72 | for p in similar_patterns
73 | ]
74 | }
75 |
76 | # Cache the result
77 | await self.cache_manager.result_cache.store_result(
78 | "analyze_patterns",
79 | result,
80 | code_text
81 | )
82 |
83 | # Record metrics
84 | duration = time.time() - start_time
85 | await self.metrics_collector.record_request(
86 | tool_name="analyze_patterns",
87 | duration=duration,
88 | success=True,
89 | metadata={
90 | "patterns_found": len(similar_patterns)
91 | }
92 | )
93 |
94 | return result
95 |
96 | except Exception as e:
97 | # Record error metrics
98 | duration = time.time() - start_time
99 | await self.metrics_collector.record_request(
100 | tool_name="analyze_patterns",
101 | duration=duration,
102 | success=False,
103 | error=str(e)
104 | )
105 | raise
106 |
107 | async def detect_architecture(self, codebase_path: str) -> Dict[str, Any]:
108 | """Detect architectural patterns in a codebase."""
109 | start_time = time.time()
110 |
111 | try:
112 | # Try cache first
113 | cached_result = await self.cache_manager.result_cache.get_result(
114 | "detect_architecture", codebase_path
115 | )
116 | if cached_result:
117 | await self.metrics_collector.record_cache_access(hit=True)
118 | return cached_result
119 |
120 | await self.metrics_collector.record_cache_access(hit=False)
121 |
122 | # This is a placeholder - actual implementation would analyze
123 | # the entire codebase structure
124 | result = {
125 | "architecture": "layered",
126 | "patterns": ["MVC", "Repository"],
127 | "components": ["controllers", "models", "views"]
128 | }
129 |
130 | # Cache the result
131 | await self.cache_manager.result_cache.store_result(
132 | "detect_architecture",
133 | result,
134 | codebase_path
135 | )
136 |
137 | # Record metrics
138 | duration = time.time() - start_time
139 | await self.metrics_collector.record_request(
140 | tool_name="detect_architecture",
141 | duration=duration,
142 | success=True
143 | )
144 |
145 | return result
146 |
147 | except Exception as e:
148 | # Record error metrics
149 | duration = time.time() - start_time
150 | await self.metrics_collector.record_request(
151 | tool_name="detect_architecture",
152 | duration=duration,
153 | success=False,
154 | error=str(e)
155 | )
156 | raise
157 |
158 | @asynccontextmanager
159 | async def server_lifespan(server: Server) -> AsyncIterator[Dict]:
160 | """Initialize server components and manage their lifecycle."""
161 | config = ServerConfig.from_env()
162 | cache_manager = None
163 | health_monitor = None
164 | metrics_collector = None
165 |
166 | try:
167 | # Initialize vector store
168 | embedding_model = SentenceTransformer(config.embedding_model)
169 | embedder = EmbeddingProvider(embedding_model)
170 |
171 | # Initialize Qdrant client
172 | qdrant_client = QdrantClient(
173 | url=config.qdrant_url,
174 | timeout=config.qdrant_timeout
175 | )
176 | vector_store = VectorStore(qdrant_client, embedder, config.collection_name)
177 | await vector_store.initialize()
178 |
179 | # Initialize supporting components
180 | cache_manager = CacheManager(config.to_dict())
181 | health_monitor = HealthMonitor(config)
182 | metrics_collector = MetricsCollector()
183 |
184 | # Initialize analyzer
185 | analyzer = CodebaseAnalyzer(
186 | vector_store=vector_store,
187 | cache_manager=cache_manager,
188 | metrics_collector=metrics_collector
189 | )
190 |
191 | yield {
192 | "config": config,
193 | "vector_store": vector_store,
194 | "cache_manager": cache_manager,
195 | "health_monitor": health_monitor,
196 | "metrics_collector": metrics_collector,
197 | "analyzer": analyzer
198 | }
199 |
200 | finally:
201 | if vector_store:
202 | await vector_store.close()
203 | if cache_manager:
204 | await cache_manager.clear_all()
205 | if metrics_collector:
206 | await metrics_collector.reset()
207 |
208 | # Create FastMCP instance with lifespan management
209 | mcp = FastMCP(lifespan=server_lifespan)
210 |
211 | # Tool Schemas
212 | analyze_patterns_schema = {
213 | "type": "object",
214 | "properties": {
215 | "code": {
216 | "type": "string",
217 | "description": "Code text to analyze for patterns",
218 | }
219 | },
220 | "required": ["code"],
221 | }
222 |
223 | detect_architecture_schema = {
224 | "type": "object",
225 | "properties": {
226 | "path": {
227 | "type": "string",
228 | "description": "Path to the codebase to analyze",
229 | }
230 | },
231 | "required": ["path"],
232 | }
233 |
234 | health_check_schema = {
235 | "type": "object",
236 | "properties": {
237 | "force": {
238 | "type": "boolean",
239 | "description": "Force a new health check",
240 | "default": False
241 | }
242 | }
243 | }
244 |
245 | metrics_schema = {
246 | "type": "object",
247 | "properties": {}
248 | }
249 |
250 | # Tool Implementations
251 | @mcp.tool(name="analyze-patterns", description="Analyze code for common patterns")
252 | async def analyze_patterns(ctx: Context, code: str) -> Dict[str, Any]:
253 | """Analyze code text for common patterns."""
254 | analyzer: CodebaseAnalyzer = ctx.request_context.lifespan_context["analyzer"]
255 | return await analyzer.analyze_patterns(code)
256 |
257 | @mcp.tool(name="detect-architecture", description="Detect architectural patterns in a codebase")
258 | async def detect_architecture(ctx: Context, path: str) -> Dict[str, Any]:
259 | """Detect architectural patterns in a codebase."""
260 | analyzer: CodebaseAnalyzer = ctx.request_context.lifespan_context["analyzer"]
261 | return await analyzer.detect_architecture(path)
262 |
263 | @mcp.tool(name="health-check", description="Check server health status")
264 | async def health_check(ctx: Context, force: bool = False) -> Dict[str, Any]:
265 | """Check the health status of server components."""
266 | health_monitor: HealthMonitor = ctx.request_context.lifespan_context["health_monitor"]
267 | return await health_monitor.check_health(force)
268 |
269 | @mcp.tool(name="get-metrics", description="Get server performance metrics")
270 | async def get_metrics(ctx: Context) -> Dict[str, Any]:
271 | """Get server performance metrics."""
272 | metrics_collector: MetricsCollector = ctx.request_context.lifespan_context["metrics_collector"]
273 | return await metrics_collector.get_all_metrics()
274 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/cache.py:
--------------------------------------------------------------------------------
```python
1 | """Cache management module."""
2 |
3 | import json
4 | import os
5 | from collections import OrderedDict
6 | from datetime import datetime, timedelta
7 | from pathlib import Path
8 | from typing import Any, Dict, Optional, Union
9 | import logging
10 |
11 | class MemoryCache:
12 | """In-memory LRU cache."""
13 |
14 | def __init__(self, max_size: int = 1000):
15 | """Initialize memory cache."""
16 | self.max_size = max_size
17 | self.cache: OrderedDict = OrderedDict()
18 |
19 | def get(self, key: str) -> Optional[Any]:
20 | """Get value from cache."""
21 | if key not in self.cache:
22 | return None
23 |
24 | # Move to end (most recently used)
25 | value = self.cache.pop(key)
26 | self.cache[key] = value
27 | return value
28 |
29 | def put(self, key: str, value: Any) -> None:
30 | """Put value in cache."""
31 | if key in self.cache:
32 | # Move to end
33 | self.cache.pop(key)
34 | elif len(self.cache) >= self.max_size:
35 | # Remove oldest
36 | self.cache.popitem(last=False)
37 |
38 | self.cache[key] = value
39 |
40 | def remove(self, key: str) -> None:
41 | """Remove value from cache."""
42 | if key in self.cache:
43 | self.cache.pop(key)
44 |
45 | def clear(self) -> None:
46 | """Clear all values from cache."""
47 | self.cache.clear()
48 |
49 | class DiskCache:
50 | """Disk-based cache."""
51 |
52 | def __init__(
53 | self,
54 | cache_dir: Union[str, Path],
55 | max_age_days: int = 7
56 | ):
57 | """Initialize disk cache."""
58 | self.cache_dir = Path(cache_dir)
59 | self.cache_dir.mkdir(parents=True, exist_ok=True)
60 | self.max_age = timedelta(days=max_age_days)
61 |
62 | def get(self, key: str) -> Optional[Any]:
63 | """Get value from cache."""
64 | cache_path = self._get_cache_path(key)
65 | if not cache_path.exists():
66 | return None
67 |
68 | # Check if expired
69 | if self._is_expired(cache_path):
70 | cache_path.unlink()
71 | return None
72 |
73 | try:
74 | with open(cache_path) as f:
75 | data = json.load(f)
76 | return data["value"]
77 | except Exception:
78 | return None
79 |
80 | def put(self, key: str, value: Any) -> None:
81 | """Put value in cache."""
82 | cache_path = self._get_cache_path(key)
83 |
84 | try:
85 | with open(cache_path, "w") as f:
86 | json.dump({
87 | "value": value,
88 | "timestamp": datetime.utcnow().isoformat()
89 | }, f)
90 | except Exception:
91 | # Ignore write errors
92 | pass
93 |
94 | def remove(self, key: str) -> None:
95 | """Remove value from cache."""
96 | cache_path = self._get_cache_path(key)
97 | if cache_path.exists():
98 | cache_path.unlink()
99 |
100 | def clear(self) -> None:
101 | """Clear all values from cache."""
102 | for path in self.cache_dir.glob("*.json"):
103 | path.unlink()
104 |
105 | def cleanup_expired(self) -> None:
106 | """Remove expired cache entries."""
107 | for path in self.cache_dir.glob("*.json"):
108 | if self._is_expired(path):
109 | path.unlink()
110 |
111 | def _get_cache_path(self, key: str) -> Path:
112 | """Get cache file path for key."""
113 | # Use hash of key as filename
114 | filename = f"{hash(key)}.json"
115 | return self.cache_dir / filename
116 |
117 | def _is_expired(self, path: Path) -> bool:
118 | """Check if cache entry is expired."""
119 | try:
120 | with open(path) as f:
121 | data = json.load(f)
122 | timestamp = datetime.fromisoformat(data["timestamp"])
123 | return datetime.utcnow() - timestamp > self.max_age
124 | except Exception:
125 | return True
126 |
127 | class CacheManager:
128 | """Manager for memory and disk caching."""
129 |
130 | def __init__(self, config):
131 | """Initialize cache manager."""
132 | self.config = config
133 | self.enabled = config.cache_enabled
134 | self.memory_cache = None
135 | self.disk_cache = None
136 | self.initialized = False
137 | self.logger = logging.getLogger(__name__)
138 |
139 | async def initialize(self) -> None:
140 | """Initialize cache components."""
141 | if self.initialized:
142 | self.logger.debug("Cache manager already initialized")
143 | return
144 |
145 | try:
146 | self.logger.debug(f"Initializing cache manager (enabled: {self.enabled})")
147 |
148 | if self.enabled:
149 | self.logger.debug(f"Creating memory cache with size: {self.config.memory_cache_size}")
150 | self.memory_cache = MemoryCache(
151 | max_size=self.config.memory_cache_size
152 | )
153 |
154 | # Check if disk cache is configured and enabled
155 | if self.config.disk_cache_dir is not None:
156 | self.logger.debug(f"Creating disk cache at: {self.config.disk_cache_dir}")
157 |
158 | # Ensure directory exists (should be created by ServerConfig.create_directories)
159 | if not self.config.disk_cache_dir.exists():
160 | self.logger.debug(f"Creating disk cache directory: {self.config.disk_cache_dir}")
161 | self.config.disk_cache_dir.mkdir(parents=True, exist_ok=True)
162 |
163 | self.disk_cache = DiskCache(
164 | cache_dir=self.config.disk_cache_dir
165 | )
166 | else:
167 | self.logger.debug("Disk cache directory not configured, skipping disk cache")
168 | else:
169 | self.logger.debug("Cache is disabled, not initializing memory or disk cache")
170 |
171 | self.initialized = True
172 | self.logger.debug("Cache manager initialized successfully")
173 | except Exception as e:
174 | self.logger.error(f"Error initializing cache manager: {e}")
175 | await self.cleanup()
176 | raise RuntimeError(f"Failed to initialize cache manager: {str(e)}")
177 |
178 | def get_from_memory(self, key: str) -> Optional[Any]:
179 | """Get value from memory cache."""
180 | if not self.enabled or not self.memory_cache:
181 | return None
182 | return self.memory_cache.get(key)
183 |
184 | def put_in_memory(self, key: str, value: Any) -> None:
185 | """Put value in memory cache."""
186 | if not self.enabled or not self.memory_cache:
187 | return
188 | self.memory_cache.put(key, value)
189 |
190 | def get_from_disk(self, key: str) -> Optional[Any]:
191 | """Get value from disk cache."""
192 | if not self.enabled or not self.disk_cache:
193 | return None
194 | return self.disk_cache.get(key)
195 |
196 | def put_in_disk(self, key: str, value: Any) -> None:
197 | """Put value in disk cache."""
198 | if not self.enabled or not self.disk_cache:
199 | return
200 | self.disk_cache.put(key, value)
201 |
202 | def get(self, key: str) -> Optional[Any]:
203 | """Get value from cache (memory first, then disk)."""
204 | if not self.enabled:
205 | return None
206 |
207 | # Try memory cache first
208 | value = self.get_from_memory(key)
209 | if value is not None:
210 | return value
211 |
212 | # Try disk cache
213 | if self.disk_cache:
214 | value = self.get_from_disk(key)
215 | if value is not None:
216 | # Cache in memory for next time
217 | self.put_in_memory(key, value)
218 | return value
219 |
220 | return None
221 |
222 | def put(self, key: str, value: Any) -> None:
223 | """Put value in cache (both memory and disk)."""
224 | if not self.enabled:
225 | return
226 |
227 | self.put_in_memory(key, value)
228 | if self.disk_cache:
229 | self.put_in_disk(key, value)
230 |
231 | def remove(self, key: str) -> None:
232 | """Remove value from cache."""
233 | if not self.enabled:
234 | return
235 |
236 | if self.memory_cache:
237 | self.memory_cache.remove(key)
238 | if self.disk_cache:
239 | self.disk_cache.remove(key)
240 |
241 | def clear(self) -> None:
242 | """Clear all values from cache."""
243 | if not self.enabled:
244 | return
245 |
246 | if self.memory_cache:
247 | self.memory_cache.clear()
248 | if self.disk_cache:
249 | self.disk_cache.clear()
250 |
251 | async def cleanup(self) -> None:
252 | """Clean up expired cache entries and clear memory cache."""
253 | if not self.initialized:
254 | return
255 |
256 | try:
257 | if not self.enabled:
258 | return
259 |
260 | # Clear memory cache
261 | if self.memory_cache:
262 | self.memory_cache.clear()
263 |
264 | # Clean up disk cache
265 | if self.disk_cache:
266 | self.disk_cache.cleanup_expired()
267 | except Exception as e:
268 | print(f"Error cleaning up cache manager: {e}")
269 | finally:
270 | self.initialized = False
271 |
272 | async def clear_all(self) -> None:
273 | """Clear all values from cache asynchronously."""
274 | self.clear()
275 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/health.py:
--------------------------------------------------------------------------------
```python
1 | """Health monitoring module."""
2 |
3 | import asyncio
4 | import os
5 | import psutil
6 | import time
7 | from datetime import datetime
8 | from enum import Enum
9 | from typing import Dict, List, Optional
10 |
11 | from pydantic import BaseModel
12 | import aiohttp
13 |
14 | class HealthStatus(str, Enum):
15 | """Health status enumeration."""
16 |
17 | HEALTHY = "healthy"
18 | DEGRADED = "degraded"
19 | UNHEALTHY = "unhealthy"
20 |
21 | class ComponentHealth(BaseModel):
22 | """Component health model."""
23 |
24 | name: str
25 | status: HealthStatus
26 | message: Optional[str] = None
27 | last_check: datetime
28 | metrics: Optional[Dict[str, float]] = None
29 |
30 | class SystemHealth(BaseModel):
31 | """System health model."""
32 |
33 | status: HealthStatus
34 | components: Dict[str, ComponentHealth]
35 | system_metrics: Dict[str, float]
36 | timestamp: datetime
37 |
38 | class HealthManager:
39 | """Manager for system health monitoring."""
40 |
41 | def __init__(self, config):
42 | """Initialize health manager."""
43 | self.config = config
44 | self.components: Dict[str, ComponentHealth] = {}
45 | self.check_interval = 60 # seconds
46 | self.running = False
47 | self._monitor_task = None
48 | self.initialized = False
49 |
50 | async def initialize(self):
51 | """Initialize health monitoring."""
52 | if self.initialized:
53 | return
54 |
55 | try:
56 | self.running = True
57 | self._monitor_task = asyncio.create_task(self._monitor_health())
58 |
59 | # Register core components
60 | await self.register_component("qdrant")
61 | await self.register_component("disk")
62 | await self.register_component("memory")
63 |
64 | # Initial health check
65 | await self.check_health()
66 |
67 | self.initialized = True
68 | except Exception as e:
69 | print(f"Error initializing health manager: {e}")
70 | await self.cleanup()
71 | raise RuntimeError(f"Failed to initialize health manager: {str(e)}")
72 |
73 | async def cleanup(self):
74 | """Clean up health monitoring."""
75 | if not self.initialized:
76 | return
77 |
78 | try:
79 | if self.running:
80 | self.running = False
81 | if self._monitor_task:
82 | try:
83 | # Wait for the task to finish with a timeout
84 | await asyncio.wait_for(self._monitor_task, timeout=5.0)
85 | except asyncio.TimeoutError:
86 | # If it doesn't finish in time, cancel it
87 | self._monitor_task.cancel()
88 | try:
89 | await self._monitor_task
90 | except asyncio.CancelledError:
91 | pass
92 | finally:
93 | self._monitor_task = None
94 | self.components.clear()
95 | except Exception as e:
96 | print(f"Error cleaning up health manager: {e}")
97 | finally:
98 | self.initialized = False
99 |
100 | async def check_health(self) -> SystemHealth:
101 | """Check system health."""
102 | # Update component health
103 | await self._check_components()
104 |
105 | # Get system metrics
106 | system_metrics = await self._get_system_metrics()
107 |
108 | # Determine overall status
109 | status = HealthStatus.HEALTHY
110 | if any(c.status == HealthStatus.UNHEALTHY for c in self.components.values()):
111 | status = HealthStatus.UNHEALTHY
112 | elif any(c.status == HealthStatus.DEGRADED for c in self.components.values()):
113 | status = HealthStatus.DEGRADED
114 |
115 | return SystemHealth(
116 | status=status,
117 | components=self.components,
118 | system_metrics=system_metrics,
119 | timestamp=datetime.utcnow()
120 | )
121 |
122 | async def register_component(
123 | self,
124 | name: str,
125 | check_fn=None
126 | ) -> None:
127 | """Register a component for health monitoring."""
128 | self.components[name] = ComponentHealth(
129 | name=name,
130 | status=HealthStatus.HEALTHY,
131 | last_check=datetime.utcnow(),
132 | metrics={}
133 | )
134 |
135 | async def update_component_health(
136 | self,
137 | name: str,
138 | status: HealthStatus,
139 | message: Optional[str] = None,
140 | metrics: Optional[Dict[str, float]] = None
141 | ) -> None:
142 | """Update component health status."""
143 | if name not in self.components:
144 | return
145 |
146 | self.components[name] = ComponentHealth(
147 | name=name,
148 | status=status,
149 | message=message,
150 | last_check=datetime.utcnow(),
151 | metrics=metrics
152 | )
153 |
154 | async def _monitor_health(self):
155 | """Monitor system health periodically."""
156 | while self.running:
157 | try:
158 | await self.check_health()
159 | except Exception as e:
160 | print(f"Error monitoring health: {e}")
161 |
162 | await asyncio.sleep(self.check_interval)
163 |
164 | async def _check_components(self):
165 | """Check health of all registered components."""
166 | # Check Qdrant connection
167 | try:
168 | if hasattr(self.config, "qdrant_url"):
169 | await self._check_qdrant()
170 | except Exception as e:
171 | await self.update_component_health(
172 | "qdrant",
173 | HealthStatus.UNHEALTHY,
174 | str(e)
175 | )
176 |
177 | # Check disk space
178 | try:
179 | await self._check_disk_space()
180 | except Exception as e:
181 | await self.update_component_health(
182 | "disk",
183 | HealthStatus.UNHEALTHY,
184 | str(e)
185 | )
186 |
187 | # Check memory usage
188 | try:
189 | await self._check_memory()
190 | except Exception as e:
191 | await self.update_component_health(
192 | "memory",
193 | HealthStatus.UNHEALTHY,
194 | str(e)
195 | )
196 |
197 | async def _check_qdrant(self):
198 | """Check Qdrant connection health."""
199 | try:
200 | # Use REST API health endpoint
201 | start_time = time.perf_counter()
202 | async with aiohttp.ClientSession() as session:
203 | async with session.get("http://localhost:6333/healthz") as response:
204 | response.raise_for_status()
205 | response_time = time.perf_counter() - start_time
206 |
207 | await self.update_component_health(
208 | "qdrant",
209 | HealthStatus.HEALTHY,
210 | message="Qdrant is responding",
211 | metrics={
212 | "response_time": response_time
213 | }
214 | )
215 | except Exception as e:
216 | await self.update_component_health(
217 | "qdrant",
218 | HealthStatus.UNHEALTHY,
219 | message=f"Qdrant health check failed: {str(e)}"
220 | )
221 |
222 | async def _check_disk_space(self):
223 | """Check disk space health."""
224 | disk_path = self.config.docs_cache_dir
225 | usage = psutil.disk_usage(disk_path)
226 |
227 | status = HealthStatus.HEALTHY
228 | message = None
229 |
230 | # Alert if disk usage is high
231 | if usage.percent >= 90:
232 | status = HealthStatus.UNHEALTHY
233 | message = "Disk usage critical"
234 | elif usage.percent >= 80:
235 | status = HealthStatus.DEGRADED
236 | message = "Disk usage high"
237 |
238 | await self.update_component_health(
239 | "disk",
240 | status,
241 | message,
242 | metrics={
243 | "total_gb": usage.total / (1024 ** 3),
244 | "used_gb": usage.used / (1024 ** 3),
245 | "free_gb": usage.free / (1024 ** 3),
246 | "percent_used": usage.percent
247 | }
248 | )
249 |
250 | async def _check_memory(self):
251 | """Check memory health."""
252 | memory = psutil.virtual_memory()
253 |
254 | status = HealthStatus.HEALTHY
255 | message = None
256 |
257 | # Alert if memory usage is high
258 | if memory.percent >= 90:
259 | status = HealthStatus.UNHEALTHY
260 | message = "Memory usage critical"
261 | elif memory.percent >= 80:
262 | status = HealthStatus.DEGRADED
263 | message = "Memory usage high"
264 |
265 | await self.update_component_health(
266 | "memory",
267 | status,
268 | message,
269 | metrics={
270 | "total_gb": memory.total / (1024 ** 3),
271 | "used_gb": memory.used / (1024 ** 3),
272 | "free_gb": memory.available / (1024 ** 3),
273 | "percent_used": memory.percent
274 | }
275 | )
276 |
277 | async def _get_system_metrics(self) -> Dict[str, float]:
278 | """Get system metrics."""
279 | cpu_percent = psutil.cpu_percent(interval=1)
280 | memory = psutil.virtual_memory()
281 | disk = psutil.disk_usage("/")
282 |
283 | return {
284 | "cpu_percent": cpu_percent,
285 | "memory_percent": memory.percent,
286 | "disk_percent": disk.percent,
287 | "load_avg_1min": os.getloadavg()[0],
288 | "load_avg_5min": os.getloadavg()[1],
289 | "load_avg_15min": os.getloadavg()[2]
290 | }
291 |
```
--------------------------------------------------------------------------------
/PLAN.md:
--------------------------------------------------------------------------------
```markdown
1 | # Universal Codebase Analysis System Implementation Plan
2 |
3 | ## System Overview
4 |
5 | ```mermaid
6 | graph TD
7 | subgraph Core Infrastructure
8 | A[MCP Server] --> B[Vector Store]
9 | A --> C[Cache System]
10 | A --> D[Metrics Collector]
11 | A --> E[Health Monitor]
12 | end
13 |
14 | subgraph LLM Integration
15 | F[Meta Prompt System] --> G[Context Builder]
16 | G --> H[Prompt Generator]
17 | H --> I[Result Processor]
18 | I --> J[Knowledge Updater]
19 | end
20 |
21 | subgraph Analysis Engine
22 | K[Code Analyzer] --> L[Pattern Detector]
23 | K --> M[Architecture Analyzer]
24 | K --> N[Security Analyzer]
25 | K --> O[Performance Analyzer]
26 | end
27 |
28 | subgraph Documentation System
29 | U[FireCrawl Integration] --> V[Doc Manager]
30 | V --> W[Reference Cache]
31 | V --> X[Git Integration]
32 | end
33 |
34 | subgraph Task Management
35 | P[Task Tracker] --> Q[Debug System]
36 | P --> R[Test Manager]
37 | P --> S[Doc Generator]
38 | P --> T[ADR Creator]
39 | end
40 |
41 | subgraph ADR System
42 | AA[Impact Analysis] --> AB[Context Gathering]
43 | AB --> AC[Decision Analysis]
44 | AC --> AD[ADR Generation]
45 | AD --> AE[Implementation Tracking]
46 | AE --> AF[Evolution Management]
47 | end
48 | ```
49 |
50 | ## Implementation Phases
51 |
52 | ### Phase 1: Core Infrastructure (Week 1-2)
53 |
54 | ```mermaid
55 | graph LR
56 | A[Setup Project] --> B[Vector Store]
57 | B --> C[Cache Layer]
58 | C --> D[Health Monitoring]
59 | D --> E[Metrics System]
60 | ```
61 |
62 | #### Components:
63 | 1. **Vector Store**
64 | - Qdrant integration
65 | - Embedding system
66 | - Pattern storage
67 | - Search functionality
68 |
69 | 2. **Cache System**
70 | - LRU implementation
71 | - Result caching
72 | - Embedding cache
73 | - Performance optimization
74 |
75 | 3. **Health Monitor**
76 | - Component status
77 | - Performance metrics
78 | - Error tracking
79 | - System diagnostics
80 |
81 | 4. **Metrics Collector**
82 | - Usage statistics
83 | - Performance data
84 | - Error rates
85 | - Cache effectiveness
86 |
87 | ### Phase 2: Documentation & Knowledge Management (Week 3-4)
88 |
89 | ```mermaid
90 | graph TD
91 | A[FireCrawl Integration] --> B[Doc Crawler]
92 | B --> C[Reference Manager]
93 | C --> D[Local Cache]
94 | D --> E[Git Management]
95 |
96 | F[External Sources] --> B
97 | F --> G[API Docs]
98 | F --> H[Framework Docs]
99 | F --> I[Best Practices]
100 | ```
101 |
102 | #### Components:
103 | 1. **Documentation Manager**
104 | ```python
105 | class DocumentationManager:
106 | async def crawl_docs(self, sources: List[str]) -> None:
107 | """Crawl and store documentation."""
108 |
109 | async def update_references(self) -> None:
110 | """Update local documentation cache."""
111 |
112 | async def manage_gitignore(self) -> None:
113 | """Handle version control for docs."""
114 | ```
115 |
116 | 2. **Reference System**
117 | ```python
118 | class ReferenceSystem:
119 | async def index_docs(self) -> None:
120 | """Index documentation for search."""
121 |
122 | async def find_relevant_docs(self, context: str) -> List[Doc]:
123 | """Find relevant documentation."""
124 | ```
125 |
126 | ### Phase 3: LLM Integration & ADR System (Week 5-6)
127 |
128 | ```mermaid
129 | graph TD
130 | A[Meta Prompt System] --> B[Context Builder]
131 | B --> C[Prompt Generator]
132 | C --> D[Result Processor]
133 | D --> E[Knowledge Base]
134 | E --> A
135 |
136 | F[ADR System] --> G[Impact Analysis]
137 | G --> H[Context Gathering]
138 | H --> I[Decision Analysis]
139 | I --> J[Pattern Learning]
140 | ```
141 |
142 | #### Components:
143 | 1. **Meta Prompt System**
144 | ```python
145 | class MetaPromptSystem:
146 | async def generate_prompt(self, task_type: str) -> str:
147 | """Generate task-specific prompts."""
148 |
149 | async def evolve_prompts(self, feedback: Dict[str, Any]) -> None:
150 | """Evolve prompts based on effectiveness."""
151 | ```
152 |
153 | 2. **ADR System**
154 | ```python
155 | class ADRSystem:
156 | async def analyze_impact(self, changes: CodeChanges) -> ImpactAnalysis:
157 | """Analyze architectural impact of changes."""
158 |
159 | async def gather_context(self) -> DecisionContext:
160 | """Gather relevant context for decision."""
161 |
162 | async def analyze_options(self, options: List[Option]) -> OptionsAnalysis:
163 | """Analyze and compare options."""
164 |
165 | async def generate_adr(self, context: DecisionContext) -> ADR:
166 | """Generate ADR document."""
167 |
168 | async def track_implementation(self, adr: ADR) -> Implementation:
169 | """Track ADR implementation."""
170 |
171 | async def evolve_adr(self, adr: ADR, feedback: Feedback) -> ADR:
172 | """Evolve ADR based on implementation feedback."""
173 |
174 | async def learn_patterns(self, adr: ADR) -> List[Pattern]:
175 | """Extract reusable patterns from ADR."""
176 | ```
177 |
178 | ### Phase 4: Debug & Analysis System (Week 7-8)
179 |
180 | ```mermaid
181 | graph TD
182 | A[Debug System] --> B[Issue Analysis]
183 | B --> C[Pattern Matching]
184 | C --> D[Solution Generation]
185 |
186 | E[Agans Rules] --> F[System Understanding]
187 | F --> G[Failure Analysis]
188 | G --> H[Solution Verification]
189 | ```
190 |
191 | #### Components:
192 | 1. **Debug System**
193 | ```python
194 | class DebugSystem:
195 | async def analyze_issue(self, issue: Issue) -> Analysis:
196 | """Analyze using Agans' 9 Rules."""
197 |
198 | async def suggest_solution(self, analysis: Analysis) -> Solution:
199 | """Suggest solution approach."""
200 | ```
201 |
202 | ## Prompt Templates
203 |
204 | ### 1. Meta Prompts
205 | ```python
206 | META_PROMPTS = {
207 | "task_analysis": """
208 | Given task: {task_description}
209 | Generate optimal analysis prompt considering:
210 | 1. Required context
211 | 2. Analysis steps
212 | 3. Validation criteria
213 | 4. Expected outcomes
214 | """,
215 |
216 | "prompt_evolution": """
217 | Original prompt: {original_prompt}
218 | Results: {results}
219 | Effectiveness: {metrics}
220 |
221 | Suggest improvements for:
222 | 1. Context gathering
223 | 2. Analysis depth
224 | 3. Result quality
225 | 4. Validation accuracy
226 | """
227 | }
228 | ```
229 |
230 | ### 2. ADR Analysis Prompts
231 | ```python
232 | ADR_PROMPTS = {
233 | "impact_analysis": """
234 | Code Changes:
235 | {code_changes}
236 |
237 | Current Architecture:
238 | {architecture_context}
239 |
240 | Historical Decisions:
241 | {related_adrs}
242 |
243 | Analyze:
244 | 1. Architectural Impact
245 | - Component changes
246 | - Interface modifications
247 | - Dependency updates
248 |
249 | 2. Technical Debt Impact
250 | - Existing debt affected
251 | - Potential new debt
252 | - Mitigation strategies
253 |
254 | 3. Cross-cutting Concerns
255 | - Security implications
256 | - Performance impact
257 | - Scalability considerations
258 | """,
259 |
260 | "decision_analysis": """
261 | Decision Context:
262 | {decision_context}
263 |
264 | Options Considered:
265 | {options_analysis}
266 |
267 | Similar Decisions:
268 | {historical_decisions}
269 |
270 | Analyze each option for:
271 | 1. Technical Alignment
272 | - Architecture fit
273 | - Technology stack
274 | - Development practices
275 |
276 | 2. Business Impact
277 | - Development effort
278 | - Maintenance cost
279 | - Time to market
280 |
281 | 3. Risk Assessment
282 | - Technical risks
283 | - Implementation risks
284 | - Operational risks
285 | """
286 | }
287 | ```
288 |
289 | ### 3. Debug Analysis Prompts
290 | ```python
291 | DEBUG_PROMPTS = {
292 | "debug_analysis": """
293 | Issue context: {issue_details}
294 | System state: {system_state}
295 |
296 | Following Agans' 9 Rules:
297 | 1. System Understanding:
298 | - Current architecture
299 | - Component relationships
300 | - Expected behavior
301 |
302 | 2. Failure Analysis:
303 | - Reproduction steps
304 | - Failure conditions
305 | - Pattern matching
306 |
307 | 3. Observable Data:
308 | - Error logs
309 | - Stack traces
310 | - System metrics
311 |
312 | 4. Component Isolation:
313 | - Affected components
314 | - Working components
315 | - Interface boundaries
316 | """
317 | }
318 | ```
319 |
320 | ## Implementation Strategy
321 |
322 | 1. **Infrastructure First**
323 | - Set up core components
324 | - Establish monitoring
325 | - Implement caching
326 | - Configure vector store
327 |
328 | 2. **Documentation System**
329 | - Integrate FireCrawl
330 | - Set up reference management
331 | - Configure Git integration
332 | - Implement caching
333 |
334 | 3. **LLM & ADR Integration**
335 | - Implement meta prompt system
336 | - Build ADR analysis system
337 | - Create knowledge management
338 | - Set up learning loop
339 |
340 | 4. **Debug & Analysis**
341 | - Implement Agans' rules system
342 | - Add pattern detection
343 | - Create solution generation
344 | - Set up verification system
345 |
346 | ## Success Criteria
347 |
348 | 1. **System Performance**
349 | - Response time < 2s
350 | - Cache hit rate > 80%
351 | - Pattern match accuracy > 90%
352 | - System uptime > 99.9%
353 |
354 | 2. **Documentation Quality**
355 | - Reference freshness < 24h
356 | - Documentation coverage > 95%
357 | - ADR completeness > 90%
358 | - Test coverage > 85%
359 |
360 | 3. **Analysis Quality**
361 | - Pattern detection accuracy > 90%
362 | - Architecture analysis precision > 85%
363 | - Security issue detection > 95%
364 | - Performance insight accuracy > 85%
365 |
366 | 4. **ADR Quality**
367 | - Context completeness > 95%
368 | - Decision clarity > 90%
369 | - Implementation tracking > 85%
370 | - Evolution management > 90%
371 |
372 | 5. **Debug Effectiveness**
373 | - Issue resolution time < 24h
374 | - Solution accuracy > 90%
375 | - Pattern learning rate > 85%
376 | - Knowledge reuse > 80%
377 |
378 | ## Next Steps
379 |
380 | 1. Toggle to Act mode to begin implementation
381 | 2. Start with core infrastructure
382 | 3. Implement documentation system
383 | 4. Add LLM & ADR integration
384 | 5. Build debug & analysis components
385 | 6. Test and refine each component
386 | 7. Gather feedback and improve
387 |
```
--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
```markdown
1 | ## Task Management API
2 |
3 | The Task Management API provides endpoints for creating, listing, and retrieving information about asynchronous tasks.
4 |
5 | ### Create Task
6 |
7 | **Endpoint:** `POST /api/tasks/create`
8 |
9 | Create a new asynchronous task for processing.
10 |
11 | **Request Body:**
12 |
13 | ```json
14 | {
15 | "type": "code_analysis",
16 | "title": "Analyze Repository",
17 | "description": "Full code analysis of the repository",
18 | "context": {
19 | "repository_path": "/path/to/repo"
20 | },
21 | "priority": "medium",
22 | "metadata": {
23 | "requested_by": "user123"
24 | }
25 | }
26 | ```
27 |
28 | **Parameters:**
29 |
30 | - `type` (string, required): Type of task to create (e.g., `code_analysis`, `pattern_extraction`, `documentation`)
31 | - `title` (string, required): Title of the task
32 | - `description` (string, required): Description of what the task will do
33 | - `context` (object, required): Context data for the task, varies based on task type
34 | - `priority` (string, optional): Task priority (`low`, `medium`, `high`, `critical`), defaults to `medium`
35 | - `metadata` (object, optional): Additional metadata for the task
36 |
37 | **Response:**
38 |
39 | ```json
40 | {
41 | "id": "123e4567-e89b-12d3-a456-426614174000",
42 | "type": "code_analysis",
43 | "title": "Analyze Repository",
44 | "description": "Full code analysis of the repository",
45 | "status": "pending",
46 | "priority": "medium",
47 | "context": {
48 | "repository_path": "/path/to/repo"
49 | },
50 | "result": null,
51 | "error": null,
52 | "created_at": "2023-07-10T14:30:00.123456",
53 | "updated_at": "2023-07-10T14:30:00.123456",
54 | "completed_at": null,
55 | "metadata": {
56 | "requested_by": "user123"
57 | }
58 | }
59 | ```
60 |
61 | ### List Tasks
62 |
63 | **Endpoint:** `GET /api/tasks`
64 |
65 | List all tasks with optional filtering.
66 |
67 | **Query Parameters:**
68 |
69 | - `type` (string, optional): Filter tasks by type
70 | - `status` (string, optional): Filter tasks by status (`pending`, `in_progress`, `completed`, `failed`, `cancelled`)
71 | - `priority` (string, optional): Filter tasks by priority
72 | - `limit` (integer, optional): Maximum number of tasks to return, defaults to 20
73 |
74 | **Response:**
75 |
76 | ```json
77 | [
78 | {
79 | "id": "123e4567-e89b-12d3-a456-426614174000",
80 | "type": "code_analysis",
81 | "title": "Analyze Repository",
82 | "description": "Full code analysis of the repository",
83 | "status": "completed",
84 | "priority": "medium",
85 | "context": {
86 | "repository_path": "/path/to/repo"
87 | },
88 | "result": {
89 | "files_analyzed": 150,
90 | "patterns_identified": 5,
91 | "complexity_score": 78
92 | },
93 | "error": null,
94 | "created_at": "2023-07-10T14:30:00.123456",
95 | "updated_at": "2023-07-10T14:35:20.123456",
96 | "completed_at": "2023-07-10T14:35:20.123456",
97 | "metadata": {
98 | "requested_by": "user123"
99 | }
100 | },
101 | {
102 | "id": "223e4567-e89b-12d3-a456-426614174000",
103 | "type": "pattern_extraction",
104 | "title": "Extract Design Patterns",
105 | "description": "Identify design patterns in codebase",
106 | "status": "in_progress",
107 | "priority": "high",
108 | "context": {
109 | "repository_path": "/path/to/repo"
110 | },
111 | "result": null,
112 | "error": null,
113 | "created_at": "2023-07-10T14:40:00.123456",
114 | "updated_at": "2023-07-10T14:40:30.123456",
115 | "completed_at": null,
116 | "metadata": {
117 | "requested_by": "user456"
118 | }
119 | }
120 | ]
121 | ```
122 |
123 | ### Get Task by ID
124 |
125 | **Endpoint:** `GET /api/tasks/{task_id}`
126 |
127 | Get detailed information about a specific task.
128 |
129 | **Path Parameters:**
130 |
131 | - `task_id` (string, required): The unique identifier of the task
132 |
133 | **Response:**
134 |
135 | ```json
136 | {
137 | "id": "123e4567-e89b-12d3-a456-426614174000",
138 | "type": "code_analysis",
139 | "title": "Analyze Repository",
140 | "description": "Full code analysis of the repository",
141 | "status": "completed",
142 | "priority": "medium",
143 | "context": {
144 | "repository_path": "/path/to/repo"
145 | },
146 | "result": {
147 | "files_analyzed": 150,
148 | "patterns_identified": 5,
149 | "complexity_score": 78
150 | },
151 | "error": null,
152 | "created_at": "2023-07-10T14:30:00.123456",
153 | "updated_at": "2023-07-10T14:35:20.123456",
154 | "completed_at": "2023-07-10T14:35:20.123456",
155 | "metadata": {
156 | "requested_by": "user123"
157 | }
158 | }
159 | ```
160 |
161 | **Error Responses:**
162 |
163 | - `400 Bad Request`: Invalid task ID format
164 | - `404 Not Found`: Task not found
165 | - `500 Internal Server Error`: Server error while retrieving task
166 |
167 | ## Debug System API
168 |
169 | The Debug System API provides endpoints for creating, listing, and managing issues for debugging and tracking purposes.
170 |
171 | ### Create Debug Issue
172 |
173 | **Endpoint:** `POST /api/debug/issues`
174 |
175 | Create a new debug issue for tracking and analysis.
176 |
177 | **Request Body:**
178 |
179 | ```json
180 | {
181 | "title": "Memory Leak in Data Processing",
182 | "type": "performance",
183 | "description": {
184 | "severity": "high",
185 | "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"],
186 | "expected_behavior": "Memory usage should remain stable",
187 | "actual_behavior": "Memory usage increases continuously"
188 | }
189 | }
190 | ```
191 |
192 | **Parameters:**
193 |
194 | - `title` (string, required): Title of the issue
195 | - `type` (string, required): Type of the issue - one of: `bug`, `performance`, `security`, `design`, `documentation`, `other`
196 | - `description` (object, required): Detailed description of the issue, structure depends on issue type
197 |
198 | **Response:**
199 |
200 | ```json
201 | {
202 | "id": "123e4567-e89b-12d3-a456-426614174000",
203 | "title": "Memory Leak in Data Processing",
204 | "type": "performance",
205 | "status": "open",
206 | "description": {
207 | "severity": "high",
208 | "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"],
209 | "expected_behavior": "Memory usage should remain stable",
210 | "actual_behavior": "Memory usage increases continuously"
211 | },
212 | "steps": null,
213 | "created_at": "2023-07-10T14:30:00.123456",
214 | "updated_at": "2023-07-10T14:30:00.123456",
215 | "resolved_at": null,
216 | "metadata": null
217 | }
218 | ```
219 |
220 | ### List Debug Issues
221 |
222 | **Endpoint:** `GET /api/debug/issues`
223 |
224 | List all debug issues with optional filtering.
225 |
226 | **Query Parameters:**
227 |
228 | - `type` (string, optional): Filter issues by type
229 | - `status` (string, optional): Filter issues by status (`open`, `in_progress`, `resolved`, `closed`, `wont_fix`)
230 |
231 | **Response:**
232 |
233 | ```json
234 | [
235 | {
236 | "id": "123e4567-e89b-12d3-a456-426614174000",
237 | "title": "Memory Leak in Data Processing",
238 | "type": "performance",
239 | "status": "open",
240 | "description": {
241 | "severity": "high",
242 | "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"],
243 | "expected_behavior": "Memory usage should remain stable",
244 | "actual_behavior": "Memory usage increases continuously"
245 | },
246 | "steps": [
247 | {
248 | "type": "check",
249 | "name": "Profiling",
250 | "description": "Run performance profiling"
251 | },
252 | {
253 | "type": "check",
254 | "name": "Resource Usage",
255 | "description": "Monitor CPU, memory, I/O"
256 | }
257 | ],
258 | "created_at": "2023-07-10T14:30:00.123456",
259 | "updated_at": "2023-07-10T14:35:00.123456",
260 | "resolved_at": null,
261 | "metadata": {
262 | "assigned_to": "developer1"
263 | }
264 | }
265 | ]
266 | ```
267 |
268 | ### Get Debug Issue
269 |
270 | **Endpoint:** `GET /api/debug/issues/{issue_id}`
271 |
272 | Get detailed information about a specific debug issue.
273 |
274 | **Path Parameters:**
275 |
276 | - `issue_id` (string, required): The unique identifier of the issue
277 |
278 | **Response:**
279 |
280 | ```json
281 | {
282 | "id": "123e4567-e89b-12d3-a456-426614174000",
283 | "title": "Memory Leak in Data Processing",
284 | "type": "performance",
285 | "status": "open",
286 | "description": {
287 | "severity": "high",
288 | "steps_to_reproduce": ["Load large dataset", "Run processing function", "Wait 10 minutes"],
289 | "expected_behavior": "Memory usage should remain stable",
290 | "actual_behavior": "Memory usage increases continuously"
291 | },
292 | "steps": [
293 | {
294 | "type": "check",
295 | "name": "Profiling",
296 | "description": "Run performance profiling"
297 | },
298 | {
299 | "type": "check",
300 | "name": "Resource Usage",
301 | "description": "Monitor CPU, memory, I/O"
302 | }
303 | ],
304 | "created_at": "2023-07-10T14:30:00.123456",
305 | "updated_at": "2023-07-10T14:35:00.123456",
306 | "resolved_at": null,
307 | "metadata": {
308 | "assigned_to": "developer1"
309 | }
310 | }
311 | ```
312 |
313 | ### Update Debug Issue
314 |
315 | **Endpoint:** `PUT /api/debug/issues/{issue_id}`
316 |
317 | Update the status and metadata of a debug issue.
318 |
319 | **Path Parameters:**
320 |
321 | - `issue_id` (string, required): The unique identifier of the issue
322 |
323 | **Request Body:**
324 |
325 | ```json
326 | {
327 | "status": "in_progress",
328 | "metadata": {
329 | "assigned_to": "developer1",
330 | "priority": "high"
331 | }
332 | }
333 | ```
334 |
335 | **Parameters:**
336 |
337 | - `status` (string, optional): New status for the issue - one of: `open`, `in_progress`, `resolved`, `closed`, `wont_fix`
338 | - `metadata` (object, optional): Updated metadata for the issue
339 |
340 | **Response:**
341 |
342 | Same as the Get Debug Issue response, with updated values.
343 |
344 | ### Analyze Debug Issue
345 |
346 | **Endpoint:** `POST /api/debug/issues/{issue_id}/analyze`
347 |
348 | Analyze a debug issue to generate recommended debugging steps based on the issue type.
349 |
350 | **Path Parameters:**
351 |
352 | - `issue_id` (string, required): The unique identifier of the issue
353 |
354 | **Response:**
355 |
356 | ```json
357 | [
358 | {
359 | "type": "check",
360 | "name": "Profiling",
361 | "description": "Run performance profiling"
362 | },
363 | {
364 | "type": "check",
365 | "name": "Resource Usage",
366 | "description": "Monitor CPU, memory, I/O"
367 | },
368 | {
369 | "type": "check",
370 | "name": "Query Analysis",
371 | "description": "Review database queries"
372 | },
373 | {
374 | "type": "check",
375 | "name": "Bottlenecks",
376 | "description": "Identify performance bottlenecks"
377 | }
378 | ]
379 | ```
380 |
381 | **Error Responses:**
382 |
383 | - `400 Bad Request`: Invalid issue ID format
384 | - `404 Not Found`: Issue not found
385 | - `500 Internal Server Error`: Server error during analysis
```
--------------------------------------------------------------------------------
/.github/agents/DocAgent.agent.md:
--------------------------------------------------------------------------------
```markdown
1 | # Documentation Agent
2 |
3 | You are a specialized documentation agent for the MCP Codebase Insight project. Your expertise is in creating, maintaining, and improving project documentation.
4 |
5 | ## Your Responsibilities
6 |
7 | 1. **API Documentation**: Document endpoints, tools, and methods
8 | 2. **Architecture Docs**: Explain system design and component relationships
9 | 3. **User Guides**: Create tutorials, quickstarts, and how-to guides
10 | 4. **Code Documentation**: Write clear docstrings and inline comments
11 | 5. **ADR Management**: Help create Architecture Decision Records
12 |
13 | ## Documentation Structure
14 |
15 | ```
16 | docs/
17 | ├── api.md # API reference
18 | ├── cookbook.md # Code examples and recipes
19 | ├── testing_guide.md # Testing documentation
20 | ├── vector_store_best_practices.md
21 | ├── getting-started/
22 | │ ├── installation.md
23 | │ ├── quickstart.md
24 | │ ├── configuration.md
25 | │ └── docker-setup.md
26 | ├── features/
27 | │ ├── code-analysis.md
28 | │ ├── adr-management.md
29 | │ └── documentation.md
30 | ├── development/
31 | │ ├── CONTRIBUTING.md
32 | │ └── CODE_OF_CONDUCT.md
33 | ├── troubleshooting/
34 | │ ├── common-issues.md
35 | │ └── faq.md
36 | └── adrs/ # Architecture Decision Records
37 | └── 001_use_docker_for_qdrant.md
38 | ```
39 |
40 | ## ADR Management
41 |
42 | ### Creating an ADR
43 |
44 | ```python
45 | from src.mcp_codebase_insight.core.adr import ADRManager
46 |
47 | adr_manager = ADRManager(config)
48 | await adr_manager.initialize()
49 |
50 | # Create new ADR
51 | adr = await adr_manager.create_adr(
52 | title="Use PostgreSQL for Persistent Storage",
53 | context="Need to store analysis results persistently...",
54 | decision="We will use PostgreSQL as our primary data store...",
55 | consequences="Positive: ACID compliance, mature ecosystem...",
56 | status="proposed",
57 | tags=["storage", "database"]
58 | )
59 |
60 | print(f"Created ADR: {adr.adr_number:03d}-{adr.slug}.md")
61 | ```
62 |
63 | ### ADR Format (Markdown with Frontmatter)
64 |
65 | ```markdown
66 | ---
67 | id: <uuid>
68 | title: Use PostgreSQL for Persistent Storage
69 | status: proposed
70 | date: 2025-11-19
71 | tags: [storage, database]
72 | ---
73 |
74 | # Context
75 |
76 | We need to store code analysis results persistently...
77 |
78 | # Decision
79 |
80 | We will use PostgreSQL as our primary data store...
81 |
82 | # Consequences
83 |
84 | ## Positive
85 | - ACID compliance
86 | - Mature ecosystem
87 |
88 | ## Negative
89 | - Additional infrastructure dependency
90 | - Learning curve for team
91 |
92 | ## Neutral
93 | - Standard SQL interface
94 | ```
95 |
96 | ### ADR Lifecycle
97 |
98 | 1. **proposed** → Decision under consideration
99 | 2. **accepted** → Decision approved and being implemented
100 | 3. **implemented** → Decision fully implemented
101 | 4. **deprecated** → Decision no longer relevant
102 | 5. **superseded** → Replaced by another ADR
103 |
104 | ### Update ADR Status
105 |
106 | ```python
107 | await adr_manager.update_adr(
108 | adr_id=adr.id,
109 | status="accepted",
110 | context="Additional context after discussion..."
111 | )
112 | ```
113 |
114 | ## Documentation Best Practices
115 |
116 | ### Docstring Format (Google Style)
117 |
118 | ```python
119 | async def search_patterns(
120 | self,
121 | query: str,
122 | filters: Optional[Dict] = None,
123 | limit: int = 10
124 | ) -> List[SearchResult]:
125 | """Search for code patterns using semantic search.
126 |
127 | This method searches the vector store for patterns that match
128 | the semantic meaning of the query text.
129 |
130 | Args:
131 | query: The search query text
132 | filters: Optional metadata filters to narrow results
133 | limit: Maximum number of results to return
134 |
135 | Returns:
136 | List of SearchResult objects ordered by relevance
137 |
138 | Raises:
139 | VectorStoreError: If search operation fails
140 | ValueError: If limit is negative or zero
141 |
142 | Example:
143 | >>> results = await kb.search_patterns(
144 | ... query="error handling patterns",
145 | ... filters={"language": "python"},
146 | ... limit=5
147 | ... )
148 | >>> for result in results:
149 | ... print(f"{result.pattern_name}: {result.score}")
150 | """
151 | ```
152 |
153 | ### Markdown Documentation Template
154 |
155 | ```markdown
156 | # Feature Name
157 |
158 | > Brief one-line description of the feature
159 |
160 | ## Overview
161 |
162 | Longer description explaining what the feature does and why it's useful.
163 |
164 | ## Quick Start
165 |
166 | ```python
167 | # Minimal working example
168 | from mcp_codebase_insight import Feature
169 |
170 | feature = Feature()
171 | result = feature.do_something()
172 | ```
173 |
174 | ## Configuration
175 |
176 | | Option | Type | Default | Description |
177 | |--------|------|---------|-------------|
178 | | `option1` | str | "default" | Description |
179 | | `option2` | int | 100 | Description |
180 |
181 | ## Usage Examples
182 |
183 | ### Basic Usage
184 |
185 | ```python
186 | # Example code
187 | ```
188 |
189 | ### Advanced Usage
190 |
191 | ```python
192 | # More complex example
193 | ```
194 |
195 | ## API Reference
196 |
197 | ### `method_name(param1, param2)`
198 |
199 | Description of the method.
200 |
201 | **Parameters:**
202 | - `param1` (str): Description
203 | - `param2` (int): Description
204 |
205 | **Returns:**
206 | - `ResultType`: Description
207 |
208 | **Raises:**
209 | - `ErrorType`: When condition occurs
210 |
211 | ## Troubleshooting
212 |
213 | ### Common Issue 1
214 |
215 | **Problem:** Description of the issue
216 |
217 | **Solution:** How to fix it
218 |
219 | ```bash
220 | # Commands or code to solve
221 | ```
222 |
223 | ## Related
224 |
225 | - [Related Doc 1](./related1.md)
226 | - [Related Doc 2](./related2.md)
227 | ```
228 |
229 | ## System Architecture Documentation
230 |
231 | ### Mermaid Diagrams
232 |
233 | ```markdown
234 | ## Component Architecture
235 |
236 | ```mermaid
237 | graph TB
238 | Client[Client] --> API[FastAPI Server]
239 | API --> Core[Core Services]
240 |
241 | subgraph Core Services
242 | VectorStore[Vector Store]
243 | Cache[Cache Manager]
244 | KB[Knowledge Base]
245 | end
246 |
247 | Core --> Qdrant[(Qdrant)]
248 | ```
249 | \```
250 |
251 | Use Mermaid for:
252 | - Architecture diagrams
253 | - Data flow diagrams
254 | - Sequence diagrams
255 | - Component relationships
256 |
257 | ## API Documentation
258 |
259 | ### Endpoint Documentation Template
260 |
261 | ```markdown
262 | ## POST /api/analyze
263 |
264 | Analyze code for patterns and architectural insights.
265 |
266 | **Request Body:**
267 | ```json
268 | {
269 | "code": "string",
270 | "language": "python",
271 | "options": {
272 | "detect_patterns": true,
273 | "analyze_architecture": true
274 | }
275 | }
276 | ```
277 |
278 | **Response (200 OK):**
279 | ```json
280 | {
281 | "patterns_found": 5,
282 | "patterns": [
283 | {
284 | "name": "Repository Pattern",
285 | "confidence": 0.95,
286 | "description": "...",
287 | "examples": ["file1.py:10-25"]
288 | }
289 | ],
290 | "architecture": {
291 | "style": "layered",
292 | "components": ["controllers", "services", "repositories"]
293 | }
294 | }
295 | ```
296 |
297 | **Error Responses:**
298 | - `400 Bad Request`: Invalid input
299 | - `500 Internal Server Error`: Analysis failed
300 |
301 | **Example:**
302 | ```python
303 | import requests
304 |
305 | response = requests.post(
306 | "http://localhost:3000/api/analyze",
307 | json={"code": "def hello(): pass", "language": "python"}
308 | )
309 | print(response.json())
310 | ```
311 | ```
312 |
313 | ## Code Examples (Cookbook)
314 |
315 | ### Recipe Format
316 |
317 | ```markdown
318 | ## Recipe: Analyze a Codebase
319 |
320 | **Goal:** Analyze an entire codebase and generate a summary report.
321 |
322 | **Prerequisites:**
323 | - MCP server running
324 | - Qdrant available
325 | - Codebase path accessible
326 |
327 | **Steps:**
328 |
329 | 1. **Initialize the analyzer**
330 | ```python
331 | from mcp_codebase_insight import CodebaseAnalyzer
332 |
333 | analyzer = CodebaseAnalyzer(config)
334 | await analyzer.initialize()
335 | ```
336 |
337 | 2. **Process files**
338 | ```python
339 | results = []
340 | for file_path in codebase_files:
341 | result = await analyzer.analyze_file(file_path)
342 | results.append(result)
343 | ```
344 |
345 | 3. **Generate report**
346 | ```python
347 | report = analyzer.generate_report(results)
348 | print(report)
349 | ```
350 |
351 | **Complete Example:**
352 | ```python
353 | # Full working code
354 | ```
355 |
356 | **Expected Output:**
357 | ```
358 | Patterns found: 23
359 | Architecture: Microservices
360 | ...
361 | ```
362 |
363 | **Troubleshooting:**
364 | - If X happens, do Y
365 | - Common error Z means ABC
366 | ```
367 |
368 | ## Updating Documentation
369 |
370 | ### When Code Changes
371 |
372 | 1. **Update docstrings** immediately with code changes
373 | 2. **Update API docs** when endpoints/signatures change
374 | 3. **Create ADR** for significant architectural decisions
375 | 4. **Update examples** to reflect new APIs
376 | 5. **Update troubleshooting** when fixing common issues
377 |
378 | ### Documentation Checklist
379 |
380 | - [ ] Docstrings updated for all modified functions
381 | - [ ] API reference updated if signatures changed
382 | - [ ] Examples tested and working
383 | - [ ] Architecture diagrams updated if structure changed
384 | - [ ] ADR created for architectural decisions
385 | - [ ] Changelog updated with user-facing changes
386 | - [ ] README updated if getting started process changed
387 |
388 | ## Tools and Validation
389 |
390 | ### Check Documentation Links
391 |
392 | ```bash
393 | # Find broken markdown links
394 | grep -r "](\./" docs/ | while read line; do
395 | file=$(echo $line | cut -d: -f1)
396 | link=$(echo $line | grep -o "](\.\/[^)]*)" | sed 's/](\.\///' | sed 's/).*//')
397 | if [ ! -f "$(dirname $file)/$link" ]; then
398 | echo "Broken link in $file: $link"
399 | fi
400 | done
401 | ```
402 |
403 | ### Generate API Documentation
404 |
405 | ```bash
406 | # Use pdoc or similar
407 | pdoc --html --output-dir docs/api src/mcp_codebase_insight
408 | ```
409 |
410 | ### Spell Check
411 |
412 | ```bash
413 | # Use aspell or codespell
414 | codespell docs/
415 | ```
416 |
417 | ## Key Files to Maintain
418 |
419 | - `README.md`: Main project overview
420 | - `CHANGELOG.md`: Version history and changes
421 | - `CONTRIBUTING.md`: How to contribute
422 | - `docs/api.md`: API reference
423 | - `docs/cookbook.md`: Code examples
424 | - `.github/copilot-instructions.md`: AI agent guidance
425 |
426 | ## Documentation Style Guide
427 |
428 | 1. **Clarity**: Write for users unfamiliar with the codebase
429 | 2. **Completeness**: Include all necessary context
430 | 3. **Conciseness**: Be brief but complete
431 | 4. **Examples**: Always include working code examples
432 | 5. **Updates**: Keep docs in sync with code
433 | 6. **Structure**: Use consistent heading hierarchy
434 | 7. **Links**: Reference related documentation
435 | 8. **Code blocks**: Always specify language for syntax highlighting
436 |
437 | ## When to Escalate
438 |
439 | - Large documentation restructuring needs
440 | - Documentation translation requirements
441 | - Complex technical writing beyond coding scope
442 | - Legal/licensing documentation questions
443 |
```
--------------------------------------------------------------------------------
/.github/workflows/tdd-verification.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: TDD Workflow Verification
2 |
3 | on:
4 | push:
5 | branches: [ dev, main ]
6 | pull_request:
7 | branches: [ dev, main ]
8 | workflow_dispatch:
9 | inputs:
10 | python_version:
11 | description: 'Python version to use for verification'
12 | required: false
13 | default: '3.11'
14 |
15 | jobs:
16 | tdd-verify:
17 | runs-on: ubuntu-latest
18 | strategy:
19 | matrix:
20 | python-version: ["${{ github.event.inputs.python_version || '3.11' }}"]
21 | fail-fast: false
22 |
23 | name: TDD Verification with Python ${{ matrix.python-version }}
24 | environment:
25 | name: development
26 | url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
27 |
28 | services:
29 | qdrant:
30 | image: qdrant/qdrant:v1.13.6
31 | ports:
32 | - 6333:6333
33 | - 6334:6334
34 |
35 | steps:
36 | - name: Checkout code
37 | uses: actions/checkout@v4
38 | with:
39 | fetch-depth: 0
40 |
41 | - name: Set up Python ${{ matrix.python-version }}
42 | uses: actions/[email protected]
43 | with:
44 | python-version: ${{ matrix.python-version }}
45 | cache: 'pip'
46 |
47 | - name: Wait for Qdrant and verify connection
48 | run: |
49 | echo "Waiting for Qdrant to start..."
50 | chmod +x scripts/check_qdrant_health.sh
51 | ./scripts/check_qdrant_health.sh "http://localhost:6333" 20 5
52 |
53 | - name: Install dependencies
54 | run: |
55 | python -m pip install --upgrade pip setuptools wheel \
56 | && pip install -r requirements.txt -r requirements-dev.txt \
57 | && pip install pytest-cov pytest-mock pytest-asyncio factory_boy \
58 | && pip install -e .
59 |
60 | - name: Set up environment
61 | run: |
62 | # Create required directories
63 | mkdir -p logs knowledge cache
64 |
65 | {
66 | echo "QDRANT_URL=http://localhost:6333"
67 | echo "MCP_QDRANT_URL=http://localhost:6333"
68 | echo "COLLECTION_NAME=mcp-codebase-insight-tdd-${{ github.run_id }}"
69 | echo "MCP_COLLECTION_NAME=mcp-codebase-insight-tdd-${{ github.run_id }}"
70 | echo "EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2"
71 | echo "PYTHON_VERSION=${{ matrix.python-version }}"
72 | } >> "$GITHUB_ENV"
73 |
74 | - name: Initialize Qdrant collection
75 | run: |
76 | echo "Creating Qdrant collection for testing..."
77 | python - <<-'EOF'
78 | import os
79 | from qdrant_client import QdrantClient
80 | from qdrant_client.http import models
81 |
82 | # Connect to Qdrant
83 | client = QdrantClient(url="http://localhost:6333")
84 | collection_name = os.environ.get("COLLECTION_NAME", "mcp-codebase-insight-tdd-${{ github.run_id }}")
85 |
86 | # Check if collection exists
87 | collections = client.get_collections().collections
88 | collection_names = [c.name for c in collections]
89 |
90 | if collection_name in collection_names:
91 | print(f"Collection {collection_name} already exists, recreating it...")
92 | client.delete_collection(collection_name=collection_name)
93 |
94 | # Create collection with vector size 384 (for all-MiniLM-L6-v2)
95 | client.create_collection(
96 | collection_name=collection_name,
97 | vectors_config=models.VectorParams(
98 | size=384, # Dimension for all-MiniLM-L6-v2
99 | distance=models.Distance.COSINE,
100 | ),
101 | )
102 |
103 | print(f"Successfully created collection {collection_name}")
104 | EOF
105 |
106 | - name: Run unit tests
107 | run: |
108 | echo "Running unit tests with coverage..."
109 | python -m pytest tests/components -v -p pytest_asyncio --cov=src --cov-report=xml:coverage-unit.xml --cov-report=term
110 |
111 | - name: Run integration tests
112 | run: |
113 | echo "Running integration tests with coverage..."
114 | python -m pytest tests/integration -v -p pytest_asyncio --cov=src --cov-report=xml:coverage-integration.xml --cov-report=term
115 |
116 | - name: Generate full coverage report
117 | run: |
118 | echo "Generating combined coverage report..."
119 | python -m coverage combine coverage-*.xml
120 | python -m coverage report
121 | python -m coverage xml
122 |
123 | - name: TDD Verification
124 | run: |
125 | echo "Performing TDD verification checks..."
126 |
127 | # Check if tests exist for all modules
128 | python - <<-'EOF'
129 | import os
130 | import sys
131 | from pathlib import Path
132 |
133 | src_dir = Path("src/mcp_codebase_insight")
134 | test_dir = Path("tests")
135 |
136 | # Get all Python modules in src
137 | modules = [f for f in src_dir.glob("**/*.py") if "__pycache__" not in str(f)]
138 | modules = [str(m.relative_to("src")).replace(".py", "").replace("/", ".") for m in modules]
139 | modules = [m for m in modules if not m.endswith("__init__")]
140 |
141 | # Check for corresponding test files
142 | missing_tests = []
143 | for module in modules:
144 | module_parts = module.split(".")
145 | if len(module_parts) > 2: # Skip __init__ files
146 | module_path = "/".join(module_parts[1:])
147 | test_file = test_dir / f"test_{module_path}.py"
148 | component_test = test_dir / "components" / f"test_{module_parts[-1]}.py"
149 |
150 | if not test_file.exists() and not component_test.exists():
151 | missing_tests.append(module)
152 |
153 | if missing_tests:
154 | print("Warning: The following modules don't have corresponding test files:")
155 | for m in missing_tests:
156 | print(f" - {m}")
157 | else:
158 | print("All modules have corresponding test files.")
159 | EOF
160 |
161 | # Check test coverage threshold
162 | coverage_threshold=40
163 | coverage_result=$(python -m coverage report | grep TOTAL | awk '{print $4}' | sed 's/%//')
164 |
165 | echo "Current test coverage: ${coverage_result}%"
166 | echo "Required minimum coverage: ${coverage_threshold}%"
167 |
168 | if (( $(echo "$coverage_result < $coverage_threshold" | bc -l) )); then
169 | echo "Error: Test coverage is below the required threshold of ${coverage_threshold}%"
170 | exit 1
171 | else
172 | echo "Test coverage meets the required threshold."
173 | fi
174 |
175 | - name: Upload coverage to Codecov
176 | uses: codecov/[email protected]
177 | with:
178 | files: ./coverage.xml
179 | name: codecov-tdd
180 | fail_ci_if_error: false
181 |
182 | - name: Check test structure
183 | run: |
184 | echo "Validating test structure..."
185 |
186 | # Check for arrange-act-assert pattern in tests
187 | python - <<-'EOF'
188 | import os
189 | import re
190 | from pathlib import Path
191 |
192 | test_files = list(Path("tests").glob("**/*.py"))
193 | violations = []
194 |
195 | for test_file in test_files:
196 | if test_file.name.startswith("test_") and not test_file.name.startswith("conftest"):
197 | with open(test_file, "r") as f:
198 | content = f.read()
199 |
200 | # Check for test functions
201 | test_funcs = re.findall(r"def (test_[a-zA-Z0-9_]+)", content)
202 |
203 | for func in test_funcs:
204 | # Extract function body
205 | pattern = rf"def {func}.*?:(.*?)(?=\n\S|\Z)"
206 | matches = re.search(pattern, content, re.DOTALL)
207 |
208 | if matches:
209 | func_body = matches.group(1)
210 |
211 | # Simple heuristic for arrange-act-assert
212 | if not (
213 | # Look for arranging variables and mocks
214 | re.search(r"= [^=]+", func_body) and
215 | # Look for function calls (actions)
216 | re.search(r"\w+\([^)]*\)", func_body) and
217 | # Look for assertions
218 | ("assert" in func_body)
219 | ):
220 | violations.append(f"{test_file}::{func}")
221 |
222 | if violations:
223 | print("Warning: The following tests might not follow the arrange-act-assert pattern:")
224 | for v in violations[:10]: # Show first 10 violations
225 | print(f" - {v}")
226 | if len(violations) > 10:
227 | print(f" ... and {len(violations) - 10} more")
228 | else:
229 | print("All tests appear to follow the arrange-act-assert pattern.")
230 | EOF
231 |
232 | - name: TDD Workflow Summary
233 | run: |
234 | echo "## TDD Workflow Summary" >> "$GITHUB_STEP_SUMMARY"
235 | echo "✅ TDD verification completed" >> "$GITHUB_STEP_SUMMARY"
236 |
237 | # Add coverage information
238 | coverage_result=$(python -m coverage report | grep TOTAL | awk '{print $4}')
239 | echo "- Test coverage: ${coverage_result}" >> "$GITHUB_STEP_SUMMARY"
240 |
241 | # Add test counts
242 | unit_tests=$(python -m pytest tests/components --collect-only -q | wc -l)
243 | integration_tests=$(python -m pytest tests/integration --collect-only -q | wc -l)
244 | echo "- Unit tests: ${unit_tests}" >> "$GITHUB_STEP_SUMMARY"
245 | echo "- Integration tests: ${integration_tests}" >> "$GITHUB_STEP_SUMMARY"
246 |
247 |
```