#
tokens: 47880/50000 13/146 files (page 4/8)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 4 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│   ├── bin
│   │   ├── activate
│   │   ├── activate.csh
│   │   ├── activate.fish
│   │   ├── Activate.ps1
│   │   ├── coverage
│   │   ├── coverage-3.11
│   │   ├── coverage3
│   │   ├── pip
│   │   ├── pip-compile
│   │   ├── pip-sync
│   │   ├── pip3
│   │   ├── pip3.11
│   │   ├── py.test
│   │   ├── pyproject-build
│   │   ├── pytest
│   │   ├── python
│   │   ├── python3
│   │   ├── python3.11
│   │   └── wheel
│   └── pyvenv.cfg
├── .env.example
├── .github
│   ├── agents
│   │   ├── DebugAgent.agent.md
│   │   ├── DocAgent.agent.md
│   │   ├── README.md
│   │   ├── TestAgent.agent.md
│   │   └── VectorStoreAgent.agent.md
│   ├── copilot-instructions.md
│   └── workflows
│       ├── build-verification.yml
│       ├── publish.yml
│       └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── create_release_issues.sh
├── debug_tests.md
├── Dockerfile
├── docs
│   ├── adrs
│   │   └── 001_use_docker_for_qdrant.md
│   ├── api.md
│   ├── components
│   │   └── README.md
│   ├── cookbook.md
│   ├── development
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   └── README.md
│   ├── documentation_map.md
│   ├── documentation_summary.md
│   ├── features
│   │   ├── adr-management.md
│   │   ├── code-analysis.md
│   │   └── documentation.md
│   ├── getting-started
│   │   ├── configuration.md
│   │   ├── docker-setup.md
│   │   ├── installation.md
│   │   ├── qdrant_setup.md
│   │   └── quickstart.md
│   ├── qdrant_setup.md
│   ├── README.md
│   ├── SSE_INTEGRATION.md
│   ├── system_architecture
│   │   └── README.md
│   ├── templates
│   │   └── adr.md
│   ├── testing_guide.md
│   ├── troubleshooting
│   │   ├── common-issues.md
│   │   └── faq.md
│   ├── vector_store_best_practices.md
│   └── workflows
│       └── README.md
├── error_logs.txt
├── examples
│   └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│   ├── backend_summary.txt
│   ├── database_summary.txt
│   └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│   ├── check_qdrant_health.sh
│   ├── compile_requirements.sh
│   ├── load_example_patterns.py
│   ├── macos_install.sh
│   ├── README.md
│   ├── setup_qdrant.sh
│   ├── start_mcp_server.sh
│   ├── store_code_relationships.py
│   ├── store_report_in_mcp.py
│   ├── validate_knowledge_base.py
│   ├── validate_poc.py
│   ├── validate_vector_store.py
│   └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│   └── mcp_codebase_insight
│       ├── __init__.py
│       ├── __main__.py
│       ├── asgi.py
│       ├── core
│       │   ├── __init__.py
│       │   ├── adr.py
│       │   ├── cache.py
│       │   ├── component_status.py
│       │   ├── config.py
│       │   ├── debug.py
│       │   ├── di.py
│       │   ├── documentation.py
│       │   ├── embeddings.py
│       │   ├── errors.py
│       │   ├── health.py
│       │   ├── knowledge.py
│       │   ├── metrics.py
│       │   ├── prompts.py
│       │   ├── sse.py
│       │   ├── state.py
│       │   ├── task_tracker.py
│       │   ├── tasks.py
│       │   └── vector_store.py
│       ├── models.py
│       ├── server_test_isolation.py
│       ├── server.py
│       ├── utils
│       │   ├── __init__.py
│       │   └── logger.py
│       └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│   ├── components
│   │   ├── conftest.py
│   │   ├── test_core_components.py
│   │   ├── test_embeddings.py
│   │   ├── test_knowledge_base.py
│   │   ├── test_sse_components.py
│   │   ├── test_stdio_components.py
│   │   ├── test_task_manager.py
│   │   └── test_vector_store.py
│   ├── config
│   │   └── test_config_and_env.py
│   ├── conftest.py
│   ├── integration
│   │   ├── fixed_test2.py
│   │   ├── test_api_endpoints.py
│   │   ├── test_api_endpoints.py-e
│   │   ├── test_communication_integration.py
│   │   └── test_server.py
│   ├── README.md
│   ├── README.test.md
│   ├── test_build_verifier.py
│   └── test_file_relationships.py
└── trajectories
    └── tosinakinosho
        ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
        │   └── db62b9
        │       └── config.yaml
        ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
        │   └── 03565e
        │       ├── 03565e.traj
        │       └── config.yaml
        └── default__openrouter
            └── anthropic
                └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
                    └── 03565e
                        ├── 03565e.pred
                        ├── 03565e.traj
                        └── config.yaml
```

# Files

--------------------------------------------------------------------------------
/docs/cookbook.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Codebase Insight Cookbook
  2 | 
  3 | This cookbook provides practical examples, common use cases, and solutions for working with the MCP Codebase Insight system. Each recipe includes step-by-step instructions, code examples, and explanations.
  4 | 
  5 | ## Table of Contents
  6 | 
  7 | - [Setup and Configuration](#setup-and-configuration)
  8 | - [Vector Store Operations](#vector-store-operations)
  9 | - [Code Analysis](#code-analysis)
 10 | - [Knowledge Base Integration](#knowledge-base-integration)
 11 | - [Task Management](#task-management)
 12 | - [Transport Protocol Usage](#transport-protocol-usage)
 13 | - [Troubleshooting](#troubleshooting)
 14 | 
 15 | ## Setup and Configuration
 16 | 
 17 | ### Recipe: Quick Start Setup
 18 | 
 19 | ```bash
 20 | # 1. Clone the repository
 21 | git clone https://github.com/your-org/mcp-codebase-insight.git
 22 | cd mcp-codebase-insight
 23 | 
 24 | # 2. Create and activate virtual environment
 25 | python -m venv .venv
 26 | source .venv/bin/activate  # On Windows: .venv\Scripts\activate
 27 | 
 28 | # 3. Install dependencies
 29 | pip install -r requirements.txt
 30 | 
 31 | # 4. Set up environment variables
 32 | cp .env.example .env
 33 | # Edit .env with your configuration
 34 | ```
 35 | 
 36 | ### Recipe: Configure Vector Store
 37 | 
 38 | ```python
 39 | from mcp_codebase_insight.core.vector_store import VectorStore
 40 | from mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 41 | 
 42 | async def setup_vector_store():
 43 |     # Initialize embedder
 44 |     embedder = SentenceTransformerEmbedding(
 45 |         model_name="sentence-transformers/all-MiniLM-L6-v2"
 46 |     )
 47 |     await embedder.initialize()
 48 |     
 49 |     # Initialize vector store
 50 |     vector_store = VectorStore(
 51 |         url="http://localhost:6333",
 52 |         embedder=embedder,
 53 |         collection_name="mcp-codebase-insight",
 54 |         api_key="your-api-key",  # Optional
 55 |         vector_name="default"
 56 |     )
 57 |     await vector_store.initialize()
 58 |     return vector_store
 59 | ```
 60 | 
 61 | ## Vector Store Operations
 62 | 
 63 | ### Recipe: Store and Search Code Snippets
 64 | 
 65 | ```python
 66 | async def store_code_snippet(vector_store, code: str, metadata: dict):
 67 |     await vector_store.add_vector(
 68 |         text=code,
 69 |         metadata={
 70 |             "type": "code",
 71 |             "content": code,
 72 |             **metadata
 73 |         }
 74 |     )
 75 | 
 76 | async def search_similar_code(vector_store, query: str, limit: int = 5):
 77 |     results = await vector_store.search_similar(
 78 |         query=query,
 79 |         limit=limit
 80 |     )
 81 |     return results
 82 | 
 83 | # Usage example
 84 | code_snippet = """
 85 | def calculate_sum(a: int, b: int) -> int:
 86 |     return a + b
 87 | """
 88 | 
 89 | metadata = {
 90 |     "filename": "math_utils.py",
 91 |     "function_name": "calculate_sum",
 92 |     "language": "python"
 93 | }
 94 | 
 95 | await store_code_snippet(vector_store, code_snippet, metadata)
 96 | similar_snippets = await search_similar_code(vector_store, "function to add two numbers")
 97 | ```
 98 | 
 99 | ### Recipe: Batch Processing Code Files
100 | 
101 | ```python
102 | import asyncio
103 | from pathlib import Path
104 | 
105 | async def process_codebase(vector_store, root_dir: str):
106 |     async def process_file(file_path: Path):
107 |         if not file_path.suffix == '.py':  # Adjust for your needs
108 |             return
109 |             
110 |         code = file_path.read_text()
111 |         await store_code_snippet(vector_store, code, {
112 |             "filename": file_path.name,
113 |             "path": str(file_path),
114 |             "language": "python"
115 |         })
116 | 
117 |     root = Path(root_dir)
118 |     tasks = [
119 |         process_file(f) 
120 |         for f in root.rglob('*') 
121 |         if f.is_file()
122 |     ]
123 |     await asyncio.gather(*tasks)
124 | ```
125 | 
126 | ## Code Analysis
127 | 
128 | ### Recipe: Detect Architectural Patterns
129 | 
130 | ```python
131 | from mcp_codebase_insight.analysis.patterns import PatternDetector
132 | 
133 | async def analyze_architecture(code_path: str):
134 |     detector = PatternDetector()
135 |     patterns = await detector.detect_patterns(code_path)
136 |     
137 |     for pattern in patterns:
138 |         print(f"Pattern: {pattern.name}")
139 |         print(f"Location: {pattern.location}")
140 |         print(f"Confidence: {pattern.confidence}")
141 |         print("---")
142 | ```
143 | 
144 | ### Recipe: Generate Code Insights
145 | 
146 | ```python
147 | from mcp_codebase_insight.analysis.insights import InsightGenerator
148 | 
149 | async def generate_insights(vector_store, codebase_path: str):
150 |     generator = InsightGenerator(vector_store)
151 |     insights = await generator.analyze_codebase(codebase_path)
152 |     
153 |     return {
154 |         "complexity_metrics": insights.complexity,
155 |         "dependency_graph": insights.dependencies,
156 |         "architectural_patterns": insights.patterns,
157 |         "recommendations": insights.recommendations
158 |     }
159 | ```
160 | 
161 | ## Knowledge Base Integration
162 | 
163 | ### Recipe: Store and Query Documentation
164 | 
165 | ```python
166 | from mcp_codebase_insight.kb.store import KnowledgeBase
167 | 
168 | async def manage_documentation(kb: KnowledgeBase):
169 |     # Store documentation
170 |     await kb.store_document(
171 |         content="API documentation content...",
172 |         metadata={
173 |             "type": "api_doc",
174 |             "version": "1.0",
175 |             "category": "reference"
176 |         }
177 |     )
178 |     
179 |     # Query documentation
180 |     results = await kb.search(
181 |         query="How to configure authentication",
182 |         filters={
183 |             "type": "api_doc",
184 |             "category": "reference"
185 |         }
186 |     )
187 | ```
188 | 
189 | ## Task Management
190 | 
191 | ### Recipe: Create and Track Tasks
192 | 
193 | ```python
194 | from mcp_codebase_insight.tasks.manager import TaskManager
195 | 
196 | async def manage_tasks(task_manager: TaskManager):
197 |     # Create a new task
198 |     task = await task_manager.create_task(
199 |         title="Implement authentication",
200 |         description="Add OAuth2 authentication to API endpoints",
201 |         priority="high",
202 |         tags=["security", "api"]
203 |     )
204 |     
205 |     # Update task status
206 |     await task_manager.update_task(
207 |         task_id=task.id,
208 |         status="in_progress",
209 |         progress=0.5
210 |     )
211 |     
212 |     # Query tasks
213 |     active_tasks = await task_manager.get_tasks(
214 |         filters={
215 |             "status": "in_progress",
216 |             "tags": ["security"]
217 |         }
218 |     )
219 | ```
220 | 
221 | ## Transport Protocol Usage
222 | 
223 | ### Recipe: Using SSE Transport
224 | 
225 | ```python
226 | from mcp_codebase_insight.transport.sse import SSETransport
227 | 
228 | async def setup_sse():
229 |     transport = SSETransport(
230 |         url="http://localhost:8000/events",
231 |         headers={"Authorization": "Bearer your-token"}
232 |     )
233 |     
234 |     async with transport:
235 |         await transport.subscribe("codebase_updates")
236 |         async for event in transport.events():
237 |             print(f"Received update: {event.data}")
238 | ```
239 | 
240 | ### Recipe: Using StdIO Transport
241 | 
242 | ```python
243 | from mcp_codebase_insight.transport.stdio import StdIOTransport
244 | 
245 | async def use_stdio():
246 |     transport = StdIOTransport()
247 |     
248 |     async with transport:
249 |         # Send command
250 |         await transport.send_command({
251 |             "type": "analyze",
252 |             "payload": {"path": "src/main.py"}
253 |         })
254 |         
255 |         # Receive response
256 |         response = await transport.receive_response()
257 |         print(f"Analysis result: {response}")
258 | ```
259 | 
260 | ## Troubleshooting
261 | 
262 | ### Recipe: Validate Vector Store Health
263 | 
264 | ```python
265 | async def check_vector_store_health(config: dict) -> bool:
266 |     try:
267 |         # Initialize components
268 |         embedder = SentenceTransformerEmbedding(
269 |             model_name="sentence-transformers/all-MiniLM-L6-v2"
270 |         )
271 |         await embedder.initialize()
272 |         
273 |         vector_store = VectorStore(
274 |             url=config["QDRANT_URL"],
275 |             embedder=embedder,
276 |             collection_name=config["COLLECTION_NAME"]
277 |         )
278 |         await vector_store.initialize()
279 |         
280 |         # Test basic operations
281 |         test_text = "def test_function():\n    pass"
282 |         await vector_store.add_vector(
283 |             text=test_text,
284 |             metadata={"type": "test"}
285 |         )
286 |         
287 |         results = await vector_store.search_similar(
288 |             query=test_text,
289 |             limit=1
290 |         )
291 |         
292 |         return len(results) > 0
293 |         
294 |     except Exception as e:
295 |         print(f"Health check failed: {e}")
296 |         return False
297 | ```
298 | 
299 | ### Recipe: Debug Transport Issues
300 | 
301 | ```python
302 | import logging
303 | from mcp_codebase_insight.transport.debug import TransportDebugger
304 | 
305 | async def debug_transport_issues():
306 |     # Enable detailed logging
307 |     logging.basicConfig(level=logging.DEBUG)
308 |     
309 |     debugger = TransportDebugger()
310 |     
311 |     # Test SSE connection
312 |     sse_status = await debugger.check_sse_connection(
313 |         url="http://localhost:8000/events"
314 |     )
315 |     print(f"SSE Status: {sse_status}")
316 |     
317 |     # Test StdIO communication
318 |     stdio_status = await debugger.check_stdio_communication()
319 |     print(f"StdIO Status: {stdio_status}")
320 |     
321 |     # Generate diagnostic report
322 |     report = await debugger.generate_diagnostic_report()
323 |     print(report)
324 | ```
325 | 
326 | ## Best Practices
327 | 
328 | 1. Always use async/await when working with the system's async functions
329 | 2. Initialize components in a context manager or properly handle cleanup
330 | 3. Use structured error handling for vector store operations
331 | 4. Implement retry logic for network-dependent operations
332 | 5. Cache frequently accessed vector embeddings
333 | 6. Use batch operations when processing multiple items
334 | 7. Implement proper logging for debugging
335 | 8. Regular health checks for system components
336 | 
337 | ## Common Issues and Solutions
338 | 
339 | 1. **Vector Store Connection Issues**
340 |    - Check if Qdrant is running and accessible
341 |    - Verify API key if authentication is enabled
342 |    - Ensure proper network connectivity
343 | 
344 | 2. **Embedding Generation Failures**
345 |    - Verify model availability and access
346 |    - Check input text formatting
347 |    - Monitor memory usage for large inputs
348 | 
349 | 3. **Transport Protocol Errors**
350 |    - Verify endpoint URLs and authentication
351 |    - Check for firewall or proxy issues
352 |    - Monitor connection timeouts
353 | 
354 | 4. **Performance Issues**
355 |    - Use batch operations for multiple items
356 |    - Implement caching where appropriate
357 |    - Monitor and optimize vector store queries
358 | 
359 | For more detailed information, refer to the [official documentation](docs/README.md) and [API reference](docs/api-reference.md). 
```

--------------------------------------------------------------------------------
/.github/agents/DebugAgent.agent.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Debug Agent
  2 | 
  3 | You are a specialized debugging agent for the MCP Codebase Insight project. You follow Agans' 9 Rules of Debugging and help diagnose and fix issues systematically.
  4 | 
  5 | ## Agans' 9 Rules of Debugging
  6 | 
  7 | 1. **Understand the System**: Know how components work before debugging
  8 | 2. **Make It Fail**: Reproduce the bug consistently
  9 | 3. **Quit Thinking and Look**: Observe actual behavior, don't assume
 10 | 4. **Divide and Conquer**: Isolate the problem systematically
 11 | 5. **Change One Thing at a Time**: Test hypotheses individually
 12 | 6. **Keep an Audit Trail**: Document what you've tried
 13 | 7. **Check the Plug**: Verify basic assumptions first
 14 | 8. **Get a Fresh View**: Sometimes you need a different perspective
 15 | 9. **If You Didn't Fix It, It Isn't Fixed**: Verify the fix works
 16 | 
 17 | ## Your Responsibilities
 18 | 
 19 | 1. **Diagnose Issues**: Systematically identify root causes
 20 | 2. **Fix Bugs**: Implement proper fixes, not workarounds
 21 | 3. **Prevent Recurrence**: Add tests and improve error handling
 22 | 4. **Document Findings**: Update troubleshooting docs
 23 | 
 24 | ## Common Issue Categories
 25 | 
 26 | ### 1. Async/Event Loop Issues
 27 | 
 28 | **Symptoms:**
 29 | - "RuntimeError: Event loop is closed"
 30 | - "Task was destroyed but it is pending"
 31 | - "coroutine was never awaited"
 32 | 
 33 | **Check the Plug:**
 34 | ```python
 35 | # Are you using await?
 36 | result = await async_function()  # ✓ Correct
 37 | result = async_function()         # ✗ Wrong
 38 | 
 39 | # Are you in an async context?
 40 | async def my_function():  # ✓ Correct
 41 |     await something()
 42 | 
 43 | def my_function():       # ✗ Wrong - can't await here
 44 |     await something()
 45 | ```
 46 | 
 47 | **Common Causes:**
 48 | 1. Missing `await` keyword
 49 | 2. Calling async functions from sync context
 50 | 3. Event loop closed before cleanup
 51 | 4. Multiple event loops in tests
 52 | 
 53 | **Solutions:**
 54 | 
 55 | ```python
 56 | # For tests: Use custom runner
 57 | ./run_tests.py --isolated --sequential
 58 | 
 59 | # For code: Proper async/await
 60 | async def process_data(data):
 61 |     result = await async_operation(data)  # Always await
 62 |     return result
 63 | 
 64 | # For cleanup: Use context managers
 65 | async with component:
 66 |     await component.do_work()
 67 | # Cleanup automatic
 68 | 
 69 | # Or explicit cleanup
 70 | try:
 71 |     await component.initialize()
 72 |     await component.do_work()
 73 | finally:
 74 |     await component.cleanup()  # Always cleanup
 75 | ```
 76 | 
 77 | ### 2. Qdrant Connection Issues
 78 | 
 79 | **Symptoms:**
 80 | - "Connection refused" on port 6333
 81 | - "Vector store not available"
 82 | - Timeout errors during initialization
 83 | 
 84 | **Check the Plug:**
 85 | ```bash
 86 | # Is Qdrant running?
 87 | curl http://localhost:6333/collections
 88 | 
 89 | # Is the URL correct?
 90 | echo $QDRANT_URL
 91 | 
 92 | # Can you reach the host?
 93 | ping localhost
 94 | ```
 95 | 
 96 | **Common Causes:**
 97 | 1. Qdrant not started
 98 | 2. Wrong URL in environment
 99 | 3. Network/firewall issues
100 | 4. Qdrant container crashed
101 | 
102 | **Solutions:**
103 | 
104 | ```bash
105 | # Start Qdrant
106 | docker run -p 6333:6333 qdrant/qdrant
107 | 
108 | # Check container status
109 | docker ps | grep qdrant
110 | 
111 | # Check logs
112 | docker logs <qdrant-container-id>
113 | 
114 | # Test connection
115 | curl http://localhost:6333/collections
116 | ```
117 | 
118 | **Code-level handling:**
119 | ```python
120 | # VectorStore handles gracefully
121 | try:
122 |     vector_store = VectorStore(url, embedder)
123 |     await vector_store.initialize()
124 | except Exception as e:
125 |     logger.warning(f"Vector store unavailable: {e}")
126 |     # Server continues with reduced functionality
127 | ```
128 | 
129 | ### 3. Cache Issues
130 | 
131 | **Symptoms:**
132 | - Stale data returned
133 | - Cache misses when hits expected
134 | - Cache size growing unbounded
135 | 
136 | **Check the Plug:**
137 | ```bash
138 | # Is cache enabled?
139 | echo $MCP_CACHE_ENABLED
140 | 
141 | # Is disk cache dir writable?
142 | ls -la cache/
143 | touch cache/test.txt
144 | ```
145 | 
146 | **Common Causes:**
147 | 1. Cache not properly initialized
148 | 2. Cache key collisions
149 | 3. Cache invalidation not working
150 | 4. Disk cache permissions
151 | 
152 | **Solutions:**
153 | 
154 | ```python
155 | # Proper cache initialization
156 | cache_manager = CacheManager(config)
157 | await cache_manager.initialize()
158 | 
159 | # Clear cache if stale
160 | await cache_manager.clear_all()
161 | 
162 | # Check cache statistics
163 | stats = cache_manager.get_stats()
164 | print(f"Hit rate: {stats.hit_rate}%")
165 | 
166 | # Manual invalidation
167 | await cache_manager.invalidate(key)
168 | ```
169 | 
170 | ### 4. Memory/Resource Leaks
171 | 
172 | **Symptoms:**
173 | - Memory usage grows over time
174 | - "Too many open files" errors
175 | - Resource warnings in tests
176 | 
177 | **Check the Plug:**
178 | ```python
179 | # Are you cleaning up resources?
180 | try:
181 |     file = open("data.txt")
182 |     # Use file
183 | finally:
184 |     file.close()  # Or use context manager
185 | 
186 | # Are async resources cleaned up?
187 | try:
188 |     await component.initialize()
189 |     # Use component
190 | finally:
191 |     await component.cleanup()  # Critical!
192 | ```
193 | 
194 | **Common Causes:**
195 | 1. Missing cleanup calls
196 | 2. Circular references
197 | 3. Tasks not cancelled
198 | 4. File handles not closed
199 | 
200 | **Solutions:**
201 | 
202 | ```python
203 | # Use context managers
204 | async with aiofiles.open('file.txt') as f:
205 |     data = await f.read()
206 | 
207 | # Cancel background tasks
208 | try:
209 |     task = asyncio.create_task(background_work())
210 |     # Main work
211 | finally:
212 |     task.cancel()
213 |     try:
214 |         await task
215 |     except asyncio.CancelledError:
216 |         pass
217 | 
218 | # Track component status
219 | assert component.status == ComponentStatus.INITIALIZED
220 | # Use component
221 | await component.cleanup()
222 | assert component.status == ComponentStatus.CLEANED_UP
223 | ```
224 | 
225 | ### 5. Configuration Issues
226 | 
227 | **Symptoms:**
228 | - "Environment variable not set"
229 | - Wrong defaults being used
230 | - Configuration not loading
231 | 
232 | **Check the Plug:**
233 | ```bash
234 | # Are env vars set?
235 | env | grep MCP_
236 | env | grep QDRANT_
237 | 
238 | # Is .env file present?
239 | ls -la .env
240 | 
241 | # Are you in the right directory?
242 | pwd
243 | ```
244 | 
245 | **Common Causes:**
246 | 1. Missing .env file
247 | 2. Wrong environment variables
248 | 3. Config not reloaded after changes
249 | 4. Type conversion errors
250 | 
251 | **Solutions:**
252 | 
253 | ```python
254 | # Use ServerConfig.from_env()
255 | config = ServerConfig.from_env()
256 | 
257 | # Validate config
258 | assert config.qdrant_url, "QDRANT_URL must be set"
259 | assert config.embedding_model, "MCP_EMBEDDING_MODEL must be set"
260 | 
261 | # Create directories
262 | config.create_directories()
263 | 
264 | # Debug config
265 | print(f"Config: {config.to_dict()}")
266 | ```
267 | 
268 | ## Debugging Workflow
269 | 
270 | ### Step 1: Reproduce the Issue
271 | 
272 | ```python
273 | # Create minimal reproduction
274 | async def test_bug_reproduction():
275 |     """Minimal test case that reproduces the bug."""
276 |     # Setup
277 |     component = BuggyComponent()
278 |     await component.initialize()
279 |     
280 |     # Trigger bug
281 |     result = await component.buggy_method()
282 |     
283 |     # Bug manifests here
284 |     assert result is not None, "Bug: result is None!"
285 |     
286 |     # Cleanup
287 |     await component.cleanup()
288 | ```
289 | 
290 | ### Step 2: Add Logging
291 | 
292 | ```python
293 | from src.mcp_codebase_insight.utils.logger import get_logger
294 | logger = get_logger(__name__)
295 | 
296 | async def buggy_method(self):
297 |     logger.debug(f"Entering buggy_method with state: {self.state}")
298 |     
299 |     try:
300 |         result = await self.do_something()
301 |         logger.debug(f"Result: {result}")
302 |         return result
303 |     except Exception as e:
304 |         logger.error(f"Error in buggy_method: {e}", exc_info=True)
305 |         raise
306 | ```
307 | 
308 | ### Step 3: Isolate the Problem
309 | 
310 | ```python
311 | # Binary search approach
312 | async def test_isolation():
313 |     # Test each component individually
314 |     
315 |     # Step 1 works?
316 |     await step1()
317 |     assert check_step1(), "Step 1 failed"
318 |     
319 |     # Step 2 works?
320 |     await step2()
321 |     assert check_step2(), "Step 2 failed"  # Bug is here!
322 |     
323 |     # Step 3...
324 | ```
325 | 
326 | ### Step 4: Form Hypothesis
327 | 
328 | ```python
329 | # Hypothesis: Component not initialized before use
330 | async def test_hypothesis():
331 |     component = MyComponent()
332 |     # DON'T initialize - test hypothesis
333 |     
334 |     # This should fail if hypothesis is correct
335 |     try:
336 |         await component.method()
337 |         assert False, "Should have failed!"
338 |     except ComponentNotInitializedError:
339 |         # Hypothesis confirmed!
340 |         pass
341 | ```
342 | 
343 | ### Step 5: Fix and Verify
344 | 
345 | ```python
346 | # Original buggy code
347 | async def buggy_version(self):
348 |     result = await self.operation()  # Bug: might not be initialized
349 |     return result
350 | 
351 | # Fixed code
352 | async def fixed_version(self):
353 |     if not self.initialized:
354 |         await self.initialize()  # Fix: ensure initialized
355 |     result = await self.operation()
356 |     return result
357 | 
358 | # Verify fix
359 | async def test_fix():
360 |     component = MyComponent()
361 |     # Don't initialize manually
362 |     result = await component.fixed_version()  # Should work now
363 |     assert result is not None
364 | ```
365 | 
366 | ### Step 6: Add Test
367 | 
368 | ```python
369 | @pytest.mark.asyncio
370 | async def test_prevents_future_bug():
371 |     """Regression test for bug XYZ."""
372 |     # Setup that triggers the original bug
373 |     component = MyComponent()
374 |     
375 |     # Should work without manual initialization
376 |     result = await component.method()
377 |     
378 |     # Verify fix
379 |     assert result is not None
380 |     assert component.initialized  # Automatically initialized
381 | ```
382 | 
383 | ## Debug Tools
384 | 
385 | ### Enable Debug Mode
386 | 
387 | ```bash
388 | # Set debug mode
389 | export MCP_DEBUG=true
390 | export MCP_LOG_LEVEL=DEBUG
391 | 
392 | # Run with verbose logging
393 | python -m mcp_codebase_insight
394 | ```
395 | 
396 | ### Async Debug Mode
397 | 
398 | ```python
399 | import asyncio
400 | import logging
401 | 
402 | # Enable asyncio debug mode
403 | asyncio.get_event_loop().set_debug(True)
404 | logging.getLogger('asyncio').setLevel(logging.DEBUG)
405 | ```
406 | 
407 | ### Component Health Check
408 | 
409 | ```python
410 | from src.mcp_codebase_insight.core.health import HealthMonitor
411 | 
412 | health = HealthMonitor(config)
413 | await health.initialize()
414 | 
415 | status = await health.check_health()
416 | print(f"System health: {status}")
417 | 
418 | for component, state in status.components.items():
419 |     print(f"  {component}: {state.status}")
420 | ```
421 | 
422 | ### Memory Profiling
423 | 
424 | ```python
425 | import tracemalloc
426 | 
427 | tracemalloc.start()
428 | 
429 | # Run code
430 | await problematic_function()
431 | 
432 | # Get memory snapshot
433 | snapshot = tracemalloc.take_snapshot()
434 | top_stats = snapshot.statistics('lineno')
435 | 
436 | for stat in top_stats[:10]:
437 |     print(stat)
438 | ```
439 | 
440 | ## Key Files for Debugging
441 | 
442 | - `src/mcp_codebase_insight/utils/logger.py`: Logging configuration
443 | - `src/mcp_codebase_insight/core/debug.py`: Debug utilities
444 | - `src/mcp_codebase_insight/core/health.py`: Health monitoring
445 | - `src/mcp_codebase_insight/core/errors.py`: Error handling
446 | - `docs/troubleshooting/common-issues.md`: Known issues
447 | - `tests/conftest.py`: Test configuration and fixtures
448 | 
449 | ## Debugging Checklist
450 | 
451 | When debugging, systematically check:
452 | 
453 | - [ ] Can you reproduce the issue consistently?
454 | - [ ] Have you checked the logs?
455 | - [ ] Are all environment variables set correctly?
456 | - [ ] Are all services (Qdrant) running?
457 | - [ ] Is the component properly initialized?
458 | - [ ] Are you using `await` for async calls?
459 | - [ ] Are resources being cleaned up?
460 | - [ ] Have you checked the "Check the Plug" items?
461 | - [ ] Is this a known issue in troubleshooting docs?
462 | - [ ] Have you tried in a clean environment?
463 | 
464 | ## When to Escalate
465 | 
466 | - Issue persists after systematic debugging
467 | - Requires deep knowledge of external dependencies (Qdrant internals)
468 | - Performance issues needing profiling tools
469 | - Suspected bugs in Python or libraries
470 | - Security vulnerabilities discovered
471 | - Architectural issues requiring system redesign
472 | 
```

--------------------------------------------------------------------------------
/trajectories/tosinakinosho/anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9/db62b9/config.yaml:
--------------------------------------------------------------------------------

```yaml
 1 | '{"env":{"deployment":{"image":"python:3.11","port":null,"docker_args":[],"startup_timeout":180.0,"pull":"missing","remove_images":false,"python_standalone_dir":"/root","platform":null,"type":"docker"},"repo":{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight","base_commit":"HEAD","type":"local"},"post_startup_commands":[],"post_startup_command_timeout":500,"name":"main"},"agent":{"name":"main","templates":{"system_template":"You
 2 |   are a helpful assistant that can interact with a computer to solve tasks.","instance_template":"<uploaded_files>\n{{working_dir}}\n</uploaded_files>\nI''ve
 3 |   uploaded a python code repository in the directory {{working_dir}}. Consider the
 4 |   following PR description:\n\n<pr_description>\n{{problem_statement}}\n</pr_description>\n\nCan
 5 |   you help me implement the necessary changes to the repository so that the requirements
 6 |   specified in the <pr_description> are met?\nI''ve already taken care of all changes
 7 |   to any of the test files described in the <pr_description>. This means you DON''T
 8 |   have to modify the testing logic or any of the tests in any way!\nYour task is to
 9 |   make the minimal changes to non-tests files in the {{working_dir}} directory to
10 |   ensure the <pr_description> is satisfied.\nFollow these steps to resolve the issue:\n1.
11 |   As a first step, it might be a good idea to find and read code relevant to the <pr_description>\n2.
12 |   Create a script to reproduce the error and execute it with `python <filename.py>`
13 |   using the bash tool, to confirm the error\n3. Edit the sourcecode of the repo to
14 |   resolve the issue\n4. Rerun your reproduce script and confirm that the error is
15 |   fixed!\n5. Think about edgecases and make sure your fix handles them as well\nYour
16 |   thinking should be thorough and so it''s fine if it''s very long.","next_step_template":"OBSERVATION:\n{{observation}}","next_step_truncated_observation_template":"Observation:
17 |   {{observation}}<response clipped><NOTE>Observations should not exceeded {{max_observation_length}}
18 |   characters. {{elided_chars}} characters were elided. Please try a different command
19 |   that produces less output or use head/tail/grep/redirect the output to a file. Do
20 |   not use interactive pagers.</NOTE>","max_observation_length":100000,"next_step_no_output_template":"Your
21 |   command ran successfully and did not produce any output.","strategy_template":null,"demonstration_template":null,"demonstrations":[],"put_demos_in_history":false,"shell_check_error_template":"Your
22 |   bash command contained syntax errors and was NOT executed. Please fix the syntax
23 |   errors and try again. This can be the result of not adhering to the syntax for multi-line
24 |   commands. Here is the output of `bash -n`:\n{{bash_stdout}}\n{{bash_stderr}}","command_cancelled_timeout_template":"The
25 |   command ''{{command}}'' was cancelled because it took more than {{timeout}} seconds.
26 |   Please try a different command that completes more quickly."},"tools":{"filter":{"blocklist_error_template":"Operation
27 |   ''{{action}}'' is not supported by this environment.","blocklist":["vim","vi","emacs","nano","nohup","gdb","less","tail
28 |   -f","python -m venv","make"],"blocklist_standalone":["python","python3","ipython","bash","sh","/bin/bash","/bin/sh","nohup","vi","vim","emacs","nano","su"],"block_unless_regex":{"radare2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*","r2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*"}},"bundles":[{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/registry","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/edit_anthropic","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/review_on_submit_m","hidden_tools":[]}],"env_variables":{},"registry_variables":{"USE_FILEMAP":"true","SUBMIT_REVIEW_MESSAGES":["Thank
29 |   you for your work on this issue. Please carefully follow the steps below to help
30 |   review your changes.\n\n1. If you made any changes to your code after running the
31 |   reproduction script, please run the reproduction script again.\n  If the reproduction
32 |   script is failing, please revisit your changes and make sure they are correct.\n  If
33 |   you have already removed your reproduction script, please ignore this step.\n2.
34 |   Remove your reproduction script (if you haven''t done so already).\n3. If you have
35 |   modified any TEST files, please revert them to the state they had before you started
36 |   fixing the issue.\n  You can do this with `git checkout -- /path/to/test/file.py`.
37 |   Use below <diff> to find the files you need to revert.\n4. Run the submit command
38 |   again to confirm.\n\nHere is a list of all of your changes:\n\n<diff>\n{{diff}}\n</diff>\n"]},"submit_command":"submit","parse_function":{"error_message":"{%-
39 |   if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
40 |   make sure your output includes exactly _ONE_ function call!\nYou must invoke the
41 |   function directly using the function call format.\nYou cannot invoke commands with
42 |   ```, you have to use the function call format.\nIf you think you have already resolved
43 |   the issue, please submit your changes by running the `submit` command.\nIf you think
44 |   you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
45 |   please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
46 |   last output included multiple tool calls!\nPlease make sure your output includes
47 |   a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
48 |   -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
49 |   your function call doesn''t include any extra arguments that are not in the allowed
50 |   arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
51 |   be parsed properly: {{exception_message}}.\n{% endif %}\n","type":"function_calling"},"enable_bash_tool":true,"format_error_template":"{%-
52 |   if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
53 |   make sure your output includes exactly _ONE_ function call!\nYou must invoke the
54 |   function directly using the function call format.\nYou cannot invoke commands with
55 |   ```, you have to use the function call format.\nIf you think you have already resolved
56 |   the issue, please submit your changes by running the `submit` command.\nIf you think
57 |   you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
58 |   please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
59 |   last output included multiple tool calls!\nPlease make sure your output includes
60 |   a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
61 |   -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
62 |   your function call doesn''t include any extra arguments that are not in the allowed
63 |   arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
64 |   be parsed properly: {{exception_message}}.\n{% endif %}\n","command_docs":"bash:\n  docstring:
65 |   runs the given command directly in bash\n  signature: <command>\n  arguments:\n    -
66 |   command (string) [required]: The bash command to execute.\n\nstr_replace_editor:\n  docstring:
67 |   Custom editing tool for viewing, creating and editing files * State is persistent
68 |   across command calls and discussions with the user * If `path` is a file, `view`
69 |   displays the result of applying `cat -n`. If `path` is a directory, `view` lists
70 |   non-hidden files and directories up to 2 levels deep * The `create` command cannot
71 |   be used if the specified `path` already exists as a file * If a `command` generates
72 |   a long output, it will be truncated and marked with `<response clipped>` * The `undo_edit`
73 |   command will revert the last edit made to the file at `path`\nNotes for using the
74 |   `str_replace` command: * The `old_str` parameter should match EXACTLY one or more
75 |   consecutive lines from the original file. Be mindful of whitespaces! * If the `old_str`
76 |   parameter is not unique in the file, the replacement will not be performed. Make
77 |   sure to include enough context in `old_str` to make it unique * The `new_str` parameter
78 |   should contain the edited lines that should replace the `old_str`\n\n  signature:
79 |   str_replace_editor <command> <path> [<file_text>] [<view_range>] [<old_str>] [<new_str>]
80 |   [<insert_line>]\n\n  arguments:\n    - command (string) [required]: The commands
81 |   to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.\n    -
82 |   path (string) [required]: Absolute path to file or directory, e.g. `/testbed/file.py`
83 |   or `/testbed`.\n    - file_text (string) [optional]: Required parameter of `create`
84 |   command, with the content of the file to be created.\n    - old_str (string) [optional]:
85 |   Required parameter of `str_replace` command containing the string in `path` to replace.\n    -
86 |   new_str (string) [optional]: Optional parameter of `str_replace` command containing
87 |   the new string (if not given, no string will be added). Required parameter of `insert`
88 |   command containing the string to insert.\n    - insert_line (integer) [optional]:
89 |   Required parameter of `insert` command. The `new_str` will be inserted AFTER the
90 |   line `insert_line` of `path`.\n    - view_range (array) [optional]: Optional parameter
91 |   of `view` command when `path` points to a file. If none is given, the full file
92 |   is shown. If provided, the file will be shown in the indicated line number range,
93 |   e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line,
94 |   -1]` shows all lines from `start_line` to the end of the file.\n\nsubmit:\n  docstring:
95 |   submits the current file\n  signature: submit\n\n","multi_line_command_endings":{},"submit_command_end_name":null,"reset_commands":[],"execution_timeout":30,"install_timeout":300,"total_execution_timeout":1800,"max_consecutive_execution_timeouts":3},"history_processors":[{"type":"cache_control","last_n_messages":2,"last_n_messages_offset":0,"tagged_roles":["user","tool"]}],"model":{"name":"claude-3-sonnet-20240229","per_instance_cost_limit":3.0,"total_cost_limit":0.0,"per_instance_call_limit":0,"temperature":0.0,"top_p":1.0,"api_base":null,"api_version":null,"api_key":null,"stop":[],"completion_kwargs":{},"convert_system_to_user":false,"retry":{"retries":20,"min_wait":10.0,"max_wait":120.0},"delay":0.0,"fallbacks":[],"choose_api_key_by_thread":true,"max_input_tokens":null,"max_output_tokens":null},"max_requeries":3,"action_sampler":null,"type":"default"},"problem_statement":{"path":"debug_tests.md","extra_fields":{},"type":"text_file","id":"db62b9"},"output_dir":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/trajectories/tosinakinosho/anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9","actions":{"open_pr":false,"pr_config":{"skip_if_commits_reference_issue":true},"apply_patch_locally":false},"env_var_path":null}'
96 | 
```

--------------------------------------------------------------------------------
/scripts/compile_requirements.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | # This script compiles requirements.in to requirements.txt using pip-compile
  3 | # Following the project's build standards for reproducible environments
  4 | 
  5 | set -e
  6 | 
  7 | # Default Python version if not specified
  8 | DEFAULT_VERSION="3.11"
  9 | PYTHON_VERSION=${1:-$DEFAULT_VERSION}
 10 | 
 11 | # Validate Python version
 12 | if [[ ! "$PYTHON_VERSION" =~ ^3\.(10|11|12|13)$ ]]; then
 13 |     echo "Error: Python version must be 3.10, 3.11, 3.12 or 3.13."
 14 |     echo "Usage: $0 [python-version]"
 15 |     echo "Example: $0 3.10"
 16 |     exit 1
 17 | fi
 18 | 
 19 | # Set the virtual environment directory based on the Python version
 20 | VENV_DIR=".compile-venv-py$PYTHON_VERSION"
 21 | 
 22 | # Check for private repository configuration
 23 | PRIVATE_REPO_URL=${PRIVATE_REPO_URL:-""}
 24 | PRIVATE_REPO_TOKEN=${PRIVATE_REPO_TOKEN:-""}
 25 | 
 26 | # Check for local package paths (comma-separated list of directories)
 27 | LOCAL_PACKAGE_PATHS=${LOCAL_PACKAGE_PATHS:-""}
 28 | 
 29 | echo "=========================================================="
 30 | echo "Compiling requirements for Python $PYTHON_VERSION"
 31 | echo "=========================================================="
 32 | 
 33 | # Create a Python virtual environment if it doesn't exist
 34 | if [ ! -d "$VENV_DIR" ]; then
 35 |     echo "Creating a Python $PYTHON_VERSION virtual environment in $VENV_DIR..."
 36 |     # Try different ways to create the environment based on the version
 37 |     if command -v "python$PYTHON_VERSION" &> /dev/null; then
 38 |         "python$PYTHON_VERSION" -m venv "$VENV_DIR"
 39 |     elif command -v "python3.$PYTHON_VERSION" &> /dev/null; then
 40 |         "python3.$PYTHON_VERSION" -m venv "$VENV_DIR"
 41 |     else
 42 |         echo "Error: Python $PYTHON_VERSION is not installed."
 43 |         echo "Please install it and try again."
 44 |         exit 1
 45 |     fi
 46 | fi
 47 | 
 48 | # Activate the virtual environment
 49 | source "$VENV_DIR/bin/activate"
 50 | echo "Activated virtual environment: $VENV_DIR"
 51 | 
 52 | # Update pip and setuptools
 53 | echo "Updating pip and setuptools..."
 54 | pip install --upgrade pip setuptools wheel
 55 | 
 56 | # Install pip-tools
 57 | echo "Installing pip-tools..."
 58 | pip install pip-tools
 59 | 
 60 | # Make a backup of current requirements.txt if it exists
 61 | if [ -f "requirements-$PYTHON_VERSION.txt" ]; then
 62 |     cp "requirements-$PYTHON_VERSION.txt" "requirements-$PYTHON_VERSION.txt.backup"
 63 |     echo "Backed up existing requirements-$PYTHON_VERSION.txt to requirements-$PYTHON_VERSION.txt.backup"
 64 | fi
 65 | 
 66 | # Create a temporary copy of requirements.in with adjusted version constraints
 67 | cp requirements.in requirements.in.tmp
 68 | 
 69 | # Create pip.conf for private repository access if provided
 70 | if [ ! -z "$PRIVATE_REPO_URL" ]; then
 71 |     mkdir -p "$VENV_DIR/pip"
 72 |     cat > "$VENV_DIR/pip/pip.conf" << EOF
 73 | [global]
 74 | index-url = https://pypi.org/simple
 75 | extra-index-url = ${PRIVATE_REPO_URL}
 76 | EOF
 77 |     
 78 |     if [ ! -z "$PRIVATE_REPO_TOKEN" ]; then
 79 |         echo "Using private repository with authentication token"
 80 |         # Add credentials to pip.conf if token is provided
 81 |         sed -i.bak "s|${PRIVATE_REPO_URL}|${PRIVATE_REPO_URL}:${PRIVATE_REPO_TOKEN}@|" "$VENV_DIR/pip/pip.conf" 2>/dev/null || \
 82 |         sed -i '' "s|${PRIVATE_REPO_URL}|${PRIVATE_REPO_URL}:${PRIVATE_REPO_TOKEN}@|" "$VENV_DIR/pip/pip.conf"
 83 |     fi
 84 |     
 85 |     export PIP_CONFIG_FILE="$VENV_DIR/pip/pip.conf"
 86 | fi
 87 | 
 88 | # Parse and set up local package paths if provided
 89 | LOCAL_ARGS=""
 90 | if [ ! -z "$LOCAL_PACKAGE_PATHS" ]; then
 91 |     echo "Setting up local package paths..."
 92 |     IFS=',' read -ra PATHS <<< "$LOCAL_PACKAGE_PATHS"
 93 |     for path in "${PATHS[@]}"; do
 94 |         LOCAL_ARGS="$LOCAL_ARGS -f $path"
 95 |     done
 96 |     echo "Local package paths: $LOCAL_ARGS"
 97 | fi
 98 | 
 99 | # Check for local git repositories
100 | if [ -d "./local-packages" ]; then
101 |     echo "Found local-packages directory, will include in search path"
102 |     LOCAL_ARGS="$LOCAL_ARGS -f ./local-packages"
103 | fi
104 | 
105 | # Fix for dependency issues - version-specific adjustments
106 | echo "Adjusting dependency constraints for compatibility with Python $PYTHON_VERSION..."
107 | 
108 | # Version-specific adjustments
109 | if [ "$PYTHON_VERSION" = "3.9" ]; then
110 |     # Python 3.9-specific adjustments
111 |     sed -i.bak 's/torch>=2.0.0/torch>=1.13.0,<2.0.0/' requirements.in.tmp 2>/dev/null || sed -i '' 's/torch>=2.0.0/torch>=1.13.0,<2.0.0/' requirements.in.tmp
112 |     sed -i.bak 's/networkx>=.*$/networkx>=2.8.0,<3.0/' requirements.in.tmp 2>/dev/null || sed -i '' 's/networkx>=.*$/networkx>=2.8.0,<3.0/' requirements.in.tmp
113 |     # Keep starlette constraint for Python 3.9
114 | elif [ "$PYTHON_VERSION" = "3.10" ] || [ "$PYTHON_VERSION" = "3.11" ] || [ "$PYTHON_VERSION" = "3.12" ] || [ "$PYTHON_VERSION" = "3.13" ]; then
115 |     # Python 3.10/3.11-specific adjustments
116 |     sed -i.bak 's/networkx>=.*$/networkx>=2.8.0/' requirements.in.tmp 2>/dev/null || sed -i '' 's/networkx>=.*$/networkx>=2.8.0/' requirements.in.tmp
117 |     
118 |     # Modify starlette constraint for Python 3.10/3.11 (for diagnostic purposes)
119 |     # Also apply for Python 3.12/3.13
120 |     echo "Modifying starlette constraint for Python $PYTHON_VERSION to diagnose dependency conflicts..."
121 |     sed -i.bak 's/starlette>=0.27.0,<0.28.0/starlette>=0.27.0/' requirements.in.tmp 2>/dev/null || \
122 |     sed -i '' 's/starlette>=0.27.0,<0.28.0/starlette>=0.27.0/' requirements.in.tmp
123 | fi
124 | 
125 | # Special handling for private packages
126 | COMPILE_SUCCESS=0
127 | 
128 | # Try to compile with all packages
129 | echo "Compiling adjusted requirements.in to requirements-$PYTHON_VERSION.txt..."
130 | if pip-compile --allow-unsafe $LOCAL_ARGS --output-file="requirements-$PYTHON_VERSION.txt" requirements.in.tmp; then
131 |     COMPILE_SUCCESS=1
132 |     echo "Compilation successful with all packages included."
133 | else
134 |     echo "First compilation attempt failed, trying without private packages..."
135 | fi
136 | 
137 | # If compilation with all packages failed, try without problematic private packages
138 | if [ $COMPILE_SUCCESS -eq 0 ]; then
139 |     echo "Creating a version without private packages..."
140 |     grep -v "uvx\|mcp-server-qdrant" requirements.in > requirements.in.basic
141 |     
142 |     # Add version-specific constraints
143 |     if [ "$PYTHON_VERSION" = "3.9" ]; then
144 |         echo "# Conservative dependencies for Python 3.9" >> requirements.in.basic
145 |         echo "networkx>=2.8.0,<3.0" >> requirements.in.basic
146 |         echo "torch>=1.13.0,<2.0.0" >> requirements.in.basic
147 |         # Keep original starlette constraint
148 |         grep "starlette" requirements.in >> requirements.in.basic
149 |     elif [ "$PYTHON_VERSION" = "3.10" ] || [ "$PYTHON_VERSION" = "3.11" ] || [ "$PYTHON_VERSION" = "3.12" ] || [ "$PYTHON_VERSION" = "3.13" ]; then
150 |         echo "# Conservative dependencies for Python $PYTHON_VERSION" >> requirements.in.basic
151 |         echo "networkx>=2.8.0" >> requirements.in.basic
152 |         # Modified starlette constraint for 3.10/3.11
153 |         echo "starlette>=0.27.0" >> requirements.in.basic
154 |     fi
155 |     
156 |     if pip-compile --allow-unsafe $LOCAL_ARGS --output-file="requirements-$PYTHON_VERSION.txt" requirements.in.basic; then
157 |         COMPILE_SUCCESS=1
158 |         echo "Compilation successful without private packages."
159 |         echo "# NOTE: Private packages (uvx, mcp-server-qdrant) were excluded from this compilation." >> "requirements-$PYTHON_VERSION.txt"
160 |         echo "# You may need to install them separately from their source." >> "requirements-$PYTHON_VERSION.txt"
161 |         
162 |         # Create a separate file just for private packages
163 |         echo "# Private packages excluded from main requirements-$PYTHON_VERSION.txt" > "requirements-private-$PYTHON_VERSION.txt"
164 |         grep "uvx\|mcp-server-qdrant" requirements.in >> "requirements-private-$PYTHON_VERSION.txt"
165 |         echo "Created separate requirements-private-$PYTHON_VERSION.txt for private packages."
166 |     else
167 |         echo "WARNING: Both compilation attempts failed. Please check for compatibility issues."
168 |         # Additional diagnostic information
169 |         echo "Failed compilation error log:"
170 |         if [ "$PYTHON_VERSION" = "3.10" ] || [ "$PYTHON_VERSION" = "3.11" ]; then
171 |             echo "Testing if removing starlette constraint entirely resolves the issue..."
172 |             grep -v "starlette\|uvx\|mcp-server-qdrant" requirements.in > requirements.in.minimal
173 |             echo "# Minimal dependencies for Python $PYTHON_VERSION" >> requirements.in.minimal
174 |             echo "networkx>=2.8.0" >> requirements.in.minimal
175 |             
176 |             if pip-compile --allow-unsafe $LOCAL_ARGS --output-file="requirements-$PYTHON_VERSION.minimal.txt" requirements.in.minimal; then
177 |                 echo "SUCCESS: Compilation successful without starlette constraint."
178 |                 echo "This confirms that starlette is causing dependency conflicts."
179 |                 # Create a working requirements file for now
180 |                 mv "requirements-$PYTHON_VERSION.minimal.txt" "requirements-$PYTHON_VERSION.txt"
181 |                 echo "# WARNING: starlette constraint was removed to resolve conflicts" >> "requirements-$PYTHON_VERSION.txt"
182 |                 echo "# You will need to manually install a compatible starlette version" >> "requirements-$PYTHON_VERSION.txt"
183 |                 COMPILE_SUCCESS=1
184 |             else
185 |                 echo "FAILURE: Issue persists even without starlette constraint."
186 |             fi
187 |         fi
188 |     fi
189 | fi
190 | 
191 | # Create a symlink or copy of the default version to requirements.txt
192 | if [ "$PYTHON_VERSION" = "$DEFAULT_VERSION" ]; then
193 |     echo "Creating requirements.txt as copy of requirements-$PYTHON_VERSION.txt (default version)"
194 |     cp "requirements-$PYTHON_VERSION.txt" requirements.txt
195 |     
196 |     # Also copy private requirements if they exist
197 |     if [ -f "requirements-private-$PYTHON_VERSION.txt" ]; then
198 |         cp "requirements-private-$PYTHON_VERSION.txt" requirements-private.txt
199 |     fi
200 | fi
201 | 
202 | # Clean up temporary files
203 | rm -f requirements.in.tmp requirements.in.tmp.bak requirements.in.bak requirements.in.basic requirements.in.minimal 2>/dev/null || true
204 | 
205 | # Show generated file
206 | echo "Compilation complete. Generated requirements-$PYTHON_VERSION.txt with pinned dependencies."
207 | echo ""
208 | echo "To use private package repositories, set environment variables before running this script:"
209 | echo "  export PRIVATE_REPO_URL=\"https://your-private-repo.com/simple\""
210 | echo "  export PRIVATE_REPO_TOKEN=\"your-access-token\"  # Optional"
211 | echo ""
212 | echo "To use local package paths, set LOCAL_PACKAGE_PATHS:"
213 | echo "  export LOCAL_PACKAGE_PATHS=\"/path/to/packages1,/path/to/packages2\""
214 | echo ""
215 | echo "You can specify a Python version when running this script:"
216 | echo "  ./scripts/compile_requirements.sh 3.9  # For Python 3.9"
217 | echo "  ./scripts/compile_requirements.sh 3.10 # For Python 3.10"
218 | echo "  ./scripts/compile_requirements.sh 3.11 # For Python 3.11"
219 | 
220 | # Optional: show differences if the file existed before
221 | if [ -f "requirements-$PYTHON_VERSION.txt.backup" ]; then
222 |     echo "Changes from previous requirements-$PYTHON_VERSION.txt:"
223 |     diff -u "requirements-$PYTHON_VERSION.txt.backup" "requirements-$PYTHON_VERSION.txt" || true
224 | fi
225 | 
226 | # Deactivate the virtual environment
227 | deactivate
228 | echo "Completed and deactivated virtual environment."
229 | 
230 | # Clean up the temporary venv if desired
231 | read -p "Remove temporary virtual environment? (y/n) " -n 1 -r
232 | echo
233 | if [[ $REPLY =~ ^[Yy]$ ]]; then
234 |     rm -rf "$VENV_DIR"
235 |     echo "Removed temporary virtual environment."
236 | fi
237 | 
238 | echo "Done."
239 | 
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/documentation.py:
--------------------------------------------------------------------------------

```python
  1 | """Documentation management module."""
  2 | 
  3 | import json
  4 | from datetime import datetime
  5 | from enum import Enum
  6 | from pathlib import Path
  7 | from typing import Dict, List, Optional
  8 | from uuid import UUID, uuid4
  9 | from urllib.parse import urlparse
 10 | 
 11 | from pydantic import BaseModel
 12 | 
 13 | class DocumentationType(str, Enum):
 14 |     """Documentation type enumeration."""
 15 |     
 16 |     REFERENCE = "reference"
 17 |     TUTORIAL = "tutorial"
 18 |     API = "api"
 19 |     GUIDE = "guide"
 20 |     EXAMPLE = "example"
 21 |     PATTERN = "pattern"
 22 | 
 23 | class Document(BaseModel):
 24 |     """Document model."""
 25 |     
 26 |     id: UUID
 27 |     title: str
 28 |     type: DocumentationType
 29 |     content: str
 30 |     metadata: Optional[Dict[str, str]] = None
 31 |     tags: Optional[List[str]] = None
 32 |     created_at: datetime
 33 |     updated_at: datetime
 34 |     version: Optional[str] = None
 35 |     related_docs: Optional[List[UUID]] = None
 36 | 
 37 | class DocumentationManager:
 38 |     """Manager for documentation handling."""
 39 |     
 40 |     def __init__(self, config):
 41 |         """Initialize documentation manager."""
 42 |         self.config = config
 43 |         self.docs_dir = config.docs_cache_dir
 44 |         self.docs_dir.mkdir(parents=True, exist_ok=True)
 45 |         self.initialized = False
 46 |         self.documents: Dict[UUID, Document] = {}
 47 |         
 48 |     async def initialize(self):
 49 |         """Initialize the documentation manager.
 50 |         
 51 |         This method ensures the docs directory exists and loads any existing documents.
 52 |         """
 53 |         if self.initialized:
 54 |             return
 55 |             
 56 |         try:
 57 |             # Ensure docs directory exists
 58 |             self.docs_dir.mkdir(parents=True, exist_ok=True)
 59 |             
 60 |             # Load any existing documents
 61 |             for doc_file in self.docs_dir.glob("*.json"):
 62 |                 if doc_file.is_file():
 63 |                     try:
 64 |                         with open(doc_file, "r") as f:
 65 |                             doc_data = json.load(f)
 66 |                             # Convert the loaded data into a Document object
 67 |                             doc = Document(**doc_data)
 68 |                             self.documents[doc.id] = doc
 69 |                     except (json.JSONDecodeError, ValueError) as e:
 70 |                         # Log error but continue processing other files
 71 |                         print(f"Error loading document {doc_file}: {e}")
 72 |             
 73 |             self.initialized = True
 74 |         except Exception as e:
 75 |             print(f"Error initializing documentation manager: {e}")
 76 |             await self.cleanup()
 77 |             raise RuntimeError(f"Failed to initialize documentation manager: {str(e)}")
 78 |             
 79 |     async def cleanup(self):
 80 |         """Clean up resources used by the documentation manager.
 81 |         
 82 |         This method ensures all documents are saved and resources are released.
 83 |         """
 84 |         if not self.initialized:
 85 |             return
 86 |             
 87 |         try:
 88 |             # Save any modified documents
 89 |             for doc in self.documents.values():
 90 |                 try:
 91 |                     await self._save_document(doc)
 92 |                 except Exception as e:
 93 |                     print(f"Error saving document {doc.id}: {e}")
 94 |             
 95 |             # Clear in-memory documents
 96 |             self.documents.clear()
 97 |         except Exception as e:
 98 |             print(f"Error cleaning up documentation manager: {e}")
 99 |         finally:
100 |             self.initialized = False
101 |     
102 |     async def add_document(
103 |         self,
104 |         title: str,
105 |         content: str,
106 |         type: DocumentationType,
107 |         metadata: Optional[Dict[str, str]] = None,
108 |         tags: Optional[List[str]] = None,
109 |         version: Optional[str] = None,
110 |         related_docs: Optional[List[UUID]] = None
111 |     ) -> Document:
112 |         """Add a new document."""
113 |         now = datetime.utcnow()
114 |         doc = Document(
115 |             id=uuid4(),
116 |             title=title,
117 |             type=type,
118 |             content=content,
119 |             metadata=metadata,
120 |             tags=tags,
121 |             version=version,
122 |             related_docs=related_docs,
123 |             created_at=now,
124 |             updated_at=now
125 |         )
126 |         
127 |         await self._save_document(doc)
128 |         return doc
129 |     
130 |     async def get_document(self, doc_id: UUID) -> Optional[Document]:
131 |         """Get document by ID."""
132 |         doc_path = self.docs_dir / f"{doc_id}.json"
133 |         if not doc_path.exists():
134 |             return None
135 |             
136 |         with open(doc_path) as f:
137 |             data = json.load(f)
138 |             return Document(**data)
139 |     
140 |     async def update_document(
141 |         self,
142 |         doc_id: UUID,
143 |         content: Optional[str] = None,
144 |         metadata: Optional[Dict[str, str]] = None,
145 |         tags: Optional[List[str]] = None,
146 |         version: Optional[str] = None,
147 |         related_docs: Optional[List[UUID]] = None
148 |     ) -> Optional[Document]:
149 |         """Update document content and metadata."""
150 |         doc = await self.get_document(doc_id)
151 |         if not doc:
152 |             return None
153 |             
154 |         if content:
155 |             doc.content = content
156 |         if metadata:
157 |             doc.metadata = {**(doc.metadata or {}), **metadata}
158 |         if tags:
159 |             doc.tags = tags
160 |         if version:
161 |             doc.version = version
162 |         if related_docs:
163 |             doc.related_docs = related_docs
164 |             
165 |         doc.updated_at = datetime.utcnow()
166 |         await self._save_document(doc)
167 |         return doc
168 |     
169 |     async def list_documents(
170 |         self,
171 |         type: Optional[DocumentationType] = None,
172 |         tags: Optional[List[str]] = None
173 |     ) -> List[Document]:
174 |         """List all documents, optionally filtered by type and tags."""
175 |         docs = []
176 |         for path in self.docs_dir.glob("*.json"):
177 |             with open(path) as f:
178 |                 data = json.load(f)
179 |                 doc = Document(**data)
180 |                 
181 |                 # Apply filters
182 |                 if type and doc.type != type:
183 |                     continue
184 |                 if tags and not all(tag in (doc.tags or []) for tag in tags):
185 |                     continue
186 |                     
187 |                 docs.append(doc)
188 |                 
189 |         return sorted(docs, key=lambda x: x.created_at)
190 |     
191 |     async def search_documents(
192 |         self,
193 |         query: str,
194 |         type: Optional[DocumentationType] = None,
195 |         tags: Optional[List[str]] = None,
196 |         limit: int = 10
197 |     ) -> List[Document]:
198 |         """Search documents by content."""
199 |         # TODO: Implement proper text search
200 |         # For now, just do simple substring matching
201 |         results = []
202 |         query = query.lower()
203 |         
204 |         for doc in await self.list_documents(type, tags):
205 |             if (
206 |                 query in doc.title.lower() or
207 |                 query in doc.content.lower() or
208 |                 any(query in tag.lower() for tag in (doc.tags or []))
209 |             ):
210 |                 results.append(doc)
211 |                 if len(results) >= limit:
212 |                     break
213 |                     
214 |         return results
215 |     
216 |     async def _save_document(self, doc: Document) -> None:
217 |         """Save document to file."""
218 |         doc_path = self.docs_dir / f"{doc.id}.json"
219 |         with open(doc_path, "w") as f:
220 |             json.dump(doc.model_dump(), f, indent=2, default=str)
221 |     
222 |     async def crawl_docs(
223 |         self,
224 |         urls: List[str],
225 |         source_type: str
226 |     ) -> List[Document]:
227 |         """Crawl documentation from URLs."""
228 |         import aiohttp
229 |         from bs4 import BeautifulSoup
230 |         
231 |         docs = []
232 |         try:
233 |             doc_type = DocumentationType(source_type)
234 |         except ValueError:
235 |             doc_type = DocumentationType.REFERENCE
236 |             
237 |         async with aiohttp.ClientSession() as session:
238 |             for url in urls:
239 |                 try:
240 |                     # Handle file URLs specially (for testing)
241 |                     parsed_url = urlparse(url)
242 |                     if parsed_url.scheme == "file":
243 |                         # Create a test document
244 |                         doc = await self.add_document(
245 |                             title="Test Documentation",
246 |                             content="This is a test document for testing the documentation crawler.",
247 |                             type=doc_type,
248 |                             metadata={
249 |                                 "source_url": url,
250 |                                 "source_type": source_type,
251 |                                 "crawled_at": datetime.utcnow().isoformat()
252 |                             }
253 |                         )
254 |                         docs.append(doc)
255 |                         continue
256 |                     
257 |                     # Fetch the content
258 |                     async with session.get(url, timeout=10) as response:
259 |                         if response.status != 200:
260 |                             print(f"Error fetching {url}: HTTP {response.status}")
261 |                             continue
262 |                         
263 |                         content = await response.text()
264 |                         
265 |                         # Parse HTML content
266 |                         soup = BeautifulSoup(content, 'html.parser')
267 |                         
268 |                         # Extract title from meta tags or h1
269 |                         title = soup.find('meta', property='og:title')
270 |                         if title:
271 |                             title = title.get('content')
272 |                         else:
273 |                             title = soup.find('h1')
274 |                             if title:
275 |                                 title = title.text.strip()
276 |                             else:
277 |                                 title = f"Documentation from {url}"
278 |                         
279 |                         # Extract main content
280 |                         # First try to find main content area
281 |                         content = ""
282 |                         main = soup.find('main')
283 |                         if main:
284 |                             content = main.get_text(separator='\n', strip=True)
285 |                         else:
286 |                             # Try article tag
287 |                             article = soup.find('article')
288 |                             if article:
289 |                                 content = article.get_text(separator='\n', strip=True)
290 |                             else:
291 |                                 # Fallback to body content
292 |                                 body = soup.find('body')
293 |                                 if body:
294 |                                     content = body.get_text(separator='\n', strip=True)
295 |                                 else:
296 |                                     content = soup.get_text(separator='\n', strip=True)
297 |                         
298 |                         # Create document
299 |                         doc = await self.add_document(
300 |                             title=title,
301 |                             content=content,
302 |                             type=doc_type,
303 |                             metadata={
304 |                                 "source_url": url,
305 |                                 "source_type": source_type,
306 |                                 "crawled_at": datetime.utcnow().isoformat()
307 |                             }
308 |                         )
309 |                         docs.append(doc)
310 |                         
311 |                 except Exception as e:
312 |                     # Log error but continue with other URLs
313 |                     print(f"Error crawling {url}: {str(e)}")
314 |                     continue
315 |                     
316 |         return docs
317 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_communication_integration.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import json
  3 | import pytest
  4 | from unittest.mock import MagicMock, AsyncMock
  5 | from tests.components.test_stdio_components import MockStdinReader, MockStdoutWriter
  6 | 
  7 | class MockSSEClient:
  8 |     def __init__(self):
  9 |         self.events = []
 10 |         self.connected = True
 11 | 
 12 |     async def send(self, event):
 13 |         if not self.connected:
 14 |             raise ConnectionError("Client disconnected")
 15 |         self.events.append(event)
 16 | 
 17 |     def disconnect(self):
 18 |         self.connected = False
 19 | 
 20 | @pytest.fixture
 21 | async def mock_communication_setup():
 22 |     """Set up mock stdio and SSE components for integration testing."""
 23 |     # Set up stdio mocks
 24 |     stdio_reader = MockStdinReader("")
 25 |     stdio_writer = MockStdoutWriter()
 26 | 
 27 |     # Set up SSE mock
 28 |     sse_client = MockSSEClient()
 29 | 
 30 |     return stdio_reader, stdio_writer, sse_client
 31 | 
 32 | @pytest.mark.asyncio
 33 | async def test_sse_stdio_interaction(mock_communication_setup):
 34 |     """Test interaction between SSE and STDIO communication channels."""
 35 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
 36 | 
 37 |     # Step 1: Tool registration via STDIO
 38 |     registration_message = {
 39 |         "type": "register",
 40 |         "tool_id": "test_tool",
 41 |         "capabilities": ["capability1", "capability2"]
 42 |     }
 43 | 
 44 |     # Override reader's input with registration message
 45 |     stdio_reader.input_stream.write(json.dumps(registration_message) + "\n")
 46 |     stdio_reader.input_stream.seek(0)
 47 | 
 48 |     # Process registration
 49 |     line = await stdio_reader.readline()
 50 |     message = json.loads(line)
 51 | 
 52 |     # Send registration acknowledgment via stdio
 53 |     response = {
 54 |         "type": "registration_success",
 55 |         "tool_id": message["tool_id"]
 56 |     }
 57 |     await stdio_writer.write(json.dumps(response) + "\n")
 58 | 
 59 |     # Send SSE notification about new tool
 60 |     sse_notification = {
 61 |         "type": "tool_registered",
 62 |         "tool_id": message["tool_id"],
 63 |         "capabilities": message["capabilities"]
 64 |     }
 65 |     await sse_client.send(json.dumps(sse_notification))
 66 | 
 67 |     # Verify stdio response
 68 |     assert "registration_success" in stdio_writer.get_output()
 69 | 
 70 |     # Verify SSE notification
 71 |     assert len(sse_client.events) == 1
 72 |     assert "tool_registered" in sse_client.events[0]
 73 |     assert message["tool_id"] in sse_client.events[0]
 74 | 
 75 |     # Step 2: SSE event triggering STDIO message
 76 |     # Reset the writer to clear previous output
 77 |     stdio_writer = MockStdoutWriter()
 78 | 
 79 |     # Simulate an SSE event that should trigger a STDIO message
 80 |     sse_event = {
 81 |         "type": "request",
 82 |         "id": "sse_to_stdio_test",
 83 |         "method": "test_method",
 84 |         "params": {"param1": "value1"}
 85 |     }
 86 | 
 87 |     # In a real system, this would be processed by an event handler
 88 |     # that would then write to STDIO. Here we simulate that directly.
 89 |     await sse_client.send(json.dumps(sse_event))
 90 | 
 91 |     # Simulate the STDIO response that would be generated
 92 |     stdio_response = {
 93 |         "type": "response",
 94 |         "id": sse_event["id"],
 95 |         "result": {"status": "success"}
 96 |     }
 97 |     await stdio_writer.write(json.dumps(stdio_response) + "\n")
 98 | 
 99 |     # Verify the STDIO response
100 |     assert "response" in stdio_writer.get_output()
101 |     assert sse_event["id"] in stdio_writer.get_output()
102 | 
103 |     # Step 3: Bidirectional communication with state tracking
104 |     # Create a simple state tracker
105 |     state = {"last_message_id": None, "message_count": 0}
106 | 
107 |     # Send a sequence of messages in both directions
108 |     for i in range(3):
109 |         # STDIO to SSE
110 |         stdio_message = {
111 |             "type": "notification",
112 |             "id": f"msg_{i}",
113 |             "data": f"data_{i}"
114 |         }
115 | 
116 |         # In a real system, this would come from STDIO input
117 |         # Here we simulate by updating state directly
118 |         state["last_message_id"] = stdio_message["id"]
119 |         state["message_count"] += 1
120 | 
121 |         # Send to SSE
122 |         await sse_client.send(json.dumps(stdio_message))
123 | 
124 |         # SSE to STDIO
125 |         sse_response = {
126 |             "type": "event",
127 |             "id": f"response_{i}",
128 |             "in_response_to": stdio_message["id"],
129 |             "data": f"response_data_{i}"
130 |         }
131 | 
132 |         # Process SSE response and update STDIO
133 |         await stdio_writer.write(json.dumps(sse_response) + "\n")
134 | 
135 |     # Verify the communication flow
136 |     assert state["message_count"] == 3
137 |     assert state["last_message_id"] == "msg_2"
138 |     assert len(sse_client.events) == 5  # 1 from registration + 1 from SSE event + 3 from the loop
139 | 
140 |     # Verify STDIO output contains all responses
141 |     stdio_output = stdio_writer.get_output()
142 |     for i in range(3):
143 |         assert f"response_{i}" in stdio_output
144 |         assert f"response_data_{i}" in stdio_output
145 | 
146 | @pytest.mark.asyncio
147 | async def test_bidirectional_communication(mock_communication_setup):
148 |     """Test bidirectional communication between stdio and SSE."""
149 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
150 | 
151 |     # Set up test message flow
152 |     stdio_messages = [
153 |         {"type": "request", "id": "1", "method": "test", "data": "stdio_data"},
154 |         {"type": "request", "id": "2", "method": "test", "data": "more_data"}
155 |     ]
156 | 
157 |     # Write messages to stdio
158 |     for msg in stdio_messages:
159 |         stdio_reader.input_stream.write(json.dumps(msg) + "\n")
160 |     stdio_reader.input_stream.seek(0)
161 | 
162 |     # Process messages and generate SSE events
163 |     while True:
164 |         line = await stdio_reader.readline()
165 |         if not line:
166 |             break
167 | 
168 |         # Process stdio message
169 |         message = json.loads(line)
170 | 
171 |         # Generate SSE event
172 |         sse_event = {
173 |             "type": "event",
174 |             "source": "stdio",
175 |             "data": message["data"]
176 |         }
177 |         await sse_client.send(json.dumps(sse_event))
178 | 
179 |         # Send response via stdio
180 |         response = {
181 |             "type": "response",
182 |             "id": message["id"],
183 |             "status": "success"
184 |         }
185 |         await stdio_writer.write(json.dumps(response) + "\n")
186 | 
187 |     # Verify all messages were processed
188 |     assert len(sse_client.events) == len(stdio_messages)
189 |     assert all("stdio" in event for event in sse_client.events)
190 | 
191 |     # Verify stdio responses
192 |     output = stdio_writer.get_output()
193 |     responses = [json.loads(line) for line in output.strip().split("\n")]
194 |     assert len(responses) == len(stdio_messages)
195 |     assert all(resp["type"] == "response" for resp in responses)
196 | 
197 | @pytest.mark.asyncio
198 | async def test_error_propagation(mock_communication_setup):
199 |     """Test error propagation between stdio and SSE."""
200 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
201 | 
202 |     # Simulate error in stdio
203 |     error_message = {
204 |         "type": "request",
205 |         "id": "error_test",
206 |         "method": "test",
207 |         "data": "error_data"
208 |     }
209 |     stdio_reader.input_stream.write(json.dumps(error_message) + "\n")
210 |     stdio_reader.input_stream.seek(0)
211 | 
212 |     # Process message and simulate error
213 |     line = await stdio_reader.readline()
214 |     message = json.loads(line)
215 | 
216 |     # Generate error response in stdio
217 |     error_response = {
218 |         "type": "error",
219 |         "id": message["id"],
220 |         "error": "Test error occurred"
221 |     }
222 |     await stdio_writer.write(json.dumps(error_response) + "\n")
223 | 
224 |     # Propagate error to SSE
225 |     sse_error_event = {
226 |         "type": "error_event",
227 |         "source": "stdio",
228 |         "error": "Test error occurred",
229 |         "request_id": message["id"]
230 |     }
231 |     await sse_client.send(json.dumps(sse_error_event))
232 | 
233 |     # Verify error handling
234 |     assert "error" in stdio_writer.get_output()
235 |     assert len(sse_client.events) == 1
236 |     assert "error_event" in sse_client.events[0]
237 | 
238 | @pytest.mark.asyncio
239 | async def test_connection_state_handling(mock_communication_setup):
240 |     """Test handling of connection state changes."""
241 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
242 | 
243 |     # Test normal operation
244 |     test_message = {
245 |         "type": "request",
246 |         "id": "state_test",
247 |         "method": "test"
248 |     }
249 |     stdio_reader.input_stream.write(json.dumps(test_message) + "\n")
250 |     stdio_reader.input_stream.seek(0)
251 | 
252 |     # Process message while connected
253 |     line = await stdio_reader.readline()
254 |     message = json.loads(line)
255 |     await sse_client.send(json.dumps({"type": "event", "data": "test"}))
256 | 
257 |     # Simulate SSE client disconnect
258 |     sse_client.disconnect()
259 | 
260 |     # Attempt to send message after disconnect
261 |     with pytest.raises(ConnectionError):
262 |         await sse_client.send(json.dumps({"type": "event", "data": "test"}))
263 | 
264 |     # Send disconnect notification via stdio
265 |     disconnect_notification = {
266 |         "type": "notification",
267 |         "event": "client_disconnected"
268 |     }
269 |     await stdio_writer.write(json.dumps(disconnect_notification) + "\n")
270 | 
271 |     # Verify disconnect handling
272 |     assert "client_disconnected" in stdio_writer.get_output()
273 |     assert not sse_client.connected
274 | 
275 | @pytest.mark.asyncio
276 | async def test_race_condition_handling(mock_communication_setup):
277 |     """Test handling of potential race conditions in message processing."""
278 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
279 |     messages = [
280 |         {"type": "request", "id": f"race_test_{i}", "sequence": i, "data": f"data_{i}"}
281 |         for i in range(5)
282 |     ]
283 |     import random
284 |     shuffled_messages = messages.copy()
285 |     random.shuffle(shuffled_messages)
286 |     for msg in shuffled_messages:
287 |         stdio_reader.input_stream.write(json.dumps(msg) + "\n")
288 |     stdio_reader.input_stream.seek(0)
289 |     received_messages = {}
290 |     while True:
291 |         line = await stdio_reader.readline()
292 |         if not line:
293 |             break
294 |         message = json.loads(line)
295 |         received_messages[message["sequence"]] = message
296 |         await sse_client.send(json.dumps({
297 |             "type": "event",
298 |             "sequence": message["sequence"],
299 |             "data": message["data"]
300 |         }))
301 |         await stdio_writer.write(json.dumps({
302 |             "type": "response",
303 |             "id": message["id"],
304 |             "sequence": message["sequence"]
305 |         }) + "\n")
306 |     ordered_sequences = sorted(received_messages.keys())
307 |     assert ordered_sequences == list(range(5))
308 |     for i, event_json in enumerate(sse_client.events):
309 |         event = json.loads(event_json)
310 |         assert event["sequence"] < len(messages)
311 | 
312 | @pytest.mark.asyncio
313 | async def test_resource_cleanup(mock_communication_setup):
314 |     """Test proper cleanup of resources after communication ends."""
315 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
316 |     allocated_resources = set()
317 |     async def allocate_resource(resource_id):
318 |         allocated_resources.add(resource_id)
319 |     async def release_resource(resource_id):
320 |         allocated_resources.remove(resource_id)
321 |     message = {"type": "request", "id": "resource_test", "resource": "test_resource"}
322 |     stdio_reader.input_stream.write(json.dumps(message) + "\n")
323 |     stdio_reader.input_stream.seek(0)
324 |     line = await stdio_reader.readline()
325 |     message = json.loads(line)
326 |     resource_id = message["resource"]
327 |     await allocate_resource(resource_id)
328 |     try:
329 |         await asyncio.sleep(0.1)
330 |         await stdio_writer.write(json.dumps({
331 |             "type": "response",
332 |             "id": message["id"],
333 |             "status": "success"
334 |         }) + "\n")
335 |     finally:
336 |         await release_resource(resource_id)
337 |     assert len(allocated_resources) == 0
338 | 
339 | @pytest.mark.asyncio
340 | async def test_partial_message_handling(mock_communication_setup):
341 |     """Test handling of partial or truncated messages."""
342 |     stdio_reader, stdio_writer, sse_client = await mock_communication_setup
343 |     partial_json = '{"type": "request", "id": "partial_test", "method": "test"'
344 |     stdio_reader.input_stream.write(partial_json + "\n")
345 |     stdio_reader.input_stream.seek(0)
346 |     line = await stdio_reader.readline()
347 |     try:
348 |         json.loads(line)
349 |         parsed = True
350 |     except json.JSONDecodeError:
351 |         parsed = False
352 |         error_response = {
353 |             "type": "error",
354 |             "error": "Invalid JSON format",
355 |             "code": "PARSE_ERROR"
356 |         }
357 |         await stdio_writer.write(json.dumps(error_response) + "\n")
358 |     assert not parsed, "Parsing should have failed with partial JSON"
359 |     assert "Invalid JSON format" in stdio_writer.get_output()
360 |     assert "PARSE_ERROR" in stdio_writer.get_output()
```

--------------------------------------------------------------------------------
/scripts/load_example_patterns.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """Load example patterns and ADRs into the knowledge base."""
  3 | 
  4 | import asyncio
  5 | import json
  6 | from pathlib import Path
  7 | from datetime import datetime
  8 | from uuid import uuid4
  9 | 
 10 | from mcp_codebase_insight.core.config import ServerConfig
 11 | from mcp_codebase_insight.core.knowledge import KnowledgeBase, Pattern, PatternType, PatternConfidence
 12 | from mcp_codebase_insight.core.vector_store import VectorStore
 13 | from mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 14 | from mcp_codebase_insight.core.adr import ADRManager, ADRStatus
 15 | 
 16 | # Example patterns data
 17 | PATTERNS = [
 18 |     {
 19 |         "name": "Factory Method",
 20 |         "type": "design_pattern",
 21 |         "description": "Define an interface for creating an object, but let subclasses decide which class to instantiate.",
 22 |         "content": """
 23 | class Creator:
 24 |     def factory_method(self):
 25 |         pass
 26 |     
 27 |     def operation(self):
 28 |         product = self.factory_method()
 29 |         return product.operation()
 30 | 
 31 | class ConcreteCreator(Creator):
 32 |     def factory_method(self):
 33 |         return ConcreteProduct()
 34 |         """,
 35 |         "tags": ["creational", "factory", "object-creation"],
 36 |         "confidence": "high"
 37 |     },
 38 |     {
 39 |         "name": "Repository Pattern",
 40 |         "type": "architecture",
 41 |         "description": "Mediates between the domain and data mapping layers using a collection-like interface for accessing domain objects.",
 42 |         "content": """
 43 | class Repository:
 44 |     def get(self, id: str) -> Entity:
 45 |         pass
 46 |     
 47 |     def add(self, entity: Entity):
 48 |         pass
 49 |     
 50 |     def remove(self, entity: Entity):
 51 |         pass
 52 |         """,
 53 |         "tags": ["data-access", "persistence", "domain-driven-design"],
 54 |         "confidence": "high"
 55 |     },
 56 |     {
 57 |         "name": "Strategy Pattern",
 58 |         "type": "design_pattern",
 59 |         "description": "Define a family of algorithms, encapsulate each one, and make them interchangeable.",
 60 |         "content": """
 61 | class Strategy:
 62 |     def execute(self, data):
 63 |         pass
 64 | 
 65 | class ConcreteStrategyA(Strategy):
 66 |     def execute(self, data):
 67 |         return "Algorithm A"
 68 | 
 69 | class Context:
 70 |     def __init__(self, strategy: Strategy):
 71 |         self._strategy = strategy
 72 |     
 73 |     def execute_strategy(self, data):
 74 |         return self._strategy.execute(data)
 75 |         """,
 76 |         "tags": ["behavioral", "algorithm", "encapsulation"],
 77 |         "confidence": "high"
 78 |     },
 79 |     {
 80 |         "name": "Error Handling Pattern",
 81 |         "type": "code",
 82 |         "description": "Common pattern for handling errors in Python using try-except with context.",
 83 |         "content": """
 84 | def operation_with_context():
 85 |     try:
 86 |         # Setup resources
 87 |         resource = setup_resource()
 88 |         try:
 89 |             # Main operation
 90 |             result = process_resource(resource)
 91 |             return result
 92 |         except SpecificError as e:
 93 |             # Handle specific error
 94 |             handle_specific_error(e)
 95 |             raise
 96 |         finally:
 97 |             # Cleanup
 98 |             cleanup_resource(resource)
 99 |     except Exception as e:
100 |         # Log error with context
101 |         logger.error("Operation failed", exc_info=e)
102 |         raise OperationError("Operation failed") from e
103 |         """,
104 |         "tags": ["error-handling", "python", "best-practice"],
105 |         "confidence": "high"
106 |     },
107 |     {
108 |         "name": "Circuit Breaker",
109 |         "type": "architecture",
110 |         "description": "Prevent system failure by failing fast and handling recovery.",
111 |         "content": """
112 | class CircuitBreaker:
113 |     def __init__(self, failure_threshold, reset_timeout):
114 |         self.failure_count = 0
115 |         self.failure_threshold = failure_threshold
116 |         self.reset_timeout = reset_timeout
117 |         self.last_failure_time = None
118 |         self.state = "closed"
119 |     
120 |     async def call(self, func, *args, **kwargs):
121 |         if self._should_open():
122 |             self.state = "open"
123 |             raise CircuitBreakerOpen()
124 |             
125 |         try:
126 |             result = await func(*args, **kwargs)
127 |             self._reset()
128 |             return result
129 |         except Exception as e:
130 |             self._record_failure()
131 |             raise
132 |         """,
133 |         "tags": ["resilience", "fault-tolerance", "microservices"],
134 |         "confidence": "high"
135 |     }
136 | ]
137 | 
138 | # Example ADRs data
139 | ADRS = [
140 |     {
141 |         "title": "Use FastAPI for REST API Development",
142 |         "context": {
143 |             "problem": "We need a modern, high-performance web framework for our REST API",
144 |             "constraints": [
145 |                 "Must support Python 3.9+",
146 |                 "Must support async/await",
147 |                 "Must have strong type validation",
148 |                 "Must have good documentation"
149 |             ],
150 |             "assumptions": [
151 |                 "The team has Python experience",
152 |                 "Performance is a priority"
153 |             ]
154 |         },
155 |         "options": [
156 |             {
157 |                 "title": "Use Flask",
158 |                 "pros": [
159 |                     "Simple and familiar",
160 |                     "Large ecosystem",
161 |                     "Easy to learn"
162 |                 ],
163 |                 "cons": [
164 |                     "No built-in async support",
165 |                     "No built-in validation",
166 |                     "Requires many extensions"
167 |                 ]
168 |             },
169 |             {
170 |                 "title": "Use FastAPI",
171 |                 "pros": [
172 |                     "Built-in async support",
173 |                     "Automatic OpenAPI documentation",
174 |                     "Built-in validation with Pydantic",
175 |                     "High performance"
176 |                 ],
177 |                 "cons": [
178 |                     "Newer framework with smaller ecosystem",
179 |                     "Steeper learning curve for some concepts"
180 |                 ]
181 |             },
182 |             {
183 |                 "title": "Use Django REST Framework",
184 |                 "pros": [
185 |                     "Mature and stable",
186 |                     "Full-featured",
187 |                     "Large community"
188 |                 ],
189 |                 "cons": [
190 |                     "Heavier weight",
191 |                     "Limited async support",
192 |                     "Slower than alternatives"
193 |                 ]
194 |             }
195 |         ],
196 |         "decision": "We will use FastAPI for our REST API development due to its modern features, performance, and built-in support for async/await and validation.",
197 |         "consequences": {
198 |             "positive": [
199 |                 "Improved API performance",
200 |                 "Better developer experience with type hints and validation",
201 |                 "Automatic API documentation"
202 |             ],
203 |             "negative": [
204 |                 "Team needs to learn new concepts (dependency injection, Pydantic)",
205 |                 "Fewer third-party extensions compared to Flask or Django"
206 |             ]
207 |         }
208 |     },
209 |     {
210 |         "title": "Vector Database for Semantic Search",
211 |         "context": {
212 |             "problem": "We need a database solution for storing and searching vector embeddings for semantic code search",
213 |             "constraints": [
214 |                 "Must support efficient vector similarity search",
215 |                 "Must scale to handle large codebases",
216 |                 "Must be easy to integrate with Python"
217 |             ]
218 |         },
219 |         "options": [
220 |             {
221 |                 "title": "Use Qdrant",
222 |                 "pros": [
223 |                     "Purpose-built for vector search",
224 |                     "Good Python client",
225 |                     "Fast similarity search",
226 |                     "Support for filters"
227 |                 ],
228 |                 "cons": [
229 |                     "Relatively new project",
230 |                     "Limited community compared to alternatives"
231 |                 ]
232 |             },
233 |             {
234 |                 "title": "Use Elasticsearch with vector capabilities",
235 |                 "pros": [
236 |                     "Mature product",
237 |                     "Well-known in industry",
238 |                     "Many features beyond vector search"
239 |                 ],
240 |                 "cons": [
241 |                     "More complex to set up",
242 |                     "Not optimized exclusively for vector search",
243 |                     "Higher resource requirements"
244 |                 ]
245 |             },
246 |             {
247 |                 "title": "Build custom solution with NumPy/FAISS",
248 |                 "pros": [
249 |                     "Complete control over implementation",
250 |                     "No external service dependency",
251 |                     "Can optimize for specific needs"
252 |                 ],
253 |                 "cons": [
254 |                     "Significant development effort",
255 |                     "Need to handle persistence manually",
256 |                     "Maintenance burden"
257 |                 ]
258 |             }
259 |         ],
260 |         "decision": "We will use Qdrant for vector storage and similarity search due to its performance, ease of use, and purpose-built design for vector operations.",
261 |         "consequences": {
262 |             "positive": [
263 |                 "Fast similarity search with minimal setup",
264 |                 "Simple API for vector operations",
265 |                 "Good scalability as codebase grows"
266 |             ],
267 |             "negative": [
268 |                 "New dependency to maintain",
269 |                 "Team needs to learn Qdrant-specific concepts"
270 |             ]
271 |         }
272 |     }
273 | ]
274 | 
275 | async def main():
276 |     """Load patterns and ADRs into knowledge base."""
277 |     try:
278 |         # Create config
279 |         config = ServerConfig()
280 |         
281 |         # Initialize components
282 |         embedder = SentenceTransformerEmbedding(config.embedding_model)
283 |         vector_store = VectorStore(
284 |             url=config.qdrant_url,
285 |             embedder=embedder,
286 |             collection_name=config.collection_name,
287 |             vector_name="fast-all-minilm-l6-v2"
288 |         )
289 |         
290 |         # Initialize vector store
291 |         await vector_store.initialize()
292 |         
293 |         # Create knowledge base
294 |         kb = KnowledgeBase(config, vector_store)
295 |         await kb.initialize()
296 |         
297 |         # Create patterns directory if it doesn't exist
298 |         patterns_dir = Path("knowledge/patterns")
299 |         patterns_dir.mkdir(parents=True, exist_ok=True)
300 |         
301 |         # Create ADRs directory if it doesn't exist
302 |         adrs_dir = Path("docs/adrs")
303 |         adrs_dir.mkdir(parents=True, exist_ok=True)
304 |         
305 |         # Load each pattern
306 |         print("\n=== Loading Patterns ===")
307 |         for pattern_data in PATTERNS:
308 |             # Save pattern to knowledge base using the correct method signature
309 |             created = await kb.add_pattern(
310 |                 name=pattern_data["name"],
311 |                 type=PatternType(pattern_data["type"]),
312 |                 description=pattern_data["description"],
313 |                 content=pattern_data["content"],
314 |                 confidence=PatternConfidence(pattern_data["confidence"]),
315 |                 tags=pattern_data["tags"]
316 |             )
317 |             
318 |             print(f"Added pattern: {created.name}")
319 |             
320 |             # Save pattern to file
321 |             pattern_file = patterns_dir / f"{created.id}.json"
322 |             with open(pattern_file, "w") as f:
323 |                 json.dump({
324 |                     "id": str(created.id),
325 |                     "name": created.name,
326 |                     "type": created.type.value,
327 |                     "description": created.description,
328 |                     "content": created.content,
329 |                     "tags": created.tags,
330 |                     "confidence": created.confidence.value,
331 |                     "created_at": created.created_at.isoformat(),
332 |                     "updated_at": created.updated_at.isoformat()
333 |                 }, f, indent=2)
334 |         
335 |         print("\nAll patterns loaded successfully!")
336 |         
337 |         # Initialize ADR manager
338 |         print("\n=== Loading ADRs ===")
339 |         adr_manager = ADRManager(config)
340 |         await adr_manager.initialize()
341 |         
342 |         # Load each ADR
343 |         for adr_data in ADRS:
344 |             created = await adr_manager.create_adr(
345 |                 title=adr_data["title"],
346 |                 context=adr_data["context"],
347 |                 options=adr_data["options"],
348 |                 decision=adr_data["decision"],
349 |                 consequences=adr_data.get("consequences")
350 |             )
351 |             
352 |             print(f"Added ADR: {created.title}")
353 |         
354 |         print("\nAll ADRs loaded successfully!")
355 |         
356 |         # Test pattern search
357 |         print("\n=== Testing Pattern Search ===")
358 |         results = await kb.find_similar_patterns(
359 |             "error handling in Python",
360 |             limit=2
361 |         )
362 |         
363 |         print("\nSearch results:")
364 |         for result in results:
365 |             print(f"- {result.pattern.name} (score: {result.similarity_score:.2f})")
366 |             
367 |         # Test ADR listing
368 |         print("\n=== Testing ADR Listing ===")
369 |         adrs = await adr_manager.list_adrs()
370 |         
371 |         print(f"\nFound {len(adrs)} ADRs:")
372 |         for adr in adrs:
373 |             print(f"- {adr.title} (status: {adr.status})")
374 |         
375 |     except Exception as e:
376 |         print(f"Error loading examples: {e}")
377 |         raise
378 | 
379 | if __name__ == "__main__":
380 |     asyncio.run(main())
381 | 
```

--------------------------------------------------------------------------------
/tests/config/test_config_and_env.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for configuration and environment handling."""
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | # Ensure the src directory is in the Python path
  7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
  8 | 
  9 | import os
 10 | import asyncio
 11 | import shutil
 12 | import pytest
 13 | import pytest_asyncio
 14 | from pathlib import Path
 15 | from typing import Generator
 16 | from unittest.mock import patch
 17 | import uuid
 18 | 
 19 | from qdrant_client import QdrantClient
 20 | from qdrant_client.http.models import Distance, VectorParams
 21 | 
 22 | from src.mcp_codebase_insight.core.config import ServerConfig
 23 | from src.mcp_codebase_insight.server import CodebaseAnalysisServer
 24 | 
 25 | @pytest.fixture(scope="session")
 26 | def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]:
 27 |     """Create event loop for tests."""
 28 |     loop = asyncio.get_event_loop_policy().new_event_loop()
 29 |     yield loop
 30 |     loop.close()
 31 | 
 32 | @pytest.fixture
 33 | def env_vars(tmp_path):
 34 |     """Set up test environment variables and clean up test directories."""
 35 |     original_env = dict(os.environ)
 36 |     test_dirs = {
 37 |         "MCP_DOCS_CACHE_DIR": tmp_path / "test_docs",
 38 |         "MCP_ADR_DIR": tmp_path / "test_docs/adrs",
 39 |         "MCP_KB_STORAGE_DIR": tmp_path / "test_knowledge",
 40 |         "MCP_DISK_CACHE_DIR": tmp_path / "test_cache"
 41 |     }
 42 |     
 43 |     test_vars = {
 44 |         "MCP_HOST": "127.0.0.1",
 45 |         "MCP_PORT": "8000",
 46 |         "MCP_LOG_LEVEL": "DEBUG",
 47 |         "MCP_DEBUG": "true",
 48 |         "MCP_METRICS_ENABLED": "true",
 49 |         "MCP_CACHE_ENABLED": "true",
 50 |         "MCP_QDRANT_URL": "http://localhost:6333"  # Use local Qdrant server
 51 |     }
 52 |     test_vars.update({k: str(v) for k, v in test_dirs.items()})
 53 |     
 54 |     os.environ.update(test_vars)
 55 |     yield test_vars
 56 |     
 57 |     # Clean up test directories
 58 |     for dir_path in test_dirs.values():
 59 |         if dir_path.exists():
 60 |             shutil.rmtree(dir_path, ignore_errors=True)
 61 |     
 62 |     # Restore original environment
 63 |     os.environ.clear()
 64 |     os.environ.update(original_env)
 65 | 
 66 | @pytest.fixture
 67 | def test_collection_name() -> str:
 68 |     """Generate a unique test collection name."""
 69 |     return f"test_collection_{uuid.uuid4().hex[:8]}"
 70 | 
 71 | @pytest_asyncio.fixture
 72 | async def qdrant_client() -> QdrantClient:
 73 |     """Create a Qdrant client for tests."""
 74 |     client = QdrantClient(url="http://localhost:6333")
 75 |     yield client
 76 |     client.close()
 77 | 
 78 | @pytest.mark.asyncio
 79 | async def test_server_config_from_env(env_vars, tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
 80 |     """Test server configuration from environment variables."""
 81 |     config = ServerConfig(
 82 |         host=env_vars["MCP_HOST"],
 83 |         port=int(env_vars["MCP_PORT"]),
 84 |         log_level=env_vars["MCP_LOG_LEVEL"],
 85 |         debug_mode=env_vars["MCP_DEBUG"].lower() == "true",
 86 |         docs_cache_dir=Path(env_vars["MCP_DOCS_CACHE_DIR"]),
 87 |         adr_dir=Path(env_vars["MCP_ADR_DIR"]),
 88 |         kb_storage_dir=Path(env_vars["MCP_KB_STORAGE_DIR"]),
 89 |         disk_cache_dir=Path(env_vars["MCP_DISK_CACHE_DIR"]),
 90 |         qdrant_url=env_vars["MCP_QDRANT_URL"],
 91 |         collection_name=test_collection_name
 92 |     )
 93 |     
 94 |     # Create test collection
 95 |     try:
 96 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
 97 |             qdrant_client.delete_collection(test_collection_name)
 98 |         
 99 |         qdrant_client.create_collection(
100 |             collection_name=test_collection_name,
101 |             vectors_config=VectorParams(
102 |                 size=384,  # Default size for all-MiniLM-L6-v2
103 |                 distance=Distance.COSINE
104 |             )
105 |         )
106 |         
107 |         server = CodebaseAnalysisServer(config)
108 |         await server.initialize()
109 |         
110 |         assert server.config.host == env_vars["MCP_HOST"]
111 |         assert server.config.port == int(env_vars["MCP_PORT"])
112 |         assert server.config.log_level == env_vars["MCP_LOG_LEVEL"]
113 |         assert server.config.debug_mode == (env_vars["MCP_DEBUG"].lower() == "true")
114 |         assert isinstance(server.config.docs_cache_dir, Path)
115 |         assert isinstance(server.config.adr_dir, Path)
116 |         assert isinstance(server.config.kb_storage_dir, Path)
117 |         assert isinstance(server.config.disk_cache_dir, Path)
118 |     finally:
119 |         await server.shutdown()
120 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
121 |             qdrant_client.delete_collection(test_collection_name)
122 | 
123 | @pytest.mark.asyncio
124 | async def test_directory_creation(tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
125 |     """Test directory creation."""
126 |     config = ServerConfig(
127 |         host="localhost",
128 |         port=8000,
129 |         docs_cache_dir=tmp_path / "docs",
130 |         adr_dir=tmp_path / "docs/adrs",
131 |         kb_storage_dir=tmp_path / "knowledge",
132 |         disk_cache_dir=tmp_path / "cache",
133 |         qdrant_url="http://localhost:6333",
134 |         collection_name=test_collection_name,
135 |         cache_enabled=True  # Explicitly enable cache for clarity
136 |     )
137 |     
138 |     # Create test collection
139 |     try:
140 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
141 |             qdrant_client.delete_collection(test_collection_name)
142 |         
143 |         qdrant_client.create_collection(
144 |             collection_name=test_collection_name,
145 |             vectors_config=VectorParams(
146 |                 size=384,  # Default size for all-MiniLM-L6-v2
147 |                 distance=Distance.COSINE
148 |             )
149 |         )
150 |         
151 |         # Create and initialize server
152 |         server = CodebaseAnalysisServer(config)
153 |         await server.initialize()
154 |         
155 |         # Verify directories were created
156 |         assert (tmp_path / "docs").exists(), "Docs directory was not created"
157 |         assert (tmp_path / "docs/adrs").exists(), "ADR directory was not created"
158 |         assert (tmp_path / "knowledge").exists(), "Knowledge directory was not created"
159 |         assert (tmp_path / "cache").exists(), "Cache directory was not created"
160 |     finally:
161 |         await server.shutdown()
162 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
163 |             qdrant_client.delete_collection(test_collection_name)
164 | 
165 | @pytest.mark.asyncio
166 | async def test_directory_creation_with_none_cache_dir(tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
167 |     """Test server startup with None disk_cache_dir."""
168 |     config = ServerConfig(
169 |         host="localhost",
170 |         port=8000,
171 |         docs_cache_dir=tmp_path / "docs",
172 |         adr_dir=tmp_path / "docs/adrs",
173 |         kb_storage_dir=tmp_path / "knowledge",
174 |         disk_cache_dir=None,  # Explicitly set to None
175 |         qdrant_url="http://localhost:6333",
176 |         collection_name=test_collection_name,
177 |         cache_enabled=True  # But keep cache enabled
178 |     )
179 |     
180 |     # Create test collection
181 |     try:
182 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
183 |             qdrant_client.delete_collection(test_collection_name)
184 |         
185 |         qdrant_client.create_collection(
186 |             collection_name=test_collection_name,
187 |             vectors_config=VectorParams(
188 |                 size=384,  # Default size for all-MiniLM-L6-v2
189 |                 distance=Distance.COSINE
190 |             )
191 |         )
192 |         
193 |         # Initialize server
194 |         server = CodebaseAnalysisServer(config)
195 |         await server.initialize()
196 |         
197 |         # When disk_cache_dir is None but cache is enabled, we should default to Path("cache")
198 |         assert config.disk_cache_dir == Path("cache"), "disk_cache_dir should default to 'cache'"
199 |         assert Path("cache").exists(), "Default cache directory should exist"
200 |     finally:
201 |         await server.shutdown()
202 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
203 |             qdrant_client.delete_collection(test_collection_name)
204 | 
205 | @pytest.mark.asyncio
206 | async def test_directory_creation_with_cache_disabled(tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
207 |     """Test server startup with caching disabled."""
208 |     config = ServerConfig(
209 |         host="localhost",
210 |         port=8000,
211 |         docs_cache_dir=tmp_path / "docs",
212 |         adr_dir=tmp_path / "docs/adrs",
213 |         kb_storage_dir=tmp_path / "knowledge",
214 |         disk_cache_dir=Path(tmp_path / "cache"),  # Set a path
215 |         qdrant_url="http://localhost:6333",
216 |         collection_name=test_collection_name,
217 |         cache_enabled=False  # But disable caching
218 |     )
219 |     
220 |     # Create test collection
221 |     try:
222 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
223 |             qdrant_client.delete_collection(test_collection_name)
224 |         
225 |         qdrant_client.create_collection(
226 |             collection_name=test_collection_name,
227 |             vectors_config=VectorParams(
228 |                 size=384,  # Default size for all-MiniLM-L6-v2
229 |                 distance=Distance.COSINE
230 |             )
231 |         )
232 |         
233 |         # Server initialization should set disk_cache_dir to None when cache_enabled is False
234 |         server = CodebaseAnalysisServer(config)
235 |         await server.initialize()
236 |         
237 |         # Verify that disk_cache_dir is None when cache_enabled is False
238 |         assert config.disk_cache_dir is None, "disk_cache_dir should be None when cache_enabled is False"
239 |         # And that the cache directory does not exist
240 |         assert not (tmp_path / "cache").exists(), "Cache directory should not exist when cache is disabled"
241 |     finally:
242 |         await server.shutdown()
243 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
244 |             qdrant_client.delete_collection(test_collection_name)
245 | 
246 | @pytest.mark.asyncio
247 | async def test_directory_creation_permission_error(tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
248 |     """Test directory creation with permission error."""
249 |     readonly_dir = tmp_path / "readonly"
250 |     readonly_dir.mkdir()
251 |     readonly_dir.chmod(0o444)  # Read-only
252 |     
253 |     config = ServerConfig(
254 |         host="localhost",
255 |         port=8000,
256 |         docs_cache_dir=readonly_dir / "docs",
257 |         adr_dir=readonly_dir / "docs/adrs",
258 |         kb_storage_dir=readonly_dir / "knowledge",
259 |         disk_cache_dir=readonly_dir / "cache",
260 |         qdrant_url="http://localhost:6333",
261 |         collection_name=test_collection_name
262 |     )
263 |     
264 |     server = None
265 |     try:
266 |         # Create test collection
267 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
268 |             qdrant_client.delete_collection(test_collection_name)
269 |         
270 |         qdrant_client.create_collection(
271 |             collection_name=test_collection_name,
272 |             vectors_config=VectorParams(
273 |                 size=384,  # Default size for all-MiniLM-L6-v2
274 |                 distance=Distance.COSINE
275 |             )
276 |         )
277 |         
278 |         server = CodebaseAnalysisServer(config)
279 |         with pytest.raises(RuntimeError) as exc_info:
280 |             await server.initialize()
281 |         assert "Permission denied" in str(exc_info.value)
282 |     finally:
283 |         if server:
284 |             await server.shutdown()
285 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
286 |             qdrant_client.delete_collection(test_collection_name)
287 |         # Clean up the readonly directory
288 |         readonly_dir.chmod(0o777)  # Restore write permissions for cleanup
289 |         if readonly_dir.exists():
290 |             shutil.rmtree(readonly_dir)
291 | 
292 | @pytest.mark.asyncio
293 | async def test_directory_already_exists(tmp_path, test_collection_name: str, qdrant_client: QdrantClient):
294 |     """Test server initialization with pre-existing directories."""
295 |     # Create directories before server initialization
296 |     dirs = [
297 |         tmp_path / "docs",
298 |         tmp_path / "docs/adrs",
299 |         tmp_path / "knowledge",
300 |         tmp_path / "cache"
301 |     ]
302 |     for dir_path in dirs:
303 |         dir_path.mkdir(parents=True, exist_ok=True)
304 |     
305 |     config = ServerConfig(
306 |         host="localhost",
307 |         port=8000,
308 |         docs_cache_dir=tmp_path / "docs",
309 |         adr_dir=tmp_path / "docs/adrs",
310 |         kb_storage_dir=tmp_path / "knowledge",
311 |         disk_cache_dir=tmp_path / "cache",
312 |         qdrant_url="http://localhost:6333",
313 |         collection_name=test_collection_name
314 |     )
315 |     
316 |     # Create test collection
317 |     try:
318 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
319 |             qdrant_client.delete_collection(test_collection_name)
320 |         
321 |         qdrant_client.create_collection(
322 |             collection_name=test_collection_name,
323 |             vectors_config=VectorParams(
324 |                 size=384,  # Default size for all-MiniLM-L6-v2
325 |                 distance=Distance.COSINE
326 |             )
327 |         )
328 |         
329 |         server = CodebaseAnalysisServer(config)
330 |         await server.initialize()
331 |         
332 |         # Verify directories still exist and are accessible
333 |         for dir_path in dirs:
334 |             assert dir_path.exists()
335 |             assert os.access(dir_path, os.R_OK | os.W_OK)
336 |     finally:
337 |         await server.shutdown()
338 |         if test_collection_name in [c.name for c in qdrant_client.get_collections().collections]:
339 |             qdrant_client.delete_collection(test_collection_name)
340 |         # Clean up
341 |         for dir_path in dirs:
342 |             if dir_path.exists():
343 |                 shutil.rmtree(dir_path) 
```

--------------------------------------------------------------------------------
/scripts/store_code_relationships.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """
  3 | Store Code Component Relationships in Vector Database
  4 | 
  5 | This script analyzes the codebase to extract relationships between components
  6 | and stores them in the vector database for use in build verification.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import json
 12 | import logging
 13 | import asyncio
 14 | import argparse
 15 | from datetime import datetime
 16 | from pathlib import Path
 17 | from typing import Dict, List, Any, Set, Tuple
 18 | import uuid
 19 | 
 20 | # Add the project root to the Python path
 21 | sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 22 | 
 23 | from src.mcp_codebase_insight.core.vector_store import VectorStore
 24 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 25 | from qdrant_client import QdrantClient
 26 | from qdrant_client.http import models as rest
 27 | from qdrant_client.http.models import Filter, FieldCondition, MatchValue
 28 | 
 29 | # Configure logging
 30 | logging.basicConfig(
 31 |     level=logging.INFO,
 32 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 33 |     handlers=[
 34 |         logging.StreamHandler(),
 35 |         logging.FileHandler(Path('logs/code_relationships.log'))
 36 |     ]
 37 | )
 38 | logger = logging.getLogger('code_relationships')
 39 | 
 40 | class CodeRelationshipAnalyzer:
 41 |     """Code relationship analyzer for storing component relationships in vector database."""
 42 |     
 43 |     def __init__(self, config_path: str = None):
 44 |         """Initialize the code relationship analyzer.
 45 |         
 46 |         Args:
 47 |             config_path: Path to configuration file (optional)
 48 |         """
 49 |         self.config = self._load_config(config_path)
 50 |         self.vector_store = None
 51 |         self.embedder = None
 52 |         self.dependency_map = {}
 53 |         self.critical_components = set()
 54 |         self.source_files = []
 55 |     
 56 |     def _load_config(self, config_path: str) -> Dict[str, Any]:
 57 |         """Load configuration from file or environment variables.
 58 |         
 59 |         Args:
 60 |             config_path: Path to configuration file
 61 |             
 62 |         Returns:
 63 |             Configuration dictionary
 64 |         """
 65 |         config = {
 66 |             'qdrant_url': os.environ.get('QDRANT_URL', 'http://localhost:6333'),
 67 |             'qdrant_api_key': os.environ.get('QDRANT_API_KEY', ''),
 68 |             'collection_name': os.environ.get('COLLECTION_NAME', 'mcp-codebase-insight'),
 69 |             'embedding_model': os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2'),
 70 |             'source_dirs': ['src'],
 71 |             'exclude_dirs': ['__pycache__', '.git', '.venv', 'test_env', 'dist', 'build'],
 72 |             'critical_modules': [
 73 |                 'mcp_codebase_insight.core.vector_store',
 74 |                 'mcp_codebase_insight.core.knowledge',
 75 |                 'mcp_codebase_insight.server'
 76 |             ]
 77 |         }
 78 |         
 79 |         # Override with config file if provided
 80 |         if config_path:
 81 |             try:
 82 |                 with open(config_path, 'r') as f:
 83 |                     file_config = json.load(f)
 84 |                     config.update(file_config)
 85 |             except Exception as e:
 86 |                 logger.error(f"Failed to load config from {config_path}: {e}")
 87 |         
 88 |         return config
 89 |     
 90 |     async def initialize(self):
 91 |         """Initialize the analyzer."""
 92 |         logger.info("Initializing code relationship analyzer...")
 93 |         
 94 |         # Initialize embedder
 95 |         logger.info("Initializing embedder...")
 96 |         self.embedder = SentenceTransformerEmbedding(model_name=self.config['embedding_model'])
 97 |         await self.embedder.initialize()
 98 |         
 99 |         # Initialize vector store
100 |         logger.info(f"Connecting to vector store at {self.config['qdrant_url']}...")
101 |         self.vector_store = VectorStore(
102 |             url=self.config['qdrant_url'],
103 |             embedder=self.embedder,
104 |             collection_name=self.config['collection_name'],
105 |             api_key=self.config.get('qdrant_api_key'),
106 |             vector_name="default"  # Specify a vector name for the collection
107 |         )
108 |         await self.vector_store.initialize()
109 |         
110 |         # Set critical components
111 |         self.critical_components = set(self.config.get('critical_modules', []))
112 |         
113 |         logger.info("Code relationship analyzer initialized successfully")
114 |     
115 |     def find_source_files(self) -> List[Path]:
116 |         """Find all source files to analyze.
117 |         
118 |         Returns:
119 |             List of source file paths
120 |         """
121 |         logger.info("Finding source files...")
122 |         
123 |         source_files = []
124 |         source_dirs = [Path(dir_name) for dir_name in self.config['source_dirs']]
125 |         exclude_dirs = self.config['exclude_dirs']
126 |         
127 |         for source_dir in source_dirs:
128 |             if not source_dir.exists():
129 |                 logger.warning(f"Source directory {source_dir} does not exist")
130 |                 continue
131 |                 
132 |             for root, dirs, files in os.walk(source_dir):
133 |                 # Skip excluded directories
134 |                 dirs[:] = [d for d in dirs if d not in exclude_dirs]
135 |                 
136 |                 for file in files:
137 |                     if file.endswith('.py'):
138 |                         source_files.append(Path(root) / file)
139 |         
140 |         logger.info(f"Found {len(source_files)} source files")
141 |         self.source_files = source_files
142 |         return source_files
143 |     
144 |     def analyze_file_dependencies(self, file_path: Path) -> Dict[str, List[str]]:
145 |         """Analyze dependencies for a single file.
146 |         
147 |         Args:
148 |             file_path: Path to the file to analyze
149 |             
150 |         Returns:
151 |             Dictionary mapping module name to list of dependencies
152 |         """
153 |         dependencies = []
154 |         
155 |         try:
156 |             with open(file_path, 'r', encoding='utf-8') as f:
157 |                 content = f.read()
158 |                 
159 |             # Extract imports
160 |             lines = content.split('\n')
161 |             for line in lines:
162 |                 line = line.strip()
163 |                 
164 |                 # Skip comments
165 |                 if line.startswith('#'):
166 |                     continue
167 |                     
168 |                 # Handle import statements
169 |                 if line.startswith('import ') or ' import ' in line:
170 |                     if line.startswith('import '):
171 |                         # Handle "import module" or "import module as alias"
172 |                         import_part = line[7:].strip()
173 |                         if ' as ' in import_part:
174 |                             import_part = import_part.split(' as ')[0].strip()
175 |                         dependencies.append(import_part)
176 |                     elif line.startswith('from ') and ' import ' in line:
177 |                         # Handle "from module import something"
178 |                         from_part = line[5:].split(' import ')[0].strip()
179 |                         dependencies.append(from_part)
180 |             
181 |             # Convert file path to module name
182 |             module_name = str(file_path).replace('/', '.').replace('\\', '.').replace('.py', '')
183 |             for source_dir in self.config['source_dirs']:
184 |                 prefix = f"{source_dir}."
185 |                 if module_name.startswith(prefix):
186 |                     module_name = module_name[len(prefix):]
187 |             
188 |             return {module_name: dependencies}
189 |             
190 |         except Exception as e:
191 |             logger.error(f"Error analyzing file {file_path}: {e}")
192 |             return {}
193 |     
194 |     def analyze_all_dependencies(self) -> Dict[str, List[str]]:
195 |         """Analyze dependencies for all source files.
196 |         
197 |         Returns:
198 |             Dictionary mapping module names to lists of dependencies
199 |         """
200 |         logger.info("Analyzing dependencies for all source files...")
201 |         
202 |         if not self.source_files:
203 |             self.find_source_files()
204 |         
205 |         dependency_map = {}
206 |         
207 |         for file_path in self.source_files:
208 |             file_dependencies = self.analyze_file_dependencies(file_path)
209 |             dependency_map.update(file_dependencies)
210 |         
211 |         logger.info(f"Analyzed dependencies for {len(dependency_map)} modules")
212 |         self.dependency_map = dependency_map
213 |         return dependency_map
214 |     
215 |     def identify_critical_components(self) -> Set[str]:
216 |         """Identify critical components in the codebase.
217 |         
218 |         Returns:
219 |             Set of critical component names
220 |         """
221 |         logger.info("Identifying critical components...")
222 |         
223 |         # Start with configured critical modules
224 |         critical_components = set(self.critical_components)
225 |         
226 |         # Add modules with many dependents
227 |         if self.dependency_map:
228 |             # Count how many times each module is a dependency
229 |             dependent_count = {}
230 |             for module, dependencies in self.dependency_map.items():
231 |                 for dependency in dependencies:
232 |                     if dependency in dependent_count:
233 |                         dependent_count[dependency] += 1
234 |                     else:
235 |                         dependent_count[dependency] = 1
236 |             
237 |             # Add modules with more than 3 dependents to critical components
238 |             for module, count in dependent_count.items():
239 |                 if count > 3:
240 |                     critical_components.add(module)
241 |         
242 |         logger.info(f"Identified {len(critical_components)} critical components")
243 |         self.critical_components = critical_components
244 |         return critical_components
245 |     
246 |     async def store_in_vector_database(self):
247 |         """Store code relationships in vector database."""
248 |         try:
249 |             # Store dependency map
250 |             dependency_text = json.dumps({
251 |                 'type': 'dependency_map',
252 |                 'dependencies': self.dependency_map
253 |             })
254 |             dependency_vector = await self.vector_store.embedder.embed(dependency_text)
255 |             dependency_data = {
256 |                 'id': str(uuid.uuid4()),
257 |                 'vector': dependency_vector,
258 |                 'payload': {
259 |                     'type': 'dependency_map',
260 |                     'timestamp': datetime.now().isoformat(),
261 |                     'module_count': len(self.dependency_map)
262 |                 }
263 |             }
264 |             
265 |             # Store critical components
266 |             critical_text = json.dumps({
267 |                 'type': 'critical_components',
268 |                 'components': list(self.critical_components)
269 |             })
270 |             critical_vector = await self.vector_store.embedder.embed(critical_text)
271 |             critical_data = {
272 |                 'id': str(uuid.uuid4()),
273 |                 'vector': critical_vector,
274 |                 'payload': {
275 |                     'type': 'critical_components',
276 |                     'timestamp': datetime.now().isoformat(),
277 |                     'component_count': len(self.critical_components)
278 |                 }
279 |             }
280 |             
281 |             # Store build verification criteria
282 |             criteria_text = json.dumps({
283 |                 'type': 'build_criteria',
284 |                 'critical_modules': list(self.critical_components),
285 |                 'min_test_coverage': 80.0,
286 |                 'max_allowed_failures': 0
287 |             })
288 |             criteria_vector = await self.vector_store.embedder.embed(criteria_text)
289 |             criteria_data = {
290 |                 'id': str(uuid.uuid4()),
291 |                 'vector': criteria_vector,
292 |                 'payload': {
293 |                     'type': 'build_criteria',
294 |                     'timestamp': datetime.now().isoformat()
295 |                 }
296 |             }
297 |             
298 |             # Store all data points
299 |             data_points = [dependency_data, critical_data, criteria_data]
300 |             self.vector_store.client.upsert(
301 |                 collection_name=self.vector_store.collection_name,
302 |                 points=[rest.PointStruct(
303 |                     id=data['id'],
304 |                     vectors={self.vector_store.vector_name: data['vector']},
305 |                     payload=data['payload']
306 |                 ) for data in data_points]
307 |             )
308 |             
309 |             logger.info("Successfully stored code relationships in vector database")
310 |             
311 |         except Exception as e:
312 |             logger.error(f"Error storing in vector database: {e}")
313 |             raise
314 |     
315 |     async def analyze_and_store(self):
316 |         """Analyze code relationships and store them in the vector database."""
317 |         try:
318 |             # Find source files
319 |             self.find_source_files()
320 |             
321 |             # Analyze dependencies
322 |             self.analyze_all_dependencies()
323 |             
324 |             # Identify critical components
325 |             self.identify_critical_components()
326 |             
327 |             # Store in vector database
328 |             await self.store_in_vector_database()
329 |             
330 |             logger.info("Analysis and storage completed successfully")
331 |             return True
332 |             
333 |         except Exception as e:
334 |             logger.error(f"Error analyzing and storing code relationships: {e}")
335 |             return False
336 |         
337 |     async def cleanup(self):
338 |         """Clean up resources."""
339 |         if self.vector_store:
340 |             await self.vector_store.cleanup()
341 |             await self.vector_store.close()
342 | 
343 | async def main():
344 |     """Main function."""
345 |     parser = argparse.ArgumentParser(description="Code Relationship Analyzer")
346 |     parser.add_argument("--config", help="Path to configuration file")
347 |     args = parser.parse_args()
348 |     
349 |     # Create logs directory if it doesn't exist
350 |     os.makedirs("logs", exist_ok=True)
351 |     
352 |     analyzer = CodeRelationshipAnalyzer(args.config)
353 |     
354 |     try:
355 |         await analyzer.initialize()
356 |         success = await analyzer.analyze_and_store()
357 |         
358 |         if success:
359 |             logger.info("Code relationship analysis completed successfully")
360 |             return 0
361 |         else:
362 |             logger.error("Code relationship analysis failed")
363 |             return 1
364 |             
365 |     except Exception as e:
366 |         logger.error(f"Error in code relationship analysis: {e}")
367 |         return 1
368 |         
369 |     finally:
370 |         await analyzer.cleanup()
371 | 
372 | if __name__ == "__main__":
373 |     sys.exit(asyncio.run(main())) 
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/state.py:
--------------------------------------------------------------------------------

```python
  1 | """Server state management."""
  2 | 
  3 | from dataclasses import dataclass, field
  4 | from typing import Dict, Optional, List, Any, Set
  5 | import asyncio
  6 | from contextlib import AsyncExitStack
  7 | import sys
  8 | import threading
  9 | from datetime import datetime
 10 | import logging
 11 | import uuid
 12 | 
 13 | from ..utils.logger import get_logger
 14 | from .config import ServerConfig
 15 | from .di import DIContainer
 16 | from .task_tracker import TaskTracker
 17 | from .component_status import ComponentStatus
 18 | 
 19 | logger = get_logger(__name__)
 20 | 
 21 | @dataclass
 22 | class ComponentState:
 23 |     """State tracking for a server component."""
 24 |     status: ComponentStatus = ComponentStatus.UNINITIALIZED
 25 |     error: Optional[str] = None
 26 |     instance: Any = None
 27 |     last_update: datetime = field(default_factory=datetime.utcnow)
 28 |     retry_count: int = 0
 29 |     instance_id: str = field(default_factory=lambda: str(uuid.uuid4()))
 30 | 
 31 | class ServerState:
 32 |     """Global server state management."""
 33 |     
 34 |     def __init__(self):
 35 |         """Initialize server state."""
 36 |         self._init_lock = asyncio.Lock()
 37 |         self._cleanup_lock = asyncio.Lock()
 38 |         self.initialized = False
 39 |         self.config: Optional[ServerConfig] = None
 40 |         self._components: Dict[str, ComponentState] = {}
 41 |         self._cleanup_handlers: List[asyncio.Task] = []
 42 |         self._task_tracker = TaskTracker()
 43 |         self._instance_id = str(uuid.uuid4())
 44 |         logger.info(f"Created ServerState instance {self._instance_id}")
 45 |     
 46 |     def register_component(self, name: str, instance: Any = None) -> None:
 47 |         """Register a new component."""
 48 |         if name not in self._components:
 49 |             component_state = ComponentState()
 50 |             if instance:
 51 |                 component_state.instance = instance
 52 |             self._components[name] = component_state
 53 |             logger.debug(f"Registered component: {name}")
 54 |     
 55 |     def update_component_status(
 56 |         self, 
 57 |         name: str, 
 58 |         status: ComponentStatus, 
 59 |         error: Optional[str] = None,
 60 |         instance: Any = None
 61 |     ) -> None:
 62 |         """Update component status."""
 63 |         if name not in self._components:
 64 |             self.register_component(name)
 65 |         
 66 |         component = self._components[name]
 67 |         component.status = status
 68 |         component.error = error
 69 |         component.last_update = datetime.utcnow()
 70 |         
 71 |         if instance is not None:
 72 |             component.instance = instance
 73 |         
 74 |         if status == ComponentStatus.FAILED:
 75 |             component.retry_count += 1
 76 |         
 77 |         logger.debug(
 78 |             f"Component {name} status updated to {status}"
 79 |             f"{f' (error: {error})' if error else ''}"
 80 |         )
 81 |     
 82 |     def get_component(self, name: str) -> Any:
 83 |         """Get component instance."""
 84 |         if name not in self._components:
 85 |             logger.warning(f"Component {name} not registered")
 86 |             return None
 87 |             
 88 |         component = self._components[name]
 89 |         if component.status != ComponentStatus.INITIALIZED:
 90 |             logger.warning(f"Component {name} not initialized (status: {component.status.value})")
 91 |             return None
 92 |             
 93 |         return component.instance
 94 |     
 95 |     def register_background_task(self, task: asyncio.Task) -> None:
 96 |         """Register a background task for tracking and cleanup."""
 97 |         self._task_tracker.track_task(task)
 98 |         logger.debug(f"Registered background task: {task.get_name()}")
 99 |     
100 |     async def cancel_background_tasks(self) -> None:
101 |         """Cancel all tracked background tasks."""
102 |         await self._task_tracker.cancel_all_tasks()
103 |     
104 |     async def cleanup(self) -> None:
105 |         """Cleanup server components."""
106 |         async with self._cleanup_lock:
107 |             if not self.initialized:
108 |                 logger.warning("Server not initialized, nothing to clean up")
109 |                 return
110 |             
111 |             logger.info(f"Beginning cleanup for instance {self._instance_id}")
112 |             
113 |             # First, cancel any background tasks
114 |             await self.cancel_background_tasks()
115 |             
116 |             # Clean up components in reverse order
117 |             components = list(self._components.keys())
118 |             components.reverse()
119 |             
120 |             for component in components:
121 |                 self.update_component_status(component, ComponentStatus.CLEANING)
122 |                 try:
123 |                     # Component-specific cleanup logic here
124 |                     comp_instance = self._components[component].instance
125 |                     if comp_instance and hasattr(comp_instance, 'cleanup'):
126 |                         await comp_instance.cleanup()
127 |                     
128 |                     self.update_component_status(component, ComponentStatus.CLEANED)
129 |                 except Exception as e:
130 |                     error_msg = f"Error cleaning up {component}: {str(e)}"
131 |                     logger.error(error_msg, exc_info=True)
132 |                     self.update_component_status(
133 |                         component,
134 |                         ComponentStatus.FAILED,
135 |                         error_msg
136 |                     )
137 |             
138 |             # Cancel any remaining cleanup handlers
139 |             for task in self._cleanup_handlers:
140 |                 if not task.done():
141 |                     task.cancel()
142 |             
143 |             self.initialized = False
144 |             logger.info(f"Server instance {self._instance_id} cleanup completed")
145 |     
146 |     def get_component_status(self) -> Dict[str, Any]:
147 |         """Get status of all components."""
148 |         return {
149 |             name: {
150 |                 "status": comp.status.value,
151 |                 "error": comp.error,
152 |                 "last_update": comp.last_update.isoformat(),
153 |                 "retry_count": comp.retry_count,
154 |                 "instance_id": comp.instance_id
155 |             }
156 |             for name, comp in self._components.items()
157 |         }
158 |     
159 |     def register_cleanup_handler(self, task: asyncio.Task) -> None:
160 |         """Register a cleanup handler task."""
161 |         self._cleanup_handlers.append(task)
162 |         logger.debug(f"Registered cleanup handler: {task.get_name()}")
163 |     
164 |     @property
165 |     def instance_id(self) -> str:
166 |         """Get the unique instance ID of this server state."""
167 |         return self._instance_id
168 |     
169 |     def list_components(self) -> List[str]:
170 |         """List all registered components."""
171 |         return list(self._components.keys())
172 |     
173 |     def get_active_tasks(self) -> Set[asyncio.Task]:
174 |         """Get all currently active tasks."""
175 |         return self._task_tracker.get_active_tasks()
176 |     
177 |     def get_task_count(self) -> int:
178 |         """Get the number of currently tracked tasks."""
179 |         return self._task_tracker.get_task_count()
180 | 
181 |     async def initialize(self) -> None:
182 |         """Initialize server components."""
183 |         async with self._init_lock:
184 |             if self.initialized:
185 |                 logger.warning("Server already initialized")
186 |                 return
187 |             
188 |             logger.info(f"Beginning initialization for instance {self._instance_id}")
189 |             
190 |             try:
191 |                 # Initialize components in order
192 |                 components = [
193 |                     "database",
194 |                     "vector_store",
195 |                     "task_manager",
196 |                     "analysis_engine",
197 |                     "adr_manager",
198 |                     "knowledge_base",
199 |                     "mcp_server"  
200 |                 ]
201 |                 
202 |                 for component in components:
203 |                     self.update_component_status(component, ComponentStatus.INITIALIZING)
204 |                     try:
205 |                         # Component-specific initialization logic here
206 |                         # await self._initialize_component(component)
207 |                         
208 |                         # For now, let's just mark them as initialized
209 |                         # In a real implementation, you'd create and store the actual component instances
210 |                         
211 |                         # For the vector_store component, create a real instance
212 |                         if component == "vector_store":
213 |                             from .vector_store import VectorStore
214 |                             from .embeddings import SentenceTransformerEmbedding
215 |                             
216 |                             # If config is available, use it to configure the vector store
217 |                             if self.config:
218 |                                 embedder = SentenceTransformerEmbedding(self.config.embedding_model)
219 |                                 vector_store = VectorStore(
220 |                                     url=self.config.qdrant_url,
221 |                                     embedder=embedder,
222 |                                     collection_name=self.config.collection_name
223 |                                 )
224 |                                 await vector_store.initialize()
225 |                                 self.update_component_status(
226 |                                     "vector_store", 
227 |                                     ComponentStatus.INITIALIZED,
228 |                                     instance=vector_store
229 |                                 )
230 |                         
231 |                         # For the adr_manager component
232 |                         elif component == "adr_manager":
233 |                             from .adr import ADRManager
234 |                             if self.config:
235 |                                 adr_manager = ADRManager(self.config)
236 |                                 await adr_manager.initialize()
237 |                                 self.update_component_status(
238 |                                     "adr_manager",
239 |                                     ComponentStatus.INITIALIZED,
240 |                                     instance=adr_manager
241 |                                 )
242 |                         
243 |                         # For the knowledge_base component
244 |                         elif component == "knowledge_base":
245 |                             from .knowledge import KnowledgeBase
246 |                             if self.config:
247 |                                 # Get vector_store if available
248 |                                 vector_store = self.get_component("vector_store")
249 |                                 if vector_store:
250 |                                     kb = KnowledgeBase(self.config, vector_store)
251 |                                     await kb.initialize()
252 |                                     self.update_component_status(
253 |                                         "knowledge_base",
254 |                                         ComponentStatus.INITIALIZED,
255 |                                         instance=kb
256 |                                     )
257 |                                 else:
258 |                                     error_msg = "Vector store not initialized, cannot initialize knowledge base"
259 |                                     logger.error(error_msg)
260 |                                     self.update_component_status(
261 |                                         component,
262 |                                         ComponentStatus.FAILED,
263 |                                         error=error_msg
264 |                                     )
265 |                         
266 |                         # For task_manager component
267 |                         elif component == "task_manager":
268 |                             from .tasks import TaskManager
269 |                             if self.config:
270 |                                 task_manager = TaskManager(self.config)
271 |                                 await task_manager.initialize()
272 |                                 self.update_component_status(
273 |                                     "task_manager",
274 |                                     ComponentStatus.INITIALIZED,
275 |                                     instance=task_manager
276 |                                 )
277 |                         
278 |                         # For database component (placeholder)
279 |                         elif component == "database":
280 |                             # Mock implementation for database
281 |                             self.update_component_status(
282 |                                 "database",
283 |                                 ComponentStatus.INITIALIZED,
284 |                                 instance={"status": "mocked"}
285 |                             )
286 |                         
287 |                         # For analysis_engine component (placeholder)
288 |                         elif component == "analysis_engine":
289 |                             # Mock implementation for analysis engine
290 |                             self.update_component_status(
291 |                                 "analysis_engine",
292 |                                 ComponentStatus.INITIALIZED,
293 |                                 instance={"status": "mocked"}
294 |                             )
295 |                         
296 |                         # For mcp_server component (placeholder)
297 |                         elif component == "mcp_server":
298 |                             # Mock implementation for mcp server
299 |                             self.update_component_status(
300 |                                 "mcp_server",
301 |                                 ComponentStatus.INITIALIZED,
302 |                                 instance={"status": "mocked"}
303 |                             )
304 |                             
305 |                     except Exception as e:
306 |                         error_msg = f"Failed to initialize {component}: {str(e)}"
307 |                         logger.error(error_msg, exc_info=True)
308 |                         self.update_component_status(
309 |                             component, 
310 |                             ComponentStatus.FAILED,
311 |                             error=error_msg
312 |                         )
313 |                 
314 |                 # Set server as initialized if all critical components are initialized
315 |                 critical_components = ["vector_store", "task_manager", "mcp_server"]  
316 |                 
317 |                 all_critical_initialized = all(
318 |                     self._components.get(c) and 
319 |                     self._components[c].status == ComponentStatus.INITIALIZED 
320 |                     for c in critical_components
321 |                 )
322 |                 
323 |                 if all_critical_initialized:
324 |                     self.initialized = True
325 |                     logger.info(f"Server instance {self._instance_id} initialized successfully")
326 |                 else:
327 |                     logger.warning(
328 |                         f"Server instance {self._instance_id} partially initialized "
329 |                         f"(some critical components failed)"
330 |                     )
331 |                 
332 |             except Exception as e:
333 |                 error_msg = f"Failed to initialize server: {str(e)}"
334 |                 logger.error(error_msg, exc_info=True)
335 |                 raise 
```

--------------------------------------------------------------------------------
/create_release_issues.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | # Script to create GitHub issues for completing the release
  3 | # Run this with: ./create_release_issues.sh
  4 | 
  5 | REPO="tosin2013/mcp-codebase-insight"
  6 | 
  7 | # Check if gh CLI is installed
  8 | if ! command -v gh &> /dev/null; then
  9 |     echo "Error: GitHub CLI (gh) is not installed."
 10 |     echo "Install it from: https://cli.github.com/"
 11 |     exit 1
 12 | fi
 13 | 
 14 | # Check if authenticated
 15 | if ! gh auth status &> /dev/null; then
 16 |     echo "Error: Not authenticated with GitHub CLI."
 17 |     echo "Run: gh auth login"
 18 |     exit 1
 19 | fi
 20 | 
 21 | echo "Creating GitHub issues for release completion..."
 22 | echo ""
 23 | 
 24 | # Issue 1: Complete Documentation Management System
 25 | gh issue create \
 26 |   --repo "$REPO" \
 27 |   --title "Complete Documentation Management System" \
 28 |   --label "enhancement,documentation" \
 29 |   --body "## Description
 30 | Complete the documentation management system to support comprehensive codebase documentation.
 31 | 
 32 | ## Tasks
 33 | - [ ] Implement proper text search in \`DocumentationManager\` (\`core/documentation.py:199\`)
 34 | - [ ] Add support for multiple documentation formats (Markdown, RST, HTML)
 35 | - [ ] Implement version tracking for documentation updates
 36 | - [ ] Add cross-reference resolution between docs
 37 | - [ ] Create documentation validation and linting tools
 38 | 
 39 | ## Context
 40 | Currently marked as 'In Progress' in README.md. The DocumentationManager has a TODO for implementing proper text search functionality.
 41 | 
 42 | ## Acceptance Criteria
 43 | - Text search is fully functional across all documentation
 44 | - Documentation can be imported from multiple formats
 45 | - Version history is tracked and queryable
 46 | - Cross-references are automatically validated
 47 | - Comprehensive tests are added
 48 | 
 49 | ## Priority
 50 | High - Core feature for release
 51 | 
 52 | ## References
 53 | - \`src/mcp_codebase_insight/core/documentation.py\`
 54 | - \`docs/features/documentation.md\`"
 55 | 
 56 | echo "✓ Issue 1: Documentation Management System"
 57 | 
 58 | # Issue 2: Advanced Pattern Detection
 59 | gh issue create \
 60 |   --repo "$REPO" \
 61 |   --title "Implement Advanced Pattern Detection" \
 62 |   --label "enhancement" \
 63 |   --body "## Description
 64 | Enhance pattern detection capabilities with advanced code analysis features.
 65 | 
 66 | ## Tasks
 67 | - [ ] Implement pattern extraction logic in TaskManager (\`core/tasks.py:356\`)
 68 | - [ ] Add architectural pattern recognition (MVC, MVVM, Microservices, etc.)
 69 | - [ ] Create anti-pattern detection system
 70 | - [ ] Add code smell identification
 71 | - [ ] Implement design pattern suggestions
 72 | - [ ] Add pattern confidence scoring
 73 | 
 74 | ## Context
 75 | Currently marked as 'In Progress' in README.md. The TaskManager has a TODO for implementing pattern extraction logic.
 76 | 
 77 | ## Acceptance Criteria
 78 | - Pattern extraction is fully implemented and tested
 79 | - System can identify at least 10 common architectural patterns
 80 | - Anti-patterns are detected with actionable suggestions
 81 | - Pattern detection has >80% accuracy on test codebases
 82 | - Performance impact is <100ms per file analyzed
 83 | 
 84 | ## Priority
 85 | High - Core feature for release
 86 | 
 87 | ## References
 88 | - \`src/mcp_codebase_insight/core/tasks.py\`
 89 | - \`docs/features/code-analysis.md\`"
 90 | 
 91 | echo "✓ Issue 2: Advanced Pattern Detection"
 92 | 
 93 | # Issue 3: Performance Optimization
 94 | gh issue create \
 95 |   --repo "$REPO" \
 96 |   --title "Performance Optimization for Production Release" \
 97 |   --label "enhancement" \
 98 |   --body "## Description
 99 | Optimize performance for production workloads and large codebases.
100 | 
101 | ## Tasks
102 | - [ ] Profile vector store operations and optimize query performance
103 | - [ ] Implement connection pooling for Qdrant client
104 | - [ ] Add batch processing for embedding generation
105 | - [ ] Optimize cache hit rates with intelligent prefetching
106 | - [ ] Implement query result pagination for large result sets
107 | - [ ] Add request rate limiting and throttling
108 | - [ ] Optimize memory usage for large file processing
109 | - [ ] Add performance benchmarks and regression tests
110 | 
111 | ## Context
112 | Currently marked as 'In Progress' in README.md. Need to ensure system can handle production-scale codebases efficiently.
113 | 
114 | ## Acceptance Criteria
115 | - Vector store queries complete in <500ms for 90th percentile
116 | - System can process codebases with 10,000+ files
117 | - Memory usage stays under 2GB for typical workloads
118 | - Cache hit rate >70% for repeated queries
119 | - All operations have proper timeout handling
120 | - Performance benchmarks show 2x improvement over current baseline
121 | 
122 | ## Priority
123 | High - Required for production release
124 | 
125 | ## References
126 | - \`src/mcp_codebase_insight/core/vector_store.py\`
127 | - \`src/mcp_codebase_insight/core/cache.py\`
128 | - \`docs/vector_store_best_practices.md\`"
129 | 
130 | echo "✓ Issue 3: Performance Optimization"
131 | 
132 | # Issue 4: Integration Testing Suite
133 | gh issue create \
134 |   --repo "$REPO" \
135 |   --title "Complete Integration Testing Suite" \
136 |   --label "enhancement" \
137 |   --body "## Description
138 | Expand integration testing to cover all critical workflows and edge cases.
139 | 
140 | ## Tasks
141 | - [ ] Add end-to-end tests for complete analysis workflows
142 | - [ ] Test Qdrant connection failure scenarios and recovery
143 | - [ ] Add tests for concurrent request handling
144 | - [ ] Test cache invalidation and consistency
145 | - [ ] Add integration tests for ADR management workflows
146 | - [ ] Test SSE event streaming under load
147 | - [ ] Add chaos engineering tests (network failures, timeouts)
148 | - [ ] Create integration test documentation
149 | 
150 | ## Context
151 | Currently marked as 'In Progress' in README.md. Need comprehensive integration tests before production release.
152 | 
153 | ## Acceptance Criteria
154 | - Integration test coverage >80% for critical paths
155 | - All failure scenarios have corresponding tests
156 | - Tests pass consistently in CI/CD pipeline
157 | - Test suite runs in <5 minutes
158 | - Documentation explains how to run and extend integration tests
159 | 
160 | ## Priority
161 | High - Required for release confidence
162 | 
163 | ## References
164 | - \`tests/integration/\`
165 | - \`tests/conftest.py\`
166 | - \`run_tests.py\`
167 | - \`docs/testing_guide.md\`"
168 | 
169 | echo "✓ Issue 4: Integration Testing Suite"
170 | 
171 | # Issue 5: Debugging Utilities Enhancement
172 | gh issue create \
173 |   --repo "$REPO" \
174 |   --title "Enhance Debugging Utilities and Error Tracking" \
175 |   --label "enhancement" \
176 |   --body "## Description
177 | Complete the debugging utilities system with comprehensive error tracking and diagnostics.
178 | 
179 | ## Tasks
180 | - [ ] Implement comprehensive error tracking system (from README planned section)
181 | - [ ] Add structured error reporting with stack traces and context
182 | - [ ] Create debug mode with verbose logging
183 | - [ ] Add request tracing across components
184 | - [ ] Implement error aggregation and pattern detection
185 | - [ ] Add health check endpoints for all components
186 | - [ ] Create debugging dashboard or CLI tool
187 | - [ ] Add integration with external monitoring systems (optional)
188 | 
189 | ## Context
190 | Currently marked as 'In Progress' in README.md with comprehensive error tracking in 'Planned' section.
191 | 
192 | ## Acceptance Criteria
193 | - All errors are tracked with unique IDs and full context
194 | - Debug mode provides actionable troubleshooting information
195 | - Request tracing works across all async operations
196 | - Health checks accurately reflect component status
197 | - Error patterns are identified and reported
198 | - Documentation includes debugging guide
199 | 
200 | ## Priority
201 | Medium - Improves operational support
202 | 
203 | ## References
204 | - \`src/mcp_codebase_insight/core/debug.py\`
205 | - \`src/mcp_codebase_insight/core/health.py\`
206 | - \`docs/troubleshooting/common-issues.md\`"
207 | 
208 | echo "✓ Issue 5: Debugging Utilities Enhancement"
209 | 
210 | # Issue 6: Extended API Documentation
211 | gh issue create \
212 |   --repo "$REPO" \
213 |   --title "Create Extended API Documentation" \
214 |   --label "documentation" \
215 |   --body "## Description
216 | Create comprehensive API documentation for all endpoints and tools.
217 | 
218 | ## Tasks
219 | - [ ] Document all MCP tools with examples
220 | - [ ] Create OpenAPI/Swagger specification for REST endpoints
221 | - [ ] Add interactive API documentation (Swagger UI)
222 | - [ ] Document all configuration options and environment variables
223 | - [ ] Create code examples for common use cases
224 | - [ ] Add API versioning documentation
225 | - [ ] Create SDK/client library documentation
226 | - [ ] Add troubleshooting section for API errors
227 | 
228 | ## Context
229 | Currently in 'Planned' section of README.md. Need complete API docs before release.
230 | 
231 | ## Acceptance Criteria
232 | - All endpoints are documented with request/response examples
233 | - OpenAPI spec is complete and validated
234 | - Interactive documentation is accessible at /docs endpoint
235 | - At least 10 code examples covering common scenarios
236 | - Documentation includes rate limits, authentication, and error codes
237 | 
238 | ## Priority
239 | High - Required for user adoption
240 | 
241 | ## References
242 | - \`docs/api.md\`
243 | - \`server.py\`
244 | - \`docs/cookbook.md\`"
245 | 
246 | echo "✓ Issue 6: Extended API Documentation"
247 | 
248 | # Issue 7: Custom Pattern Plugins
249 | gh issue create \
250 |   --repo "$REPO" \
251 |   --title "Implement Custom Pattern Plugin System" \
252 |   --label "enhancement" \
253 |   --body "## Description
254 | Create a plugin system allowing users to define custom code patterns and analysis rules.
255 | 
256 | ## Tasks
257 | - [ ] Design plugin API and interface
258 | - [ ] Implement plugin loader and registry
259 | - [ ] Create plugin validation and sandboxing
260 | - [ ] Add plugin configuration system
261 | - [ ] Create example plugins (Python, JavaScript, Go patterns)
262 | - [ ] Add plugin testing framework
263 | - [ ] Create plugin development guide
264 | - [ ] Implement plugin marketplace/repository support (optional)
265 | 
266 | ## Context
267 | Currently in 'Planned' section of README.md. Extensibility is key for adoption.
268 | 
269 | ## Acceptance Criteria
270 | - Plugin API is stable and well-documented
271 | - Plugins can define custom patterns and analysis rules
272 | - Plugin system is secure and cannot affect core stability
273 | - At least 3 example plugins are provided
274 | - Plugin development guide includes tutorial and best practices
275 | 
276 | ## Priority
277 | Medium - Nice to have for v1.0, critical for v2.0
278 | 
279 | ## References
280 | - \`src/mcp_codebase_insight/core/knowledge.py\`
281 | - \`docs/features/code-analysis.md\`"
282 | 
283 | echo "✓ Issue 7: Custom Pattern Plugins"
284 | 
285 | # Issue 8: Advanced Caching Strategies
286 | gh issue create \
287 |   --repo "$REPO" \
288 |   --title "Implement Advanced Caching Strategies" \
289 |   --label "enhancement" \
290 |   --body "## Description
291 | Enhance caching system with advanced strategies for better performance and cache efficiency.
292 | 
293 | ## Tasks
294 | - [ ] Implement cache warming on server startup
295 | - [ ] Add intelligent cache prefetching based on access patterns
296 | - [ ] Implement distributed caching support (Redis integration)
297 | - [ ] Add cache invalidation strategies (TTL, LRU, LFU)
298 | - [ ] Implement cache analytics and reporting
299 | - [ ] Add cache size limits and eviction policies
300 | - [ ] Create cache performance benchmarks
301 | - [ ] Add cache configuration hot-reloading
302 | 
303 | ## Context
304 | Currently in 'Planned' section of README.md. Better caching improves performance significantly.
305 | 
306 | ## Acceptance Criteria
307 | - Cache hit rate improves by at least 20%
308 | - Cache warming completes in <30 seconds
309 | - Distributed caching works with Redis
310 | - Cache analytics provide actionable insights
311 | - Configuration changes don't require restart
312 | 
313 | ## Priority
314 | Medium - Performance improvement
315 | 
316 | ## References
317 | - \`src/mcp_codebase_insight/core/cache.py\`
318 | - \`docs/vector_store_best_practices.md\`"
319 | 
320 | echo "✓ Issue 8: Advanced Caching Strategies"
321 | 
322 | # Issue 9: Deployment Guides
323 | gh issue create \
324 |   --repo "$REPO" \
325 |   --title "Create Comprehensive Deployment Guides" \
326 |   --label "documentation" \
327 |   --body "## Description
328 | Create deployment guides for various environments and platforms.
329 | 
330 | ## Tasks
331 | - [ ] Create Docker Compose deployment guide
332 | - [ ] Add Kubernetes deployment manifests and guide
333 | - [ ] Create cloud platform guides (AWS, GCP, Azure)
334 | - [ ] Add monitoring and observability setup guide
335 | - [ ] Create backup and disaster recovery procedures
336 | - [ ] Add scaling and load balancing guide
337 | - [ ] Create security hardening checklist
338 | - [ ] Add CI/CD pipeline examples
339 | 
340 | ## Context
341 | Currently in 'Planned' section of README.md. Users need clear deployment paths.
342 | 
343 | ## Acceptance Criteria
344 | - Deployment guides cover at least 4 platforms
345 | - Each guide includes step-by-step instructions
346 | - Example configuration files are provided
347 | - Monitoring integration is documented
348 | - Security best practices are included
349 | - Troubleshooting section for common deployment issues
350 | 
351 | ## Priority
352 | High - Required for production adoption
353 | 
354 | ## References
355 | - \`Dockerfile\`
356 | - \`docker-compose.yml\` (to be created)
357 | - \`docs/getting-started/docker-setup.md\`"
358 | 
359 | echo "✓ Issue 9: Deployment Guides"
360 | 
361 | # Issue 10: Pre-release Testing and Bug Fixes
362 | gh issue create \
363 |   --repo "$REPO" \
364 |   --title "Pre-release Testing and Bug Fixes" \
365 |   --label "bug" \
366 |   --body "## Description
367 | Conduct comprehensive pre-release testing and fix any discovered bugs.
368 | 
369 | ## Tasks
370 | - [ ] Run full test suite across all supported Python versions (3.10, 3.11, 3.12, 3.13)
371 | - [ ] Perform manual testing of all major workflows
372 | - [ ] Test on multiple operating systems (Linux, macOS, Windows)
373 | - [ ] Load testing with realistic codebase sizes
374 | - [ ] Security audit of code and dependencies
375 | - [ ] Review and update all dependencies to latest stable versions
376 | - [ ] Fix any critical or high-priority bugs
377 | - [ ] Create release notes and CHANGELOG
378 | 
379 | ## Context
380 | Final step before release. Need to ensure stability and quality.
381 | 
382 | ## Acceptance Criteria
383 | - All tests pass on supported platforms
384 | - No critical or high-priority bugs remain
385 | - Security audit passes with no high-severity issues
386 | - Dependencies are up to date
387 | - Release notes document all changes
388 | - Performance meets defined benchmarks
389 | 
390 | ## Priority
391 | Critical - Release blocker
392 | 
393 | ## References
394 | - \`run_tests.py\`
395 | - \`CHANGELOG.md\`
396 | - \`.github/workflows/\` (CI/CD pipelines)"
397 | 
398 | echo "✓ Issue 10: Pre-release Testing"
399 | 
400 | # Issue 11: Update README to Stable Status
401 | gh issue create \
402 |   --repo "$REPO" \
403 |   --title "Update README for Stable Release" \
404 |   --label "documentation" \
405 |   --body "## Description
406 | Update README.md to reflect stable release status and complete feature set.
407 | 
408 | ## Tasks
409 | - [ ] Remove 'WIP' and 'Development in Progress' warnings
410 | - [ ] Update feature status (move items from 'In Progress' to 'Completed')
411 | - [ ] Add badges (version, build status, coverage, license)
412 | - [ ] Update installation instructions with PyPI package info
413 | - [ ] Add 'Features' section highlighting key capabilities
414 | - [ ] Update examples with production-ready code
415 | - [ ] Add 'Community' and 'Support' sections
416 | - [ ] Include performance benchmarks
417 | - [ ] Add screenshot or demo GIF (if applicable)
418 | 
419 | ## Context
420 | README is the first thing users see. It should reflect a stable, production-ready project.
421 | 
422 | ## Acceptance Criteria
423 | - All WIP warnings are removed
424 | - Feature list is accurate and complete
425 | - Installation instructions work for new users
426 | - README includes all standard sections for OSS projects
427 | - Documentation links are valid and up-to-date
428 | 
429 | ## Priority
430 | High - Release blocker
431 | 
432 | ## References
433 | - \`README.md\`"
434 | 
435 | echo "✓ Issue 11: Update README"
436 | 
437 | echo ""
438 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
439 | echo "✨ Successfully created 11 GitHub issues for release completion!"
440 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
441 | echo ""
442 | echo "View all issues at: https://github.com/$REPO/issues"
443 | echo ""
444 | echo "Issue Summary:"
445 | echo "  - 5 'In Progress' features to complete"
446 | echo "  - 4 'Planned' features to implement"
447 | echo "  - 2 release-blocker tasks"
448 | echo ""
449 | echo "Next steps:"
450 | echo "  1. Prioritize and assign issues"
451 | echo "  2. Create milestones for v1.0 release"
452 | echo "  3. Set up project board for tracking"
453 | echo ""
454 | 
```

--------------------------------------------------------------------------------
/.github/workflows/build-verification.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: Build Verification
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ main ]
  6 |   pull_request:
  7 |     branches: [ main ]
  8 |   workflow_dispatch:
  9 |     inputs:
 10 |       config_file:
 11 |         description: 'Path to verification config file'
 12 |         required: false
 13 |         default: 'verification-config.json'
 14 |       min_coverage:
 15 |         description: 'Minimum test coverage percentage'
 16 |         required: false
 17 |         default: '80.0'
 18 |       max_failures:
 19 |         description: 'Maximum allowed test failures'
 20 |         required: false
 21 |         default: '0'
 22 |       python_version:
 23 |         description: 'Python version to use for verification'
 24 |         required: false
 25 |         default: '3.9'
 26 | 
 27 | jobs:
 28 |   verify:
 29 |     runs-on: ubuntu-latest
 30 |     strategy:
 31 |       matrix:
 32 |         python-version: [ '3.10', '3.11', '3.12', '3.13' ]
 33 |       fail-fast: false # Continue testing other Python versions even if one fails
 34 | 
 35 |     name: Verify with Python ${{ matrix.python-version }}
 36 |     environment:
 37 |       name: production
 38 |       url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
 39 | 
 40 |     services:
 41 |       qdrant:
 42 |         image: qdrant/qdrant:v1.13.6
 43 |         ports:
 44 |           - 6333:6333
 45 |           - 6334:6334
 46 | 
 47 |     steps:
 48 |       - name: Checkout code
 49 |         uses: actions/checkout@v4
 50 |         with:
 51 |           fetch-depth: 0  # Fetch all history for dependencies analysis
 52 | 
 53 |       - name: Set up Python ${{ matrix.python-version }}
 54 |         uses: actions/[email protected]
 55 |         with:
 56 |           python-version: ${{ matrix.python-version }}
 57 |           cache: 'pip'
 58 | 
 59 |       - name: Wait for Qdrant and verify connection
 60 |         run: |
 61 |           echo "Waiting for Qdrant to start..."
 62 |           chmod +x scripts/check_qdrant_health.sh
 63 |           ./scripts/check_qdrant_health.sh "http://localhost:6333" 20 5
 64 | 
 65 |       - name: Setup private packages
 66 |         run: |
 67 |           # Create local-packages directory if it doesn't exist
 68 |           mkdir -p local-packages
 69 | 
 70 |           # If there are private packages in repositories, clone them here
 71 |           if [ -n "${{ secrets.PRIVATE_REPO_URL }}" ]; then
 72 |             echo "Setting up private package repository..."
 73 | 
 74 |             # Configure pip to use the private repository if provided
 75 |             mkdir -p ~/.pip
 76 |             echo "[global]" > ~/.pip/pip.conf
 77 |             echo "index-url = https://pypi.org/simple" >> ~/.pip/pip.conf
 78 | 
 79 |             # Add the private repository with token if available
 80 |             if [ -n "${{ secrets.PRIVATE_REPO_TOKEN }}" ]; then
 81 |               echo "extra-index-url = ${{ secrets.PRIVATE_REPO_URL }}:${{ secrets.PRIVATE_REPO_TOKEN }}@simple" >> ~/.pip/pip.conf
 82 |             else
 83 |               echo "extra-index-url = ${{ secrets.PRIVATE_REPO_URL }}/simple" >> ~/.pip/pip.conf
 84 |             fi
 85 |           fi
 86 | 
 87 |           # If there are local Git repositories for dependencies, clone them
 88 |           if [ -n "${{ secrets.MCP_SERVER_QDRANT_REPO }}" ]; then
 89 |             echo "Cloning mcp-server-qdrant from repository..."
 90 |             git clone "${{ secrets.MCP_SERVER_QDRANT_REPO }}" local-packages/mcp-server-qdrant
 91 | 
 92 |             # Install the package in development mode
 93 |             cd local-packages/mcp-server-qdrant
 94 |             pip install -e .
 95 |             cd ../../
 96 |           fi
 97 | 
 98 |           # Similarly for uvx package if needed
 99 |           if [ -n "${{ secrets.UVX_REPO }}" ]; then
100 |             echo "Cloning uvx from repository..."
101 |             git clone "${{ secrets.UVX_REPO }}" local-packages/uvx
102 | 
103 |             # Install the package in development mode
104 |             cd local-packages/uvx
105 |             pip install -e .
106 |             cd ../../
107 |           fi
108 | 
109 |       - name: Install dependencies
110 |         run: |
111 |           python -m pip install --upgrade pip setuptools wheel
112 | 
113 |           # Make the requirements script executable
114 |           chmod +x scripts/compile_requirements.sh
115 | 
116 |           # Set environment variables for private package handling
117 |           export PRIVATE_REPO_URL="${{ secrets.PRIVATE_REPO_URL }}"
118 |           export PRIVATE_REPO_TOKEN="${{ secrets.PRIVATE_REPO_TOKEN }}"
119 |           export LOCAL_PACKAGE_PATHS="./local-packages"
120 | 
121 |           # Use the compile_requirements.sh script to generate version-specific requirements
122 |           echo "Using compile_requirements.sh to generate dependencies for Python ${{ matrix.python-version }}..."
123 |           # Set auto-yes for cleanup to avoid interactive prompts in CI
124 |           echo "y" | ./scripts/compile_requirements.sh ${{ matrix.python-version }}
125 | 
126 |           # Install the generated requirements
127 |           if [ -f requirements-${{ matrix.python-version }}.txt ]; then
128 |             echo "Installing from version-specific requirements file..."
129 |             pip install -r requirements-${{ matrix.python-version }}.txt
130 |             pip install -r requirements-dev.txt
131 | 
132 |             # Install private packages if they're in a separate file
133 |             if [ -f requirements-private-${{ matrix.python-version }}.txt ]; then
134 |               echo "Installing private packages..."
135 |               # Try to install private packages, but continue even if it fails
136 |               pip install -r requirements-private-${{ matrix.python-version }}.txt || echo "Warning: Some private packages could not be installed"
137 |             fi
138 |           else
139 |             echo "Version-specific requirements not found, falling back to standard requirements.txt"
140 |             pip install -r requirements.txt || {
141 |               echo "Error installing from requirements.txt, attempting to fix compatibility issues..."
142 |               grep -v "^#" requirements.txt | cut -d= -f1 | xargs pip install
143 |             }
144 |           fi
145 | 
146 |           # Install the package in development mode
147 |           pip install -e .
148 | 
149 |       - name: Set up environment
150 |         run: |
151 |           # Create required directories
152 |           mkdir -p logs knowledge cache
153 | 
154 |           {
155 |             echo "QDRANT_URL=http://localhost:6333"
156 |             echo "MCP_QDRANT_URL=http://localhost:6333"
157 |             echo "COLLECTION_NAME=mcp-codebase-insight-${{ matrix.python-version }}"
158 |             echo "MCP_COLLECTION_NAME=mcp-codebase-insight-${{ matrix.python-version }}"
159 |             echo "EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2"
160 |             echo "BUILD_COMMAND=make build"
161 |             echo "TEST_COMMAND=make test"
162 |             echo "MIN_TEST_COVERAGE=${{ github.event.inputs.min_coverage || '40.0' }}"
163 |             echo "MAX_ALLOWED_FAILURES=${{ github.event.inputs.max_failures || '0' }}"
164 |             echo "CRITICAL_MODULES=mcp_codebase_insight.core.vector_store,mcp_codebase_insight.core.knowledge,mcp_codebase_insight.server"
165 |             echo "PYTHON_VERSION=${{ matrix.python-version }}"
166 |           } >> "$GITHUB_ENV"
167 | 
168 |       - name: Initialize Qdrant collection
169 |         run: |
170 |           echo "Creating Qdrant collection for testing..."
171 |           # Create a basic Python script to initialize the collection
172 |           cat > init_qdrant.py << 'EOF'
173 |           import os
174 |           from qdrant_client import QdrantClient
175 |           from qdrant_client.http import models
176 | 
177 |           # Connect to Qdrant
178 |           client = QdrantClient(url="http://localhost:6333")
179 |           collection_name = os.environ.get("COLLECTION_NAME", "mcp-codebase-insight-${{ matrix.python-version }}")
180 | 
181 |           # Check if collection exists
182 |           collections = client.get_collections().collections
183 |           collection_names = [c.name for c in collections]
184 | 
185 |           if collection_name in collection_names:
186 |               print(f"Collection {collection_name} already exists, recreating it...")
187 |               client.delete_collection(collection_name=collection_name)
188 | 
189 |           # Create collection with vector size 384 (for all-MiniLM-L6-v2)
190 |           client.create_collection(
191 |               collection_name=collection_name,
192 |               vectors_config=models.VectorParams(
193 |                   size=384,  # Dimension for all-MiniLM-L6-v2
194 |                   distance=models.Distance.COSINE,
195 |               ),
196 |           )
197 | 
198 |           print(f"Successfully created collection {collection_name}")
199 |           EOF
200 | 
201 |           # Run the initialization script
202 |           python init_qdrant.py
203 | 
204 |           # Verify the collection was created
205 |           curl -s "http://localhost:6333/collections/$COLLECTION_NAME" || (echo "Failed to create Qdrant collection" && exit 1)
206 |           echo "Qdrant collection initialized successfully."
207 | 
208 |       - name: Create configuration file
209 |         if: ${{ github.event.inputs.config_file != '' }}
210 |         run: |
211 |           cat > ${{ github.event.inputs.config_file }} << EOF
212 |           {
213 |             "success_criteria": {
214 |               "min_test_coverage": ${{ github.event.inputs.min_coverage || '40.0' }},
215 |               "max_allowed_failures": ${{ github.event.inputs.max_failures || '0' }},
216 |               "critical_modules": ["mcp_codebase_insight.core.vector_store", "mcp_codebase_insight.core.knowledge", "mcp_codebase_insight.server"],
217 |               "performance_threshold_ms": 500
218 |             }
219 |           }
220 |           EOF
221 | 
222 |       - name: Run build verification
223 |         id: verify-build
224 |         run: |
225 |           # Run specific tests that are known to pass
226 |           echo "Running specific tests that are known to pass..."
227 |           python -m pytest \
228 |             tests/components/test_core_components.py::test_adr_manager \
229 |             tests/components/test_sse_components.py::test_get_starlette_app \
230 |             tests/components/test_sse_components.py::test_create_sse_server \
231 |             tests/components/test_sse_components.py::test_vector_search_tool \
232 |             tests/components/test_sse_components.py::test_knowledge_search_tool \
233 |             tests/components/test_sse_components.py::test_adr_list_tool \
234 |             tests/components/test_sse_components.py::test_task_status_tool \
235 |             tests/components/test_sse_components.py::test_sse_handle_connect \
236 |             tests/components/test_stdio_components.py::test_stdio_registration \
237 |             tests/components/test_stdio_components.py::test_stdio_message_streaming \
238 |             tests/components/test_stdio_components.py::test_stdio_error_handling \
239 |             tests/components/test_stdio_components.py::test_stdio_large_message \
240 |             tests/components/test_knowledge_base.py \
241 |             tests/integration/test_server.py::test_vector_store_search_threshold_validation \
242 |             tests/integration/test_server.py::test_vector_store_search_functionality \
243 |             tests/integration/test_server.py::test_vector_store_search_error_handling \
244 |             tests/integration/test_server.py::test_vector_store_search_performance \
245 |             tests/integration/test_api_endpoints.py::test_health_check \
246 |             tests/integration/test_api_endpoints.py::test_endpoint_integration \
247 |             tests/integration/test_api_endpoints.py::test_error_handling \
248 |             tests/integration/test_communication_integration.py::test_sse_stdio_interaction \
249 |             tests/test_file_relationships.py \
250 |             -v -p pytest_asyncio --cov=src/mcp_codebase_insight --cov-report=xml:coverage.xml --cov-report=html:htmlcov
251 | 
252 |           TEST_EXIT_CODE=$?
253 | 
254 |           CONFIG_ARG=""
255 |           # Use config file if it exists and is not empty
256 |           if [ -n "${{ github.event.inputs.config_file }}" ] && [ -f "${{ github.event.inputs.config_file }}" ] && [ -s "${{ github.event.inputs.config_file }}" ]; then
257 |             CONFIG_ARG="--config ${{ github.event.inputs.config_file }}"
258 |             python -m scripts.verify_build $CONFIG_ARG --output build-verification-report.json
259 |           else
260 |             python -m scripts.verify_build --output build-verification-report.json
261 |           fi
262 |           VERIFY_EXIT_CODE=$?
263 | 
264 |           # Use new output syntax
265 |           if [ $TEST_EXIT_CODE -ne 0 ] || [ $VERIFY_EXIT_CODE -ne 0 ]; then
266 |             echo "failed=true" >> "$GITHUB_OUTPUT"
267 |           fi
268 | 
269 |       - name: Upload verification report
270 |         uses: actions/upload-artifact@v4
271 |         with:
272 |           name: build-verification-report
273 |           path: build-verification-report.json
274 | 
275 |       - name: Parse verification report
276 |         id: parse-report
277 |         if: always()
278 |         run: |
279 |           if [ -f build-verification-report.json ]; then
280 |             SUMMARY=$(jq -r '.build_verification_report.summary' build-verification-report.json)
281 |             echo "summary=$SUMMARY" >> "$GITHUB_OUTPUT"
282 | 
283 |             STATUS=$(jq -r '.build_verification_report.verification_results.overall_status' build-verification-report.json)
284 |             echo "status=$STATUS" >> "$GITHUB_OUTPUT"
285 | 
286 |             {
287 |               echo "## Build Verification Report"
288 |               echo "### Status: $STATUS"
289 |               echo "### Summary: $SUMMARY"
290 |               echo "### Test Results"
291 |               TOTAL=$(jq -r '.build_verification_report.test_summary.total' build-verification-report.json)
292 |               PASSED=$(jq -r '.build_verification_report.test_summary.passed' build-verification-report.json)
293 |               FAILED=$(jq -r '.build_verification_report.test_summary.failed' build-verification-report.json)
294 |               COVERAGE=$(jq -r '.build_verification_report.test_summary.coverage' build-verification-report.json)
295 |               echo "- Total Tests: $TOTAL"
296 |               echo "- Passed: $PASSED"
297 |               echo "- Failed: $FAILED"
298 |               echo "- Coverage: $COVERAGE%"
299 |             } > report.md
300 |             
301 |             if jq -e '.build_verification_report.failure_analysis' build-verification-report.json > /dev/null; then
302 |               {
303 |                 echo "### Failures Detected"
304 |                 jq -r '.build_verification_report.failure_analysis[] | "- " + .description' build-verification-report.json
305 |               } >> report.md
306 |             fi
307 |             
308 |             if jq -e '.build_verification_report.contextual_verification' build-verification-report.json > /dev/null; then
309 |               {
310 |                 echo "### Contextual Analysis"
311 |                 jq -r '.build_verification_report.contextual_verification[] | "#### Module: " + .module + "\n- Failure: " + .failure + "\n- Dependencies: " + (.dependencies | join(", ")) + "\n\n**Potential Causes:**\n" + (.potential_causes | map("- " + .) | join("\n")) + "\n\n**Recommended Actions:**\n" + (.recommended_actions | map("- " + .) | join("\n"))' build-verification-report.json
312 |               } >> report.md
313 |             fi
314 |           else
315 |             {
316 |               echo "summary=Build verification failed - no report generated" >> "$GITHUB_OUTPUT"
317 |               echo "status=FAILED" >> "$GITHUB_OUTPUT"
318 |               echo "## Build Verification Failed"
319 |               echo "No report was generated. Check the logs for more information."
320 |             } > report.md
321 |           fi
322 |           cat report.md
323 | 
324 |       - name: Create GitHub check
325 |         uses: LouisBrunner/[email protected]
326 |         if: always()
327 |         with:
328 |           token: ${{ secrets.GITHUB_TOKEN }}
329 |           name: Build Verification
330 |           conclusion: ${{ steps.parse-report.outputs.status == 'PASS' && 'success' || 'failure' }}
331 |           output: |
332 |             {
333 |               "title": "Build Verification Results",
334 |               "summary": "${{ steps.parse-report.outputs.summary }}",
335 |               "text": "${{ steps.parse-report.outputs.report }}"
336 |             }
337 | 
338 |       - name: Check verification status
339 |         if: steps.verify-build.outputs.failed == 'true' || steps.parse-report.outputs.status != 'PASS'
340 |         run: |
341 |           echo "Build verification failed!"
342 |           exit 1
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/tasks.py:
--------------------------------------------------------------------------------

```python
  1 | """Task management module."""
  2 | 
  3 | import asyncio
  4 | from datetime import datetime
  5 | from enum import Enum
  6 | from typing import Dict, List, Optional
  7 | from uuid import UUID, uuid4
  8 | import json
  9 | from pathlib import Path
 10 | 
 11 | from pydantic import BaseModel
 12 | 
 13 | class TaskType(str, Enum):
 14 |     """Task type enumeration."""
 15 |     
 16 |     CODE_ANALYSIS = "code_analysis"
 17 |     PATTERN_EXTRACTION = "pattern_extraction"
 18 |     DOCUMENTATION = "documentation"
 19 |     DOCUMENTATION_CRAWL = "doc_crawl"
 20 |     DEBUG = "debug"
 21 |     ADR = "adr"
 22 | 
 23 | class TaskStatus(str, Enum):
 24 |     """Task status enumeration."""
 25 |     
 26 |     PENDING = "pending"
 27 |     IN_PROGRESS = "in_progress"
 28 |     COMPLETED = "completed"
 29 |     FAILED = "failed"
 30 |     CANCELLED = "cancelled"
 31 | 
 32 | class TaskPriority(str, Enum):
 33 |     """Task priority enumeration."""
 34 |     
 35 |     LOW = "low"
 36 |     MEDIUM = "medium"
 37 |     HIGH = "high"
 38 |     CRITICAL = "critical"
 39 | 
 40 | class Task(BaseModel):
 41 |     """Task model."""
 42 |     
 43 |     id: UUID
 44 |     type: TaskType
 45 |     title: str
 46 |     description: str
 47 |     status: TaskStatus
 48 |     priority: TaskPriority
 49 |     context: Dict
 50 |     result: Optional[Dict] = None
 51 |     error: Optional[str] = None
 52 |     created_at: datetime
 53 |     updated_at: datetime
 54 |     completed_at: Optional[datetime] = None
 55 |     metadata: Optional[Dict[str, str]] = None
 56 | 
 57 | class TaskManager:
 58 |     """Manager for asynchronous tasks."""
 59 |     
 60 |     def __init__(
 61 |         self,
 62 |         config,
 63 |         adr_manager=None,
 64 |         debug_system=None,
 65 |         doc_manager=None,
 66 |         knowledge_base=None,
 67 |         prompt_manager=None
 68 |     ):
 69 |         """Initialize task manager."""
 70 |         self.config = config
 71 |         self.adr_manager = adr_manager
 72 |         self.debug_system = debug_system
 73 |         self.doc_manager = doc_manager
 74 |         self.kb = knowledge_base
 75 |         self.prompt_manager = prompt_manager
 76 |         
 77 |         # Initialize tasks directory
 78 |         self.tasks_dir = Path(config.docs_cache_dir) / "tasks"
 79 |         self.tasks_dir.mkdir(parents=True, exist_ok=True)
 80 |         
 81 |         self.tasks: Dict[UUID, Task] = {}
 82 |         self.task_queue: asyncio.Queue = asyncio.Queue()
 83 |         self.running = False
 84 |         self._process_task_future = None
 85 |         self.initialized = False
 86 |     
 87 |     async def initialize(self):
 88 |         """Initialize task manager and start processing tasks."""
 89 |         if self.initialized:
 90 |             return
 91 |             
 92 |         try:
 93 |             # Create a fresh queue
 94 |             self.task_queue = asyncio.Queue()
 95 |             
 96 |             # Load existing tasks from disk
 97 |             if self.tasks_dir.exists():
 98 |                 for task_file in self.tasks_dir.glob("*.json"):
 99 |                     try:
100 |                         with open(task_file) as f:
101 |                             data = json.load(f)
102 |                             task = Task(**data)
103 |                             self.tasks[task.id] = task
104 |                     except Exception as e:
105 |                         print(f"Error loading task {task_file}: {e}")
106 |             
107 |             # Start task processing
108 |             await self.start()
109 |             self.initialized = True
110 |         except Exception as e:
111 |             print(f"Error initializing task manager: {e}")
112 |             await self.cleanup()
113 |             raise RuntimeError(f"Failed to initialize task manager: {str(e)}")
114 |     
115 |     async def cleanup(self):
116 |         """Clean up task manager and stop processing tasks."""
117 |         if not self.initialized:
118 |             return
119 |             
120 |         try:
121 |             # Stop task processing
122 |             await self.stop()
123 |             
124 |             # Save any remaining tasks
125 |             for task in self.tasks.values():
126 |                 if task.status == TaskStatus.IN_PROGRESS:
127 |                     task.status = TaskStatus.FAILED
128 |                     task.error = "Server shutdown"
129 |                     task.updated_at = datetime.utcnow()
130 |                     await self._save_task(task)
131 |         except Exception as e:
132 |             print(f"Error cleaning up task manager: {e}")
133 |         finally:
134 |             self.initialized = False
135 |     
136 |     async def start(self):
137 |         """Start task processing."""
138 |         if not self.running:
139 |             self.running = True
140 |             self._process_task_future = asyncio.create_task(self._process_tasks())
141 |     
142 |     async def stop(self):
143 |         """Stop task processing."""
144 |         if self.running:
145 |             self.running = False
146 |             if self._process_task_future:
147 |                 try:
148 |                     # Wait for the task to finish with a timeout
149 |                     await asyncio.wait_for(self._process_task_future, timeout=5.0)
150 |                 except asyncio.TimeoutError:
151 |                     # If it doesn't finish in time, cancel it
152 |                     self._process_task_future.cancel()
153 |                     try:
154 |                         await self._process_task_future
155 |                     except asyncio.CancelledError:
156 |                         pass
157 |                 finally:
158 |                     self._process_task_future = None
159 | 
160 |             # Create a new empty queue instead of trying to drain the old one
161 |             # This avoids task_done() issues
162 |             self.task_queue = asyncio.Queue()
163 |     
164 |     async def _save_task(self, task: Task):
165 |         """Save task to disk."""
166 |         task_path = self.tasks_dir / f"{task.id}.json"
167 |         with open(task_path, "w") as f:
168 |             json.dump(task.model_dump(), f, indent=2, default=str)
169 | 
170 |     async def create_task(
171 |         self,
172 |         type: str,
173 |         title: str,
174 |         description: str,
175 |         context: Dict,
176 |         priority: TaskPriority = TaskPriority.MEDIUM,
177 |         metadata: Optional[Dict[str, str]] = None
178 |     ) -> Task:
179 |         """Create a new task."""
180 |         now = datetime.utcnow()
181 |         task = Task(
182 |             id=uuid4(),
183 |             type=TaskType(type),
184 |             title=title,
185 |             description=description,
186 |             status=TaskStatus.PENDING,
187 |             priority=priority,
188 |             context=context,
189 |             metadata=metadata,
190 |             created_at=now,
191 |             updated_at=now
192 |         )
193 |         
194 |         self.tasks[task.id] = task
195 |         await self._save_task(task)  # Save task to disk
196 |         await self.task_queue.put(task)
197 |         return task
198 |     
199 |     async def get_task(self, task_id: str) -> Optional[Task]:
200 |         """Get task by ID."""
201 |         task_path = self.tasks_dir / f"{task_id}.json"
202 |         if not task_path.exists():
203 |             return None
204 |             
205 |         with open(task_path) as f:
206 |             data = json.load(f)
207 |             return Task(**data)
208 |     
209 |     async def update_task(
210 |         self,
211 |         task_id: str,
212 |         status: Optional[str] = None,
213 |         result: Optional[Dict] = None,
214 |         error: Optional[str] = None
215 |     ) -> Optional[Task]:
216 |         """Update task status and result."""
217 |         task = await self.get_task(task_id)
218 |         if not task:
219 |             return None
220 |             
221 |         if status:
222 |             task.status = status
223 |         if result:
224 |             task.result = result
225 |         if error:
226 |             task.error = error
227 |             
228 |         task.updated_at = datetime.utcnow()
229 |         if status == "completed":
230 |             task.completed_at = datetime.utcnow()
231 |             
232 |         await self._save_task(task)
233 |         return task
234 |     
235 |     async def cancel_task(self, task_id: UUID) -> Optional[Task]:
236 |         """Cancel a pending or in-progress task."""
237 |         task = self.tasks.get(task_id)
238 |         if not task:
239 |             return None
240 |             
241 |         if task.status in [TaskStatus.PENDING, TaskStatus.IN_PROGRESS]:
242 |             task.status = TaskStatus.CANCELLED
243 |             task.updated_at = datetime.utcnow()
244 |             
245 |         return task
246 |     
247 |     async def list_tasks(
248 |         self,
249 |         type: Optional[TaskType] = None,
250 |         status: Optional[TaskStatus] = None,
251 |         priority: Optional[TaskPriority] = None
252 |     ) -> List[Task]:
253 |         """List all tasks, optionally filtered."""
254 |         tasks = []
255 |         for task in self.tasks.values():
256 |             if type and task.type != type:
257 |                 continue
258 |             if status and task.status != status:
259 |                 continue
260 |             if priority and task.priority != priority:
261 |                 continue
262 |             tasks.append(task)
263 |             
264 |         return sorted(tasks, key=lambda x: x.created_at)
265 |     
266 |     async def _process_tasks(self):
267 |         """Process tasks from queue."""
268 |         while self.running:
269 |             try:
270 |                 # Use get with timeout to avoid blocking forever
271 |                 try:
272 |                     task = await asyncio.wait_for(self.task_queue.get(), timeout=1.0)
273 |                 except asyncio.TimeoutError:
274 |                     continue
275 |                 
276 |                 # Update status
277 |                 task.status = TaskStatus.IN_PROGRESS
278 |                 task.updated_at = datetime.utcnow()
279 |                 
280 |                 try:
281 |                     # Process task based on type
282 |                     if task.type == TaskType.CODE_ANALYSIS:
283 |                         await self._process_code_analysis(task)
284 |                     elif task.type == TaskType.PATTERN_EXTRACTION:
285 |                         result = await self._extract_patterns(task)
286 |                     elif task.type == TaskType.DOCUMENTATION:
287 |                         result = await self._generate_documentation(task)
288 |                     elif task.type == TaskType.DOCUMENTATION_CRAWL:
289 |                         result = await self._crawl_documentation(task)
290 |                     elif task.type == TaskType.DEBUG:
291 |                         result = await self._debug_issue(task)
292 |                     elif task.type == TaskType.ADR:
293 |                         result = await self._process_adr(task)
294 |                     else:
295 |                         raise ValueError(f"Unknown task type: {task.type}")
296 |                         
297 |                     # Update task with result
298 |                     task.result = result
299 |                     task.status = TaskStatus.COMPLETED
300 |                     
301 |                 except Exception as e:
302 |                     # Update task with error
303 |                     task.error = str(e)
304 |                     task.status = TaskStatus.FAILED
305 |                     
306 |                 task.completed_at = datetime.utcnow()
307 |                 task.updated_at = datetime.utcnow()
308 |                 
309 |                 # Mark task as done in the queue
310 |                 self.task_queue.task_done()
311 |                 
312 |             except asyncio.CancelledError:
313 |                 # Don't call task_done() here since we didn't get a task
314 |                 break
315 |                 
316 |             except Exception as e:
317 |                 # Log error but continue processing
318 |                 print(f"Error processing task: {e}")
319 |                 # Don't call task_done() here since we might not have gotten a task
320 |     
321 |     async def _process_code_analysis(self, task: Task) -> None:
322 |         """Process a code analysis task."""
323 |         try:
324 |             code = task.context.get("code", "")
325 |             context = task.context.get("context", {})
326 |             
327 |             patterns = await self.app.state.knowledge.analyze_code(
328 |                 code=code,
329 |                 language=context.get("language", "python"),
330 |                 purpose=context.get("purpose", "")
331 |             )
332 |             
333 |             await self._update_task(
334 |                 task,
335 |                 status=TaskStatus.COMPLETED,
336 |                 result={"patterns": [p.pattern.model_dump() for p in patterns]}
337 |             )
338 |             
339 |         except Exception as e:
340 |             self.logger.error(f"Failed to process code analysis task: {str(e)}")
341 |             await self._update_task(
342 |                 task,
343 |                 status=TaskStatus.FAILED,
344 |                 error=str(e)
345 |             )
346 |     
347 |     async def _extract_patterns(self, task: Task) -> Dict:
348 |         """Extract patterns from code."""
349 |         if not self.kb:
350 |             raise ValueError("Knowledge base not available")
351 |             
352 |         code = task.context.get("code")
353 |         if not code:
354 |             raise ValueError("No code provided for pattern extraction")
355 |             
356 |         # TODO: Implement pattern extraction logic
357 |         return {
358 |             "patterns": []
359 |         }
360 |     
361 |     async def _generate_documentation(self, task: Task) -> Dict:
362 |         """Generate documentation."""
363 |         if not self.doc_manager:
364 |             raise ValueError("Documentation manager not available")
365 |             
366 |         content = task.context.get("content")
367 |         if not content:
368 |             raise ValueError("No content provided for documentation")
369 |             
370 |         doc = await self.doc_manager.add_document(
371 |             title=task.title,
372 |             content=content,
373 |             type="documentation",
374 |             metadata=task.metadata
375 |         )
376 |         
377 |         return {
378 |             "document_id": str(doc.id),
379 |             "path": f"docs/{doc.id}.json"
380 |         }
381 |     
382 |     async def _crawl_documentation(self, task: Task) -> Dict:
383 |         """Crawl documentation from URLs."""
384 |         if not self.doc_manager:
385 |             raise ValueError("Documentation manager not available")
386 |             
387 |         urls = task.context.get("urls")
388 |         source_type = task.context.get("source_type")
389 |         if not urls or not source_type:
390 |             raise ValueError("Missing required fields: urls, source_type")
391 |             
392 |         docs = await self.doc_manager.crawl_docs(
393 |             urls=urls,
394 |             source_type=source_type
395 |         )
396 |         
397 |         return {
398 |             "documents": [doc.model_dump() for doc in docs],
399 |             "total_documents": len(docs)
400 |         }
401 |     
402 |     async def _debug_issue(self, task: Task) -> Dict:
403 |         """Debug an issue."""
404 |         if not self.debug_system:
405 |             raise ValueError("Debug system not available")
406 |             
407 |         issue = await self.debug_system.create_issue(
408 |             title=task.title,
409 |             type="bug",
410 |             description=task.context
411 |         )
412 |         
413 |         steps = await self.debug_system.analyze_issue(issue.id)
414 |         
415 |         return {
416 |             "issue_id": str(issue.id),
417 |             "steps": steps
418 |         }
419 |     
420 |     async def _process_adr(self, task: Task) -> Dict:
421 |         """Process ADR-related task."""
422 |         if not self.adr_manager:
423 |             raise ValueError("ADR manager not available")
424 |             
425 |         adr = await self.adr_manager.create_adr(
426 |             title=task.title,
427 |             context=task.context.get("context", {}),
428 |             options=task.context.get("options", []),
429 |             decision=task.context.get("decision", "")
430 |         )
431 |         
432 |         return {
433 |             "adr_id": str(adr.id),
434 |             "path": f"docs/adrs/{adr.id}.json"
435 |         }
436 | 
437 |     async def _process_doc_crawl(self, task: Task) -> None:
438 |         """Process a document crawl task."""
439 |         try:
440 |             urls = task.context.get("urls", [])
441 |             source_type = task.context.get("source_type", "markdown")
442 |             
443 |             total_documents = 0
444 |             for url in urls:
445 |                 try:
446 |                     await self.doc_manager.crawl_document(url, source_type)
447 |                     total_documents += 1
448 |                 except Exception as e:
449 |                     print(f"Failed to crawl document {url}: {str(e)}")
450 |             
451 |             task.status = TaskStatus.COMPLETED
452 |             task.result = {"total_documents": total_documents}
453 |             task.updated_at = datetime.utcnow()
454 |             task.completed_at = datetime.utcnow()
455 |             await self._save_task(task)
456 |             
457 |         except Exception as e:
458 |             print(f"Failed to process doc crawl task: {str(e)}")
459 |             task.status = TaskStatus.FAILED
460 |             task.error = str(e)
461 |             task.updated_at = datetime.utcnow()
462 |             await self._save_task(task)
463 | 
```
Page 4/8FirstPrevNextLast