tosin2013/mcp-codebase-insight # codebase.md

This is page 6 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│   ├── bin
│   │   ├── activate
│   │   ├── activate.csh
│   │   ├── activate.fish
│   │   ├── Activate.ps1
│   │   ├── coverage
│   │   ├── coverage-3.11
│   │   ├── coverage3
│   │   ├── pip
│   │   ├── pip-compile
│   │   ├── pip-sync
│   │   ├── pip3
│   │   ├── pip3.11
│   │   ├── py.test
│   │   ├── pyproject-build
│   │   ├── pytest
│   │   ├── python
│   │   ├── python3
│   │   ├── python3.11
│   │   └── wheel
│   └── pyvenv.cfg
├── .env.example
├── .github
│   └── workflows
│       ├── build-verification.yml
│       ├── publish.yml
│       └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── debug_tests.md
├── Dockerfile
├── docs
│   ├── adrs
│   │   └── 001_use_docker_for_qdrant.md
│   ├── api.md
│   ├── components
│   │   └── README.md
│   ├── cookbook.md
│   ├── development
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   └── README.md
│   ├── documentation_map.md
│   ├── documentation_summary.md
│   ├── features
│   │   ├── adr-management.md
│   │   ├── code-analysis.md
│   │   └── documentation.md
│   ├── getting-started
│   │   ├── configuration.md
│   │   ├── docker-setup.md
│   │   ├── installation.md
│   │   ├── qdrant_setup.md
│   │   └── quickstart.md
│   ├── qdrant_setup.md
│   ├── README.md
│   ├── SSE_INTEGRATION.md
│   ├── system_architecture
│   │   └── README.md
│   ├── templates
│   │   └── adr.md
│   ├── testing_guide.md
│   ├── troubleshooting
│   │   ├── common-issues.md
│   │   └── faq.md
│   ├── vector_store_best_practices.md
│   └── workflows
│       └── README.md
├── error_logs.txt
├── examples
│   └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│   ├── backend_summary.txt
│   ├── database_summary.txt
│   └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│   ├── check_qdrant_health.sh
│   ├── compile_requirements.sh
│   ├── load_example_patterns.py
│   ├── macos_install.sh
│   ├── README.md
│   ├── setup_qdrant.sh
│   ├── start_mcp_server.sh
│   ├── store_code_relationships.py
│   ├── store_report_in_mcp.py
│   ├── validate_knowledge_base.py
│   ├── validate_poc.py
│   ├── validate_vector_store.py
│   └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│   └── mcp_codebase_insight
│       ├── __init__.py
│       ├── __main__.py
│       ├── asgi.py
│       ├── core
│       │   ├── __init__.py
│       │   ├── adr.py
│       │   ├── cache.py
│       │   ├── component_status.py
│       │   ├── config.py
│       │   ├── debug.py
│       │   ├── di.py
│       │   ├── documentation.py
│       │   ├── embeddings.py
│       │   ├── errors.py
│       │   ├── health.py
│       │   ├── knowledge.py
│       │   ├── metrics.py
│       │   ├── prompts.py
│       │   ├── sse.py
│       │   ├── state.py
│       │   ├── task_tracker.py
│       │   ├── tasks.py
│       │   └── vector_store.py
│       ├── models.py
│       ├── server_test_isolation.py
│       ├── server.py
│       ├── utils
│       │   ├── __init__.py
│       │   └── logger.py
│       └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│   ├── components
│   │   ├── conftest.py
│   │   ├── test_core_components.py
│   │   ├── test_embeddings.py
│   │   ├── test_knowledge_base.py
│   │   ├── test_sse_components.py
│   │   ├── test_stdio_components.py
│   │   ├── test_task_manager.py
│   │   └── test_vector_store.py
│   ├── config
│   │   └── test_config_and_env.py
│   ├── conftest.py
│   ├── integration
│   │   ├── fixed_test2.py
│   │   ├── test_api_endpoints.py
│   │   ├── test_api_endpoints.py-e
│   │   ├── test_communication_integration.py
│   │   └── test_server.py
│   ├── README.md
│   ├── README.test.md
│   ├── test_build_verifier.py
│   └── test_file_relationships.py
└── trajectories
    └── tosinakinosho
        ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
        │   └── db62b9
        │       └── config.yaml
        ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
        │   └── 03565e
        │       ├── 03565e.traj
        │       └── config.yaml
        └── default__openrouter
            └── anthropic
                └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
                    └── 03565e
                        ├── 03565e.pred
                        ├── 03565e.traj
                        └── config.yaml
```

# Files

--------------------------------------------------------------------------------
/tests/integration/test_server.py:
--------------------------------------------------------------------------------

```python
  1 | """Test server API endpoints."""
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | # Ensure the src directory is in the Python path
  7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
  8 | 
  9 | import pytest
 10 | import pytest_asyncio
 11 | from httpx import AsyncClient
 12 | import uuid
 13 | import logging
 14 | import time
 15 | from pathlib import Path
 16 | from datetime import datetime, timezone
 17 | from typing import Dict, List, Any, Optional
 18 | 
 19 | from src.mcp_codebase_insight.core.config import ServerConfig
 20 | from src.mcp_codebase_insight.core.vector_store import VectorStore
 21 | from src.mcp_codebase_insight.core.knowledge import Pattern
 22 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 23 | from src.mcp_codebase_insight.server import CodebaseAnalysisServer
 24 | from src.mcp_codebase_insight.server_test_isolation import get_isolated_server_state
 25 | 
 26 | # Setup logger
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | # Environment variables or defaults for vector store testing
 30 | QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") 
 31 | TEST_COLLECTION_NAME = os.environ.get("TEST_COLLECTION_NAME", "test_vector_search")
 32 | EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
 33 | 
 34 | # Path to test repository
 35 | TEST_REPO_PATH = Path("tests/fixtures/test_repo")
 36 | 
 37 | @pytest_asyncio.fixture
 38 | async def setup_test_vector_store(test_server_client):
 39 |     """Set up a test vector store with sample patterns for the server tests.
 40 |     
 41 |     This fixture initializes the vector store component in the server with test patterns,
 42 |     allowing the vector store search endpoint to be tested properly.
 43 |     """
 44 |     # Get server state from the test client
 45 |     logger.info("Attempting to get server health status")
 46 |     request = await test_server_client.get("/health")
 47 |     if request.status_code != 200:
 48 |         logger.warning(f"Server health check failed with status code {request.status_code}")
 49 |         yield None
 50 |         return
 51 |     
 52 |     # Get the server state through test isolation utilities
 53 |     logger.info("Getting isolated server state")
 54 |     server_state = get_isolated_server_state()
 55 |     if not server_state:
 56 |         logger.warning("Could not get isolated server state, server_state is None")
 57 |         yield None
 58 |         return
 59 |     
 60 |     logger.info(f"Got server state, instance ID: {server_state.instance_id}")
 61 |     logger.info(f"Server state components: {server_state.list_components()}")
 62 |         
 63 |     # Create and initialize a test vector store
 64 |     try:
 65 |         # Create the embedder first
 66 |         logger.info(f"Creating embedding model with model name: {EMBEDDING_MODEL}")
 67 |         embedder = SentenceTransformerEmbedding(model_name=EMBEDDING_MODEL)
 68 |         await embedder.initialize()
 69 |         
 70 |         # Now create the vector store with the embedder
 71 |         logger.info(f"Creating vector store with URL: {QDRANT_URL}, collection: {TEST_COLLECTION_NAME}")
 72 |         vector_store = VectorStore(
 73 |             url=QDRANT_URL,
 74 |             embedder=embedder,
 75 |             collection_name=TEST_COLLECTION_NAME
 76 |         )
 77 |         
 78 |         # Delete any existing collection with this name
 79 |         try:
 80 |             logger.info("Cleaning up vector store before use")
 81 |             await vector_store.cleanup()
 82 |             logger.info("Vector store cleaned up")
 83 |         except Exception as e:
 84 |             logger.warning(f"Error during vector store cleanup: {str(e)}")
 85 |             
 86 |         # Initialize the vector store
 87 |         logger.info("Initializing vector store")
 88 |         await vector_store.initialize()
 89 |         logger.info(f"Initialized vector store with collection: {TEST_COLLECTION_NAME}")
 90 |         
 91 |         # Add test patterns
 92 |         logger.info("Adding test patterns to vector store")
 93 |         await add_test_patterns(vector_store, embedder)
 94 |         
 95 |         # Register the vector store in the server state
 96 |         logger.info("Registering vector store component in server state")
 97 |         server_state.register_component("vector_store", vector_store)
 98 |         logger.info("Registered vector store component in server state")
 99 |         
100 |         yield vector_store
101 |         
102 |         # Cleanup
103 |         try:
104 |             logger.info("Closing vector store")
105 |             await vector_store.close()
106 |             logger.info("Vector store closed")
107 |         except Exception as e:
108 |             logger.warning(f"Error during vector store closure: {str(e)}")
109 |             
110 |     except Exception as e:
111 |         logger.error(f"Error setting up test vector store: {str(e)}", exc_info=True)
112 |         yield None
113 | 
114 | async def add_test_patterns(store: VectorStore, embedder: SentenceTransformerEmbedding):
115 |     """Add test patterns to the vector store for testing."""
116 |     patterns = []
117 |     
118 |     # Add sample patterns for testing
119 |     patterns.append(Pattern(
120 |         id=str(uuid.uuid4()),
121 |         text="""class SearchResult:
122 |     \"\"\"Represents a search result from the vector store.\"\"\"
123 |     def __init__(self, id: str, score: float, metadata: Optional[Dict] = None):
124 |         self.id = id
125 |         self.score = score
126 |         self.metadata = metadata or {}
127 |         
128 |     def to_dict(self):
129 |         \"\"\"Convert to dictionary.\"\"\"
130 |         return {
131 |             "id": self.id,
132 |             "score": self.score,
133 |             "metadata": self.metadata
134 |         }""",
135 |         title="SearchResult Class",
136 |         description="A class for vector store search results",
137 |         pattern_type="code",
138 |         tags=["python", "class", "search", "vector-store"],
139 |         metadata={
140 |             "language": "python",
141 |             "file_path": "src/core/models.py",
142 |             "line_range": "10-25",
143 |             "timestamp": datetime.now(timezone.utc).isoformat(),
144 |             "type": "code"
145 |         }
146 |     ))
147 |     
148 |     patterns.append(Pattern(
149 |         id=str(uuid.uuid4()),
150 |         text="""async def search(
151 |     self,
152 |     query: str,
153 |     limit: int = 5,
154 |     threshold: float = 0.7,
155 |     file_type: Optional[str] = None,
156 |     path_pattern: Optional[str] = None
157 | ) -> List[Dict]:
158 |     \"\"\"Search for patterns matching the query.\"\"\"
159 |     # Generate embedding for the query
160 |     embedding = await self.embedding_model.embed(query)
161 |     
162 |     # Prepare filter conditions
163 |     filter_conditions = {}
164 |     if file_type:
165 |         filter_conditions["language"] = file_type
166 |     if path_pattern:
167 |         filter_conditions["file_path"] = {"$like": path_pattern}
168 |         
169 |     # Perform the search
170 |     results = await self.vector_store.search(
171 |         embedding=embedding,
172 |         limit=limit,
173 |         filter_conditions=filter_conditions
174 |     )
175 |     
176 |     # Filter by threshold
177 |     filtered_results = [r for r in results if r.score >= threshold]
178 |     
179 |     return filtered_results""",
180 |         title="Vector Store Search Method",
181 |         description="Async method to search the vector store with filters",
182 |         pattern_type="code",
183 |         tags=["python", "async", "function", "search"],
184 |         metadata={
185 |             "language": "python",
186 |             "file_path": "src/core/search.py", 
187 |             "line_range": "50-75",
188 |             "timestamp": datetime.now(timezone.utc).isoformat(),
189 |             "type": "code"
190 |         }
191 |     ))
192 |     
193 |     patterns.append(Pattern(
194 |         id=str(uuid.uuid4()),
195 |         text="""# Vector Store Configuration
196 |         
197 | ## Search Parameters
198 | 
199 | - **query**: The text to search for similar patterns
200 | - **threshold**: Similarity score threshold (0.0 to 1.0)
201 | - **limit**: Maximum number of results to return
202 | - **file_type**: Filter by programming language/file type
203 | - **path_pattern**: Filter by file path pattern
204 | 
205 | ## Recommended Threshold Values
206 | 
207 | - **0.9-1.0**: Very high precision, almost exact matches
208 | - **0.8-0.9**: High precision, strongly similar
209 | - **0.7-0.8**: Good balance (default)
210 | - **0.6-0.7**: Higher recall, more results
211 | - **0.5-0.6**: Very high recall, may include less relevant matches""",
212 |         title="Vector Store Documentation",
213 |         description="Documentation on vector store search parameters",
214 |         pattern_type="documentation",
215 |         tags=["documentation", "markdown", "search", "parameters"],
216 |         metadata={
217 |             "language": "markdown",
218 |             "file_path": "docs/vector_store.md",
219 |             "line_range": "50-70",
220 |             "timestamp": datetime.now(timezone.utc).isoformat(),
221 |             "type": "documentation"
222 |         }
223 |     ))
224 |     
225 |     # Store patterns with embeddings
226 |     for pattern in patterns:
227 |         # Generate embedding for the pattern text
228 |         embedding = await embedder.embed(pattern.text)
229 |         
230 |         # Store the pattern
231 |         await store.store_pattern(
232 |             id=pattern.id,
233 |             text=pattern.text,
234 |             title=pattern.title,
235 |             description=pattern.description,
236 |             pattern_type=pattern.pattern_type,
237 |             tags=pattern.tags,
238 |             metadata=pattern.metadata,
239 |             embedding=embedding
240 |         )
241 |         logger.info(f"Added pattern: {pattern.title}")
242 |     
243 |     logger.info(f"Added {len(patterns)} patterns to the test vector store")
244 |     return patterns
245 | 
246 | # Use the test_client fixture from conftest.py
247 | @pytest_asyncio.fixture(scope="function")
248 | async def test_server_client(httpx_test_client):
249 |     """Get a test client for server API testing.
250 |     
251 |     This uses the httpx_test_client from conftest.py to ensure
252 |     proper event loop and resource management.
253 |     """
254 |     yield httpx_test_client
255 | 
256 | @pytest.fixture
257 | def test_code():
258 |     """Return a sample code snippet for testing."""
259 |     return """
260 | def example_function(x: int) -> int:
261 |     return x * 2
262 | """
263 | 
264 | @pytest.fixture
265 | def test_issue():
266 |     """Return a sample issue description for testing."""
267 |     return "Error in function: example_function returns incorrect results for negative values"
268 | 
269 | @pytest.fixture
270 | def test_adr():
271 |     """Return a sample ADR structure for testing."""
272 |     return {
273 |         "title": "Test ADR",
274 |         "status": "Proposed",
275 |         "context": "This is a test ADR for automated testing purposes.",
276 |         "decision": "We've decided to use this test ADR format.",
277 |         "consequences": {
278 |             "positive": ["Test positive consequence"],
279 |             "negative": ["Test negative consequence"]
280 |         },
281 |         "options": [
282 |             {
283 |                 "title": "Test option",
284 |                 "description": "Test description",
285 |                 "pros": ["Test pro"],
286 |                 "cons": ["Test con"]
287 |             }
288 |         ]
289 |     }
290 | 
291 | @pytest.mark.asyncio
292 | async def test_health_check(test_server_client: AsyncClient):
293 |     """Test health check endpoint."""
294 |     response = await test_server_client.get("/health")
295 |     assert response.status_code == 200
296 |     data = response.json()
297 |     assert "status" in data
298 | 
299 | @pytest.mark.asyncio
300 | async def test_metrics(test_server_client: AsyncClient):
301 |     """Test metrics endpoint."""
302 |     response = await test_server_client.get("/metrics")
303 |     # Some test servers may not have metrics enabled
304 |     if response.status_code == 200:
305 |         data = response.json()
306 |         assert "metrics" in data
307 |     else:
308 |         logger.info(f"Metrics endpoint not available (status: {response.status_code})")
309 |         assert response.status_code in [404, 503]  # Not found or service unavailable
310 | 
311 | @pytest.mark.asyncio
312 | async def test_analyze_code(test_server_client: AsyncClient, test_code: str):
313 |     """Test code analysis endpoint."""
314 |     response = await test_server_client.post(
315 |         "/tools/analyze-code",
316 |         json={
317 |             "name": "analyze-code",
318 |             "arguments": {
319 |                 "code": test_code,
320 |                 "context": {}
321 |             }
322 |         }
323 |     )
324 |     # Component might not be available in test server
325 |     if response.status_code == 200:
326 |         data = response.json()
327 |         assert "content" in data
328 |     else:
329 |         logger.info(f"Code analysis endpoint not available (status: {response.status_code})")
330 |         assert response.status_code in [404, 503]  # Not found or service unavailable
331 | 
332 | @pytest.mark.asyncio
333 | async def test_create_adr(test_server_client: AsyncClient, test_adr: dict):
334 |     """Test ADR creation endpoint."""
335 |     response = await test_server_client.post(
336 |         "/tools/create-adr",
337 |         json={
338 |             "name": "create-adr",
339 |             "arguments": test_adr
340 |         }
341 |     )
342 |     # Component might not be available in test server
343 |     if response.status_code == 200:
344 |         data = response.json()
345 |         assert "content" in data
346 |     else:
347 |         logger.info(f"ADR creation endpoint not available (status: {response.status_code})")
348 |         assert response.status_code in [404, 503]  # Not found or service unavailable
349 | 
350 | @pytest.mark.asyncio
351 | async def test_debug_issue(test_server_client: AsyncClient, test_issue: str):
352 |     """Test issue debugging endpoint."""
353 |     response = await test_server_client.post(
354 |         "/tools/debug-issue",
355 |         json={
356 |             "name": "debug-issue",
357 |             "arguments": {
358 |                 "issue": test_issue,
359 |                 "context": {}
360 |             }
361 |         }
362 |     )
363 |     # Component might not be available in test server
364 |     if response.status_code == 200:
365 |         data = response.json()
366 |         assert "content" in data
367 |     else:
368 |         logger.info(f"Debug issue endpoint not available (status: {response.status_code})")
369 |         assert response.status_code in [404, 503]  # Not found or service unavailable
370 | 
371 | @pytest.mark.asyncio
372 | async def test_search_knowledge(test_server_client: AsyncClient):
373 |     """Test knowledge search endpoint."""
374 |     response = await test_server_client.post(
375 |         "/tools/search-knowledge",
376 |         json={
377 |             "name": "search-knowledge", 
378 |             "arguments": {
379 |                 "query": "test query",
380 |                 "limit": 5
381 |             }
382 |         }
383 |     )
384 |     # Component might not be available in test server
385 |     if response.status_code == 200:
386 |         data = response.json()
387 |         assert "content" in data
388 |     else:
389 |         logger.info(f"Knowledge search endpoint not available (status: {response.status_code})")
390 |         assert response.status_code in [404, 503]  # Not found or service unavailable
391 | 
392 | @pytest.mark.asyncio
393 | async def test_get_task(test_server_client: AsyncClient):
394 |     """Test task endpoint."""
395 |     # Create a test task ID
396 |     test_id = f"test_task_{uuid.uuid4().hex}"
397 |     
398 |     response = await test_server_client.post(
399 |         "/task",
400 |         json={
401 |             "task_id": test_id,
402 |             "status": "pending",
403 |             "result": None
404 |         }
405 |     )
406 |     assert response.status_code in [200, 404, 503]  # Allow various responses depending on component availability
407 | 
408 | @pytest.mark.asyncio
409 | async def test_invalid_request(test_server_client: AsyncClient):
410 |     """Test invalid request handling."""
411 |     response = await test_server_client.post(
412 |         "/tools/invalid-tool",
413 |         json={
414 |             "name": "invalid-tool",
415 |             "arguments": {}
416 |         }
417 |     )
418 |     assert response.status_code in [404, 400]  # Either not found or bad request
419 | 
420 | @pytest.mark.asyncio
421 | async def test_not_found(test_server_client: AsyncClient):
422 |     """Test 404 handling."""
423 |     response = await test_server_client.get("/nonexistent-endpoint")
424 |     assert response.status_code == 404
425 | 
426 | @pytest.mark.asyncio
427 | async def test_server_lifecycle():
428 |     """Test server lifecycle."""
429 |     # This is a safety check to ensure we're not breaking anything
430 |     # The actual server lifecycle is tested by the conftest fixtures
431 |     assert True  # Replace with real checks if needed
432 | 
433 | @pytest.mark.asyncio
434 | async def test_vector_store_search_threshold_validation(test_server_client: AsyncClient, setup_test_vector_store):
435 |     """Test that the vector store search endpoint validates threshold values."""
436 |     # Skip if vector store setup failed
437 |     if setup_test_vector_store is None:
438 |         pytest.skip("Vector store setup failed, skipping test")
439 |     
440 |     # Test invalid threshold greater than 1.0
441 |     response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.5")
442 |     assert response.status_code == 422
443 |     assert "threshold" in response.text
444 |     assert "less than or equal to" in response.text
445 | 
446 |     # Test invalid threshold less than 0.0
447 |     response = await test_server_client.get("/api/vector-store/search?query=test&threshold=-0.5")
448 |     assert response.status_code == 422
449 |     assert "threshold" in response.text
450 |     assert "greater than or equal to" in response.text
451 | 
452 |     # Test boundary value 0.0 (should be valid)
453 |     response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.0")
454 |     assert response.status_code == 200
455 |     data = response.json()
456 |     assert "results" in data
457 |     assert data["threshold"] == 0.0
458 | 
459 |     # Test boundary value 1.0 (should be valid)
460 |     response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.0")
461 |     assert response.status_code == 200
462 |     data = response.json()
463 |     assert "results" in data
464 |     assert data["threshold"] == 1.0
465 | 
466 |     # Test with valid filter parameters
467 |     response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.7&file_type=python&path_pattern=src/*")
468 |     assert response.status_code == 200
469 |     data = response.json()
470 |     assert "results" in data
471 |     assert "query" in data
472 |     assert "total_results" in data
473 |     assert "limit" in data
474 |     assert "threshold" in data
475 |     assert data["threshold"] == 0.7
476 | 
477 |     # If we have results, check their format
478 |     if data["results"]:
479 |         result = data["results"][0]
480 |         assert "id" in result
481 |         assert "score" in result
482 |         assert "text" in result
483 |         assert "file_path" in result
484 |         assert "line_range" in result
485 |         assert "type" in result
486 |         assert "language" in result
487 |         assert "timestamp" in result
488 | 
489 | @pytest.mark.asyncio
490 | async def test_vector_store_search_functionality(test_server_client: AsyncClient, setup_test_vector_store):
491 |     """Test comprehensive vector store search functionality.
492 |     
493 |     This test validates the full functionality of the vector store search endpoint,
494 |     including result format, filtering, and metadata handling.
495 |     
496 |     The test checks:
497 |     1. Basic search returns properly formatted results
498 |     2. File type filtering works correctly
499 |     3. Path pattern filtering works correctly
500 |     4. Limit parameter controls result count
501 |     5. Results contain all required metadata fields
502 |     """
503 |     # Skip if vector store setup failed
504 |     if setup_test_vector_store is None:
505 |         pytest.skip("Vector store setup failed, skipping test")
506 |     
507 |     # Test basic search functionality
508 |     response = await test_server_client.get(
509 |         "/api/vector-store/search",
510 |         params={
511 |             "query": "test query",
512 |             "threshold": 0.7,
513 |             "limit": 5
514 |         }
515 |     )
516 |     
517 |     # We should have a successful response now that the vector store is initialized
518 |     assert response.status_code == 200
519 |     data = response.json()
520 |     
521 |     # Validate response structure
522 |     assert "query" in data
523 |     assert data["query"] == "test query"
524 |     assert "results" in data
525 |     assert "threshold" in data
526 |     assert data["threshold"] == 0.7
527 |     assert "total_results" in data
528 |     assert "limit" in data
529 |     assert data["limit"] == 5
530 |     
531 |     # Test with file type filter
532 |     response = await test_server_client.get(
533 |         "/api/vector-store/search",
534 |         params={
535 |             "query": "test query",
536 |             "threshold": 0.7,
537 |             "limit": 5,
538 |             "file_type": "python"
539 |         }
540 |     )
541 |     assert response.status_code == 200
542 |     data = response.json()
543 |     assert "file_type" in data
544 |     assert data["file_type"] == "python"
545 |     
546 |     # Test with path pattern filter
547 |     response = await test_server_client.get(
548 |         "/api/vector-store/search",
549 |         params={
550 |             "query": "test query",
551 |             "threshold": 0.7,
552 |             "limit": 5,
553 |             "path_pattern": "src/**/*.py"
554 |         }
555 |     )
556 |     assert response.status_code == 200
557 |     data = response.json()
558 |     assert "path_pattern" in data
559 |     assert data["path_pattern"] == "src/**/*.py"
560 |     
561 |     # Test with limit=1
562 |     response = await test_server_client.get(
563 |         "/api/vector-store/search",
564 |         params={
565 |             "query": "test query",
566 |             "threshold": 0.7,
567 |             "limit": 1
568 |         }
569 |     )
570 |     assert response.status_code == 200
571 |     data = response.json()
572 |     assert data["limit"] == 1
573 |     
574 |     # If we have results, verify the result format
575 |     if data["results"]:
576 |         result = data["results"][0]
577 |         # Check all required fields are present
578 |         assert "id" in result
579 |         assert "score" in result
580 |         assert "text" in result
581 |         assert "file_path" in result
582 |         assert "line_range" in result
583 |         assert "type" in result
584 |         assert "language" in result
585 |         assert "timestamp" in result
586 |         
587 |         # Validate data types
588 |         assert isinstance(result["id"], str)
589 |         assert isinstance(result["score"], (int, float))
590 |         assert isinstance(result["text"], str)
591 |         assert isinstance(result["file_path"], str)
592 |         assert isinstance(result["line_range"], str)
593 |         assert isinstance(result["type"], str)
594 |         assert isinstance(result["language"], str)
595 |         assert isinstance(result["timestamp"], str)
596 | 
597 | @pytest.mark.asyncio
598 | async def test_vector_store_search_error_handling(test_server_client: AsyncClient, setup_test_vector_store):
599 |     """Test error handling for vector store search endpoint.
600 |     
601 |     This test validates the error handling capabilities of the vector store search endpoint
602 |     when provided with invalid or missing required parameters.
603 |     
604 |     The test checks:
605 |     1. Missing query parameter returns appropriate error
606 |     2. Invalid limit parameter (negative/zero) returns appropriate error
607 |     """
608 |     # Skip if vector store setup failed
609 |     if setup_test_vector_store is None:
610 |         pytest.skip("Vector store setup failed, skipping test")
611 |     
612 |     # Test missing query parameter
613 |     response = await test_server_client.get(
614 |         "/api/vector-store/search",
615 |         params={
616 |             "threshold": 0.7,
617 |             "limit": 5
618 |         }
619 |     )
620 |     
621 |     # Missing required query parameter should return 422
622 |     assert response.status_code == 422
623 |     data = response.json()
624 |     assert "detail" in data
625 |     assert any("query" in error["loc"] for error in data["detail"])
626 |     
627 |     # Test invalid limit parameter (negative)
628 |     response = await test_server_client.get(
629 |         "/api/vector-store/search",
630 |         params={
631 |             "query": "test query",
632 |             "threshold": 0.7,
633 |             "limit": -5
634 |         }
635 |     )
636 |     assert response.status_code == 422
637 |     data = response.json()
638 |     assert "detail" in data
639 |     assert any("limit" in error["loc"] for error in data["detail"])
640 |     
641 |     # Test invalid limit parameter (zero)
642 |     response = await test_server_client.get(
643 |         "/api/vector-store/search",
644 |         params={
645 |             "query": "test query",
646 |             "threshold": 0.7,
647 |             "limit": 0
648 |         }
649 |     )
650 |     assert response.status_code == 422
651 |     data = response.json()
652 |     assert "detail" in data
653 |     assert any("limit" in error["loc"] for error in data["detail"])
654 | 
655 | @pytest.mark.asyncio
656 | async def test_vector_store_search_performance(test_server_client: AsyncClient, setup_test_vector_store):
657 |     """Test performance of vector store search endpoint.
658 |     
659 |     This test measures the response time of the vector store search endpoint
660 |     to ensure it meets performance requirements.
661 |     
662 |     The test checks:
663 |     1. Search response time is within acceptable limits (< 1000ms)
664 |     2. Multiple consecutive searches maintain performance
665 |     """
666 |     # Skip if vector store setup failed
667 |     if setup_test_vector_store is None:
668 |         pytest.skip("Vector store setup failed, skipping test")
669 |         
670 |     # Define performance thresholds
671 |     max_response_time_ms = 1000  # 1 second maximum response time
672 |     
673 |     # Perform timed search tests
674 |     for i in range(3):  # Test 3 consecutive searches
675 |         start_time = time.time()
676 |         
677 |         response = await test_server_client.get(
678 |             "/api/vector-store/search",
679 |             params={
680 |                 "query": f"test performance query {i}",
681 |                 "threshold": 0.7,
682 |                 "limit": 5
683 |             }
684 |         )
685 |         
686 |         end_time = time.time()
687 |         response_time_ms = (end_time - start_time) * 1000
688 |         
689 |         assert response.status_code == 200
690 |         logger.info(f"Search {i+1} response time: {response_time_ms:.2f}ms")
691 |         
692 |         # Assert performance is within acceptable limits
693 |         assert response_time_ms < max_response_time_ms, \
694 |             f"Search response time ({response_time_ms:.2f}ms) exceeds threshold ({max_response_time_ms}ms)"
695 |         
696 |         # Verify we got a valid response
697 |         data = response.json()
698 |         assert "results" in data
699 |         assert "query" in data
700 | 
701 | @pytest.mark.asyncio
702 | async def test_vector_store_search_threshold_validation_mock(test_server_client: AsyncClient):
703 |     """Test that the vector store search endpoint validates threshold values using mock approach.
704 |     
705 |     This test isolates FastAPI's parameter validation from the actual server initialization.
706 |     It doesn't test the vector store implementation but only the parameter validation logic.
707 |     """
708 |     # First, check if server is responding at all by checking health endpoint
709 |     health_response = await test_server_client.get("/health")
710 |     
711 |     # If we can't even reach the server, skip the test
712 |     if health_response.status_code >= 500:
713 |         pytest.skip(f"Server is not responding (status: {health_response.status_code})")
714 |     
715 |     # Create a list of test cases: (threshold, expected_validation_error)
716 |     # None for expected_validation_error means we expect validation to pass
717 |     test_cases = [
718 |         # Invalid thresholds (should fail validation)
719 |         (1.5, "less than or equal to 1.0"),
720 |         (-0.5, "greater than or equal to 0.0"),
721 |         # Valid thresholds (should pass validation)
722 |         (0.0, None),
723 |         (1.0, None),
724 |         (0.7, None),
725 |     ]
726 |     
727 |     # Try each test case
728 |     for threshold, expected_validation_error in test_cases:
729 |         # Skip testing health check which will never have parameter validation errors
730 |         # Here we're just testing the static validation in the FastAPI route definition
731 |         # This will trigger validation errors regardless of server state
732 |         response = await test_server_client.get(f"/api/vector-store/search?query=test&threshold={threshold}")
733 |         
734 |         # Check response based on expected validation
735 |         if expected_validation_error:
736 |             # If validation error is expected, check for 422 status
737 |             # Note: If we got 503, parameter validation didn't even happen
738 |             # In some test environments this is normal, so we'll skip the assertion
739 |             if response.status_code == 503:
740 |                 logger.info(f"Server returned 503 for threshold={threshold}, "
741 |                            f"parameter validation couldn't be tested due to server state")
742 |                 continue
743 |                 
744 |             # If we get here, we should have a 422 validation error
745 |             assert response.status_code == 422, \
746 |                 f"Expected 422 for invalid threshold {threshold}, got {response.status_code}: {response.text}"
747 |             
748 |             # Check if validation error message contains expected text
749 |             assert expected_validation_error in response.text, \
750 |                 f"Expected validation error to contain '{expected_validation_error}', got: {response.text}"
751 |             
752 |             logger.info(f"Threshold {threshold} correctly failed validation with message containing '{expected_validation_error}'")
753 |         else:
754 |             # For valid thresholds, skip assertion if server returned 503
755 |             if response.status_code == 503:
756 |                 logger.info(f"Server returned 503 for valid threshold={threshold}, "
757 |                            f"but parameter validation passed (otherwise would be 422)")
758 |                 continue
759 |                 
760 |             # If we get a non-503 response for a valid threshold, it should be 200
761 |             # (or 404 if the endpoint doesn't exist in test server)
762 |             assert response.status_code in [200, 404], \
763 |                 f"Expected 200 for valid threshold {threshold}, got {response.status_code}: {response.text}"
764 |             
765 |             logger.info(f"Threshold {threshold} correctly passed validation")
766 |     
767 |     logger.info("Completed threshold parameter validation tests")
768 | 
```

--------------------------------------------------------------------------------
/output.txt:
--------------------------------------------------------------------------------

```
  1 | ============================= test session starts ==============================
  2 | platform darwin -- Python 3.13.2, pytest-8.3.5, pluggy-1.5.0 -- /Users/tosinakinosho/workspaces/mcp-codebase-insight/.venv/bin/python3.13
  3 | cachedir: .pytest_cache
  4 | rootdir: /Users/tosinakinosho/workspaces/mcp-codebase-insight
  5 | configfile: pytest.ini
  6 | plugins: cov-6.0.0, anyio-4.9.0, asyncio-0.26.0
  7 | asyncio: mode=Mode.STRICT, asyncio_default_fixture_loop_scope=session, asyncio_default_test_loop_scope=function
  8 | collecting ... collected 106 items
  9 | 
 10 | tests/components/test_core_components.py::test_adr_manager PASSED        [  0%]
 11 | tests/components/test_core_components.py::test_knowledge_base PASSED     [  1%]
 12 | tests/components/test_core_components.py::test_task_manager PASSED       [  2%]
 13 | tests/components/test_core_components.py::test_metrics_manager PASSED    [  3%]
 14 | tests/components/test_core_components.py::test_health_manager PASSED     [  4%]
 15 | tests/components/test_core_components.py::test_cache_manager PASSED      [  5%]
 16 | tests/components/test_core_components.py::test_documentation_manager PASSED [  6%]
 17 | tests/components/test_core_components.py::test_debug_system PASSED       [  7%]
 18 | tests/components/test_embeddings.py::test_embedder_initialization PASSED [  8%]
 19 | tests/components/test_embeddings.py::test_embedder_embedding PASSED      [  9%]
 20 | tests/components/test_knowledge_base.py::test_knowledge_base_initialization PASSED [ 10%]
 21 | tests/components/test_knowledge_base.py::test_add_and_get_pattern PASSED [ 11%]
 22 | tests/components/test_knowledge_base.py::test_find_similar_patterns PASSED [ 12%]
 23 | tests/components/test_knowledge_base.py::test_update_pattern PASSED      [ 13%]
 24 | tests/components/test_sse_components.py::test_mcp_server_initialization PASSED [ 14%]
 25 | tests/components/test_sse_components.py::test_register_tools PASSED      [ 15%]
 26 | tests/components/test_sse_components.py::test_get_starlette_app FAILED   [ 16%]
 27 | tests/components/test_sse_components.py::test_create_sse_server FAILED   [ 16%]
 28 | tests/components/test_sse_components.py::test_vector_search_tool PASSED  [ 17%]
 29 | tests/components/test_sse_components.py::test_knowledge_search_tool PASSED [ 18%]
 30 | tests/components/test_sse_components.py::test_adr_list_tool FAILED       [ 19%]
 31 | tests/components/test_sse_components.py::test_task_status_tool FAILED    [ 20%]
 32 | tests/components/test_sse_components.py::test_sse_handle_connect FAILED  [ 21%]
 33 | 
 34 | =================================== FAILURES ===================================
 35 | ____________________________ test_get_starlette_app ____________________________
 36 | 
 37 | mock_create_sse = <MagicMock name='create_sse_server' id='5349118976'>
 38 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ed274d0>
 39 | 
 40 |     @patch('mcp_codebase_insight.core.sse.create_sse_server')
 41 |     async def test_get_starlette_app(mock_create_sse, mcp_server):
 42 |         """Test getting the Starlette app for the MCP server."""
 43 |         # Set up the mock
 44 |         mock_app = MagicMock()
 45 |         mock_create_sse.return_value = mock_app
 46 |     
 47 |         # Reset the cached app to force a new creation
 48 |         mcp_server._starlette_app = None
 49 |     
 50 |         # Get the Starlette app
 51 |         app = mcp_server.get_starlette_app()
 52 |     
 53 |         # Verify tools were registered
 54 |         assert mcp_server.tools_registered is True
 55 |     
 56 |         # Verify create_sse_server was called with the MCP server
 57 | >       mock_create_sse.assert_called_once_with(mcp_server.mcp_server)
 58 | 
 59 | tests/components/test_sse_components.py:178: 
 60 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 61 | 
 62 | self = <MagicMock name='create_sse_server' id='5349118976'>
 63 | args = (<mcp.server.fastmcp.server.FastMCP object at 0x13ed24410>,), kwargs = {}
 64 | msg = "Expected 'create_sse_server' to be called once. Called 0 times."
 65 | 
 66 |     def assert_called_once_with(self, /, *args, **kwargs):
 67 |         """assert that the mock was called exactly once and that that call was
 68 |         with the specified arguments."""
 69 |         if not self.call_count == 1:
 70 |             msg = ("Expected '%s' to be called once. Called %s times.%s"
 71 |                    % (self._mock_name or 'mock',
 72 |                       self.call_count,
 73 |                       self._calls_repr()))
 74 | >           raise AssertionError(msg)
 75 | E           AssertionError: Expected 'create_sse_server' to be called once. Called 0 times.
 76 | 
 77 | /opt/homebrew/Cellar/[email protected]/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/unittest/mock.py:988: AssertionError
 78 | ---------------------------- Captured stdout setup -----------------------------
 79 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.688819Z"}
 80 | ------------------------------ Captured log setup ------------------------------
 81 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.688819Z"}
 82 | ----------------------------- Captured stdout call -----------------------------
 83 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.693189Z"}
 84 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693272Z"}
 85 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693321Z"}
 86 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697672Z"}
 87 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697772Z"}
 88 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698263Z"}
 89 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698395Z"}
 90 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698465Z"}
 91 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698514Z"}
 92 | ------------------------------ Captured log call -------------------------------
 93 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.693189Z"}
 94 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693272Z"}
 95 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693321Z"}
 96 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697672Z"}
 97 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697772Z"}
 98 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698263Z"}
 99 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698395Z"}
100 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698465Z"}
101 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698514Z"}
102 | ____________________________ test_create_sse_server ____________________________
103 | 
104 | mock_starlette = <MagicMock name='Starlette' id='5349123680'>
105 | mock_transport = <MagicMock name='CodebaseInsightSseTransport' id='5349125024'>
106 | 
107 |     @patch('mcp_codebase_insight.core.sse.CodebaseInsightSseTransport')
108 |     @patch('mcp_codebase_insight.core.sse.Starlette')
109 |     async def test_create_sse_server(mock_starlette, mock_transport):
110 |         """Test creating the SSE server."""
111 |         # Set up mocks
112 |         mock_mcp = MagicMock(spec=FastMCP)
113 |         mock_transport_instance = MagicMock()
114 |         mock_transport.return_value = mock_transport_instance
115 |         mock_app = MagicMock()
116 |         mock_starlette.return_value = mock_app
117 |     
118 |         # Create the SSE server
119 |         app = create_sse_server(mock_mcp)
120 |     
121 |         # Verify CodebaseInsightSseTransport was initialized correctly
122 | >       mock_transport.assert_called_once_with("/sse")
123 | 
124 | tests/components/test_sse_components.py:199: 
125 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
126 | 
127 | self = <MagicMock name='CodebaseInsightSseTransport' id='5349125024'>
128 | args = ('/sse',), kwargs = {}
129 | msg = "Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times."
130 | 
131 |     def assert_called_once_with(self, /, *args, **kwargs):
132 |         """assert that the mock was called exactly once and that that call was
133 |         with the specified arguments."""
134 |         if not self.call_count == 1:
135 |             msg = ("Expected '%s' to be called once. Called %s times.%s"
136 |                    % (self._mock_name or 'mock',
137 |                       self.call_count,
138 |                       self._calls_repr()))
139 | >           raise AssertionError(msg)
140 | E           AssertionError: Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times.
141 | 
142 | /opt/homebrew/Cellar/[email protected]/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/unittest/mock.py:988: AssertionError
143 | ----------------------------- Captured stdout call -----------------------------
144 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754343Z"}
145 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754481Z"}
146 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754566Z"}
147 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754606Z"}
148 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754640Z"}
149 | ------------------------------ Captured log call -------------------------------
150 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754343Z"}
151 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754481Z"}
152 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754566Z"}
153 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754606Z"}
154 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754640Z"}
155 | ______________________________ test_adr_list_tool ______________________________
156 | 
157 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ed7ef90>
158 | 
159 |     async def test_adr_list_tool(mcp_server):
160 |         """Test the ADR list tool."""
161 |         # Make sure tools are registered
162 |         if not mcp_server.tools_registered:
163 |             mcp_server.register_tools()
164 |     
165 |         # Mock the FastMCP add_tool method to capture calls
166 |         with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
167 |             # Re-register the ADR list tool
168 |             mcp_server._register_adr()
169 |     
170 |             # Verify tool was registered with correct parameters
171 |             mock_add_tool.assert_called_once()
172 |             args = mock_add_tool.call_args[0]
173 | >           assert args[0] == "list-adrs"  # Tool name
174 | E           IndexError: tuple index out of range
175 | 
176 | tests/components/test_sse_components.py:319: IndexError
177 | ---------------------------- Captured stdout setup -----------------------------
178 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.796820Z"}
179 | ------------------------------ Captured log setup ------------------------------
180 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.796820Z"}
181 | ----------------------------- Captured stdout call -----------------------------
182 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.797106Z"}
183 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797158Z"}
184 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797197Z"}
185 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.799588Z"}
186 | ------------------------------ Captured log call -------------------------------
187 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.797106Z"}
188 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797158Z"}
189 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797197Z"}
190 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.799588Z"}
191 | ____________________________ test_task_status_tool _____________________________
192 | 
193 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ef72030>
194 | 
195 |     async def test_task_status_tool(mcp_server):
196 |         """Test the task status tool."""
197 |         # Make sure tools are registered
198 |         if not mcp_server.tools_registered:
199 |             mcp_server.register_tools()
200 |     
201 |         # Mock the FastMCP add_tool method to capture calls
202 |         with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
203 |             # Re-register the task status tool
204 |             mcp_server._register_task()
205 |     
206 |             # Verify tool was registered with correct parameters
207 |             mock_add_tool.assert_called_once()
208 |             args = mock_add_tool.call_args[0]
209 | >           assert args[0] == "get-task-status"  # Tool name
210 | E           IndexError: tuple index out of range
211 | 
212 | tests/components/test_sse_components.py:338: IndexError
213 | ---------------------------- Captured stdout setup -----------------------------
214 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.806759Z"}
215 | ------------------------------ Captured log setup ------------------------------
216 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.806759Z"}
217 | ----------------------------- Captured stdout call -----------------------------
218 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.807096Z"}
219 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807156Z"}
220 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807197Z"}
221 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.810043Z"}
222 | ------------------------------ Captured log call -------------------------------
223 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.807096Z"}
224 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807156Z"}
225 | WARNING  src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807197Z"}
226 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.810043Z"}
227 | ___________________________ test_sse_handle_connect ____________________________
228 | 
229 | mock_starlette = <MagicMock name='Starlette' id='5349128384'>
230 | mock_transport = <MagicMock name='SseServerTransport' id='5349128720'>
231 | 
232 |     @patch('mcp_codebase_insight.core.sse.SseServerTransport')
233 |     @patch('mcp_codebase_insight.core.sse.Starlette')
234 |     async def test_sse_handle_connect(mock_starlette, mock_transport):
235 |         """Test the SSE connection handling functionality."""
236 |         # Set up mocks
237 |         mock_transport_instance = MagicMock()
238 |         mock_transport.return_value = mock_transport_instance
239 |     
240 |         mock_mcp = MagicMock(spec=FastMCP)
241 |         # For MCP v1.5.0, create a mock run method instead of initialization options
242 |         mock_mcp.run = AsyncMock()
243 |     
244 |         mock_request = MagicMock()
245 |         mock_request.client = "127.0.0.1"
246 |         mock_request.scope = {"type": "http"}
247 |     
248 |         # Mock the transport's connect_sse method
249 |         mock_streams = (AsyncMock(), AsyncMock())
250 |         mock_cm = MagicMock()
251 |         mock_cm.__aenter__ = AsyncMock(return_value=mock_streams)
252 |         mock_cm.__aexit__ = AsyncMock()
253 |         mock_transport_instance.connect_sse.return_value = mock_cm
254 |     
255 |         # Create a mock handler and add it to our mock app instance
256 |         handle_sse = AsyncMock()
257 |         mock_app = MagicMock()
258 |         mock_starlette.return_value = mock_app
259 |     
260 |         # Set up a mock route that we can access
261 |         mock_route = MagicMock()
262 |         mock_route.path = "/sse/"
263 |         mock_route.endpoint = handle_sse
264 |         mock_app.routes = [mock_route]
265 |     
266 |         # Create the SSE server
267 |         app = create_sse_server(mock_mcp)
268 |     
269 |         # Extract the actual handler from the route configuration
270 | >       routes_kwarg = mock_starlette.call_args.kwargs.get('routes', [])
271 | E       AttributeError: 'NoneType' object has no attribute 'kwargs'
272 | 
273 | tests/components/test_sse_components.py:381: AttributeError
274 | ----------------------------- Captured stdout call -----------------------------
275 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817486Z"}
276 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817591Z"}
277 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817674Z"}
278 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817714Z"}
279 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817749Z"}
280 | ------------------------------ Captured log call -------------------------------
281 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817486Z"}
282 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817591Z"}
283 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817674Z"}
284 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817714Z"}
285 | INFO     src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817749Z"}
286 | --------------------------- Captured stdout teardown ---------------------------
287 | Cleaning up test collection: test_collection_a41f92f0
288 | HTTP Request: DELETE http://localhost:6333/collections/test_collection_a41f92f0 "HTTP/1.1 200 OK"
289 | Found 0 server states at end of session
290 | ---------------------------- Captured log teardown -----------------------------
291 | INFO     conftest:conftest.py:169 Cleaning up test collection: test_collection_a41f92f0
292 | INFO     httpx:_client.py:1025 HTTP Request: DELETE http://localhost:6333/collections/test_collection_a41f92f0 "HTTP/1.1 200 OK"
293 | INFO     conftest:conftest.py:530 Found 0 server states at end of session
294 | 
295 | ---------- coverage: platform darwin, python 3.13.2-final-0 ----------
296 | Name                                                Stmts   Miss Branch BrPart  Cover   Missing
297 | -----------------------------------------------------------------------------------------------
298 | src/mcp_codebase_insight/__init__.py                    3      0      0      0   100%
299 | src/mcp_codebase_insight/__main__.py                   28     28      0      0     0%   3-76
300 | src/mcp_codebase_insight/asgi.py                        5      5      0      0     0%   3-11
301 | src/mcp_codebase_insight/core/__init__.py               2      0      0      0   100%
302 | src/mcp_codebase_insight/core/adr.py                  127     50     26      5    54%   75-111, 118-134, 186, 202, 204->206, 207, 209, 220-227
303 | src/mcp_codebase_insight/core/cache.py                168     42     68     26    68%   33, 36, 42->exit, 70-71, 77-78, 90, 97->exit, 102-103, 109, 124-125, 142-143, 160-161, 167-169, 173-176, 181, 187, 193, 199, 205, 217, 220, 225, 228->exit, 234, 236->238, 238->exit, 243-249, 254, 258, 261->265, 265->270, 267-268, 274
304 | src/mcp_codebase_insight/core/component_status.py       8      0      0      0   100%
305 | src/mcp_codebase_insight/core/config.py                63     23     14      4    60%   38, 44-45, 47-51, 64-67, 91-105, 109, 117, 121-122
306 | src/mcp_codebase_insight/core/debug.py                122     69     34      0    34%   58-78, 82-97, 122-128, 138-153, 161-168, 172-205
307 | src/mcp_codebase_insight/core/di.py                    99     62     14      0    33%   40, 53-76, 80-82, 86-97, 101-106, 110-112, 116-120, 124-132, 136-144, 148-156, 160-169
308 | src/mcp_codebase_insight/core/documentation.py        165    111     52      1    25%   53-77, 84-100, 134, 150-167, 175-189, 201-214, 228-316
309 | src/mcp_codebase_insight/core/embeddings.py            77     28     18      3    61%   29->exit, 48-58, 79-83, 88, 104-106, 114-128, 132
310 | src/mcp_codebase_insight/core/errors.py                96     27      2      0    70%   55-58, 62, 77, 88, 99, 110, 121, 132, 143, 154, 165, 176, 187, 198, 209, 220, 231, 242, 253, 264, 275, 279-282
311 | src/mcp_codebase_insight/core/health.py               140     58     26      8    54%   52-71, 75-98, 111, 113, 128, 146, 156-162, 168->178, 170-171, 180-181, 190-191, 215-216, 232-233, 235-236, 259-260, 262-263
312 | src/mcp_codebase_insight/core/knowledge.py            253    100     74     25    55%   95, 105->109, 114, 119-124, 129->exit, 131-138, 143->exit, 145-151, 155, 167, 170->175, 172-173, 208->223, 230, 250, 252->254, 254->256, 257, 258->260, 261, 263, 265, 270->285, 298, 303, 305, 307, 320->318, 335-351, 361-379, 404-421, 432-445, 457-470, 479-488, 496-503, 507-514, 518-524
313 | src/mcp_codebase_insight/core/metrics.py              108     41     38     11    58%   43, 47, 58-59, 62-65, 70, 74, 80-83, 89-100, 111, 122, 127-128, 138, 145, 151, 153, 165-183
314 | src/mcp_codebase_insight/core/prompts.py               72     72     16      0     0%   3-262
315 | src/mcp_codebase_insight/core/sse.py                  220    116     40      9    46%   29-37, 62-108, 130-141, 153-154, 162, 171-178, 186-188, 202-207, 239, 280-285, 293, 302-303, 315->321, 330-331, 338-339, 343-344, 349-380, 393-394, 398-419, 432-433, 437-458, 471-472, 476-483, 502->504
316 | src/mcp_codebase_insight/core/state.py                168    120     54      0    22%   48-53, 63-77, 84-93, 97-98, 102, 106-144, 148, 161-162, 167, 171, 175, 179, 183-335
317 | src/mcp_codebase_insight/core/task_tracker.py          48     28     12      0    33%   29-37, 45-52, 60-78, 86, 94, 102, 106-107
318 | src/mcp_codebase_insight/core/tasks.py                259    172     74      1    26%   89-113, 117-134, 138-140, 144-162, 203, 217-233, 237-245, 254-264, 268-318, 323-341, 349-357, 363-377, 384-397, 404-415, 422-432, 439-462
319 | src/mcp_codebase_insight/core/vector_store.py         177     73     26      5    58%   62->67, 78->93, 84-90, 99-100, 119-122, 127-129, 145-146, 158-159, 164-165, 170-184, 200-201, 233-235, 264-266, 270, 290, 327-393, 411
320 | src/mcp_codebase_insight/models.py                     18      0      0      0   100%
321 | src/mcp_codebase_insight/server.py                    630    536    128      0    12%   55-109, 121-138, 142-1491, 1549-1550, 1554-1561, 1585-1590, 1595, 1599-1616, 1620-1622, 1626, 1638-1664, 1668-1688
322 | src/mcp_codebase_insight/server_test_isolation.py      48     38     18      0    15%   31-39, 44-99
323 | src/mcp_codebase_insight/utils/__init__.py              2      0      0      0   100%
324 | src/mcp_codebase_insight/utils/logger.py               29      5      0      0    83%   52-53, 82, 89, 97
325 | src/mcp_codebase_insight/version.py                    14     14      2      0     0%   3-22
326 | -----------------------------------------------------------------------------------------------
327 | TOTAL                                                3149   1818    736     98    38%
328 | 
329 | =========================== short test summary info ============================
330 | FAILED tests/components/test_sse_components.py::test_get_starlette_app - AssertionError: Expected 'create_sse_server' to be called once. Called 0 times.
331 | FAILED tests/components/test_sse_components.py::test_create_sse_server - AssertionError: Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times.
332 | FAILED tests/components/test_sse_components.py::test_adr_list_tool - IndexError: tuple index out of range
333 | FAILED tests/components/test_sse_components.py::test_task_status_tool - IndexError: tuple index out of range
334 | FAILED tests/components/test_sse_components.py::test_sse_handle_connect - AttributeError: 'NoneType' object has no attribute 'kwargs'
335 | !!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 5 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!
336 | ================== 5 failed, 18 passed, 34 warnings in 7.50s ===================
337 | 
```

--------------------------------------------------------------------------------
/scripts/verify_build.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """
  3 | Automated End-to-End Build Verification Script
  4 | 
  5 | This script automates the process of verifying an end-to-end build by:
  6 | 1. Triggering the build process
  7 | 2. Gathering verification criteria from the vector database
  8 | 3. Analyzing build results against success criteria
  9 | 4. Contextual verification using the vector database
 10 | 5. Determining build status and generating a report
 11 | """
 12 | 
 13 | import os
 14 | import sys
 15 | import json
 16 | import logging
 17 | import asyncio
 18 | import argparse
 19 | import subprocess
 20 | from datetime import datetime
 21 | from pathlib import Path
 22 | from typing import Dict, List, Any, Optional, Tuple
 23 | import uuid
 24 | 
 25 | from qdrant_client import QdrantClient
 26 | from qdrant_client.http.models import Filter, FieldCondition, MatchValue
 27 | 
 28 | # Add the project root to the Python path
 29 | sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 30 | 
 31 | from src.mcp_codebase_insight.core.vector_store import VectorStore, SearchResult
 32 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 33 | from src.mcp_codebase_insight.core.config import ServerConfig
 34 | 
 35 | # Configure logging
 36 | logging.basicConfig(
 37 |     level=logging.INFO,
 38 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 39 |     handlers=[
 40 |         logging.StreamHandler(),
 41 |         logging.FileHandler(Path('logs/build_verification.log'))
 42 |     ]
 43 | )
 44 | logger = logging.getLogger('build_verification')
 45 | 
 46 | class BuildVerifier:
 47 |     """Automated build verification system."""
 48 |     
 49 |     def __init__(self, config_path: Optional[str] = None):
 50 |         """Initialize the build verifier.
 51 |         
 52 |         Args:
 53 |             config_path: Path to the configuration file (optional)
 54 |         """
 55 |         self.config = self._load_config(config_path)
 56 |         self.vector_store = None
 57 |         self.embedder = None
 58 |         self.build_output = ""
 59 |         self.build_logs = []
 60 |         self.success_criteria = []
 61 |         self.build_start_time = None
 62 |         self.build_end_time = None
 63 |         self.test_results = {}
 64 |         self.critical_components = []
 65 |         self.dependency_map = {}
 66 |     
 67 |     def _load_config(self, config_path: Optional[str]) -> Dict[str, Any]:
 68 |         """Load configuration from file or environment variables.
 69 |         
 70 |         Args:
 71 |             config_path: Path to the configuration file
 72 |             
 73 |         Returns:
 74 |             Configuration dictionary
 75 |         """
 76 |         config = {
 77 |             'qdrant_url': os.environ.get('QDRANT_URL', 'http://localhost:6333'),
 78 |             'qdrant_api_key': os.environ.get('QDRANT_API_KEY', ''),
 79 |             'collection_name': os.environ.get('COLLECTION_NAME', 'mcp-codebase-insight'),
 80 |             'embedding_model': os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2'),
 81 |             'build_command': os.environ.get('BUILD_COMMAND', 'make build'),
 82 |             'test_command': os.environ.get('TEST_COMMAND', 'make test'),
 83 |             'success_criteria': {
 84 |                 'min_test_coverage': float(os.environ.get('MIN_TEST_COVERAGE', '80.0')),
 85 |                 'max_allowed_failures': int(os.environ.get('MAX_ALLOWED_FAILURES', '0')),
 86 |                 'critical_modules': os.environ.get('CRITICAL_MODULES', '').split(','),
 87 |                 'performance_threshold_ms': int(os.environ.get('PERFORMANCE_THRESHOLD_MS', '500'))
 88 |             }
 89 |         }
 90 |         
 91 |         # Override with config file if provided
 92 |         if config_path:
 93 |             try:
 94 |                 with open(config_path, 'r') as f:
 95 |                     file_config = json.load(f)
 96 |                     config.update(file_config)
 97 |             except Exception as e:
 98 |                 logger.error(f"Failed to load config from {config_path}: {e}")
 99 |         
100 |         return config
101 |     
102 |     async def initialize(self):
103 |         """Initialize the build verifier."""
104 |         logger.info("Initializing build verifier...")
105 |         
106 |         # Initialize embedder if not already initialized
107 |         if self.embedder is None or not getattr(self.embedder, 'initialized', False):
108 |             logger.info("Initializing embedder...")
109 |             self.embedder = SentenceTransformerEmbedding(model_name=self.config['embedding_model'])
110 |             await self.embedder.initialize()
111 |         else:
112 |             logger.info("Using pre-initialized embedder")
113 |         
114 |         # Initialize vector store
115 |         logger.info(f"Connecting to vector store at {self.config['qdrant_url']}...")
116 |         self.vector_store = VectorStore(
117 |             url=self.config['qdrant_url'],
118 |             embedder=self.embedder,
119 |             collection_name=self.config['collection_name'],
120 |             api_key=self.config['qdrant_api_key'],
121 |             vector_name="default"  # Specify a vector name for the collection
122 |         )
123 |         await self.vector_store.initialize()
124 |         
125 |         # Load dependency map from vector database
126 |         await self._load_dependency_map()
127 |         
128 |         # Load critical components
129 |         await self._load_critical_components()
130 |         
131 |         logger.info("Build verifier initialized successfully")
132 |     
133 |     async def _load_dependency_map(self):
134 |         """Load dependency map from vector database."""
135 |         logger.info("Loading dependency map from vector database...")
136 |         
137 |         # Query for dependency information
138 |         dependencies = await self.vector_store.search(
139 |             text="dependency map between components",
140 |             filter_conditions={"must": [{"key": "type", "match": {"value": "architecture"}}]},
141 |             limit=10
142 |         )
143 |         
144 |         if dependencies:
145 |             for result in dependencies:
146 |                 if "dependencies" in result.metadata:
147 |                     self.dependency_map.update(result.metadata["dependencies"])
148 |                     
149 |         if not self.dependency_map:
150 |             # Try to load from file as fallback
151 |             try:
152 |                 with open('dependency_map.txt', 'r') as f:
153 |                     for line in f:
154 |                         if '->' in line:
155 |                             source, target = line.strip().split('->')
156 |                             source = source.strip()
157 |                             target = target.strip()
158 |                             if source not in self.dependency_map:
159 |                                 self.dependency_map[source] = []
160 |                             self.dependency_map[source].append(target)
161 |             except FileNotFoundError:
162 |                 logger.warning("Dependency map file not found")
163 |         
164 |         logger.info(f"Loaded dependency map with {len(self.dependency_map)} entries")
165 |     
166 |     async def _load_critical_components(self):
167 |         """Load critical components from vector database or config."""
168 |         logger.info("Loading critical components...")
169 |         
170 |         # Load from vector database
171 |         critical_components = await self.vector_store.search(
172 |             text="critical system components",
173 |             filter_conditions={"must": [{"key": "type", "match": {"value": "architecture"}}]},
174 |             limit=5
175 |         )
176 |         
177 |         if critical_components:
178 |             for result in critical_components:
179 |                 if "critical_components" in result.metadata:
180 |                     # Extend the list instead of updating
181 |                     self.critical_components.extend(result.metadata["critical_components"])
182 |         
183 |         # Add from config as fallback
184 |         config_critical = self.config.get('success_criteria', {}).get('critical_modules', [])
185 |         if config_critical:
186 |             self.critical_components.extend(config_critical)
187 |         
188 |         # Remove duplicates while preserving order
189 |         self.critical_components = list(dict.fromkeys(self.critical_components))
190 |         
191 |         logger.info(f"Loaded {len(self.critical_components)} critical components")
192 |     
193 |     async def trigger_build(self) -> bool:
194 |         """Trigger the end-to-end build process.
195 |         
196 |         Returns:
197 |             True if build command executed successfully, False otherwise
198 |         """
199 |         logger.info("Triggering end-to-end build...")
200 |         self.build_start_time = datetime.now()
201 |         
202 |         try:
203 |             # Execute build command
204 |             logger.info(f"Running build command: {self.config['build_command']}")
205 |             build_process = subprocess.Popen(
206 |                 self.config['build_command'],
207 |                 shell=True,
208 |                 stdout=subprocess.PIPE,
209 |                 stderr=subprocess.PIPE,
210 |                 text=True
211 |             )
212 |             
213 |             stdout, stderr = build_process.communicate()
214 |             self.build_output = stdout
215 |             
216 |             # Store build logs
217 |             self.build_logs = [line for line in stdout.split('\n') if line.strip()]
218 |             if stderr:
219 |                 self.build_logs.extend([f"ERROR: {line}" for line in stderr.split('\n') if line.strip()])
220 |             
221 |             build_success = build_process.returncode == 0
222 |             build_status = "SUCCESS" if build_success else "FAILURE"
223 |             logger.info(f"Build {build_status} (exit code: {build_process.returncode})")
224 |             
225 |             self.build_end_time = datetime.now()
226 |             return build_success
227 |             
228 |         except Exception as e:
229 |             logger.error(f"Failed to execute build command: {e}")
230 |             self.build_end_time = datetime.now()
231 |             self.build_logs.append(f"ERROR: Failed to execute build command: {e}")
232 |             return False
233 |     
234 |     async def run_tests(self) -> bool:
235 |         """Run the test suite.
236 |         
237 |         Returns:
238 |             True if tests passed successfully, False otherwise
239 |         """
240 |         logger.info("Running tests...")
241 |         
242 |         try:
243 |             # Execute test command
244 |             logger.info(f"Running test command: {self.config['test_command']}")
245 |             test_process = subprocess.Popen(
246 |                 self.config['test_command'],
247 |                 shell=True,
248 |                 stdout=subprocess.PIPE,
249 |                 stderr=subprocess.PIPE,
250 |                 text=True
251 |             )
252 |             
253 |             stdout, stderr = test_process.communicate()
254 |             
255 |             # Parse and store test results
256 |             self._parse_test_results(stdout)
257 |             
258 |             # Store test logs
259 |             self.build_logs.extend([line for line in stdout.split('\n') if line.strip()])
260 |             if stderr:
261 |                 self.build_logs.extend([f"ERROR: {line}" for line in stderr.split('\n') if line.strip()])
262 |             
263 |             tests_success = test_process.returncode == 0
264 |             test_status = "SUCCESS" if tests_success else "FAILURE"
265 |             logger.info(f"Tests {test_status} (exit code: {test_process.returncode})")
266 |             
267 |             return tests_success
268 |             
269 |         except Exception as e:
270 |             logger.error(f"Failed to execute test command: {e}")
271 |             self.build_logs.append(f"ERROR: Failed to execute test command: {e}")
272 |             return False
273 |     
274 |     def _parse_test_results(self, test_output: str):
275 |         """Parse test results from test output.
276 |         
277 |         Args:
278 |             test_output: Output from the test command
279 |         """
280 |         # Initialize test summary
281 |         self.test_results = {
282 |             "total": 0,
283 |             "passed": 0,
284 |             "failed": 0,
285 |             "skipped": 0,
286 |             "coverage": 0.0,
287 |             "duration_ms": 0,
288 |             "failures": []
289 |         }
290 |         
291 |         # Parse pytest output
292 |         for line in test_output.split('\n'):
293 |             # Count total tests
294 |             if "collected " in line:
295 |                 try:
296 |                     total_part = line.split("collected ")[1].split()[0]
297 |                     self.test_results["total"] = int(total_part)
298 |                 except (IndexError, ValueError):
299 |                     pass
300 |             
301 |             # Parse test failures - extract just the test path and name
302 |             if "FAILED " in line:
303 |                 # Full line format is typically like "......FAILED tests/test_module.py::test_function [70%]"
304 |                 # Extract just the "FAILED tests/test_module.py::test_function" part
305 |                 try:
306 |                     failure_part = line.split("FAILED ")[1].split("[")[0].strip()
307 |                     failure = f"FAILED {failure_part}"
308 |                     self.test_results["failures"].append(failure)
309 |                     self.test_results["failed"] += 1
310 |                 except (IndexError, ValueError):
311 |                     # If splitting fails, add the whole line as a fallback
312 |                     self.test_results["failures"].append(line.strip())
313 |                     self.test_results["failed"] += 1
314 |             
315 |             # Check for coverage percentage in the TOTAL line
316 |             if "TOTAL" in line and "%" in line:
317 |                 try:
318 |                     # Extract coverage from line like "TOTAL 600 100 83%"
319 |                     parts = line.split()
320 |                     for i, part in enumerate(parts):
321 |                         if "%" in part:
322 |                             coverage_percent = part.replace("%", "").strip()
323 |                             self.test_results["coverage"] = float(coverage_percent)
324 |                             break
325 |                 except (IndexError, ValueError):
326 |                     pass
327 |         
328 |         # Calculate passed tests - if we have total but no failed or skipped,
329 |         # assume all tests passed
330 |         if self.test_results["total"] > 0:
331 |             self.test_results["passed"] = self.test_results["total"] - self.test_results.get("failed", 0) - self.test_results.get("skipped", 0)
332 |         
333 |         logger.info(f"Parsed test results: {self.test_results['passed']}/{self.test_results['total']} tests passed, "
334 |                    f"{self.test_results['coverage']}% coverage")
335 |     
336 |     async def gather_verification_criteria(self):
337 |         """Gather verification criteria from the vector database."""
338 |         logger.info("Gathering verification criteria...")
339 |         
340 |         # Query for success criteria
341 |         results = await self.vector_store.search(
342 |             text="build verification success criteria",
343 |             filter_conditions={"must": [{"key": "type", "match": {"value": "build_verification"}}]},
344 |             limit=5
345 |         )
346 |         
347 |         if results:
348 |             criteria = []
349 |             for result in results:
350 |                 if "criteria" in result.metadata:
351 |                     criteria.extend(result.metadata["criteria"])
352 |             
353 |             if criteria:
354 |                 self.success_criteria = criteria
355 |                 logger.info(f"Loaded {len(criteria)} success criteria from vector database")
356 |                 return
357 |         
358 |         # Use default criteria if none found in the vector database
359 |         logger.info("Using default success criteria")
360 |         self.success_criteria = [
361 |             f"All tests must pass (maximum {self.config['success_criteria']['max_allowed_failures']} failures allowed)",
362 |             f"Test coverage must be at least {self.config['success_criteria']['min_test_coverage']}%",
363 |             "Build process must complete without errors",
364 |             f"Critical modules ({', '.join(self.critical_components)}) must pass all tests",
365 |             f"Performance tests must complete within {self.config['success_criteria']['performance_threshold_ms']}ms"
366 |         ]
367 |     
368 |     def _detect_build_success(self) -> bool:
369 |         """Detect if the build was successful based on build logs.
370 |         
371 |         Returns:
372 |             bool: True if build succeeded, False otherwise
373 |         """
374 |         # Check logs for serious build errors
375 |         for log in self.build_logs:
376 |             if log.startswith("ERROR: Build failed") or "BUILD FAILED" in log.upper():
377 |                 logger.info("Detected build failure in logs")
378 |                 return False
379 |         
380 |         # Consider build successful if no serious errors found
381 |         return True
382 |     
383 |     async def analyze_build_results(self) -> Tuple[bool, Dict[str, Any]]:
384 |         """Analyze build results against success criteria.
385 |         
386 |         Returns:
387 |             Tuple of (build_passed, results_dict)
388 |         """
389 |         logger.info("Analyzing build results...")
390 |         
391 |         # Initialize analysis results
392 |         results = {
393 |             "build_success": False,
394 |             "tests_success": False,
395 |             "coverage_success": False,
396 |             "critical_modules_success": False,
397 |             "performance_success": False,
398 |             "overall_success": False,
399 |             "criteria_results": {},
400 |             "failure_analysis": [],
401 |         }
402 |         
403 |         # Check if the build was successful
404 |         results["build_success"] = self._detect_build_success()
405 |         
406 |         # Check test results
407 |         max_failures = self.config['success_criteria']['max_allowed_failures']
408 |         results["tests_success"] = self.test_results.get("failed", 0) <= max_failures
409 |         
410 |         # Check coverage
411 |         min_coverage = self.config['success_criteria']['min_test_coverage']
412 |         current_coverage = self.test_results.get("coverage", 0.0)
413 |         
414 |         # For development purposes, we might want to temporarily ignore coverage requirements
415 |         # if there are tests passing but coverage reporting is not working properly
416 |         if self.test_results.get("total", 0) > 0 and self.test_results.get("passed", 0) > 0:
417 |             # If tests are passing but coverage is 0, assume coverage tool issues and pass this check
418 |             results["coverage_success"] = current_coverage >= min_coverage
419 |         else:
420 |             results["coverage_success"] = current_coverage >= min_coverage
421 |         
422 |         # Check critical modules
423 |         critical_module_failures = []
424 |         for failure in self.test_results.get("failures", []):
425 |             for module in self.critical_components:
426 |                 if module in failure:
427 |                     critical_module_failures.append(failure)
428 |                     break
429 |         
430 |         results["critical_modules_success"] = len(critical_module_failures) == 0
431 |         if not results["critical_modules_success"]:
432 |             results["failure_analysis"].append({
433 |                 "type": "critical_module_failure",
434 |                 "description": f"Failures in critical modules: {len(critical_module_failures)}",
435 |                 "details": critical_module_failures
436 |             })
437 |         
438 |         # Check performance (if available)
439 |         performance_threshold = self.config['success_criteria']['performance_threshold_ms']
440 |         current_performance = self.test_results.get("duration_ms", 0)
441 |         if current_performance > 0:  # Only check if we have performance data
442 |             results["performance_success"] = current_performance <= performance_threshold
443 |             if not results["performance_success"]:
444 |                 results["failure_analysis"].append({
445 |                     "type": "performance_issue",
446 |                     "description": f"Performance threshold exceeded: {current_performance}ms > {performance_threshold}ms",
447 |                     "details": f"Tests took {current_performance}ms, threshold is {performance_threshold}ms"
448 |                 })
449 |         else:
450 |             # No performance data available, assume success
451 |             results["performance_success"] = True
452 |         
453 |         # Evaluate each criterion
454 |         for criterion in self.success_criteria:
455 |             criterion_result = {
456 |                 "criterion": criterion,
457 |                 "passed": False,
458 |                 "details": ""
459 |             }
460 |             
461 |             if "All tests must pass" in criterion:
462 |                 criterion_result["passed"] = results["tests_success"]
463 |                 criterion_result["details"] = (
464 |                     f"{self.test_results.get('passed', 0)}/{self.test_results.get('total', 0)} tests passed, "
465 |                     f"{self.test_results.get('failed', 0)} failed"
466 |                 )
467 |                 
468 |             elif "coverage" in criterion.lower():
469 |                 criterion_result["passed"] = results["coverage_success"]
470 |                 
471 |                 if self.test_results.get("total", 0) > 0 and self.test_results.get("passed", 0) > 0 and current_coverage == 0.0:
472 |                     criterion_result["details"] = (
473 |                         f"Coverage tool may not be working correctly. {self.test_results.get('passed', 0)} tests passing, ignoring coverage requirement during development."
474 |                     )
475 |                 else:
476 |                     criterion_result["details"] = (
477 |                         f"Coverage: {current_coverage}%, required: {min_coverage}%"
478 |                     )
479 |                 
480 |             elif "build process" in criterion.lower():
481 |                 criterion_result["passed"] = results["build_success"]
482 |                 criterion_result["details"] = "Build completed successfully" if results["build_success"] else "Build errors detected"
483 |                 
484 |             elif "critical modules" in criterion.lower():
485 |                 criterion_result["passed"] = results["critical_modules_success"]
486 |                 criterion_result["details"] = (
487 |                     "All critical modules passed tests" if results["critical_modules_success"] 
488 |                     else f"{len(critical_module_failures)} failures in critical modules"
489 |                 )
490 |                 
491 |             elif "performance" in criterion.lower():
492 |                 criterion_result["passed"] = results["performance_success"]
493 |                 if current_performance > 0:
494 |                     criterion_result["details"] = (
495 |                         f"Performance: {current_performance}ms, threshold: {performance_threshold}ms"
496 |                     )
497 |                 else:
498 |                     criterion_result["details"] = "No performance data available"
499 |             
500 |             results["criteria_results"][criterion] = criterion_result
501 |         
502 |         # Determine overall success
503 |         results["overall_success"] = all([
504 |             results["build_success"],
505 |             results["tests_success"],
506 |             results["coverage_success"],
507 |             results["critical_modules_success"],
508 |             results["performance_success"]
509 |         ])
510 |         
511 |         logger.info(f"Build analysis complete: {'PASS' if results['overall_success'] else 'FAIL'}")
512 |         return results["overall_success"], results
513 |     
514 |     async def contextual_verification(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
515 |         """Perform contextual verification using the vector database.
516 |         
517 |         Args:
518 |             analysis_results: Results from the build analysis
519 |             
520 |         Returns:
521 |             Updated analysis results with contextual verification
522 |         """
523 |         logger.info("Performing contextual verification...")
524 |         
525 |         # Only perform detailed analysis if there are failures
526 |         if analysis_results["overall_success"]:
527 |             logger.info("Build successful, skipping detailed contextual verification")
528 |             return analysis_results
529 |         
530 |         # Identify failed tests
531 |         failed_tests = self.test_results.get("failures", [])
532 |         
533 |         if not failed_tests:
534 |             logger.info("No test failures to analyze")
535 |             return analysis_results
536 |         
537 |         logger.info(f"Analyzing {len(failed_tests)} test failures...")
538 |         
539 |         # Initialize contextual verification results
540 |         contextual_results = []
541 |         
542 |         # Analyze each failure
543 |         for failure in failed_tests:
544 |             # Extract module name from failure
545 |             module_name = self._extract_module_from_failure(failure)
546 |             
547 |             if not module_name:
548 |                 continue
549 |                 
550 |             # Get dependencies for the module
551 |             dependencies = self.dependency_map.get(module_name, [])
552 |             
553 |             # Query vector database for relevant information
554 |             query = f"common issues and solutions for {module_name} failures"
555 |             results = await self.vector_store.search(
556 |                 text=query,
557 |                 filter_conditions={"must": [{"key": "type", "match": {"value": "troubleshooting"}}]},
558 |                 limit=3
559 |             )
560 |             
561 |             failure_analysis = {
562 |                 "module": module_name,
563 |                 "failure": failure,
564 |                 "dependencies": dependencies,
565 |                 "potential_causes": [],
566 |                 "recommended_actions": []
567 |             }
568 |             
569 |             if results:
570 |                 for result in results:
571 |                     if "potential_causes" in result.metadata:
572 |                         failure_analysis["potential_causes"].extend(result.metadata["potential_causes"])
573 |                     if "recommended_actions" in result.metadata:
574 |                         failure_analysis["recommended_actions"].extend(result.metadata["recommended_actions"])
575 |             
576 |             # If no specific guidance found, provide general advice
577 |             if not failure_analysis["potential_causes"]:
578 |                 failure_analysis["potential_causes"] = [
579 |                     f"Recent changes to {module_name}",
580 |                     f"Changes in dependencies: {', '.join(dependencies)}",
581 |                     "Integration issues between components"
582 |                 ]
583 |                 
584 |             if not failure_analysis["recommended_actions"]:
585 |                 failure_analysis["recommended_actions"] = [
586 |                     f"Review recent changes to {module_name}",
587 |                     f"Check integration with dependencies: {', '.join(dependencies)}",
588 |                     "Run tests in isolation to identify specific failure points"
589 |                 ]
590 |             
591 |             contextual_results.append(failure_analysis)
592 |         
593 |         # Add contextual verification results to analysis
594 |         analysis_results["contextual_verification"] = contextual_results
595 |         
596 |         logger.info(f"Contextual verification complete: {len(contextual_results)} failures analyzed")
597 |         return analysis_results
598 |     
599 |     def _extract_module_from_failure(self, failure: str) -> Optional[str]:
600 |         """Extract module name from a test failure.
601 |         
602 |         Args:
603 |             failure: Test failure message
604 |             
605 |         Returns:
606 |             Module name or None if not found
607 |         """
608 |         # This is a simple implementation that assumes the module name
609 |         # is in the format: "FAILED path/to/module.py::test_function"
610 |         
611 |         if "FAILED " in failure:
612 |             try:
613 |                 path = failure.split("FAILED ")[1].split("::")[0]
614 |                 # Convert path to module name
615 |                 module_name = path.replace("/", ".").replace(".py", "")
616 |                 return module_name
617 |             except IndexError:
618 |                 pass
619 |         
620 |         return None
621 |     
622 |     def generate_report(self, results: Dict[str, Any]) -> Dict[str, Any]:
623 |         """Generate a build verification report.
624 |         
625 |         Args:
626 |             results: Analysis results
627 |             
628 |         Returns:
629 |             Report dictionary
630 |         """
631 |         logger.info("Generating build verification report...")
632 |         
633 |         build_duration = (self.build_end_time - self.build_start_time).total_seconds() if self.build_end_time else 0
634 |         
635 |         report = {
636 |             "build_verification_report": {
637 |                 "timestamp": datetime.now().isoformat(),
638 |                 "build_info": {
639 |                     "start_time": self.build_start_time.isoformat() if self.build_start_time else None,
640 |                     "end_time": self.build_end_time.isoformat() if self.build_end_time else None,
641 |                     "duration_seconds": build_duration,
642 |                     "build_command": self.config["build_command"],
643 |                     "test_command": self.config["test_command"]
644 |                 },
645 |                 "test_summary": {
646 |                     "total": self.test_results.get("total", 0),
647 |                     "passed": self.test_results.get("passed", 0),
648 |                     "failed": self.test_results.get("failed", 0),
649 |                     "skipped": self.test_results.get("skipped", 0),
650 |                     "coverage": self.test_results.get("coverage", 0.0)
651 |                 },
652 |                 "verification_results": {
653 |                     "overall_status": "PASS" if results["overall_success"] else "FAIL",
654 |                     "criteria_results": results["criteria_results"]
655 |                 }
656 |             }
657 |         }
658 |         
659 |         # Add failure analysis if available
660 |         if "failure_analysis" in results and results["failure_analysis"]:
661 |             report["build_verification_report"]["failure_analysis"] = results["failure_analysis"]
662 |         
663 |         # Add contextual verification if available
664 |         if "contextual_verification" in results:
665 |             report["build_verification_report"]["contextual_verification"] = results["contextual_verification"]
666 |         
667 |         # Add a summary field for quick review
668 |         criteria_count = len(results["criteria_results"])
669 |         passed_criteria = sum(1 for c in results["criteria_results"].values() if c["passed"])
670 |         report["build_verification_report"]["summary"] = (
671 |             f"Build verification: {report['build_verification_report']['verification_results']['overall_status']}. "
672 |             f"{passed_criteria}/{criteria_count} criteria passed. "
673 |             f"{self.test_results.get('passed', 0)}/{self.test_results.get('total', 0)} tests passed with "
674 |             f"{self.test_results.get('coverage', 0.0)}% coverage."
675 |         )
676 |         
677 |         logger.info(f"Report generated: {report['build_verification_report']['summary']}")
678 |         return report
679 |     
680 |     async def save_report(self, report: Dict[str, Any], report_file: str = "build_verification_report.json"):
681 |         """Save build verification report to file and vector database.
682 |         
683 |         Args:
684 |             report: Build verification report
685 |             report_file: Path to save the report file
686 |         """
687 |         logger.info(f"Saving report to {report_file}...")
688 |         
689 |         # Save to file
690 |         try:
691 |             with open(report_file, 'w') as f:
692 |                 json.dump(report, f, indent=2)
693 |             logger.info(f"Report saved to {report_file}")
694 |         except Exception as e:
695 |             logger.error(f"Failed to save report to file: {e}")
696 |         
697 |         # Store in vector database
698 |         try:
699 |             # Extract report data for metadata
700 |             build_info = report.get("build_verification_report", {})
701 |             verification_results = build_info.get("verification_results", {})
702 |             overall_status = verification_results.get("overall_status", "UNKNOWN")
703 |             timestamp = build_info.get("timestamp", datetime.now().isoformat())
704 |             
705 |             # Generate a consistent ID with prefix
706 |             report_id = f"build-verification-{uuid.uuid4()}"
707 |             report_text = json.dumps(report)
708 |             
709 |             # Store report in vector database with separate parameters instead of using id
710 |             # This avoids the 'tuple' object has no attribute 'id' error
711 |             await self.vector_store.add_vector(
712 |                 text=report_text,
713 |                 metadata={
714 |                     "id": report_id,  # Include ID in metadata
715 |                     "type": "build_verification_report",
716 |                     "timestamp": timestamp,
717 |                     "overall_status": overall_status
718 |                 }
719 |             )
720 |             logger.info(f"Report stored in vector database with ID: {report_id}")
721 |         except Exception as e:
722 |             logger.error(f"Failed to store report in vector database: {e}")
723 |     
724 |     async def cleanup(self):
725 |         """Clean up resources."""
726 |         logger.info("Cleaning up resources...")
727 |         
728 |         if self.vector_store:
729 |             await self.vector_store.cleanup()
730 |             await self.vector_store.close()
731 |     
732 |     async def verify_build(self, output_file: str = "logs/build_verification_report.json") -> bool:
733 |         """Verify the build process and generate a report.
734 |         
735 |         Args:
736 |             output_file: Output file path for the report
737 |             
738 |         Returns:
739 |             True if build verification passed, False otherwise
740 |         """
741 |         try:
742 |             # Initialize components
743 |             await self.initialize()
744 |             
745 |             # Trigger build
746 |             build_success = await self.trigger_build()
747 |             
748 |             # Run tests if build was successful
749 |             if build_success:
750 |                 await self.run_tests()
751 |             
752 |             # Gather verification criteria
753 |             await self.gather_verification_criteria()
754 |             
755 |             # Analyze build results
756 |             success, results = await self.analyze_build_results()
757 |             
758 |             # Perform contextual verification
759 |             results = await self.contextual_verification(results)
760 |             
761 |             # Generate report
762 |             report = self.generate_report(results)
763 |             
764 |             # Save report
765 |             await self.save_report(report, output_file)
766 |             
767 |             return success
768 |             
769 |         except Exception as e:
770 |             logger.error(f"Build verification failed: {e}")
771 |             return False
772 |             
773 |         finally:
774 |             # Clean up resources
775 |             await self.cleanup()
776 | 
777 | async def main():
778 |     """Main function."""
779 |     parser = argparse.ArgumentParser(description="Build Verification Script")
780 |     parser.add_argument("--config", help="Path to configuration file")
781 |     parser.add_argument("--output", default="logs/build_verification_report.json", help="Output file path for report")
782 |     args = parser.parse_args()
783 |     
784 |     # Create logs directory if it doesn't exist
785 |     os.makedirs("logs", exist_ok=True)
786 |     
787 |     verifier = BuildVerifier(args.config)
788 |     success = await verifier.verify_build(args.output)
789 |     
790 |     print(f"\nBuild verification {'PASSED' if success else 'FAILED'}")
791 |     print(f"Report saved to {args.output}")
792 |     
793 |     # Exit with status code based on verification result
794 |     sys.exit(0 if success else 1)
795 | 
796 | if __name__ == "__main__":
797 |     asyncio.run(main()) 
```

--------------------------------------------------------------------------------
/prepare_codebase.sh:
--------------------------------------------------------------------------------

```bash
  1 | #!/bin/bash
  2 | set -x  # Enable debugging
  3 | 
  4 | # Set output files
  5 | STRUCTURE_FILE="codebase_structure.txt"
  6 | DEPENDENCY_MAP_FILE="dependency_map.txt"
  7 | DOC_NODES_FILE="documentation_nodes.txt"
  8 | USER_DOC_MAP_FILE="user_doc_mapping.txt"
  9 | VECTOR_GRAPH_FILE="vector_relationship_graph.txt"
 10 | LLM_PROMPT_FILE="llm_prompts.txt"
 11 | SYSTEM_ARCHITECTURE_FILE="system_architecture.txt"
 12 | TECHNICAL_DEBT_FILE="technical_debt.txt"
 13 | README_CONTEXT_FILE="readme_context.txt"
 14 | 
 15 | # Create prompts directory structure
 16 | PROMPTS_DIR="./prompts"
 17 | mkdir -p "$PROMPTS_DIR"/{system,technical,dependency,custom}
 18 | 
 19 | # Check if project_environment.txt exists and source it if it does
 20 | if [ -f "project_environment.txt" ]; then
 21 |   echo "Loading environment information from project_environment.txt..."
 22 |   # Source the environment info
 23 |   source project_environment.txt
 24 | else
 25 |   echo "No project_environment.txt found. Running capture_env_info.sh to generate it..."
 26 |   # Check if capture_env_info.sh exists and run it
 27 |   if [ -f "./capture_env_info.sh" ]; then
 28 |     bash ./capture_env_info.sh
 29 |     source project_environment.txt
 30 |   else
 31 |     echo "Warning: capture_env_info.sh not found. Environment information will be limited."
 32 |   fi
 33 | fi
 34 | 
 35 | # Define directories to ignore for the file search
 36 | IGNORE_DIRS=("node_modules" ".venv" "venv" "vendor" "test_env")
 37 | 
 38 | # Create directory for module summaries
 39 | mkdir -p module_summaries
 40 | 
 41 | # Construct the 'find' command to exclude ignored directories
 42 | FIND_CMD="find ."
 43 | for dir in "${IGNORE_DIRS[@]}"; do
 44 |     FIND_CMD+=" -path ./$dir -prune -o"
 45 | done
 46 | FIND_CMD+=" -type f \( -name '*.js' -o -name '*.jsx' -o -name '*.ts' -o -name '*.tsx' -o -name '*.py' -o -name '*.md' -o -name '*.mdx' -o -name '*.sh' -o -name '*.yaml' -o -name '*.yml' -o -name '*.json' -o -name '*.cfg' -o -name '*.conf' -o -name '*.tfvars' -o -name '*.tf' \) -print | sort"
 47 | 
 48 | # Debugging: Show the generated find command
 49 | echo "Executing command: $FIND_CMD"
 50 | 
 51 | # Execute and store results
 52 | eval "$FIND_CMD" > "$STRUCTURE_FILE"
 53 | 
 54 | # Check if files were captured
 55 | if [ ! -s "$STRUCTURE_FILE" ]; then
 56 |     echo "⚠️ Warning: No matching files found. Please check directory paths."
 57 | fi
 58 | 
 59 | # Count the number of files found.
 60 | FILE_COUNT=$(wc -l < "$STRUCTURE_FILE")
 61 | 
 62 | # 1. Code Dependency Graph
 63 | echo "Generating code dependency graph..."
 64 | echo "# Code Dependency Graph" > "$DEPENDENCY_MAP_FILE"
 65 | echo "# Generated on $(date)" >> "$DEPENDENCY_MAP_FILE"
 66 | echo "# Environment: $OPERATING_SYSTEM" >> "$DEPENDENCY_MAP_FILE"
 67 | if [ -n "$PYTHON_VERSION" ]; then
 68 |   echo "# Python: $PYTHON_VERSION" >> "$DEPENDENCY_MAP_FILE"
 69 | fi
 70 | if [ -n "$NODE_VERSION" ]; then
 71 |   echo "# Node.js: $NODE_VERSION" >> "$DEPENDENCY_MAP_FILE"
 72 | fi
 73 | if [ -n "$ANSIBLE_VERSION" ]; then
 74 |   echo "# Ansible: $ANSIBLE_VERSION" >> "$DEPENDENCY_MAP_FILE"
 75 | fi
 76 | echo "" >> "$DEPENDENCY_MAP_FILE"
 77 | 
 78 | # Function to extract dependencies, tailored for graph generation
 79 | extract_dependencies() {
 80 |     local file="$1"
 81 |     local file_type="$2"
 82 |     
 83 |     # Add "./" prefix for consistency
 84 |     local current_dir="./"
 85 |     file="${current_dir}${file#./}"
 86 | 
 87 |     if [[ "$file_type" == "python" ]]; then
 88 |         while IFS= read -r line; do
 89 |             if [[ "$line" =~ ^(import|from) ]]; then
 90 |                 line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ')
 91 |                 if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then
 92 |                     # Capture module/file being imported
 93 |                     imported_module=$(echo "$line" | sed -e 's/import //g' -e 's/from //g' -e 's/ .*//g' | tr -d ' ')
 94 |                     echo "$file -> $imported_module (Python)" >> "$DEPENDENCY_MAP_FILE"
 95 |                 fi
 96 |             fi
 97 |         done < "$file"
 98 |     elif [[ "$file_type" == "js" || "$file_type" == "jsx" || "$file_type" == "ts" || "$file_type" == "tsx" ]]; then
 99 |         while IFS= read -r line; do
100 |             if [[ "$line" =~ (import|require) ]]; then
101 |                 line=$(echo "$line" | sed 's/\/\/.*$//' | sed 's/\/\*.*\*\///g' | tr -s ' ')
102 |                  if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then
103 |                     # Capture module/file being imported
104 |                     imported_module=$(echo "$line" | sed -n "s/.*\(import\|require\).*\(('|\"\)\([^'\"]*\)\('|\"\).*/\3/p" | tr -d ' ')
105 |                     echo "$file -> $imported_module (JavaScript/TypeScript)" >> "$DEPENDENCY_MAP_FILE"
106 |                 fi
107 |             fi
108 |         done < "$file"
109 |     elif [[ "$file_type" == "sh" ]]; then
110 |         while IFS= read -r line; do
111 |             if [[ "$line" =~ ^(source|.) ]]; then
112 |                line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ')
113 |                 if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then
114 |                     imported_module=$(echo "$line" | sed -n "s/source \([^ ]*\).*/\1/p" | tr -d ' ')
115 |                     echo "$file -> $imported_module (Shell)" >> "$DEPENDENCY_MAP_FILE"
116 |                 fi
117 |             fi
118 |         done < "$file"
119 |      elif [[ "$file_type" == "yaml" || "$file_type" == "yml" ]]; then
120 |         while IFS= read -r line; do
121 |              if [[ "$line" =~ ^(\ *[a-zA-Z0-9_-]+\:) ]]; then
122 |                 echo "$file -> $line (YAML)" >> "$DEPENDENCY_MAP_FILE"
123 |              fi
124 |         done < "$file"
125 |     elif [[ "$file_type" == "tf" ]]; then
126 |         while IFS= read -r line; do
127 |             if [[ "$line" =~ resource|module|data ]]; then
128 |                 line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ')
129 |                 echo "$file -> $line (Terraform)" >> "$DEPENDENCY_MAP_FILE"
130 |             fi
131 |         done < "$file"
132 |     fi
133 | }
134 | 
135 | # Process each file from the structure file
136 | while IFS= read -r file; do
137 |     if [ -f "$file" ]; then
138 |         extension="${file##*.}"
139 |         case "$extension" in
140 |             py)      file_type="python";;
141 |             js|jsx)  file_type="js";;
142 |             ts|tsx)  file_type="ts";;
143 |             sh)      file_type="sh";;
144 |             yaml)    file_type="yaml";;
145 |             yml)     file_type="yml";;
146 |             *)       file_type="other";;
147 |         esac
148 |         if [[ "$file_type" == "python" || "$file_type" == "js" || "$file_type" == "ts" || "$file_type" == "sh" || "$file_type" == "yaml" || "$file_type" == "yml" ]]; then
149 |             extract_dependencies "$file" "$file_type"
150 |         fi
151 |     fi
152 | done < "$STRUCTURE_FILE"
153 | 
154 | # 2. Documentation Linking
155 | echo "Generating documentation nodes..."
156 | echo "# Documentation Nodes" > "$DOC_NODES_FILE"
157 | 
158 | # Function to extract function/class signatures (for documentation linking)
159 | extract_doc_nodes() {
160 |     local file="$1"
161 |     local file_type="$2"
162 |     
163 |      # Add "./" prefix for consistency
164 |     local current_dir="./"
165 |     file="${current_dir}${file#./}"
166 | 
167 |     if [[ "$file_type" == "python" ]]; then
168 |         while IFS= read -r line; do
169 |             if [[ "$line" =~ ^(def|class) ]]; then
170 |                 # Extract function/class name and signature
171 |                 signature=$(echo "$line" | sed 's/#.*$//' | tr -s ' ')
172 |                 echo "$file: $signature (Python)" >> "$DOC_NODES_FILE"
173 |             fi
174 |         done < "$file"
175 |     elif [[ "$file_type" == "js" || "$file_type" == "jsx" || "$file_type" == "ts" || "$file_type" == "tsx" ]]; then
176 |         while IFS= read -r line; do
177 |             if [[ "$line" =~ ^(function|class) ]]; then
178 |                 signature=$(echo "$line" | sed 's/\/\/.*$//' | sed 's/\/\*.*\*\///g' | tr -s ' ')
179 |                 echo "$file: $signature (JavaScript/TypeScript)" >> "$DOC_NODES_FILE"
180 |             fi
181 |         done < "$file"
182 |      elif [[ "$file_type" == "sh" ]]; then
183 |         while IFS= read -r line; do
184 |             if [[ "$line" =~ ^(function ) ]]; then
185 |                 signature=$(echo "$line" | sed 's/#.*$//' | tr -s ' ')
186 |                 echo "$file: $signature (Shell)" >> "$DOC_NODES_FILE"
187 |             fi
188 |         done < "$file"
189 |     fi
190 | }
191 | 
192 | # Process each file to extract documentation nodes
193 | while IFS= read -r file; do
194 |     if [ -f "$file" ]; then
195 |         extension="${file##*.}"
196 |         case "$extension" in
197 |             py)      file_type="python";;
198 |             js|jsx)  file_type="js";;
199 |             ts|tsx)  file_type="ts";;
200 |             sh)      file_type="sh";;
201 |             yaml)    file_type="yaml";;
202 |             yml)     file_type="yml";;
203 |             *)       file_type="other";;
204 |         esac
205 |         if [[ "$file_type" == "python" || "$file_type" == "js" || "$file_type" == "ts" || "$file_type" == "sh" ]]; then
206 |             extract_doc_nodes "$file" "$file_type"
207 |         fi
208 |     fi
209 | done < "$STRUCTURE_FILE"
210 | 
211 | # 3. User Documentation Mapping
212 | echo "Generating user documentation mapping..."
213 | echo "# User Documentation Mapping" > "$USER_DOC_MAP_FILE"
214 | 
215 | #  Function to map user documentation (Markdown files) to code elements.
216 | map_user_docs() {
217 |     local file="$1"
218 |      # Add "./" prefix for consistency
219 |     local current_dir="./"
220 |     file="${current_dir}${file#./}"
221 |     
222 |     # Very basic mapping:  Look for code element names in Markdown
223 |     if [[ "$file" =~ \.md$ || "$file" =~ \.mdx$ ]]; then # Only process Markdown files
224 |         while IFS= read -r line; do
225 |             # This is a simplified approach.  A real tool would use AST parsing.
226 |             if [[ "$line" =~ (def |class |function ) ]]; then  # very rough
227 |                 echo "$file contains: $line" >> "$USER_DOC_MAP_FILE"
228 |             fi
229 |         done < "$file"
230 |     fi
231 | }
232 | 
233 | # Process each file to map user documentation
234 | while IFS= read -r file; do
235 |     if [ -f "$file" ]; then
236 |         extension="${file##*.}"
237 |         case "$extension" in
238 |             md|mdx)  file_type="md";;
239 |             *)       file_type="other";;
240 |         esac
241 |         if [[ "$file_type" == "md" ]]; then
242 |             map_user_docs "$file" >> "$USER_DOC_MAP_FILE"
243 |         fi
244 |     fi
245 | done < "$STRUCTURE_FILE"
246 | 
247 | # Extract key information from README.md
248 | echo "Analyzing README.md for project context..."
249 | echo "# README.md Analysis" > "$README_CONTEXT_FILE"
250 | echo "# Generated on $(date)" >> "$README_CONTEXT_FILE"
251 | echo "" >> "$README_CONTEXT_FILE"
252 | 
253 | if [ -f "README.md" ]; then
254 |   # Extract project name and description
255 |   echo "## Project Information" >> "$README_CONTEXT_FILE"
256 |   # Look for a title (# Title)
257 |   PROJECT_TITLE=$(grep "^# " README.md | head -1 | sed 's/^# //')
258 |   echo "Project Title: $PROJECT_TITLE" >> "$README_CONTEXT_FILE"
259 |   
260 |   # Extract what appears to be a project description (first paragraph after title)
261 |   PROJECT_DESCRIPTION=$(sed -n '/^# /,/^$/{/^# /d; /^$/d; p}' README.md | head -3)
262 |   echo "Project Description: $PROJECT_DESCRIPTION" >> "$README_CONTEXT_FILE"
263 |   
264 |   # Look for architecture information
265 |   echo -e "\n## Architecture Information" >> "$README_CONTEXT_FILE"
266 |   grep -A 10 -i "architecture\|structure\|design\|overview" README.md >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No explicit architecture information found." >> "$README_CONTEXT_FILE"
267 |   
268 |   # Extract documentation links
269 |   echo -e "\n## Documentation Links" >> "$README_CONTEXT_FILE"
270 |   grep -o "\[.*\](.*)" README.md | grep -i "doc\|guide\|tutorial\|wiki" >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No documentation links found." >> "$README_CONTEXT_FILE"
271 |   
272 |   # Check for setup instructions
273 |   echo -e "\n## Setup Instructions" >> "$README_CONTEXT_FILE"
274 |   grep -A 15 -i "setup\|install\|getting started\|prerequisites" README.md >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No setup instructions found." >> "$README_CONTEXT_FILE"
275 |   
276 |   # Prepare a summary for prompts
277 |   README_SUMMARY=$(echo "$PROJECT_DESCRIPTION" | tr '\n' ' ' | cut -c 1-200)
278 |   
279 |   echo "README.md analysis saved to $README_CONTEXT_FILE"
280 | else
281 |   echo "No README.md found at the root of the project." >> "$README_CONTEXT_FILE"
282 |   # Try to find READMEs in subdirectories
283 |   READMES=$(find . -name "README.md" -not -path "*/node_modules/*" -not -path "*/.git/*" -not -path "*/dist/*" -not -path "*/build/*")
284 |   if [ -n "$READMES" ]; then
285 |     echo "Found README.md files in subdirectories: $READMES" >> "$README_CONTEXT_FILE"
286 |     # Process the first README found
287 |     FIRST_README=$(echo "$READMES" | head -1)
288 |     echo "Analyzing $FIRST_README as fallback..." >> "$README_CONTEXT_FILE"
289 |     
290 |     # Extract project name and description
291 |     echo -e "\n## Project Information (from $FIRST_README)" >> "$README_CONTEXT_FILE"
292 |     PROJECT_TITLE=$(grep "^# " "$FIRST_README" | head -1 | sed 's/^# //')
293 |     echo "Project Title: $PROJECT_TITLE" >> "$README_CONTEXT_FILE"
294 |     
295 |     PROJECT_DESCRIPTION=$(sed -n '/^# /,/^$/{/^# /d; /^$/d; p}' "$FIRST_README" | head -3)
296 |     echo "Project Description: $PROJECT_DESCRIPTION" >> "$README_CONTEXT_FILE"
297 |     
298 |     # Prepare a summary for prompts
299 |     README_SUMMARY=$(echo "$PROJECT_DESCRIPTION" | tr '\n' ' ' | cut -c 1-200)
300 |   else
301 |     echo "No README.md files found in the project." >> "$README_CONTEXT_FILE"
302 |     README_SUMMARY="No README.md found in the project."
303 |   fi
304 | fi
305 | 
306 | # Copy README context file to prompts directory
307 | cp "$README_CONTEXT_FILE" "$PROMPTS_DIR/system/"
308 | 
309 | # NEW: System Architecture Analysis 
310 | echo "Analyzing system architecture..."
311 | echo "# System Architecture Analysis" > "$SYSTEM_ARCHITECTURE_FILE"
312 | echo "# Generated on $(date)" >> "$SYSTEM_ARCHITECTURE_FILE"
313 | echo "# Environment: $OPERATING_SYSTEM" >> "$SYSTEM_ARCHITECTURE_FILE"
314 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE"
315 | 
316 | # Identify key system components based on directory structure and file types
317 | echo "## System Components" >> "$SYSTEM_ARCHITECTURE_FILE"
318 | 
319 | # Count files by type to identify primary languages/frameworks
320 | echo "### Primary Languages/Frameworks" >> "$SYSTEM_ARCHITECTURE_FILE"
321 | echo "Counting files by extension to identify primary technologies..." >> "$SYSTEM_ARCHITECTURE_FILE"
322 | grep -o '\.[^./]*$' "$STRUCTURE_FILE" | sort | uniq -c | sort -nr >> "$SYSTEM_ARCHITECTURE_FILE"
323 | 
324 | # Identify architectural patterns based on directory names and file content
325 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE"
326 | echo "### Detected Architectural Patterns" >> "$SYSTEM_ARCHITECTURE_FILE"
327 | 
328 | # Look for common architectural clues in directory names
329 | echo "Directory structure analysis:" >> "$SYSTEM_ARCHITECTURE_FILE"
330 | for pattern in "api" "service" "controller" "model" "view" "component" "middleware" "util" "helper" "config" "test" "frontend" "backend" "client" "server"; do
331 |   count=$(find . -type d -name "*$pattern*" | wc -l)
332 |   if [ "$count" -gt 0 ]; then
333 |     echo "- Found $count directories matching pattern '$pattern'" >> "$SYSTEM_ARCHITECTURE_FILE"
334 |   fi
335 | done
336 | 
337 | # Check for deployment and infrastructure files
338 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE"
339 | echo "### Infrastructure and Deployment" >> "$SYSTEM_ARCHITECTURE_FILE"
340 | for file in "Dockerfile" "docker-compose.yml" ".github/workflows" "Jenkinsfile" "terraform" "k8s" "helm"; do
341 |   if [ -e "$file" ]; then
342 |     echo "- Found $file" >> "$SYSTEM_ARCHITECTURE_FILE"
343 |   fi
344 | done
345 | 
346 | # NEW: Technical Debt Analysis
347 | echo "Gathering technical debt indicators..."
348 | TECH_DEBT_DATA_FILE="technical_debt_data.txt"
349 | TECH_DEBT_PROMPT_FILE="$PROMPTS_DIR/technical/technical_debt_prompt.txt"
350 | echo "# Technical Debt Indicators" > "$TECH_DEBT_DATA_FILE"
351 | echo "# Generated on $(date)" >> "$TECH_DEBT_DATA_FILE"
352 | echo "" >> "$TECH_DEBT_DATA_FILE"
353 | 
354 | # Count files by type for primary languages
355 | echo "## Primary Languages" >> "$TECH_DEBT_DATA_FILE"
356 | LANGUAGE_COUNTS=$(grep -o '\.[^./]*$' "$STRUCTURE_FILE" | sort | uniq -c | sort -nr)
357 | echo "$LANGUAGE_COUNTS" >> "$TECH_DEBT_DATA_FILE"
358 | PRIMARY_LANGUAGES=$(echo "$LANGUAGE_COUNTS" | head -5 | awk '{print $2}' | tr '\n' ', ' | sed 's/,$//' | sed 's/\.//')
359 | LANGUAGE_COUNT=$(echo "$LANGUAGE_COUNTS" | wc -l)
360 | 
361 | # Look for code comments indicating technical debt
362 | echo -e "\n## TODO, FIXME, and HACK Comments" >> "$TECH_DEBT_DATA_FILE"
363 | TODO_COMMENTS=$(grep -r --include="*.py" --include="*.js" --include="*.jsx" --include="*.ts" --include="*.tsx" --include="*.sh" --include="*.yml" --include="*.yaml" --include="*.tf" "TODO\|FIXME\|HACK" . 2>/dev/null | grep -v "node_modules\|venv\|.git" | sort)
364 | TODO_COUNT=$(echo "$TODO_COMMENTS" | grep -v '^$' | wc -l)
365 | echo "Found $TODO_COUNT TODO/FIXME/HACK comments" >> "$TECH_DEBT_DATA_FILE"
366 | # Sample up to 10 TODO comments
367 | TODO_SAMPLES=$(echo "$TODO_COMMENTS" | head -10)
368 | echo "$TODO_SAMPLES" >> "$TECH_DEBT_DATA_FILE"
369 | 
370 | # Check for deprecated dependencies if we have package.json or requirements.txt
371 | echo -e "\n## Dependency Analysis" >> "$TECH_DEBT_DATA_FILE"
372 | NODE_DEPS=""
373 | if [ -f "package.json" ]; then
374 |   echo "### Node.js Dependencies" >> "$TECH_DEBT_DATA_FILE"
375 |   NODE_DEPS=$(grep -A 100 "dependencies" package.json | grep -B 100 "}" | grep ":" | head -15)
376 |   echo "$NODE_DEPS" >> "$TECH_DEBT_DATA_FILE"
377 | fi
378 | 
379 | PYTHON_DEPS=""
380 | if [ -f "requirements.txt" ]; then
381 |   echo -e "\n### Python Dependencies" >> "$TECH_DEBT_DATA_FILE"
382 |   PYTHON_DEPS=$(cat requirements.txt | head -15)
383 |   echo "$PYTHON_DEPS" >> "$TECH_DEBT_DATA_FILE"
384 | fi
385 | 
386 | # Look for large files that might indicate complexity issues
387 | echo -e "\n## Potentially Complex Files (> 500 lines)" >> "$TECH_DEBT_DATA_FILE"
388 | LARGE_FILES=$(find . -type f \( -name "*.py" -o -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) -not -path "*/node_modules/*" -not -path "*/venv/*" -not -path "*/.git/*" -exec wc -l {} \; | awk '$1 > 500' | sort -nr)
389 | LARGE_FILES_COUNT=$(echo "$LARGE_FILES" | grep -v '^$' | wc -l)
390 | echo "Found $LARGE_FILES_COUNT large files (>500 lines)" >> "$TECH_DEBT_DATA_FILE"
391 | LARGE_FILES_SAMPLES=$(echo "$LARGE_FILES" | head -10)
392 | echo "$LARGE_FILES_SAMPLES" >> "$TECH_DEBT_DATA_FILE"
393 | 
394 | # Check for potential circular dependencies
395 | echo -e "\n## Potential Circular Dependencies" >> "$TECH_DEBT_DATA_FILE"
396 | # This is a very basic check that could be improved
397 | if [ -f "$DEPENDENCY_MAP_FILE" ]; then
398 |   DEPENDENCY_SAMPLES=$(grep " -> " "$DEPENDENCY_MAP_FILE" | head -15)
399 |   IMPORT_COUNT=$(grep -c " -> " "$DEPENDENCY_MAP_FILE")
400 |   # Find modules that are both imported and import others
401 |   HIGH_COUPLING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1; print $3}' | sort | uniq -c | sort -nr | head -10)
402 |   echo "Found $IMPORT_COUNT import relationships" >> "$TECH_DEBT_DATA_FILE"
403 |   echo -e "\nHighly coupled components:" >> "$TECH_DEBT_DATA_FILE"
404 |   echo "$HIGH_COUPLING" >> "$TECH_DEBT_DATA_FILE"
405 | fi
406 | 
407 | # Now create the technical debt prompt for LLM
408 | echo "Generating technical debt analysis prompt for LLM..."
409 | 
410 | cat > "$TECH_DEBT_PROMPT_FILE" << EOL
411 | # Technical Debt Analysis Prompt
412 | 
413 | ## Context
414 | You are analyzing the technical debt in a codebase with the following characteristics:
415 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks
416 | - Primary languages: ${PRIMARY_LANGUAGES}
417 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown}
418 | - Project summary: ${README_SUMMARY:-No project description available}
419 | 
420 | ## Available Data
421 | The following data has been collected to assist your analysis:
422 | 1. TODO/FIXME/HACK comments (count: ${TODO_COUNT})
423 | 2. Large files exceeding 500 lines (count: ${LARGE_FILES_COUNT})
424 | 3. Dependency information (${IMPORT_COUNT} import relationships found)
425 | 4. Directory structure patterns and architectural indicators
426 | 
427 | ## Sample Data Points
428 | ### TODO/FIXME Examples:
429 | ${TODO_SAMPLES}
430 | 
431 | ### Large Files:
432 | ${LARGE_FILES_SAMPLES}
433 | 
434 | ### Dependency Data:
435 | ${DEPENDENCY_SAMPLES}
436 | 
437 | ### Highly Coupled Components:
438 | ${HIGH_COUPLING}
439 | 
440 | ## Instructions
441 | Please analyze the technical debt in this codebase by:
442 | 
443 | 1. **Categorizing the technical debt** into these types:
444 |    - Code quality issues
445 |    - Architectural problems
446 |    - Outdated dependencies 
447 |    - Testing gaps
448 |    - Documentation shortfalls
449 | 
450 | 2. **Identifying potential root causes** of the technical debt:
451 |    - Time pressure and deadlines
452 |    - Knowledge gaps
453 |    - Changing requirements
454 |    - Architectural erosion over time
455 |    - Legacy code integration
456 | 
457 | 3. **Assessing the potential impact** of the technical debt:
458 |    - On system stability
459 |    - On maintainability
460 |    - On performance
461 |    - On security
462 |    - On team productivity
463 | 
464 | 4. **Recommending a prioritized remediation plan** that:
465 |    - Addresses high-impact issues first
466 |    - Considers interdependencies between components
467 |    - Provides realistic, incremental steps
468 |    - Balances short-term fixes with long-term improvements
469 |    - Suggests preventative measures to avoid future debt
470 | 
471 | 5. **Creating a high-level technical debt map** showing:
472 |    - Which components contain the most concerning debt
473 |    - How the debt in one area affects other parts of the system
474 |    - Which areas would provide the highest ROI if addressed
475 | 
476 | Please format your response as a structured technical debt analysis report with clear sections, actionable insights, and system-level thinking.
477 | EOL
478 | 
479 | # Generate a minimal technical debt file that points to the prompt
480 | cat > "$TECHNICAL_DEBT_FILE" << EOL
481 | # Technical Debt Analysis
482 | # Generated on $(date)
483 | 
484 | This file contains basic technical debt indicators. For a comprehensive analysis,
485 | copy the contents of "$TECH_DEBT_PROMPT_FILE" and submit it to an LLM like Claude,
486 | ChatGPT, or use it with Cursor's AI capabilities.
487 | 
488 | ## Summary of Technical Debt Indicators
489 | - TODO/FIXME/HACK comments: ${TODO_COUNT}
490 | - Large files (>500 lines): ${LARGE_FILES_COUNT}
491 | - Import relationships: ${IMPORT_COUNT:-Unknown}
492 | - Primary languages: ${PRIMARY_LANGUAGES}
493 | 
494 | For full data points, see: ${TECH_DEBT_DATA_FILE}
495 | For LLM analysis prompt, see: ${TECH_DEBT_PROMPT_FILE}
496 | 
497 | To get a complete analysis, run:
498 | cat ${TECH_DEBT_PROMPT_FILE} | pbcopy  # On macOS
499 | # or
500 | cat ${TECH_DEBT_PROMPT_FILE} | xclip -selection clipboard  # On Linux with xclip
501 | # Then paste into your preferred LLM interface
502 | EOL
503 | 
504 | # Update project_environment.txt with technical debt indicators
505 | if [ -f "project_environment.txt" ]; then
506 |   echo -e "\n# Technical Debt Indicators" >> project_environment.txt
507 |   echo "TECH_DEBT_TODO_COUNT=\"$TODO_COUNT\"" >> project_environment.txt
508 |   echo "TECH_DEBT_LARGE_FILES_COUNT=\"$LARGE_FILES_COUNT\"" >> project_environment.txt
509 |   echo "TECH_DEBT_PROMPT_FILE=\"$TECH_DEBT_PROMPT_FILE\"" >> project_environment.txt
510 |   echo "TECH_DEBT_DATA_FILE=\"$TECH_DEBT_DATA_FILE\"" >> project_environment.txt
511 | fi
512 | 
513 | # Generate Dependency Analysis Prompt
514 | echo "Generating dependency analysis prompt for LLM..."
515 | DEPENDENCY_ANALYSIS_FILE="dependency_analysis.txt"
516 | DEPENDENCY_PROMPT_FILE="$PROMPTS_DIR/dependency/dependency_analysis_prompt.txt"
517 | 
518 | # Get some key metrics for the prompt
519 | MODULE_COUNT=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1}' | sort | uniq | wc -l)
520 | IMPORT_COUNT=$(grep -c " -> " "$DEPENDENCY_MAP_FILE")
521 | # Find highly coupled modules
522 | HIGH_COUPLING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1}' | sort | uniq -c | sort -nr | head -10)
523 | # Find modules with most incoming dependencies
524 | HIGH_INCOMING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $3}' | sort | uniq -c | sort -nr | head -10)
525 | 
526 | cat > "$DEPENDENCY_PROMPT_FILE" << EOL
527 | # Dependency Graph Analysis Prompt
528 | 
529 | ## Context
530 | You are analyzing the dependency structure in a codebase with the following characteristics:
531 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks
532 | - ${MODULE_COUNT} modules with dependencies
533 | - ${IMPORT_COUNT} total import relationships
534 | - Primary languages: ${PRIMARY_LANGUAGES}
535 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown}
536 | - Project summary: ${README_SUMMARY:-No project description available}
537 | 
538 | ## Available Data
539 | The dependency map shows how modules depend on each other. Here are some key metrics:
540 | 
541 | ### Modules with most outgoing dependencies (highest coupling):
542 | ${HIGH_COUPLING}
543 | 
544 | ### Modules with most incoming dependencies (highest dependency):
545 | ${HIGH_INCOMING}
546 | 
547 | ### Sample dependencies:
548 | $(grep " -> " "$DEPENDENCY_MAP_FILE" | head -20)
549 | 
550 | ## Instructions
551 | Please analyze the dependency structure of this codebase by:
552 | 
553 | 1. **Identifying problematic dependency patterns**:
554 |    - Modules with excessive coupling (too many dependencies)
555 |    - Core modules that too many other modules depend on (high risk)
556 |    - Potential circular dependencies or dependency chains
557 |    - Architectural layering violations (if detectable)
558 | 
559 | 2. **Evaluating the modularity of the system**:
560 |    - Is the codebase well-modularized or tightly coupled?
561 |    - Are there clear boundaries between subsystems?
562 |    - Does the dependency structure reflect good architecture?
563 |    - Are there signs of "spaghetti code" in the dependencies?
564 | 
565 | 3. **Recommending improvements to the dependency structure**:
566 |    - Which modules should be refactored to reduce coupling?
567 |    - How could dependencies be better organized?
568 |    - Are there opportunities to introduce abstractions/interfaces?
569 |    - What architectural patterns might help improve the structure?
570 | 
571 | 4. **Creating a dependency health assessment**:
572 |    - Rate the overall health of the dependency structure
573 |    - Identify the highest priority areas for improvement
574 |    - Suggest metrics to track dependency health over time
575 |    - Estimate the long-term maintainability based on dependencies
576 | 
577 | Please format your response as a structured dependency analysis report with clear sections, 
578 | visualizations (described in text if needed), and specific, actionable recommendations.
579 | EOL
580 | 
581 | # Generate a minimal dependency analysis file that points to the prompt
582 | cat > "$DEPENDENCY_ANALYSIS_FILE" << EOL
583 | # Dependency Analysis
584 | # Generated on $(date)
585 | 
586 | This file contains basic dependency metrics. For a comprehensive analysis,
587 | copy the contents of "$DEPENDENCY_PROMPT_FILE" and submit it to an LLM like Claude,
588 | ChatGPT, or use it with Cursor's AI capabilities.
589 | 
590 | ## Summary of Dependency Metrics
591 | - Modules with dependencies: ${MODULE_COUNT}
592 | - Import relationships: ${IMPORT_COUNT}
593 | - Primary languages: ${PRIMARY_LANGUAGES}
594 | 
595 | For the dependency map, see: ${DEPENDENCY_MAP_FILE}
596 | For LLM analysis prompt, see: ${DEPENDENCY_PROMPT_FILE}
597 | 
598 | To get a complete analysis, run:
599 | cat ${DEPENDENCY_PROMPT_FILE} | pbcopy  # On macOS
600 | # or
601 | cat ${DEPENDENCY_PROMPT_FILE} | xclip -selection clipboard  # On Linux with xclip
602 | # Then paste into your preferred LLM interface
603 | EOL
604 | 
605 | # Update project_environment.txt with dependency analysis references
606 | if [ -f "project_environment.txt" ]; then
607 |   echo -e "\n# Dependency Analysis Information" >> project_environment.txt
608 |   echo "DEPENDENCY_PROMPT_FILE=\"$DEPENDENCY_PROMPT_FILE\"" >> project_environment.txt
609 |   echo "DEPENDENCY_ANALYSIS_FILE=\"$DEPENDENCY_ANALYSIS_FILE\"" >> project_environment.txt
610 |   echo "MODULE_COUNT=\"$MODULE_COUNT\"" >> project_environment.txt
611 |   echo "IMPORT_COUNT=\"$IMPORT_COUNT\"" >> project_environment.txt
612 | fi
613 | 
614 | # Generate a meta-prompt to create custom analysis prompts
615 | echo "Creating meta-prompt for generating custom analysis prompts..."
616 | META_PROMPT_FILE="$PROMPTS_DIR/meta_prompt_generator.txt"
617 | 
618 | cat > "$META_PROMPT_FILE" << EOL
619 | # Meta-Prompt: Generate Custom Codebase Analysis Prompts
620 | 
621 | ## Context
622 | You've been given information about a codebase with these characteristics:
623 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks
624 | - Primary languages: ${PRIMARY_LANGUAGES}
625 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown}
626 | - Project summary: ${README_SUMMARY:-No project description available}
627 | - Detected architectural patterns: $(grep "Found" "$SYSTEM_ARCHITECTURE_FILE" | head -5 | tr '\n' ', ' | sed 's/,$//')
628 | 
629 | ## Task
630 | Generate a specialized analysis prompt that will help developers understand and improve this codebase. The prompt should be tailored to this specific codebase's characteristics and the developer's goal.
631 | 
632 | ## Developer's Goal
633 | [REPLACE THIS WITH YOUR SPECIFIC GOAL, e.g., "Improve test coverage", "Refactor for better performance", "Prepare for cloud migration"]
634 | 
635 | ## Instructions
636 | 1. Create a prompt that guides an LLM to analyze the codebase specifically for the stated goal
637 | 2. Include relevant context from the codebase metrics above
638 | 3. Structure the prompt with clear sections including:
639 |    - Background information about the codebase
640 |    - Specific questions to address about the goal
641 |    - Instructions for formatting the response
642 | 4. Focus on systems thinking principles that consider the entire codebase, not just isolated components
643 | 5. Include specific metrics or artifacts the LLM should look for in its analysis
644 | 
645 | ## Output
646 | Provide the complete text of the new analysis prompt, ready to be saved to a file and used with an LLM.
647 | EOL
648 | 
649 | echo "Meta-prompt generator created at $META_PROMPT_FILE"
650 | 
651 | # Create a README for the prompts directory
652 | cat > "$PROMPTS_DIR/README.md" << EOL
653 | # Analysis Prompts
654 | 
655 | This directory contains prompts for analyzing the codebase using LLMs:
656 | 
657 | - **system/**: Prompts related to overall system architecture
658 | - **technical/**: Prompts for analyzing technical debt and code quality
659 | - **dependency/**: Prompts for analyzing dependencies and module relationships
660 | - **custom/**: Location for your custom analysis prompts
661 | 
662 | ## Usage
663 | 
664 | 1. Select a prompt relevant to your analysis needs
665 | 2. Copy its contents to your clipboard: \`cat prompts/technical/technical_debt_prompt.txt | pbcopy\`
666 | 3. Paste into an LLM like Claude or ChatGPT
667 | 4. Review the analysis and insights
668 | 
669 | ## Creating Custom Prompts
670 | 
671 | Use the meta-prompt generator to create custom analysis prompts:
672 | \`\`\`
673 | cat prompts/meta_prompt_generator.txt | pbcopy
674 | # Then paste into an LLM, replace the [GOAL] placeholder, and follow the instructions
675 | \`\`\`
676 | 
677 | ## Available Prompts
678 | 
679 | - **Meta-Prompt Generator**: Generate custom analysis prompts for specific goals
680 | - **Technical Debt Analysis**: Analyze and prioritize technical debt in the codebase
681 | - **Dependency Structure Analysis**: Evaluate modularity and identify problematic dependencies
682 | - **System Architecture Analysis**: Understand overall system design and architecture
683 | EOL
684 | 
685 | # Create .gitignore entry for the prompts directory
686 | if [ -f ".gitignore" ]; then
687 |     if ! grep -q "^prompts/" ".gitignore"; then
688 |         echo "prompts/" >> ".gitignore"
689 |         echo "Added prompts/ to .gitignore"
690 |     fi
691 | else
692 |     echo "prompts/" > ".gitignore"
693 |     echo "Created .gitignore with prompts/ entry"
694 | fi
695 | 
696 | # Move LLM prompts to the system directory
697 | LLM_PROMPT_FILE="$PROMPTS_DIR/system/llm_prompts.txt"
698 | 
699 | # 4. Vector Graph Generation (Modified to include system architecture insights)
700 | echo "Generating vector relationship graph prompt..."
701 | cat > "$LLM_PROMPT_FILE" << 'EOL'
702 | # LLM Prompts for Codebase Analysis
703 | 
704 | ## 1. Code Dependency Graph Generation
705 | Generate a code dependency graph using the following data:
706 | -   `'"$STRUCTURE_FILE"'`: Lists all files.
707 | -   `'"$DEPENDENCY_MAP_FILE"'`: Shows dependencies between files.
708 | 
709 | ## 2. Documentation Linking Analysis
710 | Analyze documentation links using:
711 | -   `'"$STRUCTURE_FILE"'`: Lists all files.
712 | -   `'"$DOC_NODES_FILE"'`: Lists code elements (functions, classes).
713 | -    `'"$USER_DOC_MAP_FILE"'`: Maps documentation to code elements.
714 | 
715 | ## 3. System Architecture Analysis
716 | Apply systems thinking to analyze the application architecture using:
717 | -   `'"$STRUCTURE_FILE"'`: Lists all files
718 | -   `'"$DEPENDENCY_MAP_FILE"'`: Shows dependencies between files
719 | -   `'"$SYSTEM_ARCHITECTURE_FILE"'`: System components and patterns analysis
720 | -   `'"$TECH_DEBT_DATA_FILE"'`: Technical debt indicators
721 | 
722 | ### Task:
723 | Analyze the codebase as a complete system, including:
724 | 1. Identify system boundaries and integration points
725 | 2. Detect feedback loops and circular dependencies
726 | 3. Identify potential bottlenecks and single points of failure
727 | 4. Assess emergent behavior that may arise from component interactions
728 | 5. Analyze technical debt impact on overall system health
729 | 
730 | ### Output Format:
731 | Provide a systems thinking analysis that includes:
732 | ```
733 | {
734 |   "system_boundaries": [
735 |     {"name": "Frontend", "components": ["component1", "component2"]},
736 |     {"name": "Backend", "components": ["component3", "component4"]},
737 |     {"name": "Data Layer", "components": ["component5"]}
738 |   ],
739 |   "integration_points": [
740 |     {"name": "API Gateway", "type": "external_boundary", "risk_level": "medium"},
741 |     {"name": "Database Access", "type": "internal", "risk_level": "high"}
742 |   ],
743 |   "feedback_loops": [
744 |     {"components": ["componentA", "componentB", "componentC"], "type": "circular_dependency", "impact": "high"}
745 |   ],
746 |   "bottlenecks": [
747 |     {"component": "componentX", "reason": "High coupling with 15 other components", "impact": "critical"}
748 |   ],
749 |   "technical_debt_hotspots": [
750 |     {"component": "legacy_module", "type": "obsolete_dependencies", "impact": "high", "remediation_cost": "medium"}
751 |   ]
752 | }
753 | ```
754 | 
755 | ## 5. Technical Debt Analysis
756 | For a detailed technical debt analysis, use the prompt in `'"$TECH_DEBT_PROMPT_FILE"'`. 
757 | This prompt will guide you through:
758 | 1. Categorizing technical debt types
759 | 2. Identifying root causes
760 | 3. Assessing impact on the system
761 | 4. Creating a prioritized remediation plan
762 | 5. Mapping debt across the system
763 | 
764 | ## 6. Dependency Structure Analysis
765 | For a detailed analysis of the dependency structure, use the prompt in `'"$DEPENDENCY_PROMPT_FILE"'`.
766 | This prompt will guide you through:
767 | 1. Identifying problematic dependency patterns
768 | 2. Evaluating system modularity
769 | 3. Recommending structural improvements
770 | 4. Creating a dependency health assessment
771 | EOL
772 | 
773 | echo "Directory structure saved to $STRUCTURE_FILE."
774 | echo "Code dependency graph data saved to $DEPENDENCY_MAP_FILE."
775 | echo "Documentation nodes data saved to $DOC_NODES_FILE."
776 | echo "User documentation mapping data saved to $USER_DOC_MAP_FILE."
777 | echo "System architecture analysis saved to $SYSTEM_ARCHITECTURE_FILE."
778 | echo "Technical debt data saved to $TECH_DEBT_DATA_FILE."
779 | echo "Technical debt analysis prompt saved to $TECH_DEBT_PROMPT_FILE."
780 | echo "Dependency analysis data saved to $DEPENDENCY_ANALYSIS_FILE."
781 | echo "Dependency analysis prompt saved to $DEPENDENCY_PROMPT_FILE."
782 | echo "README.md analysis saved to $README_CONTEXT_FILE."
783 | echo "Meta-prompt generator saved to $META_PROMPT_FILE."
784 | echo "Prompts directory created at $PROMPTS_DIR with README.md"
785 | echo "LLM prompts saved to $LLM_PROMPT_FILE."
786 | 
787 | # Update project_environment.txt with analysis results
788 | if [ -f "project_environment.txt" ]; then
789 |   echo -e "\n# Codebase Analysis Results" >> project_environment.txt
790 |   echo "FILE_COUNT=\"$FILE_COUNT\"" >> project_environment.txt
791 |   echo "SYSTEM_ARCHITECTURE_FILE=\"$SYSTEM_ARCHITECTURE_FILE\"" >> project_environment.txt
792 |   echo "TECHNICAL_DEBT_FILE=\"$TECHNICAL_DEBT_FILE\"" >> project_environment.txt
793 |   echo "DEPENDENCY_MAP_FILE=\"$DEPENDENCY_MAP_FILE\"" >> project_environment.txt
794 |   echo "README_CONTEXT_FILE=\"$README_CONTEXT_FILE\"" >> project_environment.txt
795 |   echo "PROMPTS_DIR=\"$PROMPTS_DIR\"" >> project_environment.txt
796 |   
797 |   # README.md context
798 |   if [ -n "$PROJECT_TITLE" ]; then
799 |     echo "PROJECT_TITLE=\"$PROJECT_TITLE\"" >> project_environment.txt
800 |   fi
801 |   if [ -n "$README_SUMMARY" ]; then
802 |     echo "PROJECT_DESCRIPTION=\"$README_SUMMARY\"" >> project_environment.txt
803 |   fi
804 |   
805 |   # Count number of TODO/FIXME comments as a technical debt indicator
806 |   TECH_DEBT_COUNT=$(grep -c "TODO\|FIXME\|HACK" "$TECHNICAL_DEBT_FILE")
807 |   echo "TECHNICAL_DEBT_INDICATORS=\"$TECH_DEBT_COUNT\"" >> project_environment.txt
808 |   
809 |   echo "Updated project_environment.txt with codebase analysis results."
810 | fi
811 | 
812 | echo "✅ Codebase analysis complete!"
813 | echo "📊 To use the analysis prompts with an LLM, see $PROMPTS_DIR/README.md"
814 | 
```