This is page 6 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .bumpversion.cfg ├── .codecov.yml ├── .compile-venv-py3.11 │ ├── bin │ │ ├── activate │ │ ├── activate.csh │ │ ├── activate.fish │ │ ├── Activate.ps1 │ │ ├── coverage │ │ ├── coverage-3.11 │ │ ├── coverage3 │ │ ├── pip │ │ ├── pip-compile │ │ ├── pip-sync │ │ ├── pip3 │ │ ├── pip3.11 │ │ ├── py.test │ │ ├── pyproject-build │ │ ├── pytest │ │ ├── python │ │ ├── python3 │ │ ├── python3.11 │ │ └── wheel │ └── pyvenv.cfg ├── .env.example ├── .github │ └── workflows │ ├── build-verification.yml │ ├── publish.yml │ └── tdd-verification.yml ├── .gitignore ├── async_fixture_wrapper.py ├── CHANGELOG.md ├── CLAUDE.md ├── codebase_structure.txt ├── component_test_runner.py ├── CONTRIBUTING.md ├── core_workflows.txt ├── debug_tests.md ├── Dockerfile ├── docs │ ├── adrs │ │ └── 001_use_docker_for_qdrant.md │ ├── api.md │ ├── components │ │ └── README.md │ ├── cookbook.md │ ├── development │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ └── README.md │ ├── documentation_map.md │ ├── documentation_summary.md │ ├── features │ │ ├── adr-management.md │ │ ├── code-analysis.md │ │ └── documentation.md │ ├── getting-started │ │ ├── configuration.md │ │ ├── docker-setup.md │ │ ├── installation.md │ │ ├── qdrant_setup.md │ │ └── quickstart.md │ ├── qdrant_setup.md │ ├── README.md │ ├── SSE_INTEGRATION.md │ ├── system_architecture │ │ └── README.md │ ├── templates │ │ └── adr.md │ ├── testing_guide.md │ ├── troubleshooting │ │ ├── common-issues.md │ │ └── faq.md │ ├── vector_store_best_practices.md │ └── workflows │ └── README.md ├── error_logs.txt ├── examples │ └── use_with_claude.py ├── github-actions-documentation.md ├── Makefile ├── module_summaries │ ├── backend_summary.txt │ ├── database_summary.txt │ └── frontend_summary.txt ├── output.txt ├── package-lock.json ├── package.json ├── PLAN.md ├── prepare_codebase.sh ├── PULL_REQUEST.md ├── pyproject.toml ├── pytest.ini ├── README.md ├── requirements-3.11.txt ├── requirements-3.11.txt.backup ├── requirements-dev.txt ├── requirements.in ├── requirements.txt ├── run_build_verification.sh ├── run_fixed_tests.sh ├── run_test_with_path_fix.sh ├── run_tests.py ├── scripts │ ├── check_qdrant_health.sh │ ├── compile_requirements.sh │ ├── load_example_patterns.py │ ├── macos_install.sh │ ├── README.md │ ├── setup_qdrant.sh │ ├── start_mcp_server.sh │ ├── store_code_relationships.py │ ├── store_report_in_mcp.py │ ├── validate_knowledge_base.py │ ├── validate_poc.py │ ├── validate_vector_store.py │ └── verify_build.py ├── server.py ├── setup_qdrant_collection.py ├── setup.py ├── src │ └── mcp_codebase_insight │ ├── __init__.py │ ├── __main__.py │ ├── asgi.py │ ├── core │ │ ├── __init__.py │ │ ├── adr.py │ │ ├── cache.py │ │ ├── component_status.py │ │ ├── config.py │ │ ├── debug.py │ │ ├── di.py │ │ ├── documentation.py │ │ ├── embeddings.py │ │ ├── errors.py │ │ ├── health.py │ │ ├── knowledge.py │ │ ├── metrics.py │ │ ├── prompts.py │ │ ├── sse.py │ │ ├── state.py │ │ ├── task_tracker.py │ │ ├── tasks.py │ │ └── vector_store.py │ ├── models.py │ ├── server_test_isolation.py │ ├── server.py │ ├── utils │ │ ├── __init__.py │ │ └── logger.py │ └── version.py ├── start-mcpserver.sh ├── summary_document.txt ├── system-architecture.md ├── system-card.yml ├── test_fix_helper.py ├── test_fixes.md ├── test_function.txt ├── test_imports.py ├── tests │ ├── components │ │ ├── conftest.py │ │ ├── test_core_components.py │ │ ├── test_embeddings.py │ │ ├── test_knowledge_base.py │ │ ├── test_sse_components.py │ │ ├── test_stdio_components.py │ │ ├── test_task_manager.py │ │ └── test_vector_store.py │ ├── config │ │ └── test_config_and_env.py │ ├── conftest.py │ ├── integration │ │ ├── fixed_test2.py │ │ ├── test_api_endpoints.py │ │ ├── test_api_endpoints.py-e │ │ ├── test_communication_integration.py │ │ └── test_server.py │ ├── README.md │ ├── README.test.md │ ├── test_build_verifier.py │ └── test_file_relationships.py └── trajectories └── tosinakinosho ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9 │ └── db62b9 │ └── config.yaml ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e │ └── 03565e │ ├── 03565e.traj │ └── config.yaml └── default__openrouter └── anthropic └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e └── 03565e ├── 03565e.pred ├── 03565e.traj └── config.yaml ``` # Files -------------------------------------------------------------------------------- /tests/integration/test_server.py: -------------------------------------------------------------------------------- ```python 1 | """Test server API endpoints.""" 2 | 3 | import sys 4 | import os 5 | 6 | # Ensure the src directory is in the Python path 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) 8 | 9 | import pytest 10 | import pytest_asyncio 11 | from httpx import AsyncClient 12 | import uuid 13 | import logging 14 | import time 15 | from pathlib import Path 16 | from datetime import datetime, timezone 17 | from typing import Dict, List, Any, Optional 18 | 19 | from src.mcp_codebase_insight.core.config import ServerConfig 20 | from src.mcp_codebase_insight.core.vector_store import VectorStore 21 | from src.mcp_codebase_insight.core.knowledge import Pattern 22 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding 23 | from src.mcp_codebase_insight.server import CodebaseAnalysisServer 24 | from src.mcp_codebase_insight.server_test_isolation import get_isolated_server_state 25 | 26 | # Setup logger 27 | logger = logging.getLogger(__name__) 28 | 29 | # Environment variables or defaults for vector store testing 30 | QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") 31 | TEST_COLLECTION_NAME = os.environ.get("TEST_COLLECTION_NAME", "test_vector_search") 32 | EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2") 33 | 34 | # Path to test repository 35 | TEST_REPO_PATH = Path("tests/fixtures/test_repo") 36 | 37 | @pytest_asyncio.fixture 38 | async def setup_test_vector_store(test_server_client): 39 | """Set up a test vector store with sample patterns for the server tests. 40 | 41 | This fixture initializes the vector store component in the server with test patterns, 42 | allowing the vector store search endpoint to be tested properly. 43 | """ 44 | # Get server state from the test client 45 | logger.info("Attempting to get server health status") 46 | request = await test_server_client.get("/health") 47 | if request.status_code != 200: 48 | logger.warning(f"Server health check failed with status code {request.status_code}") 49 | yield None 50 | return 51 | 52 | # Get the server state through test isolation utilities 53 | logger.info("Getting isolated server state") 54 | server_state = get_isolated_server_state() 55 | if not server_state: 56 | logger.warning("Could not get isolated server state, server_state is None") 57 | yield None 58 | return 59 | 60 | logger.info(f"Got server state, instance ID: {server_state.instance_id}") 61 | logger.info(f"Server state components: {server_state.list_components()}") 62 | 63 | # Create and initialize a test vector store 64 | try: 65 | # Create the embedder first 66 | logger.info(f"Creating embedding model with model name: {EMBEDDING_MODEL}") 67 | embedder = SentenceTransformerEmbedding(model_name=EMBEDDING_MODEL) 68 | await embedder.initialize() 69 | 70 | # Now create the vector store with the embedder 71 | logger.info(f"Creating vector store with URL: {QDRANT_URL}, collection: {TEST_COLLECTION_NAME}") 72 | vector_store = VectorStore( 73 | url=QDRANT_URL, 74 | embedder=embedder, 75 | collection_name=TEST_COLLECTION_NAME 76 | ) 77 | 78 | # Delete any existing collection with this name 79 | try: 80 | logger.info("Cleaning up vector store before use") 81 | await vector_store.cleanup() 82 | logger.info("Vector store cleaned up") 83 | except Exception as e: 84 | logger.warning(f"Error during vector store cleanup: {str(e)}") 85 | 86 | # Initialize the vector store 87 | logger.info("Initializing vector store") 88 | await vector_store.initialize() 89 | logger.info(f"Initialized vector store with collection: {TEST_COLLECTION_NAME}") 90 | 91 | # Add test patterns 92 | logger.info("Adding test patterns to vector store") 93 | await add_test_patterns(vector_store, embedder) 94 | 95 | # Register the vector store in the server state 96 | logger.info("Registering vector store component in server state") 97 | server_state.register_component("vector_store", vector_store) 98 | logger.info("Registered vector store component in server state") 99 | 100 | yield vector_store 101 | 102 | # Cleanup 103 | try: 104 | logger.info("Closing vector store") 105 | await vector_store.close() 106 | logger.info("Vector store closed") 107 | except Exception as e: 108 | logger.warning(f"Error during vector store closure: {str(e)}") 109 | 110 | except Exception as e: 111 | logger.error(f"Error setting up test vector store: {str(e)}", exc_info=True) 112 | yield None 113 | 114 | async def add_test_patterns(store: VectorStore, embedder: SentenceTransformerEmbedding): 115 | """Add test patterns to the vector store for testing.""" 116 | patterns = [] 117 | 118 | # Add sample patterns for testing 119 | patterns.append(Pattern( 120 | id=str(uuid.uuid4()), 121 | text="""class SearchResult: 122 | \"\"\"Represents a search result from the vector store.\"\"\" 123 | def __init__(self, id: str, score: float, metadata: Optional[Dict] = None): 124 | self.id = id 125 | self.score = score 126 | self.metadata = metadata or {} 127 | 128 | def to_dict(self): 129 | \"\"\"Convert to dictionary.\"\"\" 130 | return { 131 | "id": self.id, 132 | "score": self.score, 133 | "metadata": self.metadata 134 | }""", 135 | title="SearchResult Class", 136 | description="A class for vector store search results", 137 | pattern_type="code", 138 | tags=["python", "class", "search", "vector-store"], 139 | metadata={ 140 | "language": "python", 141 | "file_path": "src/core/models.py", 142 | "line_range": "10-25", 143 | "timestamp": datetime.now(timezone.utc).isoformat(), 144 | "type": "code" 145 | } 146 | )) 147 | 148 | patterns.append(Pattern( 149 | id=str(uuid.uuid4()), 150 | text="""async def search( 151 | self, 152 | query: str, 153 | limit: int = 5, 154 | threshold: float = 0.7, 155 | file_type: Optional[str] = None, 156 | path_pattern: Optional[str] = None 157 | ) -> List[Dict]: 158 | \"\"\"Search for patterns matching the query.\"\"\" 159 | # Generate embedding for the query 160 | embedding = await self.embedding_model.embed(query) 161 | 162 | # Prepare filter conditions 163 | filter_conditions = {} 164 | if file_type: 165 | filter_conditions["language"] = file_type 166 | if path_pattern: 167 | filter_conditions["file_path"] = {"$like": path_pattern} 168 | 169 | # Perform the search 170 | results = await self.vector_store.search( 171 | embedding=embedding, 172 | limit=limit, 173 | filter_conditions=filter_conditions 174 | ) 175 | 176 | # Filter by threshold 177 | filtered_results = [r for r in results if r.score >= threshold] 178 | 179 | return filtered_results""", 180 | title="Vector Store Search Method", 181 | description="Async method to search the vector store with filters", 182 | pattern_type="code", 183 | tags=["python", "async", "function", "search"], 184 | metadata={ 185 | "language": "python", 186 | "file_path": "src/core/search.py", 187 | "line_range": "50-75", 188 | "timestamp": datetime.now(timezone.utc).isoformat(), 189 | "type": "code" 190 | } 191 | )) 192 | 193 | patterns.append(Pattern( 194 | id=str(uuid.uuid4()), 195 | text="""# Vector Store Configuration 196 | 197 | ## Search Parameters 198 | 199 | - **query**: The text to search for similar patterns 200 | - **threshold**: Similarity score threshold (0.0 to 1.0) 201 | - **limit**: Maximum number of results to return 202 | - **file_type**: Filter by programming language/file type 203 | - **path_pattern**: Filter by file path pattern 204 | 205 | ## Recommended Threshold Values 206 | 207 | - **0.9-1.0**: Very high precision, almost exact matches 208 | - **0.8-0.9**: High precision, strongly similar 209 | - **0.7-0.8**: Good balance (default) 210 | - **0.6-0.7**: Higher recall, more results 211 | - **0.5-0.6**: Very high recall, may include less relevant matches""", 212 | title="Vector Store Documentation", 213 | description="Documentation on vector store search parameters", 214 | pattern_type="documentation", 215 | tags=["documentation", "markdown", "search", "parameters"], 216 | metadata={ 217 | "language": "markdown", 218 | "file_path": "docs/vector_store.md", 219 | "line_range": "50-70", 220 | "timestamp": datetime.now(timezone.utc).isoformat(), 221 | "type": "documentation" 222 | } 223 | )) 224 | 225 | # Store patterns with embeddings 226 | for pattern in patterns: 227 | # Generate embedding for the pattern text 228 | embedding = await embedder.embed(pattern.text) 229 | 230 | # Store the pattern 231 | await store.store_pattern( 232 | id=pattern.id, 233 | text=pattern.text, 234 | title=pattern.title, 235 | description=pattern.description, 236 | pattern_type=pattern.pattern_type, 237 | tags=pattern.tags, 238 | metadata=pattern.metadata, 239 | embedding=embedding 240 | ) 241 | logger.info(f"Added pattern: {pattern.title}") 242 | 243 | logger.info(f"Added {len(patterns)} patterns to the test vector store") 244 | return patterns 245 | 246 | # Use the test_client fixture from conftest.py 247 | @pytest_asyncio.fixture(scope="function") 248 | async def test_server_client(httpx_test_client): 249 | """Get a test client for server API testing. 250 | 251 | This uses the httpx_test_client from conftest.py to ensure 252 | proper event loop and resource management. 253 | """ 254 | yield httpx_test_client 255 | 256 | @pytest.fixture 257 | def test_code(): 258 | """Return a sample code snippet for testing.""" 259 | return """ 260 | def example_function(x: int) -> int: 261 | return x * 2 262 | """ 263 | 264 | @pytest.fixture 265 | def test_issue(): 266 | """Return a sample issue description for testing.""" 267 | return "Error in function: example_function returns incorrect results for negative values" 268 | 269 | @pytest.fixture 270 | def test_adr(): 271 | """Return a sample ADR structure for testing.""" 272 | return { 273 | "title": "Test ADR", 274 | "status": "Proposed", 275 | "context": "This is a test ADR for automated testing purposes.", 276 | "decision": "We've decided to use this test ADR format.", 277 | "consequences": { 278 | "positive": ["Test positive consequence"], 279 | "negative": ["Test negative consequence"] 280 | }, 281 | "options": [ 282 | { 283 | "title": "Test option", 284 | "description": "Test description", 285 | "pros": ["Test pro"], 286 | "cons": ["Test con"] 287 | } 288 | ] 289 | } 290 | 291 | @pytest.mark.asyncio 292 | async def test_health_check(test_server_client: AsyncClient): 293 | """Test health check endpoint.""" 294 | response = await test_server_client.get("/health") 295 | assert response.status_code == 200 296 | data = response.json() 297 | assert "status" in data 298 | 299 | @pytest.mark.asyncio 300 | async def test_metrics(test_server_client: AsyncClient): 301 | """Test metrics endpoint.""" 302 | response = await test_server_client.get("/metrics") 303 | # Some test servers may not have metrics enabled 304 | if response.status_code == 200: 305 | data = response.json() 306 | assert "metrics" in data 307 | else: 308 | logger.info(f"Metrics endpoint not available (status: {response.status_code})") 309 | assert response.status_code in [404, 503] # Not found or service unavailable 310 | 311 | @pytest.mark.asyncio 312 | async def test_analyze_code(test_server_client: AsyncClient, test_code: str): 313 | """Test code analysis endpoint.""" 314 | response = await test_server_client.post( 315 | "/tools/analyze-code", 316 | json={ 317 | "name": "analyze-code", 318 | "arguments": { 319 | "code": test_code, 320 | "context": {} 321 | } 322 | } 323 | ) 324 | # Component might not be available in test server 325 | if response.status_code == 200: 326 | data = response.json() 327 | assert "content" in data 328 | else: 329 | logger.info(f"Code analysis endpoint not available (status: {response.status_code})") 330 | assert response.status_code in [404, 503] # Not found or service unavailable 331 | 332 | @pytest.mark.asyncio 333 | async def test_create_adr(test_server_client: AsyncClient, test_adr: dict): 334 | """Test ADR creation endpoint.""" 335 | response = await test_server_client.post( 336 | "/tools/create-adr", 337 | json={ 338 | "name": "create-adr", 339 | "arguments": test_adr 340 | } 341 | ) 342 | # Component might not be available in test server 343 | if response.status_code == 200: 344 | data = response.json() 345 | assert "content" in data 346 | else: 347 | logger.info(f"ADR creation endpoint not available (status: {response.status_code})") 348 | assert response.status_code in [404, 503] # Not found or service unavailable 349 | 350 | @pytest.mark.asyncio 351 | async def test_debug_issue(test_server_client: AsyncClient, test_issue: str): 352 | """Test issue debugging endpoint.""" 353 | response = await test_server_client.post( 354 | "/tools/debug-issue", 355 | json={ 356 | "name": "debug-issue", 357 | "arguments": { 358 | "issue": test_issue, 359 | "context": {} 360 | } 361 | } 362 | ) 363 | # Component might not be available in test server 364 | if response.status_code == 200: 365 | data = response.json() 366 | assert "content" in data 367 | else: 368 | logger.info(f"Debug issue endpoint not available (status: {response.status_code})") 369 | assert response.status_code in [404, 503] # Not found or service unavailable 370 | 371 | @pytest.mark.asyncio 372 | async def test_search_knowledge(test_server_client: AsyncClient): 373 | """Test knowledge search endpoint.""" 374 | response = await test_server_client.post( 375 | "/tools/search-knowledge", 376 | json={ 377 | "name": "search-knowledge", 378 | "arguments": { 379 | "query": "test query", 380 | "limit": 5 381 | } 382 | } 383 | ) 384 | # Component might not be available in test server 385 | if response.status_code == 200: 386 | data = response.json() 387 | assert "content" in data 388 | else: 389 | logger.info(f"Knowledge search endpoint not available (status: {response.status_code})") 390 | assert response.status_code in [404, 503] # Not found or service unavailable 391 | 392 | @pytest.mark.asyncio 393 | async def test_get_task(test_server_client: AsyncClient): 394 | """Test task endpoint.""" 395 | # Create a test task ID 396 | test_id = f"test_task_{uuid.uuid4().hex}" 397 | 398 | response = await test_server_client.post( 399 | "/task", 400 | json={ 401 | "task_id": test_id, 402 | "status": "pending", 403 | "result": None 404 | } 405 | ) 406 | assert response.status_code in [200, 404, 503] # Allow various responses depending on component availability 407 | 408 | @pytest.mark.asyncio 409 | async def test_invalid_request(test_server_client: AsyncClient): 410 | """Test invalid request handling.""" 411 | response = await test_server_client.post( 412 | "/tools/invalid-tool", 413 | json={ 414 | "name": "invalid-tool", 415 | "arguments": {} 416 | } 417 | ) 418 | assert response.status_code in [404, 400] # Either not found or bad request 419 | 420 | @pytest.mark.asyncio 421 | async def test_not_found(test_server_client: AsyncClient): 422 | """Test 404 handling.""" 423 | response = await test_server_client.get("/nonexistent-endpoint") 424 | assert response.status_code == 404 425 | 426 | @pytest.mark.asyncio 427 | async def test_server_lifecycle(): 428 | """Test server lifecycle.""" 429 | # This is a safety check to ensure we're not breaking anything 430 | # The actual server lifecycle is tested by the conftest fixtures 431 | assert True # Replace with real checks if needed 432 | 433 | @pytest.mark.asyncio 434 | async def test_vector_store_search_threshold_validation(test_server_client: AsyncClient, setup_test_vector_store): 435 | """Test that the vector store search endpoint validates threshold values.""" 436 | # Skip if vector store setup failed 437 | if setup_test_vector_store is None: 438 | pytest.skip("Vector store setup failed, skipping test") 439 | 440 | # Test invalid threshold greater than 1.0 441 | response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.5") 442 | assert response.status_code == 422 443 | assert "threshold" in response.text 444 | assert "less than or equal to" in response.text 445 | 446 | # Test invalid threshold less than 0.0 447 | response = await test_server_client.get("/api/vector-store/search?query=test&threshold=-0.5") 448 | assert response.status_code == 422 449 | assert "threshold" in response.text 450 | assert "greater than or equal to" in response.text 451 | 452 | # Test boundary value 0.0 (should be valid) 453 | response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.0") 454 | assert response.status_code == 200 455 | data = response.json() 456 | assert "results" in data 457 | assert data["threshold"] == 0.0 458 | 459 | # Test boundary value 1.0 (should be valid) 460 | response = await test_server_client.get("/api/vector-store/search?query=test&threshold=1.0") 461 | assert response.status_code == 200 462 | data = response.json() 463 | assert "results" in data 464 | assert data["threshold"] == 1.0 465 | 466 | # Test with valid filter parameters 467 | response = await test_server_client.get("/api/vector-store/search?query=test&threshold=0.7&file_type=python&path_pattern=src/*") 468 | assert response.status_code == 200 469 | data = response.json() 470 | assert "results" in data 471 | assert "query" in data 472 | assert "total_results" in data 473 | assert "limit" in data 474 | assert "threshold" in data 475 | assert data["threshold"] == 0.7 476 | 477 | # If we have results, check their format 478 | if data["results"]: 479 | result = data["results"][0] 480 | assert "id" in result 481 | assert "score" in result 482 | assert "text" in result 483 | assert "file_path" in result 484 | assert "line_range" in result 485 | assert "type" in result 486 | assert "language" in result 487 | assert "timestamp" in result 488 | 489 | @pytest.mark.asyncio 490 | async def test_vector_store_search_functionality(test_server_client: AsyncClient, setup_test_vector_store): 491 | """Test comprehensive vector store search functionality. 492 | 493 | This test validates the full functionality of the vector store search endpoint, 494 | including result format, filtering, and metadata handling. 495 | 496 | The test checks: 497 | 1. Basic search returns properly formatted results 498 | 2. File type filtering works correctly 499 | 3. Path pattern filtering works correctly 500 | 4. Limit parameter controls result count 501 | 5. Results contain all required metadata fields 502 | """ 503 | # Skip if vector store setup failed 504 | if setup_test_vector_store is None: 505 | pytest.skip("Vector store setup failed, skipping test") 506 | 507 | # Test basic search functionality 508 | response = await test_server_client.get( 509 | "/api/vector-store/search", 510 | params={ 511 | "query": "test query", 512 | "threshold": 0.7, 513 | "limit": 5 514 | } 515 | ) 516 | 517 | # We should have a successful response now that the vector store is initialized 518 | assert response.status_code == 200 519 | data = response.json() 520 | 521 | # Validate response structure 522 | assert "query" in data 523 | assert data["query"] == "test query" 524 | assert "results" in data 525 | assert "threshold" in data 526 | assert data["threshold"] == 0.7 527 | assert "total_results" in data 528 | assert "limit" in data 529 | assert data["limit"] == 5 530 | 531 | # Test with file type filter 532 | response = await test_server_client.get( 533 | "/api/vector-store/search", 534 | params={ 535 | "query": "test query", 536 | "threshold": 0.7, 537 | "limit": 5, 538 | "file_type": "python" 539 | } 540 | ) 541 | assert response.status_code == 200 542 | data = response.json() 543 | assert "file_type" in data 544 | assert data["file_type"] == "python" 545 | 546 | # Test with path pattern filter 547 | response = await test_server_client.get( 548 | "/api/vector-store/search", 549 | params={ 550 | "query": "test query", 551 | "threshold": 0.7, 552 | "limit": 5, 553 | "path_pattern": "src/**/*.py" 554 | } 555 | ) 556 | assert response.status_code == 200 557 | data = response.json() 558 | assert "path_pattern" in data 559 | assert data["path_pattern"] == "src/**/*.py" 560 | 561 | # Test with limit=1 562 | response = await test_server_client.get( 563 | "/api/vector-store/search", 564 | params={ 565 | "query": "test query", 566 | "threshold": 0.7, 567 | "limit": 1 568 | } 569 | ) 570 | assert response.status_code == 200 571 | data = response.json() 572 | assert data["limit"] == 1 573 | 574 | # If we have results, verify the result format 575 | if data["results"]: 576 | result = data["results"][0] 577 | # Check all required fields are present 578 | assert "id" in result 579 | assert "score" in result 580 | assert "text" in result 581 | assert "file_path" in result 582 | assert "line_range" in result 583 | assert "type" in result 584 | assert "language" in result 585 | assert "timestamp" in result 586 | 587 | # Validate data types 588 | assert isinstance(result["id"], str) 589 | assert isinstance(result["score"], (int, float)) 590 | assert isinstance(result["text"], str) 591 | assert isinstance(result["file_path"], str) 592 | assert isinstance(result["line_range"], str) 593 | assert isinstance(result["type"], str) 594 | assert isinstance(result["language"], str) 595 | assert isinstance(result["timestamp"], str) 596 | 597 | @pytest.mark.asyncio 598 | async def test_vector_store_search_error_handling(test_server_client: AsyncClient, setup_test_vector_store): 599 | """Test error handling for vector store search endpoint. 600 | 601 | This test validates the error handling capabilities of the vector store search endpoint 602 | when provided with invalid or missing required parameters. 603 | 604 | The test checks: 605 | 1. Missing query parameter returns appropriate error 606 | 2. Invalid limit parameter (negative/zero) returns appropriate error 607 | """ 608 | # Skip if vector store setup failed 609 | if setup_test_vector_store is None: 610 | pytest.skip("Vector store setup failed, skipping test") 611 | 612 | # Test missing query parameter 613 | response = await test_server_client.get( 614 | "/api/vector-store/search", 615 | params={ 616 | "threshold": 0.7, 617 | "limit": 5 618 | } 619 | ) 620 | 621 | # Missing required query parameter should return 422 622 | assert response.status_code == 422 623 | data = response.json() 624 | assert "detail" in data 625 | assert any("query" in error["loc"] for error in data["detail"]) 626 | 627 | # Test invalid limit parameter (negative) 628 | response = await test_server_client.get( 629 | "/api/vector-store/search", 630 | params={ 631 | "query": "test query", 632 | "threshold": 0.7, 633 | "limit": -5 634 | } 635 | ) 636 | assert response.status_code == 422 637 | data = response.json() 638 | assert "detail" in data 639 | assert any("limit" in error["loc"] for error in data["detail"]) 640 | 641 | # Test invalid limit parameter (zero) 642 | response = await test_server_client.get( 643 | "/api/vector-store/search", 644 | params={ 645 | "query": "test query", 646 | "threshold": 0.7, 647 | "limit": 0 648 | } 649 | ) 650 | assert response.status_code == 422 651 | data = response.json() 652 | assert "detail" in data 653 | assert any("limit" in error["loc"] for error in data["detail"]) 654 | 655 | @pytest.mark.asyncio 656 | async def test_vector_store_search_performance(test_server_client: AsyncClient, setup_test_vector_store): 657 | """Test performance of vector store search endpoint. 658 | 659 | This test measures the response time of the vector store search endpoint 660 | to ensure it meets performance requirements. 661 | 662 | The test checks: 663 | 1. Search response time is within acceptable limits (< 1000ms) 664 | 2. Multiple consecutive searches maintain performance 665 | """ 666 | # Skip if vector store setup failed 667 | if setup_test_vector_store is None: 668 | pytest.skip("Vector store setup failed, skipping test") 669 | 670 | # Define performance thresholds 671 | max_response_time_ms = 1000 # 1 second maximum response time 672 | 673 | # Perform timed search tests 674 | for i in range(3): # Test 3 consecutive searches 675 | start_time = time.time() 676 | 677 | response = await test_server_client.get( 678 | "/api/vector-store/search", 679 | params={ 680 | "query": f"test performance query {i}", 681 | "threshold": 0.7, 682 | "limit": 5 683 | } 684 | ) 685 | 686 | end_time = time.time() 687 | response_time_ms = (end_time - start_time) * 1000 688 | 689 | assert response.status_code == 200 690 | logger.info(f"Search {i+1} response time: {response_time_ms:.2f}ms") 691 | 692 | # Assert performance is within acceptable limits 693 | assert response_time_ms < max_response_time_ms, \ 694 | f"Search response time ({response_time_ms:.2f}ms) exceeds threshold ({max_response_time_ms}ms)" 695 | 696 | # Verify we got a valid response 697 | data = response.json() 698 | assert "results" in data 699 | assert "query" in data 700 | 701 | @pytest.mark.asyncio 702 | async def test_vector_store_search_threshold_validation_mock(test_server_client: AsyncClient): 703 | """Test that the vector store search endpoint validates threshold values using mock approach. 704 | 705 | This test isolates FastAPI's parameter validation from the actual server initialization. 706 | It doesn't test the vector store implementation but only the parameter validation logic. 707 | """ 708 | # First, check if server is responding at all by checking health endpoint 709 | health_response = await test_server_client.get("/health") 710 | 711 | # If we can't even reach the server, skip the test 712 | if health_response.status_code >= 500: 713 | pytest.skip(f"Server is not responding (status: {health_response.status_code})") 714 | 715 | # Create a list of test cases: (threshold, expected_validation_error) 716 | # None for expected_validation_error means we expect validation to pass 717 | test_cases = [ 718 | # Invalid thresholds (should fail validation) 719 | (1.5, "less than or equal to 1.0"), 720 | (-0.5, "greater than or equal to 0.0"), 721 | # Valid thresholds (should pass validation) 722 | (0.0, None), 723 | (1.0, None), 724 | (0.7, None), 725 | ] 726 | 727 | # Try each test case 728 | for threshold, expected_validation_error in test_cases: 729 | # Skip testing health check which will never have parameter validation errors 730 | # Here we're just testing the static validation in the FastAPI route definition 731 | # This will trigger validation errors regardless of server state 732 | response = await test_server_client.get(f"/api/vector-store/search?query=test&threshold={threshold}") 733 | 734 | # Check response based on expected validation 735 | if expected_validation_error: 736 | # If validation error is expected, check for 422 status 737 | # Note: If we got 503, parameter validation didn't even happen 738 | # In some test environments this is normal, so we'll skip the assertion 739 | if response.status_code == 503: 740 | logger.info(f"Server returned 503 for threshold={threshold}, " 741 | f"parameter validation couldn't be tested due to server state") 742 | continue 743 | 744 | # If we get here, we should have a 422 validation error 745 | assert response.status_code == 422, \ 746 | f"Expected 422 for invalid threshold {threshold}, got {response.status_code}: {response.text}" 747 | 748 | # Check if validation error message contains expected text 749 | assert expected_validation_error in response.text, \ 750 | f"Expected validation error to contain '{expected_validation_error}', got: {response.text}" 751 | 752 | logger.info(f"Threshold {threshold} correctly failed validation with message containing '{expected_validation_error}'") 753 | else: 754 | # For valid thresholds, skip assertion if server returned 503 755 | if response.status_code == 503: 756 | logger.info(f"Server returned 503 for valid threshold={threshold}, " 757 | f"but parameter validation passed (otherwise would be 422)") 758 | continue 759 | 760 | # If we get a non-503 response for a valid threshold, it should be 200 761 | # (or 404 if the endpoint doesn't exist in test server) 762 | assert response.status_code in [200, 404], \ 763 | f"Expected 200 for valid threshold {threshold}, got {response.status_code}: {response.text}" 764 | 765 | logger.info(f"Threshold {threshold} correctly passed validation") 766 | 767 | logger.info("Completed threshold parameter validation tests") 768 | ``` -------------------------------------------------------------------------------- /output.txt: -------------------------------------------------------------------------------- ``` 1 | ============================= test session starts ============================== 2 | platform darwin -- Python 3.13.2, pytest-8.3.5, pluggy-1.5.0 -- /Users/tosinakinosho/workspaces/mcp-codebase-insight/.venv/bin/python3.13 3 | cachedir: .pytest_cache 4 | rootdir: /Users/tosinakinosho/workspaces/mcp-codebase-insight 5 | configfile: pytest.ini 6 | plugins: cov-6.0.0, anyio-4.9.0, asyncio-0.26.0 7 | asyncio: mode=Mode.STRICT, asyncio_default_fixture_loop_scope=session, asyncio_default_test_loop_scope=function 8 | collecting ... collected 106 items 9 | 10 | tests/components/test_core_components.py::test_adr_manager PASSED [ 0%] 11 | tests/components/test_core_components.py::test_knowledge_base PASSED [ 1%] 12 | tests/components/test_core_components.py::test_task_manager PASSED [ 2%] 13 | tests/components/test_core_components.py::test_metrics_manager PASSED [ 3%] 14 | tests/components/test_core_components.py::test_health_manager PASSED [ 4%] 15 | tests/components/test_core_components.py::test_cache_manager PASSED [ 5%] 16 | tests/components/test_core_components.py::test_documentation_manager PASSED [ 6%] 17 | tests/components/test_core_components.py::test_debug_system PASSED [ 7%] 18 | tests/components/test_embeddings.py::test_embedder_initialization PASSED [ 8%] 19 | tests/components/test_embeddings.py::test_embedder_embedding PASSED [ 9%] 20 | tests/components/test_knowledge_base.py::test_knowledge_base_initialization PASSED [ 10%] 21 | tests/components/test_knowledge_base.py::test_add_and_get_pattern PASSED [ 11%] 22 | tests/components/test_knowledge_base.py::test_find_similar_patterns PASSED [ 12%] 23 | tests/components/test_knowledge_base.py::test_update_pattern PASSED [ 13%] 24 | tests/components/test_sse_components.py::test_mcp_server_initialization PASSED [ 14%] 25 | tests/components/test_sse_components.py::test_register_tools PASSED [ 15%] 26 | tests/components/test_sse_components.py::test_get_starlette_app FAILED [ 16%] 27 | tests/components/test_sse_components.py::test_create_sse_server FAILED [ 16%] 28 | tests/components/test_sse_components.py::test_vector_search_tool PASSED [ 17%] 29 | tests/components/test_sse_components.py::test_knowledge_search_tool PASSED [ 18%] 30 | tests/components/test_sse_components.py::test_adr_list_tool FAILED [ 19%] 31 | tests/components/test_sse_components.py::test_task_status_tool FAILED [ 20%] 32 | tests/components/test_sse_components.py::test_sse_handle_connect FAILED [ 21%] 33 | 34 | =================================== FAILURES =================================== 35 | ____________________________ test_get_starlette_app ____________________________ 36 | 37 | mock_create_sse = <MagicMock name='create_sse_server' id='5349118976'> 38 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ed274d0> 39 | 40 | @patch('mcp_codebase_insight.core.sse.create_sse_server') 41 | async def test_get_starlette_app(mock_create_sse, mcp_server): 42 | """Test getting the Starlette app for the MCP server.""" 43 | # Set up the mock 44 | mock_app = MagicMock() 45 | mock_create_sse.return_value = mock_app 46 | 47 | # Reset the cached app to force a new creation 48 | mcp_server._starlette_app = None 49 | 50 | # Get the Starlette app 51 | app = mcp_server.get_starlette_app() 52 | 53 | # Verify tools were registered 54 | assert mcp_server.tools_registered is True 55 | 56 | # Verify create_sse_server was called with the MCP server 57 | > mock_create_sse.assert_called_once_with(mcp_server.mcp_server) 58 | 59 | tests/components/test_sse_components.py:178: 60 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 61 | 62 | self = <MagicMock name='create_sse_server' id='5349118976'> 63 | args = (<mcp.server.fastmcp.server.FastMCP object at 0x13ed24410>,), kwargs = {} 64 | msg = "Expected 'create_sse_server' to be called once. Called 0 times." 65 | 66 | def assert_called_once_with(self, /, *args, **kwargs): 67 | """assert that the mock was called exactly once and that that call was 68 | with the specified arguments.""" 69 | if not self.call_count == 1: 70 | msg = ("Expected '%s' to be called once. Called %s times.%s" 71 | % (self._mock_name or 'mock', 72 | self.call_count, 73 | self._calls_repr())) 74 | > raise AssertionError(msg) 75 | E AssertionError: Expected 'create_sse_server' to be called once. Called 0 times. 76 | 77 | /opt/homebrew/Cellar/[email protected]/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/unittest/mock.py:988: AssertionError 78 | ---------------------------- Captured stdout setup ----------------------------- 79 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.688819Z"} 80 | ------------------------------ Captured log setup ------------------------------ 81 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.688819Z"} 82 | ----------------------------- Captured stdout call ----------------------------- 83 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.693189Z"} 84 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693272Z"} 85 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693321Z"} 86 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697672Z"} 87 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697772Z"} 88 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698263Z"} 89 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698395Z"} 90 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698465Z"} 91 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698514Z"} 92 | ------------------------------ Captured log call ------------------------------- 93 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.693189Z"} 94 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693272Z"} 95 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.693321Z"} 96 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697672Z"} 97 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.697772Z"} 98 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698263Z"} 99 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698395Z"} 100 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698465Z"} 101 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.698514Z"} 102 | ____________________________ test_create_sse_server ____________________________ 103 | 104 | mock_starlette = <MagicMock name='Starlette' id='5349123680'> 105 | mock_transport = <MagicMock name='CodebaseInsightSseTransport' id='5349125024'> 106 | 107 | @patch('mcp_codebase_insight.core.sse.CodebaseInsightSseTransport') 108 | @patch('mcp_codebase_insight.core.sse.Starlette') 109 | async def test_create_sse_server(mock_starlette, mock_transport): 110 | """Test creating the SSE server.""" 111 | # Set up mocks 112 | mock_mcp = MagicMock(spec=FastMCP) 113 | mock_transport_instance = MagicMock() 114 | mock_transport.return_value = mock_transport_instance 115 | mock_app = MagicMock() 116 | mock_starlette.return_value = mock_app 117 | 118 | # Create the SSE server 119 | app = create_sse_server(mock_mcp) 120 | 121 | # Verify CodebaseInsightSseTransport was initialized correctly 122 | > mock_transport.assert_called_once_with("/sse") 123 | 124 | tests/components/test_sse_components.py:199: 125 | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 126 | 127 | self = <MagicMock name='CodebaseInsightSseTransport' id='5349125024'> 128 | args = ('/sse',), kwargs = {} 129 | msg = "Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times." 130 | 131 | def assert_called_once_with(self, /, *args, **kwargs): 132 | """assert that the mock was called exactly once and that that call was 133 | with the specified arguments.""" 134 | if not self.call_count == 1: 135 | msg = ("Expected '%s' to be called once. Called %s times.%s" 136 | % (self._mock_name or 'mock', 137 | self.call_count, 138 | self._calls_repr())) 139 | > raise AssertionError(msg) 140 | E AssertionError: Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times. 141 | 142 | /opt/homebrew/Cellar/[email protected]/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/unittest/mock.py:988: AssertionError 143 | ----------------------------- Captured stdout call ----------------------------- 144 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754343Z"} 145 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754481Z"} 146 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754566Z"} 147 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754606Z"} 148 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754640Z"} 149 | ------------------------------ Captured log call ------------------------------- 150 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754343Z"} 151 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754481Z"} 152 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754566Z"} 153 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754606Z"} 154 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.754640Z"} 155 | ______________________________ test_adr_list_tool ______________________________ 156 | 157 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ed7ef90> 158 | 159 | async def test_adr_list_tool(mcp_server): 160 | """Test the ADR list tool.""" 161 | # Make sure tools are registered 162 | if not mcp_server.tools_registered: 163 | mcp_server.register_tools() 164 | 165 | # Mock the FastMCP add_tool method to capture calls 166 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool: 167 | # Re-register the ADR list tool 168 | mcp_server._register_adr() 169 | 170 | # Verify tool was registered with correct parameters 171 | mock_add_tool.assert_called_once() 172 | args = mock_add_tool.call_args[0] 173 | > assert args[0] == "list-adrs" # Tool name 174 | E IndexError: tuple index out of range 175 | 176 | tests/components/test_sse_components.py:319: IndexError 177 | ---------------------------- Captured stdout setup ----------------------------- 178 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.796820Z"} 179 | ------------------------------ Captured log setup ------------------------------ 180 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.796820Z"} 181 | ----------------------------- Captured stdout call ----------------------------- 182 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.797106Z"} 183 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797158Z"} 184 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797197Z"} 185 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.799588Z"} 186 | ------------------------------ Captured log call ------------------------------- 187 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.797106Z"} 188 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797158Z"} 189 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.797197Z"} 190 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.799588Z"} 191 | ____________________________ test_task_status_tool _____________________________ 192 | 193 | mcp_server = <src.mcp_codebase_insight.core.sse.MCP_CodebaseInsightServer object at 0x13ef72030> 194 | 195 | async def test_task_status_tool(mcp_server): 196 | """Test the task status tool.""" 197 | # Make sure tools are registered 198 | if not mcp_server.tools_registered: 199 | mcp_server.register_tools() 200 | 201 | # Mock the FastMCP add_tool method to capture calls 202 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool: 203 | # Re-register the task status tool 204 | mcp_server._register_task() 205 | 206 | # Verify tool was registered with correct parameters 207 | mock_add_tool.assert_called_once() 208 | args = mock_add_tool.call_args[0] 209 | > assert args[0] == "get-task-status" # Tool name 210 | E IndexError: tuple index out of range 211 | 212 | tests/components/test_sse_components.py:338: IndexError 213 | ---------------------------- Captured stdout setup ----------------------------- 214 | {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.806759Z"} 215 | ------------------------------ Captured log setup ------------------------------ 216 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP Codebase Insight server initialized", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.806759Z"} 217 | ----------------------------- Captured stdout call ----------------------------- 218 | {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.807096Z"} 219 | {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807156Z"} 220 | {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807197Z"} 221 | {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.810043Z"} 222 | ------------------------------ Captured log call ------------------------------- 223 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Registering tools with MCP server", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.807096Z"} 224 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Some critical dependencies are not available: task_manager", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807156Z"} 225 | WARNING src.mcp_codebase_insight.core.sse:logger.py:75 {"event": "Tools requiring these dependencies will not be registered", "logger": "src.mcp_codebase_insight.core.sse", "level": "warning", "timestamp": "2025-04-18T06:51:43.807197Z"} 226 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "MCP tools registration completed", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.810043Z"} 227 | ___________________________ test_sse_handle_connect ____________________________ 228 | 229 | mock_starlette = <MagicMock name='Starlette' id='5349128384'> 230 | mock_transport = <MagicMock name='SseServerTransport' id='5349128720'> 231 | 232 | @patch('mcp_codebase_insight.core.sse.SseServerTransport') 233 | @patch('mcp_codebase_insight.core.sse.Starlette') 234 | async def test_sse_handle_connect(mock_starlette, mock_transport): 235 | """Test the SSE connection handling functionality.""" 236 | # Set up mocks 237 | mock_transport_instance = MagicMock() 238 | mock_transport.return_value = mock_transport_instance 239 | 240 | mock_mcp = MagicMock(spec=FastMCP) 241 | # For MCP v1.5.0, create a mock run method instead of initialization options 242 | mock_mcp.run = AsyncMock() 243 | 244 | mock_request = MagicMock() 245 | mock_request.client = "127.0.0.1" 246 | mock_request.scope = {"type": "http"} 247 | 248 | # Mock the transport's connect_sse method 249 | mock_streams = (AsyncMock(), AsyncMock()) 250 | mock_cm = MagicMock() 251 | mock_cm.__aenter__ = AsyncMock(return_value=mock_streams) 252 | mock_cm.__aexit__ = AsyncMock() 253 | mock_transport_instance.connect_sse.return_value = mock_cm 254 | 255 | # Create a mock handler and add it to our mock app instance 256 | handle_sse = AsyncMock() 257 | mock_app = MagicMock() 258 | mock_starlette.return_value = mock_app 259 | 260 | # Set up a mock route that we can access 261 | mock_route = MagicMock() 262 | mock_route.path = "/sse/" 263 | mock_route.endpoint = handle_sse 264 | mock_app.routes = [mock_route] 265 | 266 | # Create the SSE server 267 | app = create_sse_server(mock_mcp) 268 | 269 | # Extract the actual handler from the route configuration 270 | > routes_kwarg = mock_starlette.call_args.kwargs.get('routes', []) 271 | E AttributeError: 'NoneType' object has no attribute 'kwargs' 272 | 273 | tests/components/test_sse_components.py:381: AttributeError 274 | ----------------------------- Captured stdout call ----------------------------- 275 | {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817486Z"} 276 | {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817591Z"} 277 | {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817674Z"} 278 | {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817714Z"} 279 | {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817749Z"} 280 | ------------------------------ Captured log call ------------------------------- 281 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Initializing SSE transport with endpoint: /sse", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817486Z"} 282 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Created SSE server with routes:", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817591Z"} 283 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /health, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817674Z"} 284 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /sse, methods: {'HEAD', 'GET'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817714Z"} 285 | INFO src.mcp_codebase_insight.core.sse:logger.py:68 {"event": "Route: /message, methods: {'POST'}", "logger": "src.mcp_codebase_insight.core.sse", "level": "info", "timestamp": "2025-04-18T06:51:43.817749Z"} 286 | --------------------------- Captured stdout teardown --------------------------- 287 | Cleaning up test collection: test_collection_a41f92f0 288 | HTTP Request: DELETE http://localhost:6333/collections/test_collection_a41f92f0 "HTTP/1.1 200 OK" 289 | Found 0 server states at end of session 290 | ---------------------------- Captured log teardown ----------------------------- 291 | INFO conftest:conftest.py:169 Cleaning up test collection: test_collection_a41f92f0 292 | INFO httpx:_client.py:1025 HTTP Request: DELETE http://localhost:6333/collections/test_collection_a41f92f0 "HTTP/1.1 200 OK" 293 | INFO conftest:conftest.py:530 Found 0 server states at end of session 294 | 295 | ---------- coverage: platform darwin, python 3.13.2-final-0 ---------- 296 | Name Stmts Miss Branch BrPart Cover Missing 297 | ----------------------------------------------------------------------------------------------- 298 | src/mcp_codebase_insight/__init__.py 3 0 0 0 100% 299 | src/mcp_codebase_insight/__main__.py 28 28 0 0 0% 3-76 300 | src/mcp_codebase_insight/asgi.py 5 5 0 0 0% 3-11 301 | src/mcp_codebase_insight/core/__init__.py 2 0 0 0 100% 302 | src/mcp_codebase_insight/core/adr.py 127 50 26 5 54% 75-111, 118-134, 186, 202, 204->206, 207, 209, 220-227 303 | src/mcp_codebase_insight/core/cache.py 168 42 68 26 68% 33, 36, 42->exit, 70-71, 77-78, 90, 97->exit, 102-103, 109, 124-125, 142-143, 160-161, 167-169, 173-176, 181, 187, 193, 199, 205, 217, 220, 225, 228->exit, 234, 236->238, 238->exit, 243-249, 254, 258, 261->265, 265->270, 267-268, 274 304 | src/mcp_codebase_insight/core/component_status.py 8 0 0 0 100% 305 | src/mcp_codebase_insight/core/config.py 63 23 14 4 60% 38, 44-45, 47-51, 64-67, 91-105, 109, 117, 121-122 306 | src/mcp_codebase_insight/core/debug.py 122 69 34 0 34% 58-78, 82-97, 122-128, 138-153, 161-168, 172-205 307 | src/mcp_codebase_insight/core/di.py 99 62 14 0 33% 40, 53-76, 80-82, 86-97, 101-106, 110-112, 116-120, 124-132, 136-144, 148-156, 160-169 308 | src/mcp_codebase_insight/core/documentation.py 165 111 52 1 25% 53-77, 84-100, 134, 150-167, 175-189, 201-214, 228-316 309 | src/mcp_codebase_insight/core/embeddings.py 77 28 18 3 61% 29->exit, 48-58, 79-83, 88, 104-106, 114-128, 132 310 | src/mcp_codebase_insight/core/errors.py 96 27 2 0 70% 55-58, 62, 77, 88, 99, 110, 121, 132, 143, 154, 165, 176, 187, 198, 209, 220, 231, 242, 253, 264, 275, 279-282 311 | src/mcp_codebase_insight/core/health.py 140 58 26 8 54% 52-71, 75-98, 111, 113, 128, 146, 156-162, 168->178, 170-171, 180-181, 190-191, 215-216, 232-233, 235-236, 259-260, 262-263 312 | src/mcp_codebase_insight/core/knowledge.py 253 100 74 25 55% 95, 105->109, 114, 119-124, 129->exit, 131-138, 143->exit, 145-151, 155, 167, 170->175, 172-173, 208->223, 230, 250, 252->254, 254->256, 257, 258->260, 261, 263, 265, 270->285, 298, 303, 305, 307, 320->318, 335-351, 361-379, 404-421, 432-445, 457-470, 479-488, 496-503, 507-514, 518-524 313 | src/mcp_codebase_insight/core/metrics.py 108 41 38 11 58% 43, 47, 58-59, 62-65, 70, 74, 80-83, 89-100, 111, 122, 127-128, 138, 145, 151, 153, 165-183 314 | src/mcp_codebase_insight/core/prompts.py 72 72 16 0 0% 3-262 315 | src/mcp_codebase_insight/core/sse.py 220 116 40 9 46% 29-37, 62-108, 130-141, 153-154, 162, 171-178, 186-188, 202-207, 239, 280-285, 293, 302-303, 315->321, 330-331, 338-339, 343-344, 349-380, 393-394, 398-419, 432-433, 437-458, 471-472, 476-483, 502->504 316 | src/mcp_codebase_insight/core/state.py 168 120 54 0 22% 48-53, 63-77, 84-93, 97-98, 102, 106-144, 148, 161-162, 167, 171, 175, 179, 183-335 317 | src/mcp_codebase_insight/core/task_tracker.py 48 28 12 0 33% 29-37, 45-52, 60-78, 86, 94, 102, 106-107 318 | src/mcp_codebase_insight/core/tasks.py 259 172 74 1 26% 89-113, 117-134, 138-140, 144-162, 203, 217-233, 237-245, 254-264, 268-318, 323-341, 349-357, 363-377, 384-397, 404-415, 422-432, 439-462 319 | src/mcp_codebase_insight/core/vector_store.py 177 73 26 5 58% 62->67, 78->93, 84-90, 99-100, 119-122, 127-129, 145-146, 158-159, 164-165, 170-184, 200-201, 233-235, 264-266, 270, 290, 327-393, 411 320 | src/mcp_codebase_insight/models.py 18 0 0 0 100% 321 | src/mcp_codebase_insight/server.py 630 536 128 0 12% 55-109, 121-138, 142-1491, 1549-1550, 1554-1561, 1585-1590, 1595, 1599-1616, 1620-1622, 1626, 1638-1664, 1668-1688 322 | src/mcp_codebase_insight/server_test_isolation.py 48 38 18 0 15% 31-39, 44-99 323 | src/mcp_codebase_insight/utils/__init__.py 2 0 0 0 100% 324 | src/mcp_codebase_insight/utils/logger.py 29 5 0 0 83% 52-53, 82, 89, 97 325 | src/mcp_codebase_insight/version.py 14 14 2 0 0% 3-22 326 | ----------------------------------------------------------------------------------------------- 327 | TOTAL 3149 1818 736 98 38% 328 | 329 | =========================== short test summary info ============================ 330 | FAILED tests/components/test_sse_components.py::test_get_starlette_app - AssertionError: Expected 'create_sse_server' to be called once. Called 0 times. 331 | FAILED tests/components/test_sse_components.py::test_create_sse_server - AssertionError: Expected 'CodebaseInsightSseTransport' to be called once. Called 0 times. 332 | FAILED tests/components/test_sse_components.py::test_adr_list_tool - IndexError: tuple index out of range 333 | FAILED tests/components/test_sse_components.py::test_task_status_tool - IndexError: tuple index out of range 334 | FAILED tests/components/test_sse_components.py::test_sse_handle_connect - AttributeError: 'NoneType' object has no attribute 'kwargs' 335 | !!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 5 failures !!!!!!!!!!!!!!!!!!!!!!!!!!! 336 | ================== 5 failed, 18 passed, 34 warnings in 7.50s =================== 337 | ``` -------------------------------------------------------------------------------- /scripts/verify_build.py: -------------------------------------------------------------------------------- ```python 1 | #!/usr/bin/env python 2 | """ 3 | Automated End-to-End Build Verification Script 4 | 5 | This script automates the process of verifying an end-to-end build by: 6 | 1. Triggering the build process 7 | 2. Gathering verification criteria from the vector database 8 | 3. Analyzing build results against success criteria 9 | 4. Contextual verification using the vector database 10 | 5. Determining build status and generating a report 11 | """ 12 | 13 | import os 14 | import sys 15 | import json 16 | import logging 17 | import asyncio 18 | import argparse 19 | import subprocess 20 | from datetime import datetime 21 | from pathlib import Path 22 | from typing import Dict, List, Any, Optional, Tuple 23 | import uuid 24 | 25 | from qdrant_client import QdrantClient 26 | from qdrant_client.http.models import Filter, FieldCondition, MatchValue 27 | 28 | # Add the project root to the Python path 29 | sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) 30 | 31 | from src.mcp_codebase_insight.core.vector_store import VectorStore, SearchResult 32 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding 33 | from src.mcp_codebase_insight.core.config import ServerConfig 34 | 35 | # Configure logging 36 | logging.basicConfig( 37 | level=logging.INFO, 38 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 39 | handlers=[ 40 | logging.StreamHandler(), 41 | logging.FileHandler(Path('logs/build_verification.log')) 42 | ] 43 | ) 44 | logger = logging.getLogger('build_verification') 45 | 46 | class BuildVerifier: 47 | """Automated build verification system.""" 48 | 49 | def __init__(self, config_path: Optional[str] = None): 50 | """Initialize the build verifier. 51 | 52 | Args: 53 | config_path: Path to the configuration file (optional) 54 | """ 55 | self.config = self._load_config(config_path) 56 | self.vector_store = None 57 | self.embedder = None 58 | self.build_output = "" 59 | self.build_logs = [] 60 | self.success_criteria = [] 61 | self.build_start_time = None 62 | self.build_end_time = None 63 | self.test_results = {} 64 | self.critical_components = [] 65 | self.dependency_map = {} 66 | 67 | def _load_config(self, config_path: Optional[str]) -> Dict[str, Any]: 68 | """Load configuration from file or environment variables. 69 | 70 | Args: 71 | config_path: Path to the configuration file 72 | 73 | Returns: 74 | Configuration dictionary 75 | """ 76 | config = { 77 | 'qdrant_url': os.environ.get('QDRANT_URL', 'http://localhost:6333'), 78 | 'qdrant_api_key': os.environ.get('QDRANT_API_KEY', ''), 79 | 'collection_name': os.environ.get('COLLECTION_NAME', 'mcp-codebase-insight'), 80 | 'embedding_model': os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2'), 81 | 'build_command': os.environ.get('BUILD_COMMAND', 'make build'), 82 | 'test_command': os.environ.get('TEST_COMMAND', 'make test'), 83 | 'success_criteria': { 84 | 'min_test_coverage': float(os.environ.get('MIN_TEST_COVERAGE', '80.0')), 85 | 'max_allowed_failures': int(os.environ.get('MAX_ALLOWED_FAILURES', '0')), 86 | 'critical_modules': os.environ.get('CRITICAL_MODULES', '').split(','), 87 | 'performance_threshold_ms': int(os.environ.get('PERFORMANCE_THRESHOLD_MS', '500')) 88 | } 89 | } 90 | 91 | # Override with config file if provided 92 | if config_path: 93 | try: 94 | with open(config_path, 'r') as f: 95 | file_config = json.load(f) 96 | config.update(file_config) 97 | except Exception as e: 98 | logger.error(f"Failed to load config from {config_path}: {e}") 99 | 100 | return config 101 | 102 | async def initialize(self): 103 | """Initialize the build verifier.""" 104 | logger.info("Initializing build verifier...") 105 | 106 | # Initialize embedder if not already initialized 107 | if self.embedder is None or not getattr(self.embedder, 'initialized', False): 108 | logger.info("Initializing embedder...") 109 | self.embedder = SentenceTransformerEmbedding(model_name=self.config['embedding_model']) 110 | await self.embedder.initialize() 111 | else: 112 | logger.info("Using pre-initialized embedder") 113 | 114 | # Initialize vector store 115 | logger.info(f"Connecting to vector store at {self.config['qdrant_url']}...") 116 | self.vector_store = VectorStore( 117 | url=self.config['qdrant_url'], 118 | embedder=self.embedder, 119 | collection_name=self.config['collection_name'], 120 | api_key=self.config['qdrant_api_key'], 121 | vector_name="default" # Specify a vector name for the collection 122 | ) 123 | await self.vector_store.initialize() 124 | 125 | # Load dependency map from vector database 126 | await self._load_dependency_map() 127 | 128 | # Load critical components 129 | await self._load_critical_components() 130 | 131 | logger.info("Build verifier initialized successfully") 132 | 133 | async def _load_dependency_map(self): 134 | """Load dependency map from vector database.""" 135 | logger.info("Loading dependency map from vector database...") 136 | 137 | # Query for dependency information 138 | dependencies = await self.vector_store.search( 139 | text="dependency map between components", 140 | filter_conditions={"must": [{"key": "type", "match": {"value": "architecture"}}]}, 141 | limit=10 142 | ) 143 | 144 | if dependencies: 145 | for result in dependencies: 146 | if "dependencies" in result.metadata: 147 | self.dependency_map.update(result.metadata["dependencies"]) 148 | 149 | if not self.dependency_map: 150 | # Try to load from file as fallback 151 | try: 152 | with open('dependency_map.txt', 'r') as f: 153 | for line in f: 154 | if '->' in line: 155 | source, target = line.strip().split('->') 156 | source = source.strip() 157 | target = target.strip() 158 | if source not in self.dependency_map: 159 | self.dependency_map[source] = [] 160 | self.dependency_map[source].append(target) 161 | except FileNotFoundError: 162 | logger.warning("Dependency map file not found") 163 | 164 | logger.info(f"Loaded dependency map with {len(self.dependency_map)} entries") 165 | 166 | async def _load_critical_components(self): 167 | """Load critical components from vector database or config.""" 168 | logger.info("Loading critical components...") 169 | 170 | # Load from vector database 171 | critical_components = await self.vector_store.search( 172 | text="critical system components", 173 | filter_conditions={"must": [{"key": "type", "match": {"value": "architecture"}}]}, 174 | limit=5 175 | ) 176 | 177 | if critical_components: 178 | for result in critical_components: 179 | if "critical_components" in result.metadata: 180 | # Extend the list instead of updating 181 | self.critical_components.extend(result.metadata["critical_components"]) 182 | 183 | # Add from config as fallback 184 | config_critical = self.config.get('success_criteria', {}).get('critical_modules', []) 185 | if config_critical: 186 | self.critical_components.extend(config_critical) 187 | 188 | # Remove duplicates while preserving order 189 | self.critical_components = list(dict.fromkeys(self.critical_components)) 190 | 191 | logger.info(f"Loaded {len(self.critical_components)} critical components") 192 | 193 | async def trigger_build(self) -> bool: 194 | """Trigger the end-to-end build process. 195 | 196 | Returns: 197 | True if build command executed successfully, False otherwise 198 | """ 199 | logger.info("Triggering end-to-end build...") 200 | self.build_start_time = datetime.now() 201 | 202 | try: 203 | # Execute build command 204 | logger.info(f"Running build command: {self.config['build_command']}") 205 | build_process = subprocess.Popen( 206 | self.config['build_command'], 207 | shell=True, 208 | stdout=subprocess.PIPE, 209 | stderr=subprocess.PIPE, 210 | text=True 211 | ) 212 | 213 | stdout, stderr = build_process.communicate() 214 | self.build_output = stdout 215 | 216 | # Store build logs 217 | self.build_logs = [line for line in stdout.split('\n') if line.strip()] 218 | if stderr: 219 | self.build_logs.extend([f"ERROR: {line}" for line in stderr.split('\n') if line.strip()]) 220 | 221 | build_success = build_process.returncode == 0 222 | build_status = "SUCCESS" if build_success else "FAILURE" 223 | logger.info(f"Build {build_status} (exit code: {build_process.returncode})") 224 | 225 | self.build_end_time = datetime.now() 226 | return build_success 227 | 228 | except Exception as e: 229 | logger.error(f"Failed to execute build command: {e}") 230 | self.build_end_time = datetime.now() 231 | self.build_logs.append(f"ERROR: Failed to execute build command: {e}") 232 | return False 233 | 234 | async def run_tests(self) -> bool: 235 | """Run the test suite. 236 | 237 | Returns: 238 | True if tests passed successfully, False otherwise 239 | """ 240 | logger.info("Running tests...") 241 | 242 | try: 243 | # Execute test command 244 | logger.info(f"Running test command: {self.config['test_command']}") 245 | test_process = subprocess.Popen( 246 | self.config['test_command'], 247 | shell=True, 248 | stdout=subprocess.PIPE, 249 | stderr=subprocess.PIPE, 250 | text=True 251 | ) 252 | 253 | stdout, stderr = test_process.communicate() 254 | 255 | # Parse and store test results 256 | self._parse_test_results(stdout) 257 | 258 | # Store test logs 259 | self.build_logs.extend([line for line in stdout.split('\n') if line.strip()]) 260 | if stderr: 261 | self.build_logs.extend([f"ERROR: {line}" for line in stderr.split('\n') if line.strip()]) 262 | 263 | tests_success = test_process.returncode == 0 264 | test_status = "SUCCESS" if tests_success else "FAILURE" 265 | logger.info(f"Tests {test_status} (exit code: {test_process.returncode})") 266 | 267 | return tests_success 268 | 269 | except Exception as e: 270 | logger.error(f"Failed to execute test command: {e}") 271 | self.build_logs.append(f"ERROR: Failed to execute test command: {e}") 272 | return False 273 | 274 | def _parse_test_results(self, test_output: str): 275 | """Parse test results from test output. 276 | 277 | Args: 278 | test_output: Output from the test command 279 | """ 280 | # Initialize test summary 281 | self.test_results = { 282 | "total": 0, 283 | "passed": 0, 284 | "failed": 0, 285 | "skipped": 0, 286 | "coverage": 0.0, 287 | "duration_ms": 0, 288 | "failures": [] 289 | } 290 | 291 | # Parse pytest output 292 | for line in test_output.split('\n'): 293 | # Count total tests 294 | if "collected " in line: 295 | try: 296 | total_part = line.split("collected ")[1].split()[0] 297 | self.test_results["total"] = int(total_part) 298 | except (IndexError, ValueError): 299 | pass 300 | 301 | # Parse test failures - extract just the test path and name 302 | if "FAILED " in line: 303 | # Full line format is typically like "......FAILED tests/test_module.py::test_function [70%]" 304 | # Extract just the "FAILED tests/test_module.py::test_function" part 305 | try: 306 | failure_part = line.split("FAILED ")[1].split("[")[0].strip() 307 | failure = f"FAILED {failure_part}" 308 | self.test_results["failures"].append(failure) 309 | self.test_results["failed"] += 1 310 | except (IndexError, ValueError): 311 | # If splitting fails, add the whole line as a fallback 312 | self.test_results["failures"].append(line.strip()) 313 | self.test_results["failed"] += 1 314 | 315 | # Check for coverage percentage in the TOTAL line 316 | if "TOTAL" in line and "%" in line: 317 | try: 318 | # Extract coverage from line like "TOTAL 600 100 83%" 319 | parts = line.split() 320 | for i, part in enumerate(parts): 321 | if "%" in part: 322 | coverage_percent = part.replace("%", "").strip() 323 | self.test_results["coverage"] = float(coverage_percent) 324 | break 325 | except (IndexError, ValueError): 326 | pass 327 | 328 | # Calculate passed tests - if we have total but no failed or skipped, 329 | # assume all tests passed 330 | if self.test_results["total"] > 0: 331 | self.test_results["passed"] = self.test_results["total"] - self.test_results.get("failed", 0) - self.test_results.get("skipped", 0) 332 | 333 | logger.info(f"Parsed test results: {self.test_results['passed']}/{self.test_results['total']} tests passed, " 334 | f"{self.test_results['coverage']}% coverage") 335 | 336 | async def gather_verification_criteria(self): 337 | """Gather verification criteria from the vector database.""" 338 | logger.info("Gathering verification criteria...") 339 | 340 | # Query for success criteria 341 | results = await self.vector_store.search( 342 | text="build verification success criteria", 343 | filter_conditions={"must": [{"key": "type", "match": {"value": "build_verification"}}]}, 344 | limit=5 345 | ) 346 | 347 | if results: 348 | criteria = [] 349 | for result in results: 350 | if "criteria" in result.metadata: 351 | criteria.extend(result.metadata["criteria"]) 352 | 353 | if criteria: 354 | self.success_criteria = criteria 355 | logger.info(f"Loaded {len(criteria)} success criteria from vector database") 356 | return 357 | 358 | # Use default criteria if none found in the vector database 359 | logger.info("Using default success criteria") 360 | self.success_criteria = [ 361 | f"All tests must pass (maximum {self.config['success_criteria']['max_allowed_failures']} failures allowed)", 362 | f"Test coverage must be at least {self.config['success_criteria']['min_test_coverage']}%", 363 | "Build process must complete without errors", 364 | f"Critical modules ({', '.join(self.critical_components)}) must pass all tests", 365 | f"Performance tests must complete within {self.config['success_criteria']['performance_threshold_ms']}ms" 366 | ] 367 | 368 | def _detect_build_success(self) -> bool: 369 | """Detect if the build was successful based on build logs. 370 | 371 | Returns: 372 | bool: True if build succeeded, False otherwise 373 | """ 374 | # Check logs for serious build errors 375 | for log in self.build_logs: 376 | if log.startswith("ERROR: Build failed") or "BUILD FAILED" in log.upper(): 377 | logger.info("Detected build failure in logs") 378 | return False 379 | 380 | # Consider build successful if no serious errors found 381 | return True 382 | 383 | async def analyze_build_results(self) -> Tuple[bool, Dict[str, Any]]: 384 | """Analyze build results against success criteria. 385 | 386 | Returns: 387 | Tuple of (build_passed, results_dict) 388 | """ 389 | logger.info("Analyzing build results...") 390 | 391 | # Initialize analysis results 392 | results = { 393 | "build_success": False, 394 | "tests_success": False, 395 | "coverage_success": False, 396 | "critical_modules_success": False, 397 | "performance_success": False, 398 | "overall_success": False, 399 | "criteria_results": {}, 400 | "failure_analysis": [], 401 | } 402 | 403 | # Check if the build was successful 404 | results["build_success"] = self._detect_build_success() 405 | 406 | # Check test results 407 | max_failures = self.config['success_criteria']['max_allowed_failures'] 408 | results["tests_success"] = self.test_results.get("failed", 0) <= max_failures 409 | 410 | # Check coverage 411 | min_coverage = self.config['success_criteria']['min_test_coverage'] 412 | current_coverage = self.test_results.get("coverage", 0.0) 413 | 414 | # For development purposes, we might want to temporarily ignore coverage requirements 415 | # if there are tests passing but coverage reporting is not working properly 416 | if self.test_results.get("total", 0) > 0 and self.test_results.get("passed", 0) > 0: 417 | # If tests are passing but coverage is 0, assume coverage tool issues and pass this check 418 | results["coverage_success"] = current_coverage >= min_coverage 419 | else: 420 | results["coverage_success"] = current_coverage >= min_coverage 421 | 422 | # Check critical modules 423 | critical_module_failures = [] 424 | for failure in self.test_results.get("failures", []): 425 | for module in self.critical_components: 426 | if module in failure: 427 | critical_module_failures.append(failure) 428 | break 429 | 430 | results["critical_modules_success"] = len(critical_module_failures) == 0 431 | if not results["critical_modules_success"]: 432 | results["failure_analysis"].append({ 433 | "type": "critical_module_failure", 434 | "description": f"Failures in critical modules: {len(critical_module_failures)}", 435 | "details": critical_module_failures 436 | }) 437 | 438 | # Check performance (if available) 439 | performance_threshold = self.config['success_criteria']['performance_threshold_ms'] 440 | current_performance = self.test_results.get("duration_ms", 0) 441 | if current_performance > 0: # Only check if we have performance data 442 | results["performance_success"] = current_performance <= performance_threshold 443 | if not results["performance_success"]: 444 | results["failure_analysis"].append({ 445 | "type": "performance_issue", 446 | "description": f"Performance threshold exceeded: {current_performance}ms > {performance_threshold}ms", 447 | "details": f"Tests took {current_performance}ms, threshold is {performance_threshold}ms" 448 | }) 449 | else: 450 | # No performance data available, assume success 451 | results["performance_success"] = True 452 | 453 | # Evaluate each criterion 454 | for criterion in self.success_criteria: 455 | criterion_result = { 456 | "criterion": criterion, 457 | "passed": False, 458 | "details": "" 459 | } 460 | 461 | if "All tests must pass" in criterion: 462 | criterion_result["passed"] = results["tests_success"] 463 | criterion_result["details"] = ( 464 | f"{self.test_results.get('passed', 0)}/{self.test_results.get('total', 0)} tests passed, " 465 | f"{self.test_results.get('failed', 0)} failed" 466 | ) 467 | 468 | elif "coverage" in criterion.lower(): 469 | criterion_result["passed"] = results["coverage_success"] 470 | 471 | if self.test_results.get("total", 0) > 0 and self.test_results.get("passed", 0) > 0 and current_coverage == 0.0: 472 | criterion_result["details"] = ( 473 | f"Coverage tool may not be working correctly. {self.test_results.get('passed', 0)} tests passing, ignoring coverage requirement during development." 474 | ) 475 | else: 476 | criterion_result["details"] = ( 477 | f"Coverage: {current_coverage}%, required: {min_coverage}%" 478 | ) 479 | 480 | elif "build process" in criterion.lower(): 481 | criterion_result["passed"] = results["build_success"] 482 | criterion_result["details"] = "Build completed successfully" if results["build_success"] else "Build errors detected" 483 | 484 | elif "critical modules" in criterion.lower(): 485 | criterion_result["passed"] = results["critical_modules_success"] 486 | criterion_result["details"] = ( 487 | "All critical modules passed tests" if results["critical_modules_success"] 488 | else f"{len(critical_module_failures)} failures in critical modules" 489 | ) 490 | 491 | elif "performance" in criterion.lower(): 492 | criterion_result["passed"] = results["performance_success"] 493 | if current_performance > 0: 494 | criterion_result["details"] = ( 495 | f"Performance: {current_performance}ms, threshold: {performance_threshold}ms" 496 | ) 497 | else: 498 | criterion_result["details"] = "No performance data available" 499 | 500 | results["criteria_results"][criterion] = criterion_result 501 | 502 | # Determine overall success 503 | results["overall_success"] = all([ 504 | results["build_success"], 505 | results["tests_success"], 506 | results["coverage_success"], 507 | results["critical_modules_success"], 508 | results["performance_success"] 509 | ]) 510 | 511 | logger.info(f"Build analysis complete: {'PASS' if results['overall_success'] else 'FAIL'}") 512 | return results["overall_success"], results 513 | 514 | async def contextual_verification(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]: 515 | """Perform contextual verification using the vector database. 516 | 517 | Args: 518 | analysis_results: Results from the build analysis 519 | 520 | Returns: 521 | Updated analysis results with contextual verification 522 | """ 523 | logger.info("Performing contextual verification...") 524 | 525 | # Only perform detailed analysis if there are failures 526 | if analysis_results["overall_success"]: 527 | logger.info("Build successful, skipping detailed contextual verification") 528 | return analysis_results 529 | 530 | # Identify failed tests 531 | failed_tests = self.test_results.get("failures", []) 532 | 533 | if not failed_tests: 534 | logger.info("No test failures to analyze") 535 | return analysis_results 536 | 537 | logger.info(f"Analyzing {len(failed_tests)} test failures...") 538 | 539 | # Initialize contextual verification results 540 | contextual_results = [] 541 | 542 | # Analyze each failure 543 | for failure in failed_tests: 544 | # Extract module name from failure 545 | module_name = self._extract_module_from_failure(failure) 546 | 547 | if not module_name: 548 | continue 549 | 550 | # Get dependencies for the module 551 | dependencies = self.dependency_map.get(module_name, []) 552 | 553 | # Query vector database for relevant information 554 | query = f"common issues and solutions for {module_name} failures" 555 | results = await self.vector_store.search( 556 | text=query, 557 | filter_conditions={"must": [{"key": "type", "match": {"value": "troubleshooting"}}]}, 558 | limit=3 559 | ) 560 | 561 | failure_analysis = { 562 | "module": module_name, 563 | "failure": failure, 564 | "dependencies": dependencies, 565 | "potential_causes": [], 566 | "recommended_actions": [] 567 | } 568 | 569 | if results: 570 | for result in results: 571 | if "potential_causes" in result.metadata: 572 | failure_analysis["potential_causes"].extend(result.metadata["potential_causes"]) 573 | if "recommended_actions" in result.metadata: 574 | failure_analysis["recommended_actions"].extend(result.metadata["recommended_actions"]) 575 | 576 | # If no specific guidance found, provide general advice 577 | if not failure_analysis["potential_causes"]: 578 | failure_analysis["potential_causes"] = [ 579 | f"Recent changes to {module_name}", 580 | f"Changes in dependencies: {', '.join(dependencies)}", 581 | "Integration issues between components" 582 | ] 583 | 584 | if not failure_analysis["recommended_actions"]: 585 | failure_analysis["recommended_actions"] = [ 586 | f"Review recent changes to {module_name}", 587 | f"Check integration with dependencies: {', '.join(dependencies)}", 588 | "Run tests in isolation to identify specific failure points" 589 | ] 590 | 591 | contextual_results.append(failure_analysis) 592 | 593 | # Add contextual verification results to analysis 594 | analysis_results["contextual_verification"] = contextual_results 595 | 596 | logger.info(f"Contextual verification complete: {len(contextual_results)} failures analyzed") 597 | return analysis_results 598 | 599 | def _extract_module_from_failure(self, failure: str) -> Optional[str]: 600 | """Extract module name from a test failure. 601 | 602 | Args: 603 | failure: Test failure message 604 | 605 | Returns: 606 | Module name or None if not found 607 | """ 608 | # This is a simple implementation that assumes the module name 609 | # is in the format: "FAILED path/to/module.py::test_function" 610 | 611 | if "FAILED " in failure: 612 | try: 613 | path = failure.split("FAILED ")[1].split("::")[0] 614 | # Convert path to module name 615 | module_name = path.replace("/", ".").replace(".py", "") 616 | return module_name 617 | except IndexError: 618 | pass 619 | 620 | return None 621 | 622 | def generate_report(self, results: Dict[str, Any]) -> Dict[str, Any]: 623 | """Generate a build verification report. 624 | 625 | Args: 626 | results: Analysis results 627 | 628 | Returns: 629 | Report dictionary 630 | """ 631 | logger.info("Generating build verification report...") 632 | 633 | build_duration = (self.build_end_time - self.build_start_time).total_seconds() if self.build_end_time else 0 634 | 635 | report = { 636 | "build_verification_report": { 637 | "timestamp": datetime.now().isoformat(), 638 | "build_info": { 639 | "start_time": self.build_start_time.isoformat() if self.build_start_time else None, 640 | "end_time": self.build_end_time.isoformat() if self.build_end_time else None, 641 | "duration_seconds": build_duration, 642 | "build_command": self.config["build_command"], 643 | "test_command": self.config["test_command"] 644 | }, 645 | "test_summary": { 646 | "total": self.test_results.get("total", 0), 647 | "passed": self.test_results.get("passed", 0), 648 | "failed": self.test_results.get("failed", 0), 649 | "skipped": self.test_results.get("skipped", 0), 650 | "coverage": self.test_results.get("coverage", 0.0) 651 | }, 652 | "verification_results": { 653 | "overall_status": "PASS" if results["overall_success"] else "FAIL", 654 | "criteria_results": results["criteria_results"] 655 | } 656 | } 657 | } 658 | 659 | # Add failure analysis if available 660 | if "failure_analysis" in results and results["failure_analysis"]: 661 | report["build_verification_report"]["failure_analysis"] = results["failure_analysis"] 662 | 663 | # Add contextual verification if available 664 | if "contextual_verification" in results: 665 | report["build_verification_report"]["contextual_verification"] = results["contextual_verification"] 666 | 667 | # Add a summary field for quick review 668 | criteria_count = len(results["criteria_results"]) 669 | passed_criteria = sum(1 for c in results["criteria_results"].values() if c["passed"]) 670 | report["build_verification_report"]["summary"] = ( 671 | f"Build verification: {report['build_verification_report']['verification_results']['overall_status']}. " 672 | f"{passed_criteria}/{criteria_count} criteria passed. " 673 | f"{self.test_results.get('passed', 0)}/{self.test_results.get('total', 0)} tests passed with " 674 | f"{self.test_results.get('coverage', 0.0)}% coverage." 675 | ) 676 | 677 | logger.info(f"Report generated: {report['build_verification_report']['summary']}") 678 | return report 679 | 680 | async def save_report(self, report: Dict[str, Any], report_file: str = "build_verification_report.json"): 681 | """Save build verification report to file and vector database. 682 | 683 | Args: 684 | report: Build verification report 685 | report_file: Path to save the report file 686 | """ 687 | logger.info(f"Saving report to {report_file}...") 688 | 689 | # Save to file 690 | try: 691 | with open(report_file, 'w') as f: 692 | json.dump(report, f, indent=2) 693 | logger.info(f"Report saved to {report_file}") 694 | except Exception as e: 695 | logger.error(f"Failed to save report to file: {e}") 696 | 697 | # Store in vector database 698 | try: 699 | # Extract report data for metadata 700 | build_info = report.get("build_verification_report", {}) 701 | verification_results = build_info.get("verification_results", {}) 702 | overall_status = verification_results.get("overall_status", "UNKNOWN") 703 | timestamp = build_info.get("timestamp", datetime.now().isoformat()) 704 | 705 | # Generate a consistent ID with prefix 706 | report_id = f"build-verification-{uuid.uuid4()}" 707 | report_text = json.dumps(report) 708 | 709 | # Store report in vector database with separate parameters instead of using id 710 | # This avoids the 'tuple' object has no attribute 'id' error 711 | await self.vector_store.add_vector( 712 | text=report_text, 713 | metadata={ 714 | "id": report_id, # Include ID in metadata 715 | "type": "build_verification_report", 716 | "timestamp": timestamp, 717 | "overall_status": overall_status 718 | } 719 | ) 720 | logger.info(f"Report stored in vector database with ID: {report_id}") 721 | except Exception as e: 722 | logger.error(f"Failed to store report in vector database: {e}") 723 | 724 | async def cleanup(self): 725 | """Clean up resources.""" 726 | logger.info("Cleaning up resources...") 727 | 728 | if self.vector_store: 729 | await self.vector_store.cleanup() 730 | await self.vector_store.close() 731 | 732 | async def verify_build(self, output_file: str = "logs/build_verification_report.json") -> bool: 733 | """Verify the build process and generate a report. 734 | 735 | Args: 736 | output_file: Output file path for the report 737 | 738 | Returns: 739 | True if build verification passed, False otherwise 740 | """ 741 | try: 742 | # Initialize components 743 | await self.initialize() 744 | 745 | # Trigger build 746 | build_success = await self.trigger_build() 747 | 748 | # Run tests if build was successful 749 | if build_success: 750 | await self.run_tests() 751 | 752 | # Gather verification criteria 753 | await self.gather_verification_criteria() 754 | 755 | # Analyze build results 756 | success, results = await self.analyze_build_results() 757 | 758 | # Perform contextual verification 759 | results = await self.contextual_verification(results) 760 | 761 | # Generate report 762 | report = self.generate_report(results) 763 | 764 | # Save report 765 | await self.save_report(report, output_file) 766 | 767 | return success 768 | 769 | except Exception as e: 770 | logger.error(f"Build verification failed: {e}") 771 | return False 772 | 773 | finally: 774 | # Clean up resources 775 | await self.cleanup() 776 | 777 | async def main(): 778 | """Main function.""" 779 | parser = argparse.ArgumentParser(description="Build Verification Script") 780 | parser.add_argument("--config", help="Path to configuration file") 781 | parser.add_argument("--output", default="logs/build_verification_report.json", help="Output file path for report") 782 | args = parser.parse_args() 783 | 784 | # Create logs directory if it doesn't exist 785 | os.makedirs("logs", exist_ok=True) 786 | 787 | verifier = BuildVerifier(args.config) 788 | success = await verifier.verify_build(args.output) 789 | 790 | print(f"\nBuild verification {'PASSED' if success else 'FAILED'}") 791 | print(f"Report saved to {args.output}") 792 | 793 | # Exit with status code based on verification result 794 | sys.exit(0 if success else 1) 795 | 796 | if __name__ == "__main__": 797 | asyncio.run(main()) ``` -------------------------------------------------------------------------------- /prepare_codebase.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | set -x # Enable debugging 3 | 4 | # Set output files 5 | STRUCTURE_FILE="codebase_structure.txt" 6 | DEPENDENCY_MAP_FILE="dependency_map.txt" 7 | DOC_NODES_FILE="documentation_nodes.txt" 8 | USER_DOC_MAP_FILE="user_doc_mapping.txt" 9 | VECTOR_GRAPH_FILE="vector_relationship_graph.txt" 10 | LLM_PROMPT_FILE="llm_prompts.txt" 11 | SYSTEM_ARCHITECTURE_FILE="system_architecture.txt" 12 | TECHNICAL_DEBT_FILE="technical_debt.txt" 13 | README_CONTEXT_FILE="readme_context.txt" 14 | 15 | # Create prompts directory structure 16 | PROMPTS_DIR="./prompts" 17 | mkdir -p "$PROMPTS_DIR"/{system,technical,dependency,custom} 18 | 19 | # Check if project_environment.txt exists and source it if it does 20 | if [ -f "project_environment.txt" ]; then 21 | echo "Loading environment information from project_environment.txt..." 22 | # Source the environment info 23 | source project_environment.txt 24 | else 25 | echo "No project_environment.txt found. Running capture_env_info.sh to generate it..." 26 | # Check if capture_env_info.sh exists and run it 27 | if [ -f "./capture_env_info.sh" ]; then 28 | bash ./capture_env_info.sh 29 | source project_environment.txt 30 | else 31 | echo "Warning: capture_env_info.sh not found. Environment information will be limited." 32 | fi 33 | fi 34 | 35 | # Define directories to ignore for the file search 36 | IGNORE_DIRS=("node_modules" ".venv" "venv" "vendor" "test_env") 37 | 38 | # Create directory for module summaries 39 | mkdir -p module_summaries 40 | 41 | # Construct the 'find' command to exclude ignored directories 42 | FIND_CMD="find ." 43 | for dir in "${IGNORE_DIRS[@]}"; do 44 | FIND_CMD+=" -path ./$dir -prune -o" 45 | done 46 | FIND_CMD+=" -type f \( -name '*.js' -o -name '*.jsx' -o -name '*.ts' -o -name '*.tsx' -o -name '*.py' -o -name '*.md' -o -name '*.mdx' -o -name '*.sh' -o -name '*.yaml' -o -name '*.yml' -o -name '*.json' -o -name '*.cfg' -o -name '*.conf' -o -name '*.tfvars' -o -name '*.tf' \) -print | sort" 47 | 48 | # Debugging: Show the generated find command 49 | echo "Executing command: $FIND_CMD" 50 | 51 | # Execute and store results 52 | eval "$FIND_CMD" > "$STRUCTURE_FILE" 53 | 54 | # Check if files were captured 55 | if [ ! -s "$STRUCTURE_FILE" ]; then 56 | echo "⚠️ Warning: No matching files found. Please check directory paths." 57 | fi 58 | 59 | # Count the number of files found. 60 | FILE_COUNT=$(wc -l < "$STRUCTURE_FILE") 61 | 62 | # 1. Code Dependency Graph 63 | echo "Generating code dependency graph..." 64 | echo "# Code Dependency Graph" > "$DEPENDENCY_MAP_FILE" 65 | echo "# Generated on $(date)" >> "$DEPENDENCY_MAP_FILE" 66 | echo "# Environment: $OPERATING_SYSTEM" >> "$DEPENDENCY_MAP_FILE" 67 | if [ -n "$PYTHON_VERSION" ]; then 68 | echo "# Python: $PYTHON_VERSION" >> "$DEPENDENCY_MAP_FILE" 69 | fi 70 | if [ -n "$NODE_VERSION" ]; then 71 | echo "# Node.js: $NODE_VERSION" >> "$DEPENDENCY_MAP_FILE" 72 | fi 73 | if [ -n "$ANSIBLE_VERSION" ]; then 74 | echo "# Ansible: $ANSIBLE_VERSION" >> "$DEPENDENCY_MAP_FILE" 75 | fi 76 | echo "" >> "$DEPENDENCY_MAP_FILE" 77 | 78 | # Function to extract dependencies, tailored for graph generation 79 | extract_dependencies() { 80 | local file="$1" 81 | local file_type="$2" 82 | 83 | # Add "./" prefix for consistency 84 | local current_dir="./" 85 | file="${current_dir}${file#./}" 86 | 87 | if [[ "$file_type" == "python" ]]; then 88 | while IFS= read -r line; do 89 | if [[ "$line" =~ ^(import|from) ]]; then 90 | line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ') 91 | if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then 92 | # Capture module/file being imported 93 | imported_module=$(echo "$line" | sed -e 's/import //g' -e 's/from //g' -e 's/ .*//g' | tr -d ' ') 94 | echo "$file -> $imported_module (Python)" >> "$DEPENDENCY_MAP_FILE" 95 | fi 96 | fi 97 | done < "$file" 98 | elif [[ "$file_type" == "js" || "$file_type" == "jsx" || "$file_type" == "ts" || "$file_type" == "tsx" ]]; then 99 | while IFS= read -r line; do 100 | if [[ "$line" =~ (import|require) ]]; then 101 | line=$(echo "$line" | sed 's/\/\/.*$//' | sed 's/\/\*.*\*\///g' | tr -s ' ') 102 | if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then 103 | # Capture module/file being imported 104 | imported_module=$(echo "$line" | sed -n "s/.*\(import\|require\).*\(('|\"\)\([^'\"]*\)\('|\"\).*/\3/p" | tr -d ' ') 105 | echo "$file -> $imported_module (JavaScript/TypeScript)" >> "$DEPENDENCY_MAP_FILE" 106 | fi 107 | fi 108 | done < "$file" 109 | elif [[ "$file_type" == "sh" ]]; then 110 | while IFS= read -r line; do 111 | if [[ "$line" =~ ^(source|.) ]]; then 112 | line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ') 113 | if [[ "$line" != *'"'* && "$line" != *"'"* ]]; then 114 | imported_module=$(echo "$line" | sed -n "s/source \([^ ]*\).*/\1/p" | tr -d ' ') 115 | echo "$file -> $imported_module (Shell)" >> "$DEPENDENCY_MAP_FILE" 116 | fi 117 | fi 118 | done < "$file" 119 | elif [[ "$file_type" == "yaml" || "$file_type" == "yml" ]]; then 120 | while IFS= read -r line; do 121 | if [[ "$line" =~ ^(\ *[a-zA-Z0-9_-]+\:) ]]; then 122 | echo "$file -> $line (YAML)" >> "$DEPENDENCY_MAP_FILE" 123 | fi 124 | done < "$file" 125 | elif [[ "$file_type" == "tf" ]]; then 126 | while IFS= read -r line; do 127 | if [[ "$line" =~ resource|module|data ]]; then 128 | line=$(echo "$line" | sed 's/#.*$//' | tr -s ' ') 129 | echo "$file -> $line (Terraform)" >> "$DEPENDENCY_MAP_FILE" 130 | fi 131 | done < "$file" 132 | fi 133 | } 134 | 135 | # Process each file from the structure file 136 | while IFS= read -r file; do 137 | if [ -f "$file" ]; then 138 | extension="${file##*.}" 139 | case "$extension" in 140 | py) file_type="python";; 141 | js|jsx) file_type="js";; 142 | ts|tsx) file_type="ts";; 143 | sh) file_type="sh";; 144 | yaml) file_type="yaml";; 145 | yml) file_type="yml";; 146 | *) file_type="other";; 147 | esac 148 | if [[ "$file_type" == "python" || "$file_type" == "js" || "$file_type" == "ts" || "$file_type" == "sh" || "$file_type" == "yaml" || "$file_type" == "yml" ]]; then 149 | extract_dependencies "$file" "$file_type" 150 | fi 151 | fi 152 | done < "$STRUCTURE_FILE" 153 | 154 | # 2. Documentation Linking 155 | echo "Generating documentation nodes..." 156 | echo "# Documentation Nodes" > "$DOC_NODES_FILE" 157 | 158 | # Function to extract function/class signatures (for documentation linking) 159 | extract_doc_nodes() { 160 | local file="$1" 161 | local file_type="$2" 162 | 163 | # Add "./" prefix for consistency 164 | local current_dir="./" 165 | file="${current_dir}${file#./}" 166 | 167 | if [[ "$file_type" == "python" ]]; then 168 | while IFS= read -r line; do 169 | if [[ "$line" =~ ^(def|class) ]]; then 170 | # Extract function/class name and signature 171 | signature=$(echo "$line" | sed 's/#.*$//' | tr -s ' ') 172 | echo "$file: $signature (Python)" >> "$DOC_NODES_FILE" 173 | fi 174 | done < "$file" 175 | elif [[ "$file_type" == "js" || "$file_type" == "jsx" || "$file_type" == "ts" || "$file_type" == "tsx" ]]; then 176 | while IFS= read -r line; do 177 | if [[ "$line" =~ ^(function|class) ]]; then 178 | signature=$(echo "$line" | sed 's/\/\/.*$//' | sed 's/\/\*.*\*\///g' | tr -s ' ') 179 | echo "$file: $signature (JavaScript/TypeScript)" >> "$DOC_NODES_FILE" 180 | fi 181 | done < "$file" 182 | elif [[ "$file_type" == "sh" ]]; then 183 | while IFS= read -r line; do 184 | if [[ "$line" =~ ^(function ) ]]; then 185 | signature=$(echo "$line" | sed 's/#.*$//' | tr -s ' ') 186 | echo "$file: $signature (Shell)" >> "$DOC_NODES_FILE" 187 | fi 188 | done < "$file" 189 | fi 190 | } 191 | 192 | # Process each file to extract documentation nodes 193 | while IFS= read -r file; do 194 | if [ -f "$file" ]; then 195 | extension="${file##*.}" 196 | case "$extension" in 197 | py) file_type="python";; 198 | js|jsx) file_type="js";; 199 | ts|tsx) file_type="ts";; 200 | sh) file_type="sh";; 201 | yaml) file_type="yaml";; 202 | yml) file_type="yml";; 203 | *) file_type="other";; 204 | esac 205 | if [[ "$file_type" == "python" || "$file_type" == "js" || "$file_type" == "ts" || "$file_type" == "sh" ]]; then 206 | extract_doc_nodes "$file" "$file_type" 207 | fi 208 | fi 209 | done < "$STRUCTURE_FILE" 210 | 211 | # 3. User Documentation Mapping 212 | echo "Generating user documentation mapping..." 213 | echo "# User Documentation Mapping" > "$USER_DOC_MAP_FILE" 214 | 215 | # Function to map user documentation (Markdown files) to code elements. 216 | map_user_docs() { 217 | local file="$1" 218 | # Add "./" prefix for consistency 219 | local current_dir="./" 220 | file="${current_dir}${file#./}" 221 | 222 | # Very basic mapping: Look for code element names in Markdown 223 | if [[ "$file" =~ \.md$ || "$file" =~ \.mdx$ ]]; then # Only process Markdown files 224 | while IFS= read -r line; do 225 | # This is a simplified approach. A real tool would use AST parsing. 226 | if [[ "$line" =~ (def |class |function ) ]]; then # very rough 227 | echo "$file contains: $line" >> "$USER_DOC_MAP_FILE" 228 | fi 229 | done < "$file" 230 | fi 231 | } 232 | 233 | # Process each file to map user documentation 234 | while IFS= read -r file; do 235 | if [ -f "$file" ]; then 236 | extension="${file##*.}" 237 | case "$extension" in 238 | md|mdx) file_type="md";; 239 | *) file_type="other";; 240 | esac 241 | if [[ "$file_type" == "md" ]]; then 242 | map_user_docs "$file" >> "$USER_DOC_MAP_FILE" 243 | fi 244 | fi 245 | done < "$STRUCTURE_FILE" 246 | 247 | # Extract key information from README.md 248 | echo "Analyzing README.md for project context..." 249 | echo "# README.md Analysis" > "$README_CONTEXT_FILE" 250 | echo "# Generated on $(date)" >> "$README_CONTEXT_FILE" 251 | echo "" >> "$README_CONTEXT_FILE" 252 | 253 | if [ -f "README.md" ]; then 254 | # Extract project name and description 255 | echo "## Project Information" >> "$README_CONTEXT_FILE" 256 | # Look for a title (# Title) 257 | PROJECT_TITLE=$(grep "^# " README.md | head -1 | sed 's/^# //') 258 | echo "Project Title: $PROJECT_TITLE" >> "$README_CONTEXT_FILE" 259 | 260 | # Extract what appears to be a project description (first paragraph after title) 261 | PROJECT_DESCRIPTION=$(sed -n '/^# /,/^$/{/^# /d; /^$/d; p}' README.md | head -3) 262 | echo "Project Description: $PROJECT_DESCRIPTION" >> "$README_CONTEXT_FILE" 263 | 264 | # Look for architecture information 265 | echo -e "\n## Architecture Information" >> "$README_CONTEXT_FILE" 266 | grep -A 10 -i "architecture\|structure\|design\|overview" README.md >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No explicit architecture information found." >> "$README_CONTEXT_FILE" 267 | 268 | # Extract documentation links 269 | echo -e "\n## Documentation Links" >> "$README_CONTEXT_FILE" 270 | grep -o "\[.*\](.*)" README.md | grep -i "doc\|guide\|tutorial\|wiki" >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No documentation links found." >> "$README_CONTEXT_FILE" 271 | 272 | # Check for setup instructions 273 | echo -e "\n## Setup Instructions" >> "$README_CONTEXT_FILE" 274 | grep -A 15 -i "setup\|install\|getting started\|prerequisites" README.md >> "$README_CONTEXT_FILE" 2>/dev/null || echo "No setup instructions found." >> "$README_CONTEXT_FILE" 275 | 276 | # Prepare a summary for prompts 277 | README_SUMMARY=$(echo "$PROJECT_DESCRIPTION" | tr '\n' ' ' | cut -c 1-200) 278 | 279 | echo "README.md analysis saved to $README_CONTEXT_FILE" 280 | else 281 | echo "No README.md found at the root of the project." >> "$README_CONTEXT_FILE" 282 | # Try to find READMEs in subdirectories 283 | READMES=$(find . -name "README.md" -not -path "*/node_modules/*" -not -path "*/.git/*" -not -path "*/dist/*" -not -path "*/build/*") 284 | if [ -n "$READMES" ]; then 285 | echo "Found README.md files in subdirectories: $READMES" >> "$README_CONTEXT_FILE" 286 | # Process the first README found 287 | FIRST_README=$(echo "$READMES" | head -1) 288 | echo "Analyzing $FIRST_README as fallback..." >> "$README_CONTEXT_FILE" 289 | 290 | # Extract project name and description 291 | echo -e "\n## Project Information (from $FIRST_README)" >> "$README_CONTEXT_FILE" 292 | PROJECT_TITLE=$(grep "^# " "$FIRST_README" | head -1 | sed 's/^# //') 293 | echo "Project Title: $PROJECT_TITLE" >> "$README_CONTEXT_FILE" 294 | 295 | PROJECT_DESCRIPTION=$(sed -n '/^# /,/^$/{/^# /d; /^$/d; p}' "$FIRST_README" | head -3) 296 | echo "Project Description: $PROJECT_DESCRIPTION" >> "$README_CONTEXT_FILE" 297 | 298 | # Prepare a summary for prompts 299 | README_SUMMARY=$(echo "$PROJECT_DESCRIPTION" | tr '\n' ' ' | cut -c 1-200) 300 | else 301 | echo "No README.md files found in the project." >> "$README_CONTEXT_FILE" 302 | README_SUMMARY="No README.md found in the project." 303 | fi 304 | fi 305 | 306 | # Copy README context file to prompts directory 307 | cp "$README_CONTEXT_FILE" "$PROMPTS_DIR/system/" 308 | 309 | # NEW: System Architecture Analysis 310 | echo "Analyzing system architecture..." 311 | echo "# System Architecture Analysis" > "$SYSTEM_ARCHITECTURE_FILE" 312 | echo "# Generated on $(date)" >> "$SYSTEM_ARCHITECTURE_FILE" 313 | echo "# Environment: $OPERATING_SYSTEM" >> "$SYSTEM_ARCHITECTURE_FILE" 314 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE" 315 | 316 | # Identify key system components based on directory structure and file types 317 | echo "## System Components" >> "$SYSTEM_ARCHITECTURE_FILE" 318 | 319 | # Count files by type to identify primary languages/frameworks 320 | echo "### Primary Languages/Frameworks" >> "$SYSTEM_ARCHITECTURE_FILE" 321 | echo "Counting files by extension to identify primary technologies..." >> "$SYSTEM_ARCHITECTURE_FILE" 322 | grep -o '\.[^./]*$' "$STRUCTURE_FILE" | sort | uniq -c | sort -nr >> "$SYSTEM_ARCHITECTURE_FILE" 323 | 324 | # Identify architectural patterns based on directory names and file content 325 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE" 326 | echo "### Detected Architectural Patterns" >> "$SYSTEM_ARCHITECTURE_FILE" 327 | 328 | # Look for common architectural clues in directory names 329 | echo "Directory structure analysis:" >> "$SYSTEM_ARCHITECTURE_FILE" 330 | for pattern in "api" "service" "controller" "model" "view" "component" "middleware" "util" "helper" "config" "test" "frontend" "backend" "client" "server"; do 331 | count=$(find . -type d -name "*$pattern*" | wc -l) 332 | if [ "$count" -gt 0 ]; then 333 | echo "- Found $count directories matching pattern '$pattern'" >> "$SYSTEM_ARCHITECTURE_FILE" 334 | fi 335 | done 336 | 337 | # Check for deployment and infrastructure files 338 | echo "" >> "$SYSTEM_ARCHITECTURE_FILE" 339 | echo "### Infrastructure and Deployment" >> "$SYSTEM_ARCHITECTURE_FILE" 340 | for file in "Dockerfile" "docker-compose.yml" ".github/workflows" "Jenkinsfile" "terraform" "k8s" "helm"; do 341 | if [ -e "$file" ]; then 342 | echo "- Found $file" >> "$SYSTEM_ARCHITECTURE_FILE" 343 | fi 344 | done 345 | 346 | # NEW: Technical Debt Analysis 347 | echo "Gathering technical debt indicators..." 348 | TECH_DEBT_DATA_FILE="technical_debt_data.txt" 349 | TECH_DEBT_PROMPT_FILE="$PROMPTS_DIR/technical/technical_debt_prompt.txt" 350 | echo "# Technical Debt Indicators" > "$TECH_DEBT_DATA_FILE" 351 | echo "# Generated on $(date)" >> "$TECH_DEBT_DATA_FILE" 352 | echo "" >> "$TECH_DEBT_DATA_FILE" 353 | 354 | # Count files by type for primary languages 355 | echo "## Primary Languages" >> "$TECH_DEBT_DATA_FILE" 356 | LANGUAGE_COUNTS=$(grep -o '\.[^./]*$' "$STRUCTURE_FILE" | sort | uniq -c | sort -nr) 357 | echo "$LANGUAGE_COUNTS" >> "$TECH_DEBT_DATA_FILE" 358 | PRIMARY_LANGUAGES=$(echo "$LANGUAGE_COUNTS" | head -5 | awk '{print $2}' | tr '\n' ', ' | sed 's/,$//' | sed 's/\.//') 359 | LANGUAGE_COUNT=$(echo "$LANGUAGE_COUNTS" | wc -l) 360 | 361 | # Look for code comments indicating technical debt 362 | echo -e "\n## TODO, FIXME, and HACK Comments" >> "$TECH_DEBT_DATA_FILE" 363 | TODO_COMMENTS=$(grep -r --include="*.py" --include="*.js" --include="*.jsx" --include="*.ts" --include="*.tsx" --include="*.sh" --include="*.yml" --include="*.yaml" --include="*.tf" "TODO\|FIXME\|HACK" . 2>/dev/null | grep -v "node_modules\|venv\|.git" | sort) 364 | TODO_COUNT=$(echo "$TODO_COMMENTS" | grep -v '^$' | wc -l) 365 | echo "Found $TODO_COUNT TODO/FIXME/HACK comments" >> "$TECH_DEBT_DATA_FILE" 366 | # Sample up to 10 TODO comments 367 | TODO_SAMPLES=$(echo "$TODO_COMMENTS" | head -10) 368 | echo "$TODO_SAMPLES" >> "$TECH_DEBT_DATA_FILE" 369 | 370 | # Check for deprecated dependencies if we have package.json or requirements.txt 371 | echo -e "\n## Dependency Analysis" >> "$TECH_DEBT_DATA_FILE" 372 | NODE_DEPS="" 373 | if [ -f "package.json" ]; then 374 | echo "### Node.js Dependencies" >> "$TECH_DEBT_DATA_FILE" 375 | NODE_DEPS=$(grep -A 100 "dependencies" package.json | grep -B 100 "}" | grep ":" | head -15) 376 | echo "$NODE_DEPS" >> "$TECH_DEBT_DATA_FILE" 377 | fi 378 | 379 | PYTHON_DEPS="" 380 | if [ -f "requirements.txt" ]; then 381 | echo -e "\n### Python Dependencies" >> "$TECH_DEBT_DATA_FILE" 382 | PYTHON_DEPS=$(cat requirements.txt | head -15) 383 | echo "$PYTHON_DEPS" >> "$TECH_DEBT_DATA_FILE" 384 | fi 385 | 386 | # Look for large files that might indicate complexity issues 387 | echo -e "\n## Potentially Complex Files (> 500 lines)" >> "$TECH_DEBT_DATA_FILE" 388 | LARGE_FILES=$(find . -type f \( -name "*.py" -o -name "*.js" -o -name "*.jsx" -o -name "*.ts" -o -name "*.tsx" \) -not -path "*/node_modules/*" -not -path "*/venv/*" -not -path "*/.git/*" -exec wc -l {} \; | awk '$1 > 500' | sort -nr) 389 | LARGE_FILES_COUNT=$(echo "$LARGE_FILES" | grep -v '^$' | wc -l) 390 | echo "Found $LARGE_FILES_COUNT large files (>500 lines)" >> "$TECH_DEBT_DATA_FILE" 391 | LARGE_FILES_SAMPLES=$(echo "$LARGE_FILES" | head -10) 392 | echo "$LARGE_FILES_SAMPLES" >> "$TECH_DEBT_DATA_FILE" 393 | 394 | # Check for potential circular dependencies 395 | echo -e "\n## Potential Circular Dependencies" >> "$TECH_DEBT_DATA_FILE" 396 | # This is a very basic check that could be improved 397 | if [ -f "$DEPENDENCY_MAP_FILE" ]; then 398 | DEPENDENCY_SAMPLES=$(grep " -> " "$DEPENDENCY_MAP_FILE" | head -15) 399 | IMPORT_COUNT=$(grep -c " -> " "$DEPENDENCY_MAP_FILE") 400 | # Find modules that are both imported and import others 401 | HIGH_COUPLING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1; print $3}' | sort | uniq -c | sort -nr | head -10) 402 | echo "Found $IMPORT_COUNT import relationships" >> "$TECH_DEBT_DATA_FILE" 403 | echo -e "\nHighly coupled components:" >> "$TECH_DEBT_DATA_FILE" 404 | echo "$HIGH_COUPLING" >> "$TECH_DEBT_DATA_FILE" 405 | fi 406 | 407 | # Now create the technical debt prompt for LLM 408 | echo "Generating technical debt analysis prompt for LLM..." 409 | 410 | cat > "$TECH_DEBT_PROMPT_FILE" << EOL 411 | # Technical Debt Analysis Prompt 412 | 413 | ## Context 414 | You are analyzing the technical debt in a codebase with the following characteristics: 415 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks 416 | - Primary languages: ${PRIMARY_LANGUAGES} 417 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown} 418 | - Project summary: ${README_SUMMARY:-No project description available} 419 | 420 | ## Available Data 421 | The following data has been collected to assist your analysis: 422 | 1. TODO/FIXME/HACK comments (count: ${TODO_COUNT}) 423 | 2. Large files exceeding 500 lines (count: ${LARGE_FILES_COUNT}) 424 | 3. Dependency information (${IMPORT_COUNT} import relationships found) 425 | 4. Directory structure patterns and architectural indicators 426 | 427 | ## Sample Data Points 428 | ### TODO/FIXME Examples: 429 | ${TODO_SAMPLES} 430 | 431 | ### Large Files: 432 | ${LARGE_FILES_SAMPLES} 433 | 434 | ### Dependency Data: 435 | ${DEPENDENCY_SAMPLES} 436 | 437 | ### Highly Coupled Components: 438 | ${HIGH_COUPLING} 439 | 440 | ## Instructions 441 | Please analyze the technical debt in this codebase by: 442 | 443 | 1. **Categorizing the technical debt** into these types: 444 | - Code quality issues 445 | - Architectural problems 446 | - Outdated dependencies 447 | - Testing gaps 448 | - Documentation shortfalls 449 | 450 | 2. **Identifying potential root causes** of the technical debt: 451 | - Time pressure and deadlines 452 | - Knowledge gaps 453 | - Changing requirements 454 | - Architectural erosion over time 455 | - Legacy code integration 456 | 457 | 3. **Assessing the potential impact** of the technical debt: 458 | - On system stability 459 | - On maintainability 460 | - On performance 461 | - On security 462 | - On team productivity 463 | 464 | 4. **Recommending a prioritized remediation plan** that: 465 | - Addresses high-impact issues first 466 | - Considers interdependencies between components 467 | - Provides realistic, incremental steps 468 | - Balances short-term fixes with long-term improvements 469 | - Suggests preventative measures to avoid future debt 470 | 471 | 5. **Creating a high-level technical debt map** showing: 472 | - Which components contain the most concerning debt 473 | - How the debt in one area affects other parts of the system 474 | - Which areas would provide the highest ROI if addressed 475 | 476 | Please format your response as a structured technical debt analysis report with clear sections, actionable insights, and system-level thinking. 477 | EOL 478 | 479 | # Generate a minimal technical debt file that points to the prompt 480 | cat > "$TECHNICAL_DEBT_FILE" << EOL 481 | # Technical Debt Analysis 482 | # Generated on $(date) 483 | 484 | This file contains basic technical debt indicators. For a comprehensive analysis, 485 | copy the contents of "$TECH_DEBT_PROMPT_FILE" and submit it to an LLM like Claude, 486 | ChatGPT, or use it with Cursor's AI capabilities. 487 | 488 | ## Summary of Technical Debt Indicators 489 | - TODO/FIXME/HACK comments: ${TODO_COUNT} 490 | - Large files (>500 lines): ${LARGE_FILES_COUNT} 491 | - Import relationships: ${IMPORT_COUNT:-Unknown} 492 | - Primary languages: ${PRIMARY_LANGUAGES} 493 | 494 | For full data points, see: ${TECH_DEBT_DATA_FILE} 495 | For LLM analysis prompt, see: ${TECH_DEBT_PROMPT_FILE} 496 | 497 | To get a complete analysis, run: 498 | cat ${TECH_DEBT_PROMPT_FILE} | pbcopy # On macOS 499 | # or 500 | cat ${TECH_DEBT_PROMPT_FILE} | xclip -selection clipboard # On Linux with xclip 501 | # Then paste into your preferred LLM interface 502 | EOL 503 | 504 | # Update project_environment.txt with technical debt indicators 505 | if [ -f "project_environment.txt" ]; then 506 | echo -e "\n# Technical Debt Indicators" >> project_environment.txt 507 | echo "TECH_DEBT_TODO_COUNT=\"$TODO_COUNT\"" >> project_environment.txt 508 | echo "TECH_DEBT_LARGE_FILES_COUNT=\"$LARGE_FILES_COUNT\"" >> project_environment.txt 509 | echo "TECH_DEBT_PROMPT_FILE=\"$TECH_DEBT_PROMPT_FILE\"" >> project_environment.txt 510 | echo "TECH_DEBT_DATA_FILE=\"$TECH_DEBT_DATA_FILE\"" >> project_environment.txt 511 | fi 512 | 513 | # Generate Dependency Analysis Prompt 514 | echo "Generating dependency analysis prompt for LLM..." 515 | DEPENDENCY_ANALYSIS_FILE="dependency_analysis.txt" 516 | DEPENDENCY_PROMPT_FILE="$PROMPTS_DIR/dependency/dependency_analysis_prompt.txt" 517 | 518 | # Get some key metrics for the prompt 519 | MODULE_COUNT=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1}' | sort | uniq | wc -l) 520 | IMPORT_COUNT=$(grep -c " -> " "$DEPENDENCY_MAP_FILE") 521 | # Find highly coupled modules 522 | HIGH_COUPLING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $1}' | sort | uniq -c | sort -nr | head -10) 523 | # Find modules with most incoming dependencies 524 | HIGH_INCOMING=$(grep " -> " "$DEPENDENCY_MAP_FILE" | awk '{print $3}' | sort | uniq -c | sort -nr | head -10) 525 | 526 | cat > "$DEPENDENCY_PROMPT_FILE" << EOL 527 | # Dependency Graph Analysis Prompt 528 | 529 | ## Context 530 | You are analyzing the dependency structure in a codebase with the following characteristics: 531 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks 532 | - ${MODULE_COUNT} modules with dependencies 533 | - ${IMPORT_COUNT} total import relationships 534 | - Primary languages: ${PRIMARY_LANGUAGES} 535 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown} 536 | - Project summary: ${README_SUMMARY:-No project description available} 537 | 538 | ## Available Data 539 | The dependency map shows how modules depend on each other. Here are some key metrics: 540 | 541 | ### Modules with most outgoing dependencies (highest coupling): 542 | ${HIGH_COUPLING} 543 | 544 | ### Modules with most incoming dependencies (highest dependency): 545 | ${HIGH_INCOMING} 546 | 547 | ### Sample dependencies: 548 | $(grep " -> " "$DEPENDENCY_MAP_FILE" | head -20) 549 | 550 | ## Instructions 551 | Please analyze the dependency structure of this codebase by: 552 | 553 | 1. **Identifying problematic dependency patterns**: 554 | - Modules with excessive coupling (too many dependencies) 555 | - Core modules that too many other modules depend on (high risk) 556 | - Potential circular dependencies or dependency chains 557 | - Architectural layering violations (if detectable) 558 | 559 | 2. **Evaluating the modularity of the system**: 560 | - Is the codebase well-modularized or tightly coupled? 561 | - Are there clear boundaries between subsystems? 562 | - Does the dependency structure reflect good architecture? 563 | - Are there signs of "spaghetti code" in the dependencies? 564 | 565 | 3. **Recommending improvements to the dependency structure**: 566 | - Which modules should be refactored to reduce coupling? 567 | - How could dependencies be better organized? 568 | - Are there opportunities to introduce abstractions/interfaces? 569 | - What architectural patterns might help improve the structure? 570 | 571 | 4. **Creating a dependency health assessment**: 572 | - Rate the overall health of the dependency structure 573 | - Identify the highest priority areas for improvement 574 | - Suggest metrics to track dependency health over time 575 | - Estimate the long-term maintainability based on dependencies 576 | 577 | Please format your response as a structured dependency analysis report with clear sections, 578 | visualizations (described in text if needed), and specific, actionable recommendations. 579 | EOL 580 | 581 | # Generate a minimal dependency analysis file that points to the prompt 582 | cat > "$DEPENDENCY_ANALYSIS_FILE" << EOL 583 | # Dependency Analysis 584 | # Generated on $(date) 585 | 586 | This file contains basic dependency metrics. For a comprehensive analysis, 587 | copy the contents of "$DEPENDENCY_PROMPT_FILE" and submit it to an LLM like Claude, 588 | ChatGPT, or use it with Cursor's AI capabilities. 589 | 590 | ## Summary of Dependency Metrics 591 | - Modules with dependencies: ${MODULE_COUNT} 592 | - Import relationships: ${IMPORT_COUNT} 593 | - Primary languages: ${PRIMARY_LANGUAGES} 594 | 595 | For the dependency map, see: ${DEPENDENCY_MAP_FILE} 596 | For LLM analysis prompt, see: ${DEPENDENCY_PROMPT_FILE} 597 | 598 | To get a complete analysis, run: 599 | cat ${DEPENDENCY_PROMPT_FILE} | pbcopy # On macOS 600 | # or 601 | cat ${DEPENDENCY_PROMPT_FILE} | xclip -selection clipboard # On Linux with xclip 602 | # Then paste into your preferred LLM interface 603 | EOL 604 | 605 | # Update project_environment.txt with dependency analysis references 606 | if [ -f "project_environment.txt" ]; then 607 | echo -e "\n# Dependency Analysis Information" >> project_environment.txt 608 | echo "DEPENDENCY_PROMPT_FILE=\"$DEPENDENCY_PROMPT_FILE\"" >> project_environment.txt 609 | echo "DEPENDENCY_ANALYSIS_FILE=\"$DEPENDENCY_ANALYSIS_FILE\"" >> project_environment.txt 610 | echo "MODULE_COUNT=\"$MODULE_COUNT\"" >> project_environment.txt 611 | echo "IMPORT_COUNT=\"$IMPORT_COUNT\"" >> project_environment.txt 612 | fi 613 | 614 | # Generate a meta-prompt to create custom analysis prompts 615 | echo "Creating meta-prompt for generating custom analysis prompts..." 616 | META_PROMPT_FILE="$PROMPTS_DIR/meta_prompt_generator.txt" 617 | 618 | cat > "$META_PROMPT_FILE" << EOL 619 | # Meta-Prompt: Generate Custom Codebase Analysis Prompts 620 | 621 | ## Context 622 | You've been given information about a codebase with these characteristics: 623 | - ${FILE_COUNT} files across ${LANGUAGE_COUNT} languages/frameworks 624 | - Primary languages: ${PRIMARY_LANGUAGES} 625 | - Environment: ${OPERATING_SYSTEM:-Unknown OS}, Python ${PYTHON_VERSION:-Unknown}, Node.js ${NODE_VERSION:-Unknown} 626 | - Project summary: ${README_SUMMARY:-No project description available} 627 | - Detected architectural patterns: $(grep "Found" "$SYSTEM_ARCHITECTURE_FILE" | head -5 | tr '\n' ', ' | sed 's/,$//') 628 | 629 | ## Task 630 | Generate a specialized analysis prompt that will help developers understand and improve this codebase. The prompt should be tailored to this specific codebase's characteristics and the developer's goal. 631 | 632 | ## Developer's Goal 633 | [REPLACE THIS WITH YOUR SPECIFIC GOAL, e.g., "Improve test coverage", "Refactor for better performance", "Prepare for cloud migration"] 634 | 635 | ## Instructions 636 | 1. Create a prompt that guides an LLM to analyze the codebase specifically for the stated goal 637 | 2. Include relevant context from the codebase metrics above 638 | 3. Structure the prompt with clear sections including: 639 | - Background information about the codebase 640 | - Specific questions to address about the goal 641 | - Instructions for formatting the response 642 | 4. Focus on systems thinking principles that consider the entire codebase, not just isolated components 643 | 5. Include specific metrics or artifacts the LLM should look for in its analysis 644 | 645 | ## Output 646 | Provide the complete text of the new analysis prompt, ready to be saved to a file and used with an LLM. 647 | EOL 648 | 649 | echo "Meta-prompt generator created at $META_PROMPT_FILE" 650 | 651 | # Create a README for the prompts directory 652 | cat > "$PROMPTS_DIR/README.md" << EOL 653 | # Analysis Prompts 654 | 655 | This directory contains prompts for analyzing the codebase using LLMs: 656 | 657 | - **system/**: Prompts related to overall system architecture 658 | - **technical/**: Prompts for analyzing technical debt and code quality 659 | - **dependency/**: Prompts for analyzing dependencies and module relationships 660 | - **custom/**: Location for your custom analysis prompts 661 | 662 | ## Usage 663 | 664 | 1. Select a prompt relevant to your analysis needs 665 | 2. Copy its contents to your clipboard: \`cat prompts/technical/technical_debt_prompt.txt | pbcopy\` 666 | 3. Paste into an LLM like Claude or ChatGPT 667 | 4. Review the analysis and insights 668 | 669 | ## Creating Custom Prompts 670 | 671 | Use the meta-prompt generator to create custom analysis prompts: 672 | \`\`\` 673 | cat prompts/meta_prompt_generator.txt | pbcopy 674 | # Then paste into an LLM, replace the [GOAL] placeholder, and follow the instructions 675 | \`\`\` 676 | 677 | ## Available Prompts 678 | 679 | - **Meta-Prompt Generator**: Generate custom analysis prompts for specific goals 680 | - **Technical Debt Analysis**: Analyze and prioritize technical debt in the codebase 681 | - **Dependency Structure Analysis**: Evaluate modularity and identify problematic dependencies 682 | - **System Architecture Analysis**: Understand overall system design and architecture 683 | EOL 684 | 685 | # Create .gitignore entry for the prompts directory 686 | if [ -f ".gitignore" ]; then 687 | if ! grep -q "^prompts/" ".gitignore"; then 688 | echo "prompts/" >> ".gitignore" 689 | echo "Added prompts/ to .gitignore" 690 | fi 691 | else 692 | echo "prompts/" > ".gitignore" 693 | echo "Created .gitignore with prompts/ entry" 694 | fi 695 | 696 | # Move LLM prompts to the system directory 697 | LLM_PROMPT_FILE="$PROMPTS_DIR/system/llm_prompts.txt" 698 | 699 | # 4. Vector Graph Generation (Modified to include system architecture insights) 700 | echo "Generating vector relationship graph prompt..." 701 | cat > "$LLM_PROMPT_FILE" << 'EOL' 702 | # LLM Prompts for Codebase Analysis 703 | 704 | ## 1. Code Dependency Graph Generation 705 | Generate a code dependency graph using the following data: 706 | - `'"$STRUCTURE_FILE"'`: Lists all files. 707 | - `'"$DEPENDENCY_MAP_FILE"'`: Shows dependencies between files. 708 | 709 | ## 2. Documentation Linking Analysis 710 | Analyze documentation links using: 711 | - `'"$STRUCTURE_FILE"'`: Lists all files. 712 | - `'"$DOC_NODES_FILE"'`: Lists code elements (functions, classes). 713 | - `'"$USER_DOC_MAP_FILE"'`: Maps documentation to code elements. 714 | 715 | ## 3. System Architecture Analysis 716 | Apply systems thinking to analyze the application architecture using: 717 | - `'"$STRUCTURE_FILE"'`: Lists all files 718 | - `'"$DEPENDENCY_MAP_FILE"'`: Shows dependencies between files 719 | - `'"$SYSTEM_ARCHITECTURE_FILE"'`: System components and patterns analysis 720 | - `'"$TECH_DEBT_DATA_FILE"'`: Technical debt indicators 721 | 722 | ### Task: 723 | Analyze the codebase as a complete system, including: 724 | 1. Identify system boundaries and integration points 725 | 2. Detect feedback loops and circular dependencies 726 | 3. Identify potential bottlenecks and single points of failure 727 | 4. Assess emergent behavior that may arise from component interactions 728 | 5. Analyze technical debt impact on overall system health 729 | 730 | ### Output Format: 731 | Provide a systems thinking analysis that includes: 732 | ``` 733 | { 734 | "system_boundaries": [ 735 | {"name": "Frontend", "components": ["component1", "component2"]}, 736 | {"name": "Backend", "components": ["component3", "component4"]}, 737 | {"name": "Data Layer", "components": ["component5"]} 738 | ], 739 | "integration_points": [ 740 | {"name": "API Gateway", "type": "external_boundary", "risk_level": "medium"}, 741 | {"name": "Database Access", "type": "internal", "risk_level": "high"} 742 | ], 743 | "feedback_loops": [ 744 | {"components": ["componentA", "componentB", "componentC"], "type": "circular_dependency", "impact": "high"} 745 | ], 746 | "bottlenecks": [ 747 | {"component": "componentX", "reason": "High coupling with 15 other components", "impact": "critical"} 748 | ], 749 | "technical_debt_hotspots": [ 750 | {"component": "legacy_module", "type": "obsolete_dependencies", "impact": "high", "remediation_cost": "medium"} 751 | ] 752 | } 753 | ``` 754 | 755 | ## 5. Technical Debt Analysis 756 | For a detailed technical debt analysis, use the prompt in `'"$TECH_DEBT_PROMPT_FILE"'`. 757 | This prompt will guide you through: 758 | 1. Categorizing technical debt types 759 | 2. Identifying root causes 760 | 3. Assessing impact on the system 761 | 4. Creating a prioritized remediation plan 762 | 5. Mapping debt across the system 763 | 764 | ## 6. Dependency Structure Analysis 765 | For a detailed analysis of the dependency structure, use the prompt in `'"$DEPENDENCY_PROMPT_FILE"'`. 766 | This prompt will guide you through: 767 | 1. Identifying problematic dependency patterns 768 | 2. Evaluating system modularity 769 | 3. Recommending structural improvements 770 | 4. Creating a dependency health assessment 771 | EOL 772 | 773 | echo "Directory structure saved to $STRUCTURE_FILE." 774 | echo "Code dependency graph data saved to $DEPENDENCY_MAP_FILE." 775 | echo "Documentation nodes data saved to $DOC_NODES_FILE." 776 | echo "User documentation mapping data saved to $USER_DOC_MAP_FILE." 777 | echo "System architecture analysis saved to $SYSTEM_ARCHITECTURE_FILE." 778 | echo "Technical debt data saved to $TECH_DEBT_DATA_FILE." 779 | echo "Technical debt analysis prompt saved to $TECH_DEBT_PROMPT_FILE." 780 | echo "Dependency analysis data saved to $DEPENDENCY_ANALYSIS_FILE." 781 | echo "Dependency analysis prompt saved to $DEPENDENCY_PROMPT_FILE." 782 | echo "README.md analysis saved to $README_CONTEXT_FILE." 783 | echo "Meta-prompt generator saved to $META_PROMPT_FILE." 784 | echo "Prompts directory created at $PROMPTS_DIR with README.md" 785 | echo "LLM prompts saved to $LLM_PROMPT_FILE." 786 | 787 | # Update project_environment.txt with analysis results 788 | if [ -f "project_environment.txt" ]; then 789 | echo -e "\n# Codebase Analysis Results" >> project_environment.txt 790 | echo "FILE_COUNT=\"$FILE_COUNT\"" >> project_environment.txt 791 | echo "SYSTEM_ARCHITECTURE_FILE=\"$SYSTEM_ARCHITECTURE_FILE\"" >> project_environment.txt 792 | echo "TECHNICAL_DEBT_FILE=\"$TECHNICAL_DEBT_FILE\"" >> project_environment.txt 793 | echo "DEPENDENCY_MAP_FILE=\"$DEPENDENCY_MAP_FILE\"" >> project_environment.txt 794 | echo "README_CONTEXT_FILE=\"$README_CONTEXT_FILE\"" >> project_environment.txt 795 | echo "PROMPTS_DIR=\"$PROMPTS_DIR\"" >> project_environment.txt 796 | 797 | # README.md context 798 | if [ -n "$PROJECT_TITLE" ]; then 799 | echo "PROJECT_TITLE=\"$PROJECT_TITLE\"" >> project_environment.txt 800 | fi 801 | if [ -n "$README_SUMMARY" ]; then 802 | echo "PROJECT_DESCRIPTION=\"$README_SUMMARY\"" >> project_environment.txt 803 | fi 804 | 805 | # Count number of TODO/FIXME comments as a technical debt indicator 806 | TECH_DEBT_COUNT=$(grep -c "TODO\|FIXME\|HACK" "$TECHNICAL_DEBT_FILE") 807 | echo "TECHNICAL_DEBT_INDICATORS=\"$TECH_DEBT_COUNT\"" >> project_environment.txt 808 | 809 | echo "Updated project_environment.txt with codebase analysis results." 810 | fi 811 | 812 | echo "✅ Codebase analysis complete!" 813 | echo "📊 To use the analysis prompts with an LLM, see $PROMPTS_DIR/README.md" 814 | ```