#
tokens: 48605/50000 9/146 files (page 5/8)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 5 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│   ├── bin
│   │   ├── activate
│   │   ├── activate.csh
│   │   ├── activate.fish
│   │   ├── Activate.ps1
│   │   ├── coverage
│   │   ├── coverage-3.11
│   │   ├── coverage3
│   │   ├── pip
│   │   ├── pip-compile
│   │   ├── pip-sync
│   │   ├── pip3
│   │   ├── pip3.11
│   │   ├── py.test
│   │   ├── pyproject-build
│   │   ├── pytest
│   │   ├── python
│   │   ├── python3
│   │   ├── python3.11
│   │   └── wheel
│   └── pyvenv.cfg
├── .env.example
├── .github
│   ├── agents
│   │   ├── DebugAgent.agent.md
│   │   ├── DocAgent.agent.md
│   │   ├── README.md
│   │   ├── TestAgent.agent.md
│   │   └── VectorStoreAgent.agent.md
│   ├── copilot-instructions.md
│   └── workflows
│       ├── build-verification.yml
│       ├── publish.yml
│       └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── create_release_issues.sh
├── debug_tests.md
├── Dockerfile
├── docs
│   ├── adrs
│   │   └── 001_use_docker_for_qdrant.md
│   ├── api.md
│   ├── components
│   │   └── README.md
│   ├── cookbook.md
│   ├── development
│   │   ├── CODE_OF_CONDUCT.md
│   │   ├── CONTRIBUTING.md
│   │   └── README.md
│   ├── documentation_map.md
│   ├── documentation_summary.md
│   ├── features
│   │   ├── adr-management.md
│   │   ├── code-analysis.md
│   │   └── documentation.md
│   ├── getting-started
│   │   ├── configuration.md
│   │   ├── docker-setup.md
│   │   ├── installation.md
│   │   ├── qdrant_setup.md
│   │   └── quickstart.md
│   ├── qdrant_setup.md
│   ├── README.md
│   ├── SSE_INTEGRATION.md
│   ├── system_architecture
│   │   └── README.md
│   ├── templates
│   │   └── adr.md
│   ├── testing_guide.md
│   ├── troubleshooting
│   │   ├── common-issues.md
│   │   └── faq.md
│   ├── vector_store_best_practices.md
│   └── workflows
│       └── README.md
├── error_logs.txt
├── examples
│   └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│   ├── backend_summary.txt
│   ├── database_summary.txt
│   └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│   ├── check_qdrant_health.sh
│   ├── compile_requirements.sh
│   ├── load_example_patterns.py
│   ├── macos_install.sh
│   ├── README.md
│   ├── setup_qdrant.sh
│   ├── start_mcp_server.sh
│   ├── store_code_relationships.py
│   ├── store_report_in_mcp.py
│   ├── validate_knowledge_base.py
│   ├── validate_poc.py
│   ├── validate_vector_store.py
│   └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│   └── mcp_codebase_insight
│       ├── __init__.py
│       ├── __main__.py
│       ├── asgi.py
│       ├── core
│       │   ├── __init__.py
│       │   ├── adr.py
│       │   ├── cache.py
│       │   ├── component_status.py
│       │   ├── config.py
│       │   ├── debug.py
│       │   ├── di.py
│       │   ├── documentation.py
│       │   ├── embeddings.py
│       │   ├── errors.py
│       │   ├── health.py
│       │   ├── knowledge.py
│       │   ├── metrics.py
│       │   ├── prompts.py
│       │   ├── sse.py
│       │   ├── state.py
│       │   ├── task_tracker.py
│       │   ├── tasks.py
│       │   └── vector_store.py
│       ├── models.py
│       ├── server_test_isolation.py
│       ├── server.py
│       ├── utils
│       │   ├── __init__.py
│       │   └── logger.py
│       └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│   ├── components
│   │   ├── conftest.py
│   │   ├── test_core_components.py
│   │   ├── test_embeddings.py
│   │   ├── test_knowledge_base.py
│   │   ├── test_sse_components.py
│   │   ├── test_stdio_components.py
│   │   ├── test_task_manager.py
│   │   └── test_vector_store.py
│   ├── config
│   │   └── test_config_and_env.py
│   ├── conftest.py
│   ├── integration
│   │   ├── fixed_test2.py
│   │   ├── test_api_endpoints.py
│   │   ├── test_api_endpoints.py-e
│   │   ├── test_communication_integration.py
│   │   └── test_server.py
│   ├── README.md
│   ├── README.test.md
│   ├── test_build_verifier.py
│   └── test_file_relationships.py
└── trajectories
    └── tosinakinosho
        ├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
        │   └── db62b9
        │       └── config.yaml
        ├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
        │   └── 03565e
        │       ├── 03565e.traj
        │       └── config.yaml
        └── default__openrouter
            └── anthropic
                └── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
                    └── 03565e
                        ├── 03565e.pred
                        ├── 03565e.traj
                        └── config.yaml
```

# Files

--------------------------------------------------------------------------------
/component_test_runner.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python
  2 | """
  3 | Component Test Runner
  4 | 
  5 | A specialized runner for executing component tests with proper async fixture handling.
  6 | This bypasses the standard pytest fixture mechanisms to handle async fixtures correctly
  7 | in isolated execution environments.
  8 | """
  9 | import os
 10 | import sys
 11 | import uuid
 12 | import asyncio
 13 | import importlib
 14 | from pathlib import Path
 15 | import inspect
 16 | import logging
 17 | import re
 18 | from typing import Dict, Any, List, Callable, Tuple, Optional, Set, Awaitable
 19 | 
 20 | # Configure logging
 21 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 22 | logger = logging.getLogger("component-test-runner")
 23 | 
 24 | # Import the sys module to modify path
 25 | import sys
 26 | sys.path.insert(0, '/Users/tosinakinosho/workspaces/mcp-codebase-insight')
 27 | 
 28 | # Import required components directly to avoid fixture resolution issues
 29 | from src.mcp_codebase_insight.core.config import ServerConfig
 30 | from src.mcp_codebase_insight.core.vector_store import VectorStore
 31 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
 32 | from src.mcp_codebase_insight.core.knowledge import KnowledgeBase
 33 | from src.mcp_codebase_insight.core.tasks import TaskManager
 34 | 
 35 | 
 36 | async def create_test_config() -> ServerConfig:
 37 |     """Create a server configuration for tests."""
 38 |     # Generate a unique collection name for this test run
 39 |     collection_name = f"test_collection_{uuid.uuid4().hex[:8]}"
 40 |     
 41 |     # Check if MCP_COLLECTION_NAME is set in env, use that instead if available
 42 |     if "MCP_COLLECTION_NAME" in os.environ:
 43 |         collection_name = os.environ["MCP_COLLECTION_NAME"]
 44 |     
 45 |     logger.info(f"Using test collection: {collection_name}")
 46 |     
 47 |     config = ServerConfig(
 48 |         host="localhost",
 49 |         port=8000,
 50 |         log_level="DEBUG",
 51 |         qdrant_url="http://localhost:6333",
 52 |         docs_cache_dir=Path(".test_cache") / "docs",
 53 |         adr_dir=Path(".test_cache") / "docs/adrs",
 54 |         kb_storage_dir=Path(".test_cache") / "knowledge",
 55 |         embedding_model="all-MiniLM-L6-v2",
 56 |         collection_name=collection_name,
 57 |         debug_mode=True,
 58 |         metrics_enabled=False,
 59 |         cache_enabled=True,
 60 |         memory_cache_size=1000,
 61 |         disk_cache_dir=Path(".test_cache") / "cache"
 62 |     )
 63 |     return config
 64 | 
 65 | 
 66 | async def create_embedder() -> SentenceTransformerEmbedding:
 67 |     """Create an embedder for tests."""
 68 |     logger.info("Initializing the embedder...")
 69 |     return SentenceTransformerEmbedding()
 70 | 
 71 | 
 72 | async def create_vector_store(config: ServerConfig, embedder: SentenceTransformerEmbedding) -> VectorStore:
 73 |     """Create a vector store for tests."""
 74 |     logger.info("Initializing the vector store...")
 75 |     store = VectorStore(config.qdrant_url, embedder)
 76 |     try:
 77 |         await store.initialize()
 78 |         logger.info("Vector store initialized successfully")
 79 |         return store
 80 |     except Exception as e:
 81 |         logger.error(f"Failed to initialize vector store: {e}")
 82 |         raise RuntimeError(f"Failed to initialize vector store: {e}")
 83 | 
 84 | 
 85 | async def create_knowledge_base(config: ServerConfig, vector_store: VectorStore) -> KnowledgeBase:
 86 |     """Create a knowledge base for tests."""
 87 |     logger.info("Initializing the knowledge base...")
 88 |     kb = KnowledgeBase(config, vector_store)
 89 |     try:
 90 |         await kb.initialize()
 91 |         logger.info("Knowledge base initialized successfully")
 92 |         return kb
 93 |     except Exception as e:
 94 |         logger.error(f"Failed to initialize knowledge base: {e}")
 95 |         raise RuntimeError(f"Failed to initialize knowledge base: {e}")
 96 | 
 97 | 
 98 | async def create_task_manager(config: ServerConfig) -> TaskManager:
 99 |     """Create a task manager for tests."""
100 |     logger.info("Initializing the task manager...")
101 |     manager = TaskManager(config)
102 |     try:
103 |         await manager.initialize()
104 |         logger.info("Task manager initialized successfully")
105 |         return manager
106 |     except Exception as e:
107 |         logger.error(f"Failed to initialize task manager: {e}")
108 |         raise RuntimeError(f"Failed to initialize task manager: {e}")
109 | 
110 | 
111 | async def create_test_metadata() -> Dict[str, Any]:
112 |     """Standard test metadata for consistency across tests."""
113 |     return {
114 |         "type": "code",
115 |         "language": "python",
116 |         "title": "Test Code",
117 |         "description": "Test code snippet for vector store testing",
118 |         "tags": ["test", "vector"]
119 |     }
120 | 
121 | 
122 | def create_test_code() -> str:
123 |     """Provide sample code for testing task-related functionality."""
124 |     return """
125 | def example_function():
126 |     \"\"\"This is a test function for task manager tests.\"\"\"
127 |     return "Hello, world!"
128 | 
129 | class TestClass:
130 |     def __init__(self):
131 |         self.value = 42
132 |         
133 |     def method(self):
134 |         return self.value
135 | """
136 | 
137 | 
138 | async def cleanup_vector_store(vector_store: VectorStore) -> None:
139 |     """Cleanup a vector store after tests."""
140 |     if vector_store and hasattr(vector_store, 'cleanup'):
141 |         logger.info("Cleaning up vector store...")
142 |         try:
143 |             await vector_store.cleanup()
144 |             logger.info("Vector store cleanup completed")
145 |         except Exception as e:
146 |             logger.error(f"Error during vector store cleanup: {e}")
147 | 
148 | 
149 | async def cleanup_knowledge_base(kb: KnowledgeBase) -> None:
150 |     """Cleanup a knowledge base after tests."""
151 |     if kb and hasattr(kb, 'cleanup'):
152 |         logger.info("Cleaning up knowledge base...")
153 |         try:
154 |             await kb.cleanup()
155 |             logger.info("Knowledge base cleanup completed")
156 |         except Exception as e:
157 |             logger.error(f"Error during knowledge base cleanup: {e}")
158 | 
159 | 
160 | async def cleanup_task_manager(manager: TaskManager) -> None:
161 |     """Cleanup a task manager after tests."""
162 |     if manager and hasattr(manager, 'cleanup'):
163 |         logger.info("Cleaning up task manager...")
164 |         try:
165 |             await manager.cleanup()
166 |             logger.info("Task manager cleanup completed")
167 |         except Exception as e:
168 |             logger.error(f"Error cleaning up task manager: {e}")
169 | 
170 | 
171 | def get_module_tests(module_path: str) -> List[str]:
172 |     """Get the list of tests in a module."""
173 |     logger.info(f"Analyzing module: {module_path}")
174 |     with open(module_path, 'r') as file:
175 |         content = file.read()
176 |     
177 |     # Pattern to match test functions but exclude fixtures
178 |     pattern = r'async\s+def\s+(test_\w+)\s*\('
179 |     
180 |     # Find test functions that are not fixtures (exclude lines with @pytest.fixture)
181 |     lines = content.split('\n')
182 |     test_functions = []
183 |     
184 |     for i, line in enumerate(lines):
185 |         if i > 0 and '@pytest.fixture' in lines[i-1]:
186 |             continue  # Skip this as it's a fixture, not a test
187 |         
188 |         match = re.search(pattern, line)
189 |         if match:
190 |             test_functions.append(match.group(1))
191 |     
192 |     logger.info(f"Found {len(test_functions)} tests in {module_path}")
193 |     return test_functions
194 | 
195 | def load_test_module(module_path: str):
196 |     """Load a test module with proper path handling."""
197 |     # Convert file path to module path
198 |     if module_path.endswith('.py'):
199 |         module_path = module_path[:-3]  # Remove .py extension
200 |     
201 |     # Convert path separators to module separators
202 |     module_name = module_path.replace('/', '.').replace('\\', '.')
203 |     
204 |     # Ensure we use the correct Python path
205 |     if not any(p == '.' for p in sys.path):
206 |         sys.path.append('.')
207 |     
208 |     logger.info(f"Attempting to import module: {module_name}")
209 |     try:
210 |         return importlib.import_module(module_name)
211 |     except ImportError as e:
212 |         logger.error(f"Failed to import test module {module_name}: {e}")
213 |         return None
214 | 
215 | 
216 | async def run_component_test(module_path: str, test_name: str) -> bool:
217 |     """
218 |     Dynamically load and run a component test with proper fixture initialization.
219 |     
220 |     Args:
221 |         module_path: Path to the test module
222 |         test_name: Name of the test function to run
223 |         
224 |     Returns:
225 |         True if test passed, False if it failed
226 |     """
227 |     logger.info(f"Running test: {module_path}::{test_name}")
228 |     
229 |     # Import the test module
230 |     test_module = load_test_module(module_path)
231 |     if not test_module:
232 |         return False
233 |     
234 |     # Get the test function
235 |     if not hasattr(test_module, test_name):
236 |         logger.error(f"Test function {test_name} not found in module {module_name}")
237 |         return False
238 |     
239 |     test_func = getattr(test_module, test_name)
240 |     
241 |     # Determine which fixtures the test needs
242 |     required_fixtures = inspect.signature(test_func).parameters
243 |     logger.info(f"Test requires fixtures: {list(required_fixtures.keys())}")
244 |     
245 |     # Initialize the required fixtures
246 |     fixture_values = {}
247 |     resources_to_cleanup = []
248 |     
249 |     try:
250 |         # Create ServerConfig first since many other fixtures depend on it
251 |         if "test_config" in required_fixtures:
252 |             logger.info("Setting up test_config fixture")
253 |             fixture_values["test_config"] = await create_test_config()
254 |         
255 |         # Create embedder if needed
256 |         if "embedder" in required_fixtures:
257 |             logger.info("Setting up embedder fixture")
258 |             fixture_values["embedder"] = await create_embedder()
259 |         
260 |         # Create test metadata if needed
261 |         if "test_metadata" in required_fixtures:
262 |             logger.info("Setting up test_metadata fixture")
263 |             fixture_values["test_metadata"] = await create_test_metadata()
264 |         
265 |         # Create test code if needed
266 |         if "test_code" in required_fixtures:
267 |             logger.info("Setting up test_code fixture")
268 |             fixture_values["test_code"] = create_test_code()
269 |         
270 |         # Create vector store if needed
271 |         if "vector_store" in required_fixtures:
272 |             logger.info("Setting up vector_store fixture")
273 |             if "test_config" not in fixture_values:
274 |                 fixture_values["test_config"] = await create_test_config()
275 |             if "embedder" not in fixture_values:
276 |                 fixture_values["embedder"] = await create_embedder()
277 |             
278 |             fixture_values["vector_store"] = await create_vector_store(
279 |                 fixture_values["test_config"], 
280 |                 fixture_values["embedder"]
281 |             )
282 |             resources_to_cleanup.append(("vector_store", fixture_values["vector_store"]))
283 |         
284 |         # Create knowledge base if needed
285 |         if "knowledge_base" in required_fixtures:
286 |             logger.info("Setting up knowledge_base fixture")
287 |             if "test_config" not in fixture_values:
288 |                 fixture_values["test_config"] = await create_test_config()
289 |             if "vector_store" not in fixture_values:
290 |                 if "embedder" not in fixture_values:
291 |                     fixture_values["embedder"] = await create_embedder()
292 |                 fixture_values["vector_store"] = await create_vector_store(
293 |                     fixture_values["test_config"], 
294 |                     fixture_values["embedder"]
295 |                 )
296 |                 resources_to_cleanup.append(("vector_store", fixture_values["vector_store"]))
297 |             
298 |             fixture_values["knowledge_base"] = await create_knowledge_base(
299 |                 fixture_values["test_config"], 
300 |                 fixture_values["vector_store"]
301 |             )
302 |             resources_to_cleanup.append(("knowledge_base", fixture_values["knowledge_base"]))
303 |         
304 |         # Create task manager if needed
305 |         if "task_manager" in required_fixtures:
306 |             logger.info("Setting up task_manager fixture")
307 |             if "test_config" not in fixture_values:
308 |                 fixture_values["test_config"] = await create_test_config()
309 |             
310 |             fixture_values["task_manager"] = await create_task_manager(fixture_values["test_config"])
311 |             resources_to_cleanup.append(("task_manager", fixture_values["task_manager"]))
312 |         
313 |         # Ensure all required fixtures are initialized
314 |         missing_fixtures = set(required_fixtures.keys()) - set(fixture_values.keys())
315 |         if missing_fixtures:
316 |             logger.error(f"Missing required fixtures: {missing_fixtures}")
317 |             return False
318 |         
319 |         # Run the actual test
320 |         logger.info(f"Executing test with fixtures: {list(fixture_values.keys())}")
321 |         test_kwargs = {name: value for name, value in fixture_values.items() if name in required_fixtures}
322 |         
323 |         # Check if the test function is an async function
324 |         if inspect.iscoroutinefunction(test_func):
325 |             # For async test functions, await them
326 |             logger.info(f"Running async test: {test_name}")
327 |             await test_func(**test_kwargs)
328 |         else:
329 |             # For regular test functions, just call them
330 |             logger.info(f"Running synchronous test: {test_name}")
331 |             test_func(**test_kwargs)
332 |         
333 |         logger.info(f"Test {test_name} completed successfully")
334 |         return True
335 |     
336 |     except Exception as e:
337 |         logger.error(f"Test {test_name} failed with error: {e}")
338 |         import traceback
339 |         logger.error(traceback.format_exc())
340 |         return False
341 |     
342 |     finally:
343 |         # Clean up resources in reverse order (LIFO)
344 |         logger.info("Cleaning up resources...")
345 |         for resource_type, resource in reversed(resources_to_cleanup):
346 |             try:
347 |                 if resource_type == "vector_store":
348 |                     await cleanup_vector_store(resource)
349 |                 elif resource_type == "knowledge_base":
350 |                     await cleanup_knowledge_base(resource)
351 |                 elif resource_type == "task_manager":
352 |                     await cleanup_task_manager(resource)
353 |             except Exception as e:
354 |                 logger.error(f"Error cleaning up {resource_type}: {e}")
355 | 
356 | 
357 | def main():
358 |     """Run a component test with proper async fixture handling."""
359 |     if len(sys.argv) < 2:
360 |         print("Usage: python component_test_runner.py <module_path> <test_name>")
361 |         sys.exit(1)
362 |     
363 |     module_path = sys.argv[1]
364 |     
365 |     # Configure event loop policy for macOS if needed
366 |     if sys.platform == 'darwin':
367 |         import platform
368 |         if int(platform.mac_ver()[0].split('.')[0]) >= 10:
369 |             asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
370 |     
371 |     try:
372 |         if len(sys.argv) < 3:
373 |             # No specific test provided, use module discovery
374 |             tests = get_module_tests(module_path)
375 |             if not tests:
376 |                 logger.error(f"No tests found in {module_path}")
377 |                 sys.exit(1)
378 |             
379 |             # Run all tests in the module
380 |             successful_tests = 0
381 |             for test_name in tests:
382 |                 loop = asyncio.new_event_loop()
383 |                 asyncio.set_event_loop(loop)
384 |                 test_result = loop.run_until_complete(run_component_test(module_path, test_name))
385 |                 loop.close()
386 |                 if test_result:
387 |                     successful_tests += 1
388 |             
389 |             # Report test results
390 |             logger.info(f"Test Results: {successful_tests}/{len(tests)} tests passed")
391 |             sys.exit(0 if successful_tests == len(tests) else 1)
392 |         else:
393 |             # Run a specific test
394 |             test_name = sys.argv[2]
395 |             
396 |             loop = asyncio.new_event_loop()
397 |             asyncio.set_event_loop(loop)
398 |             result = loop.run_until_complete(run_component_test(module_path, test_name))
399 |             loop.close()
400 |             sys.exit(0 if result else 1)
401 |     except KeyboardInterrupt:
402 |         logger.info("Test execution interrupted")
403 |         sys.exit(130)  # 130 is the standard exit code for SIGINT
404 |     except Exception as e:
405 |         logger.error(f"Unhandled exception during test execution: {e}")
406 |         import traceback
407 |         logger.error(traceback.format_exc())
408 |         sys.exit(1)
409 | 
410 | 
411 | if __name__ == "__main__":
412 |     main()
413 | 
```

--------------------------------------------------------------------------------
/trajectories/tosinakinosho/default__openrouter/anthropic/claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e/03565e/config.yaml:
--------------------------------------------------------------------------------

```yaml
  1 | '{"env":{"deployment":{"image":"python:3.11","port":null,"docker_args":[],"startup_timeout":180.0,"pull":"missing","remove_images":false,"python_standalone_dir":"/root","platform":null,"type":"docker"},"repo":{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight","base_commit":"HEAD","type":"local"},"post_startup_commands":[],"post_startup_command_timeout":500,"name":"main"},"agent":{"name":"main","templates":{"system_template":"SETTING:
  2 |   You are an autonomous programmer, and you''re working directly in the command line
  3 |   with a special interface.\n\nThe special interface consists of a file editor that
  4 |   shows you {{WINDOW}} lines of a file at a time.\nIn addition to typical bash commands,
  5 |   you can also use specific commands to help you navigate and edit files.\nTo call
  6 |   a command, you need to invoke it with a function call/tool call.\n\nPlease note
  7 |   that THE EDIT COMMAND REQUIRES PROPER INDENTATION.\n\nFor example, if you are looking
  8 |   at this file:\n\ndef fct():\n    print(\"Hello world\")\n\nand you want to edit
  9 |   the file to read:\n\ndef fct():\n    print(\"Hello\")\n    print(\"world\")\n\nyou
 10 |   search string should be `Hello world` and your replace string should be `\"Hello\"\\n    print(\"world\")`\n(note
 11 |   the extra spaces before the print statement!).\n\nYou could also get the same result
 12 |   by search for `    print(\"Hello world\")` and replace with `    print(\"Hello\")\\n    print(\"world\")`.\n\nRESPONSE
 13 |   FORMAT:\nYour shell prompt is formatted as follows:\n(Open file: <path>)\n(Current
 14 |   directory: <cwd>)\nbash-$\n\nFirst, you should _always_ include a general thought
 15 |   about what you''re going to do next.\nThen, for every response, you must include
 16 |   exactly _ONE_ tool call/function call.\n\nRemember, you should always include a
 17 |   _SINGLE_ tool call/function call and then wait for a response from the shell before
 18 |   continuing with more discussion and commands. Everything you include in the DISCUSSION
 19 |   section will be saved for future reference.\nIf you''d like to issue two commands
 20 |   at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool
 21 |   call, and then after receiving a response you''ll be able to issue the second .\nNote
 22 |   that the environment does NOT support interactive session commands (e.g. python,
 23 |   vim), so please do not invoke them.","instance_template":"We''re currently solving
 24 |   the following issue within our repository. Here''s the issue text:\nISSUE:\n{{problem_statement}}\n\nINSTRUCTIONS:\nNow,
 25 |   you''re going to solve this issue on your own. Your terminal session has started
 26 |   and you''re in the repository''s root directory. You can use any bash commands or
 27 |   the special interface to help you. Edit all the files you need to and run any checks
 28 |   or tests that you want.\nRemember, YOU SHOULD ALWAYS INCLUDE EXACTLY ONE TOOL CALL/FUNCTION
 29 |   CALL PER RESPONSE.\nWhen you''re satisfied with all of the changes you''ve made,
 30 |   you can submit your changes to the code base by simply running the submit command.\nNote
 31 |   however that you cannot use any interactive session commands (e.g. python, vim)
 32 |   in this environment, but you can write scripts and run them. E.g. you can write
 33 |   a python script and then run it with the python command.\n\nNOTE ABOUT THE EDIT
 34 |   COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate
 35 |   indentation before each line!\n\nGENERAL IMPORTANT TIPS:\n\n1. If you run a command
 36 |   and it doesn''t work, try running a different command. A command that did not work
 37 |   once will not work the second time unless you modify it!\n\n2. If you open a file
 38 |   and need to get to an area around a specific line that is not in the first 100 lines,
 39 |   say line 583, don''t just use the scroll_down command multiple times. Instead, use
 40 |   the goto 583 command. It''s much quicker.\n\n3. If the bug reproduction script requires
 41 |   inputting/reading a specific file, such as buggy-input.png, and you''d like to understand
 42 |   how to input that file, conduct a search in the existing repo code, to see whether
 43 |   someone else has already done that. Do this by running the command: find_file \"buggy-input.png\"
 44 |   If that doesn''t work, use the linux ''find'' command.\n\n4. Always make sure to
 45 |   look at the currently open file and the current working directory (which appears
 46 |   right after the currently open file). The currently open file might be in a different
 47 |   directory than the working directory! Note that some commands, such as ''create'',
 48 |   open files, so they might change the current open file.\n\n5. When editing files,
 49 |   it is easy to accidentally to write code with incorrect indentation or make other
 50 |   mistakes. Always check the code after you issue an edit to make sure that it reflects
 51 |   what you wanted to accomplish. If it didn''t, issue another command to fix it.\n\n6.
 52 |   When editing files, first explain the code you want to edit and why it is causing
 53 |   the problem. Then explain the edit you want to make and how it fixes the problem.
 54 |   Explain how the edit does not break existing functionality.\n\n7. Do not try to
 55 |   install any packages with `pip`, `conda`, or any other way. This will usually not
 56 |   work. If the environment is not set up correctly, try to fix the issue without executing
 57 |   python code or running any tests that require the package installed.\n\nSTRATEGY:\n\n1.
 58 |   Always start by trying to replicate the bug that the issues discusses.\n  If the
 59 |   issue includes code for reproducing the bug, we recommend that you re-implement
 60 |   that in your environment, and run it to make sure you can reproduce the bug.\n  Then
 61 |   start trying to fix it.\n\n  If the bug reproduction script does not print anything
 62 |   when it successfully runs, we recommend adding a print(\"Script completed successfully,
 63 |   no errors.\") command at the end of the file,\n  so that you can be sure that the
 64 |   script indeed ran fine all the way through.\n\n2. Locate relevant code using the
 65 |   find and search commands. `open` the file you want to edit.\n\n3. Use the `edit`
 66 |   command to perform edits.\n\n4. When you think you''ve fixed the bug, re-run the
 67 |   bug reproduction script to make sure that the bug has indeed been fixed.\n\n5. Create
 68 |   additional tests to verify the fix in a style similar to the existing reproduction
 69 |   script. In particular, make sure to test edge cases.\n   If you find any issues,
 70 |   go back to the file you edited and perform further edits.\n\n(Open file: {{open_file}})\n(Current
 71 |   directory: {{working_dir}})\nbash-$","next_step_template":"{{observation}}\n(Open
 72 |   file: {{open_file}})\n(Current directory: {{working_dir}})\nbash-$","next_step_truncated_observation_template":"Observation:
 73 |   {{observation}}<response clipped><NOTE>Observations should not exceeded {{max_observation_length}}
 74 |   characters. {{elided_chars}} characters were elided. Please try a different command
 75 |   that produces less output or use head/tail/grep/redirect the output to a file. Do
 76 |   not use interactive pagers.</NOTE>","max_observation_length":100000,"next_step_no_output_template":"Your
 77 |   command ran successfully and did not produce any output.\n(Open file: {{open_file}})\n(Current
 78 |   directory: {{working_dir}})\nbash-$","strategy_template":null,"demonstration_template":"Here
 79 |   is a demonstration of how to correctly accomplish this task.\nIt is included to
 80 |   show you how to correctly use the interface.\nYou do not need to follow exactly
 81 |   what is done in the demonstration.\n--- DEMONSTRATION ---\n{{demonstration}}\n---
 82 |   END OF DEMONSTRATION ---\n","demonstrations":["/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj"],"put_demos_in_history":true,"shell_check_error_template":"Your
 83 |   bash command contained syntax errors and was NOT executed. Please fix the syntax
 84 |   errors and try again. This can be the result of not adhering to the syntax for multi-line
 85 |   commands. Here is the output of `bash -n`:\n{{bash_stdout}}\n{{bash_stderr}}","command_cancelled_timeout_template":"The
 86 |   command ''{{command}}'' was cancelled because it took more than {{timeout}} seconds.
 87 |   Please try a different command that completes more quickly."},"tools":{"filter":{"blocklist_error_template":"Operation
 88 |   ''{{action}}'' is not supported by this environment.","blocklist":["vim","vi","emacs","nano","nohup","gdb","less","tail
 89 |   -f","python -m venv","make"],"blocklist_standalone":["python","python3","ipython","bash","sh","/bin/bash","/bin/sh","nohup","vi","vim","emacs","nano","su"],"block_unless_regex":{"radare2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*","r2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*"}},"bundles":[{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/registry","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/defaults","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/search","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/edit_replace","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/submit","hidden_tools":[]}],"env_variables":{"WINDOW":100,"OVERLAP":2},"registry_variables":{},"submit_command":"submit","parse_function":{"error_message":"Your
 90 |   output was not formatted correctly. You must always include one discussion and one
 91 |   command as part of your response. Make sure you do not have multiple discussion/command
 92 |   tags.\nPlease make sure your output precisely matches the following format:\nDISCUSSION\nDiscuss
 93 |   here with yourself about what your planning and what you''re going to do in this
 94 |   step.\n\n```\ncommand(s) that you''re going to run\n```\n","type":"thought_action"},"enable_bash_tool":true,"format_error_template":"Your
 95 |   output was not formatted correctly. You must always include one discussion and one
 96 |   command as part of your response. Make sure you do not have multiple discussion/command
 97 |   tags.\nPlease make sure your output precisely matches the following format:\nDISCUSSION\nDiscuss
 98 |   here with yourself about what your planning and what you''re going to do in this
 99 |   step.\n\n```\ncommand(s) that you''re going to run\n```\n","command_docs":"bash:\n  docstring:
100 |   runs the given command directly in bash\n  signature: <command>\n  arguments:\n    -
101 |   command (string) [required]: The bash command to execute.\n\ngoto:\n  docstring:
102 |   moves the window to show <line_number>\n  signature: goto <line_number>\n  arguments:\n    -
103 |   line_number (integer) [required]: the line number to move the window to\n\nopen:\n  docstring:
104 |   opens the file at the given path in the editor. If line_number is provided, the
105 |   window will be move to include that line\n  signature: open \"<path>\" [<line_number>]\n  arguments:\n    -
106 |   path (string) [required]: the path to the file to open\n    - line_number (integer)
107 |   [optional]: the line number to move the window to (if not provided, the window will
108 |   start at the top of the file)\n\ncreate:\n  docstring: creates and opens a new file
109 |   with the given name\n  signature: create <filename>\n  arguments:\n    - filename
110 |   (string) [required]: the name of the file to create\n\nscroll_up:\n  docstring:
111 |   moves the window up 100 lines\n  signature: scroll_up\n\nscroll_down:\n  docstring:
112 |   moves the window down 100 lines\n  signature: scroll_down\n\nfind_file:\n  docstring:
113 |   finds all files with the given name or pattern in dir. If dir is not provided, searches
114 |   in the current directory\n  signature: find_file <file_name> [<dir>]\n  arguments:\n    -
115 |   file_name (string) [required]: the name of the file or pattern to search for. supports
116 |   shell-style wildcards (e.g. *.py)\n    - dir (string) [optional]: the directory
117 |   to search in (if not provided, searches in the current directory)\n\nsearch_dir:\n  docstring:
118 |   searches for search_term in all files in dir. If dir is not provided, searches in
119 |   the current directory\n  signature: search_dir <search_term> [<dir>]\n  arguments:\n    -
120 |   search_term (string) [required]: the term to search for\n    - dir (string) [optional]:
121 |   the directory to search in (if not provided, searches in the current directory)\n\nsearch_file:\n  docstring:
122 |   searches for search_term in file. If file is not provided, searches in the current
123 |   open file\n  signature: search_file <search_term> [<file>]\n  arguments:\n    -
124 |   search_term (string) [required]: the term to search for\n    - file (string) [optional]:
125 |   the file to search in (if not provided, searches in the current open file)\n\nedit:\n  docstring:
126 |   Replace first occurrence of <search> with <replace> in the currently displayed lines.
127 |   If replace-all is True , replace all occurrences of <search> with <replace>.\nFor
128 |   example, if you are looking at this file:\ndef fct():\n    print(\"Hello world\")\n\nand
129 |   you want to edit the file to read:\ndef fct():\n    print(\"Hello\")\n    print(\"world\")\n\nyou
130 |   can search for `Hello world` and replace with `\"Hello\"\\n    print(\"world\")`
131 |   (note the extra spaces before the print statement!).\nTips:\n1. Always include proper
132 |   whitespace/indentation 2. When you are adding an if/with/try statement, you need
133 |   to INDENT the block that follows, so make sure to include it in both your search
134 |   and replace strings! 3. If you are wrapping code in a try statement, make sure to
135 |   also add an ''except'' or ''finally'' block.\nBefore every edit, please\n1. Explain
136 |   the code you want to edit and why it is causing the problem 2. Explain the edit
137 |   you want to make and how it fixes the problem 3. Explain how the edit does not break
138 |   existing functionality\n\n  signature: edit <search> <replace> [<replace-all>]\n\n  arguments:\n    -
139 |   search (string) [required]: the text to search for (make sure to include proper
140 |   whitespace if needed)\n    - replace (string) [required]: the text to replace the
141 |   search with (make sure to include proper whitespace if needed)\n    - replace-all
142 |   (boolean) [optional]: replace all occurrences rather than the first occurrence within
143 |   the displayed lines\n\ninsert:\n  docstring: Insert <text> at the end of the currently
144 |   opened file or after <line> if specified.\n\n  signature: insert <text> [<line>]\n\n  arguments:\n    -
145 |   text (string) [required]: the text to insert\n    - line (integer) [optional]: the
146 |   line number to insert the text as new lines after\n\nsubmit:\n  docstring: submits
147 |   the current file\n  signature: submit\n\n","multi_line_command_endings":{},"submit_command_end_name":null,"reset_commands":[],"execution_timeout":30,"install_timeout":300,"total_execution_timeout":1800,"max_consecutive_execution_timeouts":3},"history_processors":[{"n":5,"polling":1,"always_remove_output_for_tags":["remove_output"],"always_keep_output_for_tags":["keep_output"],"type":"last_n_observations"}],"model":{"name":"openrouter/anthropic/claude-3.5-sonnet-20240620:beta","per_instance_cost_limit":3.0,"total_cost_limit":0.0,"per_instance_call_limit":0,"temperature":0.0,"top_p":1.0,"api_base":null,"api_version":null,"api_key":null,"stop":[],"completion_kwargs":{},"convert_system_to_user":false,"retry":{"retries":20,"min_wait":10.0,"max_wait":120.0},"delay":0.0,"fallbacks":[],"choose_api_key_by_thread":true,"max_input_tokens":null,"max_output_tokens":null},"max_requeries":3,"action_sampler":null,"type":"default"},"problem_statement":{"text":"#
148 |   Debug MCP Codebase Insight Tests","extra_fields":{},"type":"text","id":"03565e"},"output_dir":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/trajectories/tosinakinosho/default__openrouter/anthropic/claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e","actions":{"open_pr":false,"pr_config":{"skip_if_commits_reference_issue":true},"apply_patch_locally":false},"env_var_path":null}'
149 | 
```

--------------------------------------------------------------------------------
/trajectories/tosinakinosho/default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e/03565e/config.yaml:
--------------------------------------------------------------------------------

```yaml
  1 | '{"env":{"deployment":{"image":"python:3.11","port":null,"docker_args":[],"startup_timeout":180.0,"pull":"missing","remove_images":false,"python_standalone_dir":"/root","platform":null,"type":"docker"},"repo":null,"post_startup_commands":[],"post_startup_command_timeout":500,"name":"main"},"agent":{"name":"main","templates":{"system_template":"SETTING:
  2 |   You are an autonomous programmer, and you''re working directly in the command line
  3 |   with a special interface.\n\nThe special interface consists of a file editor that
  4 |   shows you {{WINDOW}} lines of a file at a time.\nIn addition to typical bash commands,
  5 |   you can also use specific commands to help you navigate and edit files.\nTo call
  6 |   a command, you need to invoke it with a function call/tool call.\n\nPlease note
  7 |   that THE EDIT COMMAND REQUIRES PROPER INDENTATION.\n\nFor example, if you are looking
  8 |   at this file:\n\ndef fct():\n    print(\"Hello world\")\n\nand you want to edit
  9 |   the file to read:\n\ndef fct():\n    print(\"Hello\")\n    print(\"world\")\n\nyou
 10 |   search string should be `Hello world` and your replace string should be `\"Hello\"\\n    print(\"world\")`\n(note
 11 |   the extra spaces before the print statement!).\n\nYou could also get the same result
 12 |   by search for `    print(\"Hello world\")` and replace with `    print(\"Hello\")\\n    print(\"world\")`.\n\nRESPONSE
 13 |   FORMAT:\nYour shell prompt is formatted as follows:\n(Open file: <path>)\n(Current
 14 |   directory: <cwd>)\nbash-$\n\nFirst, you should _always_ include a general thought
 15 |   about what you''re going to do next.\nThen, for every response, you must include
 16 |   exactly _ONE_ tool call/function call.\n\nRemember, you should always include a
 17 |   _SINGLE_ tool call/function call and then wait for a response from the shell before
 18 |   continuing with more discussion and commands. Everything you include in the DISCUSSION
 19 |   section will be saved for future reference.\nIf you''d like to issue two commands
 20 |   at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool
 21 |   call, and then after receiving a response you''ll be able to issue the second .\nNote
 22 |   that the environment does NOT support interactive session commands (e.g. python,
 23 |   vim), so please do not invoke them.","instance_template":"We''re currently solving
 24 |   the following issue within our repository. Here''s the issue text:\nISSUE:\n{{problem_statement}}\n\nINSTRUCTIONS:\nNow,
 25 |   you''re going to solve this issue on your own. Your terminal session has started
 26 |   and you''re in the repository''s root directory. You can use any bash commands or
 27 |   the special interface to help you. Edit all the files you need to and run any checks
 28 |   or tests that you want.\nRemember, YOU SHOULD ALWAYS INCLUDE EXACTLY ONE TOOL CALL/FUNCTION
 29 |   CALL PER RESPONSE.\nWhen you''re satisfied with all of the changes you''ve made,
 30 |   you can submit your changes to the code base by simply running the submit command.\nNote
 31 |   however that you cannot use any interactive session commands (e.g. python, vim)
 32 |   in this environment, but you can write scripts and run them. E.g. you can write
 33 |   a python script and then run it with the python command.\n\nNOTE ABOUT THE EDIT
 34 |   COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate
 35 |   indentation before each line!\n\nGENERAL IMPORTANT TIPS:\n\n1. If you run a command
 36 |   and it doesn''t work, try running a different command. A command that did not work
 37 |   once will not work the second time unless you modify it!\n\n2. If you open a file
 38 |   and need to get to an area around a specific line that is not in the first 100 lines,
 39 |   say line 583, don''t just use the scroll_down command multiple times. Instead, use
 40 |   the goto 583 command. It''s much quicker.\n\n3. If the bug reproduction script requires
 41 |   inputting/reading a specific file, such as buggy-input.png, and you''d like to understand
 42 |   how to input that file, conduct a search in the existing repo code, to see whether
 43 |   someone else has already done that. Do this by running the command: find_file \"buggy-input.png\"
 44 |   If that doesn''t work, use the linux ''find'' command.\n\n4. Always make sure to
 45 |   look at the currently open file and the current working directory (which appears
 46 |   right after the currently open file). The currently open file might be in a different
 47 |   directory than the working directory! Note that some commands, such as ''create'',
 48 |   open files, so they might change the current open file.\n\n5. When editing files,
 49 |   it is easy to accidentally to write code with incorrect indentation or make other
 50 |   mistakes. Always check the code after you issue an edit to make sure that it reflects
 51 |   what you wanted to accomplish. If it didn''t, issue another command to fix it.\n\n6.
 52 |   When editing files, first explain the code you want to edit and why it is causing
 53 |   the problem. Then explain the edit you want to make and how it fixes the problem.
 54 |   Explain how the edit does not break existing functionality.\n\n7. Do not try to
 55 |   install any packages with `pip`, `conda`, or any other way. This will usually not
 56 |   work. If the environment is not set up correctly, try to fix the issue without executing
 57 |   python code or running any tests that require the package installed.\n\nSTRATEGY:\n\n1.
 58 |   Always start by trying to replicate the bug that the issues discusses.\n  If the
 59 |   issue includes code for reproducing the bug, we recommend that you re-implement
 60 |   that in your environment, and run it to make sure you can reproduce the bug.\n  Then
 61 |   start trying to fix it.\n\n  If the bug reproduction script does not print anything
 62 |   when it successfully runs, we recommend adding a print(\"Script completed successfully,
 63 |   no errors.\") command at the end of the file,\n  so that you can be sure that the
 64 |   script indeed ran fine all the way through.\n\n2. Locate relevant code using the
 65 |   find and search commands. `open` the file you want to edit.\n\n3. Use the `edit`
 66 |   command to perform edits.\n\n4. When you think you''ve fixed the bug, re-run the
 67 |   bug reproduction script to make sure that the bug has indeed been fixed.\n\n5. Create
 68 |   additional tests to verify the fix in a style similar to the existing reproduction
 69 |   script. In particular, make sure to test edge cases.\n   If you find any issues,
 70 |   go back to the file you edited and perform further edits.\n\n(Open file: {{open_file}})\n(Current
 71 |   directory: {{working_dir}})\nbash-$","next_step_template":"{{observation}}\n(Open
 72 |   file: {{open_file}})\n(Current directory: {{working_dir}})\nbash-$","next_step_truncated_observation_template":"Observation:
 73 |   {{observation}}<response clipped><NOTE>Observations should not exceeded {{max_observation_length}}
 74 |   characters. {{elided_chars}} characters were elided. Please try a different command
 75 |   that produces less output or use head/tail/grep/redirect the output to a file. Do
 76 |   not use interactive pagers.</NOTE>","max_observation_length":100000,"next_step_no_output_template":"Your
 77 |   command ran successfully and did not produce any output.\n(Open file: {{open_file}})\n(Current
 78 |   directory: {{working_dir}})\nbash-$","strategy_template":null,"demonstration_template":"Here
 79 |   is a demonstration of how to correctly accomplish this task.\nIt is included to
 80 |   show you how to correctly use the interface.\nYou do not need to follow exactly
 81 |   what is done in the demonstration.\n--- DEMONSTRATION ---\n{{demonstration}}\n---
 82 |   END OF DEMONSTRATION ---\n","demonstrations":["/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj"],"put_demos_in_history":true,"shell_check_error_template":"Your
 83 |   bash command contained syntax errors and was NOT executed. Please fix the syntax
 84 |   errors and try again. This can be the result of not adhering to the syntax for multi-line
 85 |   commands. Here is the output of `bash -n`:\n{{bash_stdout}}\n{{bash_stderr}}","command_cancelled_timeout_template":"The
 86 |   command ''{{command}}'' was cancelled because it took more than {{timeout}} seconds.
 87 |   Please try a different command that completes more quickly."},"tools":{"filter":{"blocklist_error_template":"Operation
 88 |   ''{{action}}'' is not supported by this environment.","blocklist":["vim","vi","emacs","nano","nohup","gdb","less","tail
 89 |   -f","python -m venv","make"],"blocklist_standalone":["python","python3","ipython","bash","sh","/bin/bash","/bin/sh","nohup","vi","vim","emacs","nano","su"],"block_unless_regex":{"radare2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*","r2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*"}},"bundles":[{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/registry","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/defaults","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/search","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/edit_replace","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/submit","hidden_tools":[]}],"env_variables":{"WINDOW":100,"OVERLAP":2},"registry_variables":{},"submit_command":"submit","parse_function":{"error_message":"{%-
 90 |   if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
 91 |   make sure your output includes exactly _ONE_ function call!\nYou must invoke the
 92 |   function directly using the function call format.\nYou cannot invoke commands with
 93 |   ```, you have to use the function call format.\nIf you think you have already resolved
 94 |   the issue, please submit your changes by running the `submit` command.\nIf you think
 95 |   you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
 96 |   please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
 97 |   last output included multiple tool calls!\nPlease make sure your output includes
 98 |   a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
 99 |   -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
100 |   your function call doesn''t include any extra arguments that are not in the allowed
101 |   arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
102 |   be parsed properly: {{exception_message}}.\n{% endif %}\n","type":"function_calling"},"enable_bash_tool":true,"format_error_template":"{%-
103 |   if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
104 |   make sure your output includes exactly _ONE_ function call!\nYou must invoke the
105 |   function directly using the function call format.\nYou cannot invoke commands with
106 |   ```, you have to use the function call format.\nIf you think you have already resolved
107 |   the issue, please submit your changes by running the `submit` command.\nIf you think
108 |   you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
109 |   please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
110 |   last output included multiple tool calls!\nPlease make sure your output includes
111 |   a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
112 |   -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
113 |   your function call doesn''t include any extra arguments that are not in the allowed
114 |   arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
115 |   be parsed properly: {{exception_message}}.\n{% endif %}\n","command_docs":"bash:\n  docstring:
116 |   runs the given command directly in bash\n  signature: <command>\n  arguments:\n    -
117 |   command (string) [required]: The bash command to execute.\n\ngoto:\n  docstring:
118 |   moves the window to show <line_number>\n  signature: goto <line_number>\n  arguments:\n    -
119 |   line_number (integer) [required]: the line number to move the window to\n\nopen:\n  docstring:
120 |   opens the file at the given path in the editor. If line_number is provided, the
121 |   window will be move to include that line\n  signature: open \"<path>\" [<line_number>]\n  arguments:\n    -
122 |   path (string) [required]: the path to the file to open\n    - line_number (integer)
123 |   [optional]: the line number to move the window to (if not provided, the window will
124 |   start at the top of the file)\n\ncreate:\n  docstring: creates and opens a new file
125 |   with the given name\n  signature: create <filename>\n  arguments:\n    - filename
126 |   (string) [required]: the name of the file to create\n\nscroll_up:\n  docstring:
127 |   moves the window up 100 lines\n  signature: scroll_up\n\nscroll_down:\n  docstring:
128 |   moves the window down 100 lines\n  signature: scroll_down\n\nfind_file:\n  docstring:
129 |   finds all files with the given name or pattern in dir. If dir is not provided, searches
130 |   in the current directory\n  signature: find_file <file_name> [<dir>]\n  arguments:\n    -
131 |   file_name (string) [required]: the name of the file or pattern to search for. supports
132 |   shell-style wildcards (e.g. *.py)\n    - dir (string) [optional]: the directory
133 |   to search in (if not provided, searches in the current directory)\n\nsearch_dir:\n  docstring:
134 |   searches for search_term in all files in dir. If dir is not provided, searches in
135 |   the current directory\n  signature: search_dir <search_term> [<dir>]\n  arguments:\n    -
136 |   search_term (string) [required]: the term to search for\n    - dir (string) [optional]:
137 |   the directory to search in (if not provided, searches in the current directory)\n\nsearch_file:\n  docstring:
138 |   searches for search_term in file. If file is not provided, searches in the current
139 |   open file\n  signature: search_file <search_term> [<file>]\n  arguments:\n    -
140 |   search_term (string) [required]: the term to search for\n    - file (string) [optional]:
141 |   the file to search in (if not provided, searches in the current open file)\n\nedit:\n  docstring:
142 |   Replace first occurrence of <search> with <replace> in the currently displayed lines.
143 |   If replace-all is True , replace all occurrences of <search> with <replace>.\nFor
144 |   example, if you are looking at this file:\ndef fct():\n    print(\"Hello world\")\n\nand
145 |   you want to edit the file to read:\ndef fct():\n    print(\"Hello\")\n    print(\"world\")\n\nyou
146 |   can search for `Hello world` and replace with `\"Hello\"\\n    print(\"world\")`
147 |   (note the extra spaces before the print statement!).\nTips:\n1. Always include proper
148 |   whitespace/indentation 2. When you are adding an if/with/try statement, you need
149 |   to INDENT the block that follows, so make sure to include it in both your search
150 |   and replace strings! 3. If you are wrapping code in a try statement, make sure to
151 |   also add an ''except'' or ''finally'' block.\nBefore every edit, please\n1. Explain
152 |   the code you want to edit and why it is causing the problem 2. Explain the edit
153 |   you want to make and how it fixes the problem 3. Explain how the edit does not break
154 |   existing functionality\n\n  signature: edit <search> <replace> [<replace-all>]\n\n  arguments:\n    -
155 |   search (string) [required]: the text to search for (make sure to include proper
156 |   whitespace if needed)\n    - replace (string) [required]: the text to replace the
157 |   search with (make sure to include proper whitespace if needed)\n    - replace-all
158 |   (boolean) [optional]: replace all occurrences rather than the first occurrence within
159 |   the displayed lines\n\ninsert:\n  docstring: Insert <text> at the end of the currently
160 |   opened file or after <line> if specified.\n\n  signature: insert <text> [<line>]\n\n  arguments:\n    -
161 |   text (string) [required]: the text to insert\n    - line (integer) [optional]: the
162 |   line number to insert the text as new lines after\n\nsubmit:\n  docstring: submits
163 |   the current file\n  signature: submit\n\n","multi_line_command_endings":{},"submit_command_end_name":null,"reset_commands":[],"execution_timeout":30,"install_timeout":300,"total_execution_timeout":1800,"max_consecutive_execution_timeouts":3},"history_processors":[{"n":5,"polling":1,"always_remove_output_for_tags":["remove_output"],"always_keep_output_for_tags":["keep_output"],"type":"last_n_observations"}],"model":{"name":"claude-3-5-sonnet-20240620","per_instance_cost_limit":3.0,"total_cost_limit":0.0,"per_instance_call_limit":0,"temperature":0.0,"top_p":1.0,"api_base":null,"api_version":null,"api_key":null,"stop":[],"completion_kwargs":{},"convert_system_to_user":false,"retry":{"retries":20,"min_wait":10.0,"max_wait":120.0},"delay":0.0,"fallbacks":[],"choose_api_key_by_thread":true,"max_input_tokens":null,"max_output_tokens":null},"max_requeries":3,"action_sampler":null,"type":"default"},"problem_statement":{"text":"#
164 |   Debug MCP Codebase Insight Tests","extra_fields":{},"type":"text","id":"03565e"},"output_dir":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/trajectories/tosinakinosho/default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e","actions":{"open_pr":false,"pr_config":{"skip_if_commits_reference_issue":true},"apply_patch_locally":false},"env_var_path":null}'
165 | 
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/sse.py:
--------------------------------------------------------------------------------

```python
  1 | """Server-Sent Events (SSE) transport implementation for MCP."""
  2 | 
  3 | import asyncio
  4 | import logging
  5 | import json
  6 | from typing import Any, Callable, Dict, List, Optional, Tuple
  7 | from datetime import datetime
  8 | from starlette.applications import Starlette
  9 | from starlette.routing import Mount, Route
 10 | from starlette.requests import Request
 11 | from starlette.responses import Response, JSONResponse, RedirectResponse, StreamingResponse
 12 | import uuid
 13 | from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 14 | from starlette.middleware.cors import CORSMiddleware
 15 | 
 16 | from mcp.server.fastmcp import FastMCP
 17 | from mcp.server.sse import SseServerTransport
 18 | from ..utils.logger import get_logger
 19 | 
 20 | logger = get_logger(__name__)
 21 | 
 22 | async def send_heartbeats(queue: asyncio.Queue, interval: int = 30):
 23 |     """Send periodic heartbeat messages to keep the connection alive.
 24 |     
 25 |     Args:
 26 |         queue: The queue to send heartbeats to
 27 |         interval: Time between heartbeats in seconds
 28 |     """
 29 |     while True:
 30 |         try:
 31 |             await queue.put({"type": "heartbeat", "timestamp": datetime.utcnow().isoformat()})
 32 |             await asyncio.sleep(interval)
 33 |         except asyncio.CancelledError:
 34 |             break
 35 |         except Exception as e:
 36 |             logger.error(f"Error sending heartbeat: {e}")
 37 |             await asyncio.sleep(1)  # Brief pause before retrying
 38 | 
 39 | class CodebaseInsightSseTransport(SseServerTransport):
 40 |     """Custom SSE transport implementation for Codebase Insight."""
 41 |     
 42 |     def __init__(self, endpoint: str):
 43 |         """Initialize the SSE transport.
 44 |         
 45 |         Args:
 46 |             endpoint: The endpoint path for SSE connections
 47 |         """
 48 |         super().__init__(endpoint)
 49 |         self.connections = {}
 50 |         self.message_queue = asyncio.Queue()
 51 |         logger.info(f"Initializing SSE transport with endpoint: {endpoint}")
 52 |         
 53 |     async def handle_sse(self, request: Request) -> StreamingResponse:
 54 |         """Handle incoming SSE connection requests.
 55 |         
 56 |         Args:
 57 |             request: The incoming HTTP request
 58 |             
 59 |         Returns:
 60 |             StreamingResponse for the SSE connection
 61 |         """
 62 |         connection_id = str(uuid.uuid4())
 63 |         queue = asyncio.Queue()
 64 |         self.connections[connection_id] = queue
 65 |         
 66 |         logger.info(f"New SSE connection established: {connection_id}")
 67 |         logger.debug(f"Request headers: {dict(request.headers)}")
 68 |         logger.debug(f"Active connections: {len(self.connections)}")
 69 |         
 70 |         async def event_generator():
 71 |             try:
 72 |                 logger.debug(f"Starting event generator for connection {connection_id}")
 73 |                 heartbeat_task = asyncio.create_task(send_heartbeats(queue))
 74 |                 logger.debug(f"Heartbeat task started for connection {connection_id}")
 75 |                 
 76 |                 while True:
 77 |                     try:
 78 |                         message = await queue.get()
 79 |                         logger.debug(f"Connection {connection_id} received message: {message}")
 80 |                         
 81 |                         if isinstance(message, dict):
 82 |                             data = json.dumps(message)
 83 |                         else:
 84 |                             data = str(message)
 85 |                             
 86 |                         yield f"data: {data}\n\n"
 87 |                         logger.debug(f"Sent message to connection {connection_id}")
 88 |                         
 89 |                     except asyncio.CancelledError:
 90 |                         logger.info(f"Event generator cancelled for connection {connection_id}")
 91 |                         break
 92 |                     except Exception as e:
 93 |                         logger.error(f"Error in event generator for connection {connection_id}: {e}")
 94 |                         break
 95 |                         
 96 |             finally:
 97 |                 heartbeat_task.cancel()
 98 |                 try:
 99 |                     await heartbeat_task
100 |                 except asyncio.CancelledError:
101 |                     pass
102 |                     
103 |                 if connection_id in self.connections:
104 |                     del self.connections[connection_id]
105 |                 logger.info(f"Event generator cleaned up for connection {connection_id}")
106 |                 logger.debug(f"Remaining active connections: {len(self.connections)}")
107 |                 
108 |         return StreamingResponse(
109 |             event_generator(),
110 |             media_type="text/event-stream",
111 |             headers={
112 |                 "Cache-Control": "no-cache",
113 |                 "Connection": "keep-alive",
114 |                 "X-Accel-Buffering": "no",
115 |                 "Access-Control-Allow-Origin": "*",  # Allow CORS
116 |                 "Access-Control-Allow-Headers": "Content-Type",
117 |                 "Access-Control-Allow-Methods": "GET, POST"
118 |             }
119 |         )
120 |         
121 |     async def handle_message(self, request: Request) -> Response:
122 |         """Handle incoming messages to be broadcast over SSE.
123 |         
124 |         Args:
125 |             request: The incoming HTTP request with the message
126 |             
127 |         Returns:
128 |             HTTP response indicating message handling status
129 |         """
130 |         try:
131 |             message = await request.json()
132 |             
133 |             # Broadcast to all connections
134 |             for queue in self.connections.values():
135 |                 await queue.put(message)
136 |                 
137 |             return JSONResponse({"status": "message sent"})
138 |             
139 |         except Exception as e:
140 |             logger.error(f"Error handling message: {e}")
141 |             return JSONResponse(
142 |                 {"error": str(e)},
143 |                 status_code=500
144 |             )
145 |             
146 |     async def send(self, message: Any) -> None:
147 |         """Send a message to all connected clients.
148 |         
149 |         Args:
150 |             message: The message to send
151 |         """
152 |         # Put message in queue for all connections
153 |         for queue in self.connections.values():
154 |             await queue.put(message)
155 |             
156 |     async def broadcast(self, message: Any) -> None:
157 |         """Broadcast a message to all connected clients.
158 |         
159 |         Args:
160 |             message: The message to broadcast
161 |         """
162 |         await self.send(message)
163 |         
164 |     async def connect(self) -> Tuple[MemoryObjectReceiveStream, MemoryObjectSendStream]:
165 |         """Create a new SSE connection.
166 |         
167 |         Returns:
168 |             Tuple of receive and send streams for the connection
169 |         """
170 |         # Create memory object streams for this connection
171 |         receive_stream = MemoryObjectReceiveStream()
172 |         send_stream = MemoryObjectSendStream()
173 |         
174 |         # Store the connection
175 |         connection_id = str(uuid.uuid4())
176 |         self.connections[connection_id] = send_stream
177 |         
178 |         return receive_stream, send_stream
179 |         
180 |     async def disconnect(self, connection_id: str) -> None:
181 |         """Disconnect a client.
182 |         
183 |         Args:
184 |             connection_id: The ID of the connection to disconnect
185 |         """
186 |         if connection_id in self.connections:
187 |             del self.connections[connection_id]
188 |             logger.info(f"Disconnected client: {connection_id}")
189 | 
190 | async def verify_routes(app: Starlette) -> Dict[str, List[str]]:
191 |     """Verify and log all registered routes in the application.
192 |     
193 |     Args:
194 |         app: The Starlette application to verify
195 |         
196 |     Returns:
197 |         Dictionary mapping route paths to their methods
198 |     """
199 |     routes = {}
200 |     for route in app.routes:
201 |         if isinstance(route, Mount):
202 |             logger.info(f"Mount point: {route.path}")
203 |             # Recursively verify mounted routes
204 |             mounted_routes = await verify_routes(route.app)
205 |             for path, methods in mounted_routes.items():
206 |                 full_path = f"{route.path}{path}"
207 |                 routes[full_path] = methods
208 |         else:
209 |             routes[route.path] = route.methods
210 |             logger.info(f"Route: {route.path}, methods: {route.methods}")
211 |     return routes
212 | 
213 | def create_sse_server(mcp_server: Optional[FastMCP] = None) -> Starlette:
214 |     """Create an SSE server instance.
215 |     
216 |     Args:
217 |         mcp_server: Optional FastMCP instance to use. If not provided, a new one will be created.
218 |         
219 |     Returns:
220 |         Starlette application configured for SSE
221 |     """
222 |     app = Starlette(debug=True)  # Enable debug mode for better error reporting
223 |     
224 |     # Create SSE transport
225 |     transport = CodebaseInsightSseTransport("/sse")
226 |     
227 |     # Add CORS middleware
228 |     app.add_middleware(
229 |         CORSMiddleware,
230 |         allow_origins=["*"],  # Allow all origins
231 |         allow_credentials=True,
232 |         allow_methods=["GET", "POST", "OPTIONS"],
233 |         allow_headers=["*"],
234 |         expose_headers=["*"]
235 |     )
236 |     
237 |     async def health_check(request: Request) -> JSONResponse:
238 |         """Health check endpoint."""
239 |         return JSONResponse({
240 |             "status": "ok",
241 |             "timestamp": datetime.utcnow().isoformat(),
242 |             "connections": len(transport.connections)
243 |         })
244 |     
245 |     # Add routes
246 |     app.add_route("/health", health_check, methods=["GET"])
247 |     app.add_route("/sse", transport.handle_sse, methods=["GET"])
248 |     app.add_route("/message", transport.handle_message, methods=["POST"])
249 |     
250 |     logger.info("Created SSE server with routes:")
251 |     asyncio.create_task(verify_routes(app))
252 |     
253 |     return app
254 | 
255 | class MCP_CodebaseInsightServer:
256 |     """MCP server implementation for Codebase Insight.
257 |     
258 |     This class manages the Model Context Protocol server, connecting it to
259 |     the Codebase Insight's core components and exposing them as MCP tools.
260 |     """
261 |     
262 |     def __init__(self, server_state):
263 |         """Initialize the MCP server with access to the application state.
264 |         
265 |         Args:
266 |             server_state: The global server state providing access to components
267 |         """
268 |         self.state = server_state
269 |         self.mcp_server = FastMCP(name="MCP-Codebase-Insight")
270 |         self.tools_registered = False
271 |         self._starlette_app = None  # Cache the Starlette app
272 |         logger.info("MCP Codebase Insight server initialized")
273 |         
274 |     async def cleanup(self):
275 |         """Clean up resources used by the MCP server.
276 |         
277 |         This method ensures proper shutdown of the MCP server and
278 |         releases any resources it might be holding.
279 |         """
280 |         logger.info("Cleaning up MCP server resources")
281 |         # If the MCP server has a shutdown or cleanup method, call it here
282 |         # For now, just log the cleanup attempt
283 |         self.tools_registered = False
284 |         self._starlette_app = None
285 |         logger.info("MCP server cleanup completed")
286 |     
287 |     def is_initialized(self) -> bool:
288 |         """Check if the MCP server is properly initialized.
289 |         
290 |         Returns:
291 |             True if the server is initialized and ready to use, False otherwise
292 |         """
293 |         return self.tools_registered and self._starlette_app is not None
294 |         
295 |     def register_tools(self):
296 |         """Register all available tools with the MCP server.
297 |         
298 |         This connects the MCP protocol to the Codebase Insight core components,
299 |         exposing their functionality through the MCP interface.
300 |         """
301 |         if self.tools_registered:
302 |             logger.debug("Tools already registered, skipping")
303 |             return
304 |             
305 |         logger.info("Registering tools with MCP server")
306 |         
307 |         # Check if critical dependencies are available
308 |         critical_dependencies = ["vector_store", "knowledge_base", "task_manager", "adr_manager"]
309 |         missing_dependencies = []
310 |         
311 |         for dependency in critical_dependencies:
312 |             if not self.state.get_component(dependency):
313 |                 missing_dependencies.append(dependency)
314 |                 
315 |         if missing_dependencies:
316 |             logger.warning(f"Some critical dependencies are not available: {', '.join(missing_dependencies)}")
317 |             logger.warning("Tools requiring these dependencies will not be registered")
318 |             # Don't fail registration completely - continue with available tools
319 |         
320 |         # Register available tools
321 |         try:
322 |             self._register_vector_search()
323 |             self._register_knowledge()
324 |             self._register_adr()
325 |             self._register_task()
326 |             
327 |             # Mark tools as registered even if some failed
328 |             self.tools_registered = True
329 |             logger.info("MCP tools registration completed")
330 |         except Exception as e:
331 |             logger.error(f"Error registering MCP tools: {e}", exc_info=True)
332 |             # Don't mark as registered if there was an error
333 |         
334 |     def _register_vector_search(self):
335 |         """Register the vector search tool with the MCP server."""
336 |         vector_store = self.state.get_component("vector_store")
337 |         if not vector_store:
338 |             logger.warning("Vector store component not available, skipping tool registration")
339 |             return
340 |             
341 |         # Verify that the vector store is properly initialized
342 |         if not hasattr(vector_store, 'search') or not callable(getattr(vector_store, 'search')):
343 |             logger.warning("Vector store component does not have a search method, skipping tool registration")
344 |             return
345 |             
346 |         async def vector_search(query: str, limit: int = 5, threshold: float = 0.7, 
347 |                           file_type: Optional[str] = None, path_pattern: Optional[str] = None):
348 |             """Search for code snippets semantically similar to the query text."""
349 |             logger.debug(f"MCP vector search request: {query=}, {limit=}, {threshold=}")
350 |             
351 |             # Prepare filters if provided
352 |             filter_conditions = {}
353 |             if file_type:
354 |                 filter_conditions["file_type"] = {"$eq": file_type}
355 |             if path_pattern:
356 |                 filter_conditions["path"] = {"$like": path_pattern}
357 |                 
358 |             results = await vector_store.search(
359 |                 text=query,
360 |                 filter_conditions=filter_conditions if filter_conditions else None,
361 |                 limit=limit
362 |             )
363 |             
364 |             # Format results
365 |             formatted_results = [
366 |                 {
367 |                     "id": result.id,
368 |                     "score": result.score,
369 |                     "text": result.metadata.get("text", ""),
370 |                     "file_path": result.metadata.get("file_path", ""),
371 |                     "line_range": result.metadata.get("line_range", ""),
372 |                     "type": result.metadata.get("type", "code"),
373 |                     "language": result.metadata.get("language", ""),
374 |                     "timestamp": result.metadata.get("timestamp", "")
375 |                 }
376 |                 for result in results
377 |                 if result.score >= threshold
378 |             ]
379 |             
380 |             return {"results": formatted_results}
381 |             
382 |         self.mcp_server.add_tool(
383 |             name="vector-search",
384 |             fn=vector_search,
385 |             description="Search for code snippets semantically similar to the query text"
386 |         )
387 |         logger.debug("Vector search tool registered")
388 |         
389 |     def _register_knowledge(self):
390 |         """Register the knowledge base tool with the MCP server."""
391 |         knowledge_base = self.state.get_component("knowledge_base")
392 |         if not knowledge_base:
393 |             logger.warning("Knowledge base component not available, skipping tool registration")
394 |             return
395 |             
396 |         async def search_knowledge(query: str, pattern_type: str = "code", limit: int = 5):
397 |             """Search for patterns in the knowledge base."""
398 |             logger.debug(f"MCP knowledge search request: {query=}, {pattern_type=}, {limit=}")
399 |             
400 |             results = await knowledge_base.search_patterns(
401 |                 query=query,
402 |                 pattern_type=pattern_type,
403 |                 limit=limit
404 |             )
405 |             
406 |             # Format results
407 |             formatted_results = [
408 |                 {
409 |                     "id": result.id,
410 |                     "pattern": result.pattern,
411 |                     "description": result.description,
412 |                     "type": result.type,
413 |                     "confidence": result.confidence,
414 |                     "metadata": result.metadata
415 |                 }
416 |                 for result in results
417 |             ]
418 |             
419 |             return {"results": formatted_results}
420 |             
421 |         self.mcp_server.add_tool(
422 |             name="knowledge-search",
423 |             fn=search_knowledge,
424 |             description="Search for patterns in the knowledge base"
425 |         )
426 |         logger.debug("Knowledge search tool registered")
427 |         
428 |     def _register_adr(self):
429 |         """Register the ADR management tool with the MCP server."""
430 |         adr_manager = self.state.get_component("adr_manager")
431 |         if not adr_manager:
432 |             logger.warning("ADR manager component not available, skipping tool registration")
433 |             return
434 |             
435 |         async def list_adrs(status: Optional[str] = None, limit: int = 10):
436 |             """List architectural decision records."""
437 |             logger.debug(f"MCP ADR list request: {status=}, {limit=}")
438 |             
439 |             try:
440 |                 adrs = await adr_manager.list_adrs(status=status, limit=limit)
441 |                 
442 |                 # Format results
443 |                 formatted_results = [
444 |                     {
445 |                         "id": adr.id,
446 |                         "title": adr.title,
447 |                         "status": adr.status,
448 |                         "date": adr.date.isoformat() if adr.date else None,
449 |                         "authors": adr.authors,
450 |                         "summary": adr.summary
451 |                     }
452 |                     for adr in adrs
453 |                 ]
454 |                 
455 |                 return {"adrs": formatted_results}
456 |             except Exception as e:
457 |                 logger.error(f"Error listing ADRs: {e}", exc_info=True)
458 |                 return {"error": str(e), "adrs": []}
459 |                 
460 |         self.mcp_server.add_tool(
461 |             name="adr-list",
462 |             fn=list_adrs,
463 |             description="List architectural decision records"
464 |         )
465 |         logger.debug("ADR management tool registered")
466 |         
467 |     def _register_task(self):
468 |         """Register the task management tool with the MCP server."""
469 |         task_tracker = self.state.get_component("task_tracker")
470 |         if not task_tracker:
471 |             logger.warning("Task tracker component not available, skipping tool registration")
472 |             return
473 |             
474 |         async def get_task_status(task_id: str):
475 |             """Get the status of a specific task."""
476 |             logger.debug(f"MCP task status request: {task_id=}")
477 |             
478 |             try:
479 |                 status = await task_tracker.get_task_status(task_id)
480 |                 return status
481 |             except Exception as e:
482 |                 logger.error(f"Error getting task status: {e}", exc_info=True)
483 |                 return {"error": str(e), "status": "unknown"}
484 |                 
485 |         self.mcp_server.add_tool(
486 |             name="task-status",
487 |             fn=get_task_status,
488 |             description="Get the status of a specific task"
489 |         )
490 |         logger.debug("Task management tool registered")
491 |         
492 |     def get_starlette_app(self) -> Starlette:
493 |         """Get the Starlette application for the MCP server.
494 |         
495 |         Returns:
496 |             Configured Starlette application
497 |         """
498 |         # Ensure tools are registered
499 |         self.register_tools()
500 |         
501 |         # Create and return the Starlette app for SSE
502 |         if self._starlette_app is None:
503 |             self._starlette_app = create_sse_server(self.mcp_server)
504 |         return self._starlette_app
505 | 
```

--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------

```python
  1 | """Test fixtures for the codebase insight server."""
  2 | 
  3 | import asyncio
  4 | import logging
  5 | import os
  6 | import sys
  7 | import threading
  8 | import uuid
  9 | import warnings
 10 | from contextlib import ExitStack
 11 | from pathlib import Path
 12 | from threading import Lock
 13 | from typing import AsyncGenerator, Dict, Generator, Optional, Set
 14 | import tracemalloc
 15 | 
 16 | import httpx
 17 | import pytest
 18 | import pytest_asyncio
 19 | from fastapi import FastAPI
 20 | 
 21 | # Ensure the src directory is in the Python path
 22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
 23 | 
 24 | from src.mcp_codebase_insight.core.config import ServerConfig
 25 | from src.mcp_codebase_insight.server import CodebaseAnalysisServer
 26 | from src.mcp_codebase_insight.server_test_isolation import get_isolated_server_state
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | # Enable tracemalloc for debugging resource warnings and coroutine tracking
 31 | tracemalloc.start(25)  # Keep 25 frames to provide good traceback info
 32 | 
 33 | # Track process-specific event loops with mutex protection
 34 | _event_loops: Dict[int, asyncio.AbstractEventLoop] = {}
 35 | _loops_lock = Lock()
 36 | _active_test_ids: Set[str] = set()
 37 | _tests_lock = Lock()
 38 | 
 39 | # Configure logging for better debug info
 40 | logging.basicConfig(level=logging.INFO)
 41 | asyncio_logger = logging.getLogger("asyncio")
 42 | asyncio_logger.setLevel(logging.INFO)
 43 | 
 44 | def _get_test_id():
 45 |     """Get a unique identifier for the current test."""
 46 |     return f"{os.getpid()}_{threading.get_ident()}"
 47 | 
 48 | # Primary event loop with session scope for compatibility with pytest-asyncio
 49 | @pytest.fixture(scope="session")
 50 | def event_loop():
 51 |     """Create a session-scoped event loop for the test session."""
 52 |     pid = os.getpid()
 53 |     logger.info(f"Creating session-scoped event loop for process {pid}")
 54 | 
 55 |     # Create and set a new loop for this session
 56 |     policy = asyncio.get_event_loop_policy()
 57 |     loop = policy.new_event_loop()
 58 |     asyncio.set_event_loop(loop)
 59 | 
 60 |     with _loops_lock:
 61 |         _event_loops[pid] = loop
 62 | 
 63 |     yield loop
 64 | 
 65 |     # Final cleanup
 66 |     with _loops_lock:
 67 |         if pid in _event_loops:
 68 |             del _event_loops[pid]
 69 | 
 70 |     # Close the loop to prevent asyncio related warnings
 71 |     try:
 72 |         if not loop.is_closed():
 73 |             loop.run_until_complete(loop.shutdown_asyncgens())
 74 |             loop.close()
 75 |     except:
 76 |         logger.exception("Error closing session event loop")
 77 | 
 78 | # To address the event_loop fixture scope mismatch issue, we'll use a different approach
 79 | # We'll have a single session-scoped event loop that's accessible to function-scoped fixtures
 80 | @pytest.fixture(scope="function")
 81 | def function_event_loop(event_loop):
 82 |     """
 83 |     Create a function-scoped event loop proxy for test isolation.
 84 | 
 85 |     This approach avoids the ScopeMismatch error by using the session-scoped event_loop
 86 |     but providing function-level isolation.
 87 |     """
 88 |     # Return the session loop, but track the test in our isolation system
 89 |     test_id = _get_test_id()
 90 |     logger.debug(f"Using function-level event loop isolation for test {test_id}")
 91 | 
 92 |     with _tests_lock:
 93 |         _active_test_ids.add(test_id)
 94 | 
 95 |     yield event_loop
 96 | 
 97 |     with _tests_lock:
 98 |         if test_id in _active_test_ids:
 99 |             _active_test_ids.remove(test_id)
100 | 
101 | @pytest.fixture(scope="session")
102 | def anyio_backend():
103 |     """Configure pytest-asyncio to use asyncio backend."""
104 |     return "asyncio"
105 | 
106 | @pytest.fixture(scope="session")
107 | def test_server_config():
108 |     """Create a server configuration for tests."""
109 |     # For CI/CD environment, use the environment variables if available
110 |     qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
111 | 
112 |     # Use the CI/CD collection name if provided, otherwise generate a unique one
113 |     collection_name = os.environ.get("COLLECTION_NAME", f"test_collection_{uuid.uuid4().hex[:8]}")
114 | 
115 |     # Optional: Use a shorter embedding model for tests to save resources
116 |     embedding_model = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
117 | 
118 |     logger.info(f"Configuring test server with Qdrant URL: {qdrant_url}, collection: {collection_name}")
119 | 
120 |     config = ServerConfig(
121 |         host="localhost",
122 |         port=8000,
123 |         log_level="DEBUG",
124 |         qdrant_url=qdrant_url,
125 |         docs_cache_dir=Path(".test_cache") / "docs",
126 |         adr_dir=Path(".test_cache") / "docs/adrs",
127 |         kb_storage_dir=Path(".test_cache") / "knowledge",
128 |         embedding_model=embedding_model,
129 |         collection_name=collection_name,
130 |         debug_mode=True,
131 |         metrics_enabled=False,
132 |         cache_enabled=True,
133 |         memory_cache_size=1000,
134 |         disk_cache_dir=Path(".test_cache") / "cache"
135 |     )
136 |     return config
137 | 
138 | # Make the qdrant_client fixture session-scoped to avoid connection issues
139 | @pytest.fixture(scope="session")
140 | def qdrant_client(test_server_config):
141 |     """Create a shared Qdrant client for tests."""
142 |     from qdrant_client import QdrantClient
143 |     from qdrant_client.http import models
144 | 
145 |     # Connect to Qdrant
146 |     client = QdrantClient(url=test_server_config.qdrant_url)
147 | 
148 |     # Create the collection if it doesn't exist
149 |     try:
150 |         collections = client.get_collections().collections
151 |         collection_names = [c.name for c in collections]
152 | 
153 |         # If collection doesn't exist, create it
154 |         if test_server_config.collection_name not in collection_names:
155 |             logger.info(f"Creating test collection: {test_server_config.collection_name}")
156 |             client.create_collection(
157 |                 collection_name=test_server_config.collection_name,
158 |                 vectors_config=models.VectorParams(
159 |                     size=384,  # Dimension for all-MiniLM-L6-v2
160 |                     distance=models.Distance.COSINE,
161 |                 ),
162 |             )
163 |         else:
164 |             logger.info(f"Collection {test_server_config.collection_name} already exists")
165 |     except Exception as e:
166 |         logger.warning(f"Error checking/creating Qdrant collection: {e}")
167 | 
168 |     yield client
169 | 
170 |     # Cleanup - delete the collection at the end of the session
171 |     try:
172 |         if test_server_config.collection_name.startswith("test_"):
173 |             logger.info(f"Cleaning up test collection: {test_server_config.collection_name}")
174 |             client.delete_collection(collection_name=test_server_config.collection_name)
175 |     except Exception as e:
176 |         logger.warning(f"Error deleting Qdrant collection: {e}")
177 | 
178 | # Session-scoped server instance for shared resources
179 | @pytest_asyncio.fixture(scope="session")
180 | async def session_test_server(event_loop, test_server_config):
181 |     """Create a session-scoped server instance for shared tests."""
182 |     logger.info(f"Creating session-scoped test server instance")
183 | 
184 |     # Create the server instance with the provided test configuration
185 |     server = CodebaseAnalysisServer(test_server_config)
186 | 
187 |     # Initialize the server state
188 |     logger.info("Initializing server state...")
189 |     await server.state.initialize()
190 |     logger.info("Server state initialized successfully")
191 | 
192 |     # Initialize the server
193 |     logger.info("Initializing server...")
194 |     await server.initialize()
195 |     logger.info("Server initialized successfully")
196 | 
197 |     # Create and mount MCP server
198 |     from src.mcp_codebase_insight.core.sse import MCP_CodebaseInsightServer, create_sse_server
199 |     from src.mcp_codebase_insight.core.state import ComponentStatus
200 | 
201 |     logger.info("Creating and mounting MCP server...")
202 |     try:
203 |         # Create SSE server
204 |         sse_server = create_sse_server()
205 |         logger.info("Created SSE server")
206 | 
207 |         # Mount SSE server
208 |         server.app.mount("/mcp", sse_server)
209 |         logger.info("Mounted SSE server at /mcp")
210 | 
211 |         # Create MCP server instance
212 |         mcp_server = MCP_CodebaseInsightServer(server.state)
213 |         logger.info("Created MCP server instance")
214 | 
215 |         # Register tools
216 |         mcp_server.register_tools()
217 |         logger.info("Registered MCP server tools")
218 | 
219 |         # Update component status
220 |         server.state.update_component_status(
221 |             "mcp_server",
222 |             ComponentStatus.INITIALIZED,
223 |             instance=mcp_server
224 |         )
225 |         logger.info("Updated MCP server component status")
226 | 
227 |     except Exception as e:
228 |         logger.error(f"Failed to create/mount MCP server: {e}", exc_info=True)
229 |         raise RuntimeError(f"Failed to create/mount MCP server: {e}")
230 | 
231 |     # Add test-specific endpoints
232 |     @server.app.get("/direct-sse")
233 |     async def direct_sse_endpoint():
234 |         """Test endpoint for direct SSE connection."""
235 |         from starlette.responses import Response
236 |         return Response(
237 |             content="data: Direct SSE test endpoint\n\n",
238 |             media_type="text/event-stream",
239 |             headers={
240 |                 "Cache-Control": "no-cache",
241 |                 "Connection": "keep-alive",
242 |                 "X-Accel-Buffering": "no"
243 |             }
244 |         )
245 | 
246 |     @server.app.get("/mcp/sse-mock")
247 |     async def mock_sse_endpoint():
248 |         """Mock SSE endpoint for testing."""
249 |         from starlette.responses import Response
250 |         return Response(
251 |             content="data: Mock SSE endpoint\n\n",
252 |             media_type="text/event-stream",
253 |             headers={
254 |                 "Cache-Control": "no-cache",
255 |                 "Connection": "keep-alive",
256 |                 "X-Accel-Buffering": "no"
257 |             }
258 |         )
259 | 
260 |     @server.app.get("/debug/routes")
261 |     async def debug_routes():
262 |         """Debug endpoint to list all registered routes."""
263 |         from starlette.responses import Response
264 |         routes = []
265 |         for route in server.app.routes:
266 |             route_info = {
267 |                 "path": getattr(route, "path", str(route)),
268 |                 "methods": getattr(route, "methods", set()),
269 |                 "name": getattr(route, "name", None),
270 |                 "endpoint": str(getattr(route, "endpoint", None))
271 |             }
272 |             routes.append(route_info)
273 |         return {"routes": routes}
274 | 
275 |     @server.app.get("/health")
276 |     async def health_check_test():
277 |         """Health check endpoint for testing."""
278 |         mcp_server = server.state.get_component("mcp_server")
279 |         return {
280 |             "status": "ok",
281 |             "initialized": server.state.initialized,
282 |             "mcp_available": mcp_server is not None,
283 |             "instance_id": server.state.instance_id,
284 |             "components": server.state.list_components()
285 |         }
286 | 
287 |     # The server is already initialized, no need to start it
288 |     logger.info("Test server ready")
289 | 
290 |     yield server
291 | 
292 |     # Cleanup
293 |     logger.info("Cleaning up test server...")
294 |     await server.shutdown()
295 |     logger.info("Test server cleanup complete")
296 | 
297 | # Function-scoped server instance for isolated tests
298 | @pytest_asyncio.fixture
299 | async def test_server_instance(function_event_loop, test_server_config):
300 |     """Create a function-scoped server instance for isolated tests."""
301 |     logger.info(f"Creating function-scoped test server instance for test {_get_test_id()}")
302 | 
303 |     # Create server with isolated state
304 |     server = CodebaseAnalysisServer(test_server_config)
305 |     instance_id = f"test_server_{uuid.uuid4().hex}"
306 |     server.state = get_isolated_server_state(instance_id)
307 | 
308 |     try:
309 |         # Initialize state
310 |         if not server.state.initialized:
311 |             logger.info("Initializing server state...")
312 |             await server.state.initialize()
313 |             logger.info("Server state initialized successfully")
314 | 
315 |         # Initialize server
316 |         if not server.is_initialized:
317 |             logger.info("Initializing server...")
318 |             await server.initialize()
319 |             logger.info("Server initialized successfully")
320 | 
321 |         yield server
322 |     finally:
323 |         try:
324 |             # Clean up server state
325 |             logger.info("Starting server cleanup...")
326 | 
327 |             # Check server.state exists and is initialized
328 |             if hasattr(server, 'state') and server.state and hasattr(server.state, 'initialized') and server.state.initialized:
329 |                 logger.info("Cleaning up server state...")
330 |                 try:
331 |                     await server.state.cleanup()
332 |                     logger.info("Server state cleanup completed")
333 |                 except Exception as e:
334 |                     logger.error(f"Error during server state cleanup: {e}")
335 | 
336 |             # Check server is initialized
337 |             if hasattr(server, 'is_initialized') and server.is_initialized:
338 |                 logger.info("Shutting down server...")
339 |                 try:
340 |                     await server.shutdown()
341 |                     logger.info("Server shutdown completed")
342 |                 except Exception as e:
343 |                     logger.error(f"Error during server shutdown: {e}")
344 |         except Exception as e:
345 |             logger.error(f"Error during overall server cleanup: {e}")
346 | 
347 | # Session-scoped httpx client
348 | @pytest_asyncio.fixture(scope="session")
349 | async def session_httpx_client(session_test_server):
350 |     """Create a session-scoped httpx client for shared tests."""
351 |     logger.info(f"Creating session-scoped httpx test client")
352 | 
353 |     # Configure transport with proper ASGI handling
354 |     transport = httpx.ASGITransport(
355 |         app=session_test_server.app,
356 |         raise_app_exceptions=False,
357 |     )
358 | 
359 |     # Create client
360 |     client = httpx.AsyncClient(
361 |         transport=transport,
362 |         base_url="http://testserver",
363 |         follow_redirects=True,
364 |         timeout=30.0
365 |     )
366 | 
367 |     logger.info("Session-scoped httpx test client created")
368 | 
369 |     try:
370 |         yield client
371 |     finally:
372 |         try:
373 |             await client.aclose()
374 |             logger.info("Session-scoped httpx test client closed")
375 |         except Exception as e:
376 |             logger.error(f"Error during session client cleanup: {e}")
377 | 
378 | # Function-scoped httpx client
379 | @pytest_asyncio.fixture
380 | async def httpx_test_client(test_server_instance):
381 |     """Create a function-scoped httpx client for isolated tests."""
382 |     logger.info(f"Creating function-scoped httpx test client for test {_get_test_id()}")
383 | 
384 |     # Configure transport with proper ASGI handling
385 |     transport = httpx.ASGITransport(
386 |         app=test_server_instance.app,
387 |         raise_app_exceptions=False,
388 |     )
389 | 
390 |     # Create client
391 |     client = httpx.AsyncClient(
392 |         transport=transport,
393 |         base_url="http://testserver",
394 |         follow_redirects=True,
395 |         timeout=30.0
396 |     )
397 | 
398 |     logger.info("Function-scoped httpx test client created")
399 | 
400 |     try:
401 |         yield client
402 |     finally:
403 |         try:
404 |             await client.aclose()
405 |             logger.info("Function-scoped httpx test client closed")
406 |         except Exception as e:
407 |             logger.error(f"Error during client cleanup: {e}")
408 | 
409 | # Default client for tests (currently using session-scoped client)
410 | @pytest_asyncio.fixture
411 | async def client(session_httpx_client) -> AsyncGenerator[httpx.AsyncClient, None]:
412 |     """Return the current httpx test client.
413 | 
414 |     This is a function-scoped async fixture that yields the session-scoped client.
415 |     Tests can override this to use the function-scoped client if needed.
416 |     """
417 |     yield session_httpx_client
418 | 
419 | # Test data fixtures
420 | @pytest.fixture
421 | def test_code():
422 |     """Provide sample code for tests."""
423 |     return """
424 |     def factorial(n):
425 |         if n <= 1:
426 |             return 1
427 |         return n * factorial(n-1)
428 |     """
429 | 
430 | @pytest.fixture
431 | def test_issue():
432 |     """Provide a sample issue for tests."""
433 |     return {
434 |         "title": "Test Issue",
435 |         "description": "This is a test issue for debugging",
436 |         "code": "print('hello world')",
437 |         "error": "TypeError: unsupported operand type(s)",
438 |     }
439 | 
440 | @pytest.fixture
441 | def test_adr():
442 |     """Provide a sample ADR for tests."""
443 |     return {
444 |         "title": "Test ADR",
445 |         "status": "proposed",
446 |         "context": {
447 |             "problem": "This is a test ADR for testing",
448 |             "constraints": ["Test constraint"],
449 |             "assumptions": ["Test assumption"],
450 |             "background": "Test background"
451 |         },
452 |         "decision": "We decided to test the ADR system",
453 |         "consequences": "Testing will be successful",
454 |         "options": [
455 |             {
456 |                 "title": "Test Option",
457 |                 "description": "A test option for the ADR.",
458 |                 "pros": ["Easy to implement"],
459 |                 "cons": ["Not production ready"]
460 |             }
461 |         ]
462 |     }
463 | 
464 | # Define custom pytest hooks
465 | def pytest_collection_modifyitems(items):
466 |     """Add the isolated_event_loop marker to integration tests."""
467 |     for item in items:
468 |         module_name = item.module.__name__ if hasattr(item, 'module') else ''
469 |         if 'integration' in module_name:
470 |             # Add our custom marker to all integration tests
471 |             item.add_marker(pytest.mark.isolated_event_loop)
472 | 
473 | def pytest_configure(config):
474 |     """Configure pytest with our specific settings."""
475 |     config.addinivalue_line(
476 |         "markers", "isolated_event_loop: mark test to use an isolated event loop"
477 |     )
478 | 
479 |     # Suppress event loop warnings
480 |     warnings.filterwarnings(
481 |         "ignore",
482 |         message="There is no current event loop",
483 |         category=DeprecationWarning
484 |     )
485 |     warnings.filterwarnings(
486 |         "ignore",
487 |         message="The loop argument is deprecated",
488 |         category=DeprecationWarning
489 |     )
490 | 
491 | def pytest_runtest_setup(item):
492 |     """Set up for each test."""
493 |     # Get the module name for the test
494 |     module_name = item.module.__name__ if hasattr(item, 'module') else ''
495 | 
496 |     # Set an environment variable with the current test module
497 |     # This helps with test isolation in the server code
498 |     os.environ['CURRENT_TEST_MODULE'] = module_name
499 |     os.environ['CURRENT_TEST_NAME'] = item.name if hasattr(item, 'name') else ''
500 | 
501 |     # For any async test, ensure we have a valid event loop
502 |     if 'asyncio' in item.keywords:
503 |         try:
504 |             loop = asyncio.get_event_loop()
505 |             if loop.is_closed():
506 |                 logger.warning(f"Found closed loop in {module_name}:{item.name}, creating new loop")
507 |                 loop = asyncio.new_event_loop()
508 |                 asyncio.set_event_loop(loop)
509 |         except RuntimeError:
510 |             logger.warning(f"No event loop found in {module_name}:{item.name}, creating new loop")
511 |             loop = asyncio.new_event_loop()
512 |             asyncio.set_event_loop(loop)
513 | 
514 | def pytest_runtest_teardown(item):
515 |     """Clean up after each test."""
516 |     # Clear the current test environment variables
517 |     if 'CURRENT_TEST_MODULE' in os.environ:
518 |         del os.environ['CURRENT_TEST_MODULE']
519 |     if 'CURRENT_TEST_NAME' in os.environ:
520 |         del os.environ['CURRENT_TEST_NAME']
521 | 
522 | # Cleanup fixture
523 | @pytest.fixture(autouse=True, scope="session")
524 | def cleanup_server_states(event_loop: asyncio.AbstractEventLoop):
525 |     """Clean up any lingering server states."""
526 |     from src.mcp_codebase_insight.server_test_isolation import _server_states
527 | 
528 |     yield
529 | 
530 |     try:
531 |         # Report any unclosed instances
532 |         logger.info(f"Found {len(_server_states)} server states at end of session")
533 |         for instance_id, state in list(_server_states.items()):
534 |             logger.info(f"Cleaning up state for instance: {instance_id}")
535 |             try:
536 |                 if state.initialized:
537 |                     try:
538 |                         # Use the event loop for cleanup
539 |                         if not event_loop.is_closed():
540 |                             event_loop.run_until_complete(state.cleanup())
541 |                     except Exception as e:
542 |                         logger.error(f"Error cleaning up state: {e}")
543 |             except Exception as e:
544 |                 logger.error(f"Error checking state initialized: {e}")
545 |     except Exception as e:
546 |         logger.error(f"Error during server states cleanup: {e}")
547 | 
548 |     try:
549 |         # Cancel any remaining tasks
550 |         for pid, loop in list(_event_loops.items()):
551 |             if not loop.is_closed():
552 |                 for task in asyncio.all_tasks(loop):
553 |                     if not task.done() and not task.cancelled():
554 |                         logger.warning(f"Force cancelling task: {task.get_name()}")
555 |                         task.cancel()
556 |     except Exception as e:
557 |         logger.error(f"Error cancelling tasks: {e}")
558 | 
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/vector_store.py:
--------------------------------------------------------------------------------

```python
  1 | """Vector store for pattern similarity search using Qdrant."""
  2 | 
  3 | from typing import Dict, List, Optional
  4 | import asyncio
  5 | import logging
  6 | import uuid
  7 | from datetime import datetime
  8 | 
  9 | from qdrant_client import QdrantClient
 10 | from qdrant_client.http import models as rest
 11 | from qdrant_client.http.models import Distance, VectorParams
 12 | from qdrant_client.http.exceptions import UnexpectedResponse
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | # Note: Parameter changes between Qdrant client versions:
 17 | # - In v1.13.3+, the parameter 'query_vector' was renamed to 'query' in the query_points method
 18 | # - The store_pattern and update_pattern methods now accept 'id' instead of 'pattern_id'
 19 | # For backward compatibility, we support both parameter styles.
 20 | 
 21 | class SearchResult:
 22 |     """Search result from vector store."""
 23 |     
 24 |     def __init__(self, id: str, score: float, metadata: Optional[Dict] = None):
 25 |         """Initialize search result."""
 26 |         self.id = id
 27 |         self.score = score
 28 |         self.metadata = metadata or {}  # Initialize with empty dict or provided metadata
 29 |     
 30 |     def __repr__(self):
 31 |         """String representation of search result."""
 32 |         return f"SearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"
 33 | 
 34 | class VectorStore:
 35 |     """Vector store for pattern similarity search."""
 36 |     
 37 |     def __init__(
 38 |         self,
 39 |         url: str,
 40 |         embedder,
 41 |         collection_name: str = "codebase_patterns",
 42 |         vector_size: int = 384,  # Default for all-MiniLM-L6-v2
 43 |         api_key: Optional[str] = None,
 44 |         vector_name: str = "default"  # Add vector_name parameter with default value
 45 |     ):
 46 |         """Initialize vector store."""
 47 |         self.url = url
 48 |         self.embedder = embedder
 49 |         self.collection_name = collection_name
 50 |         self.vector_size = vector_size
 51 |         self.api_key = api_key
 52 |         self.vector_name = vector_name  # Store the vector name
 53 |         self.initialized = False
 54 |         self.client = None
 55 |     
 56 |     async def initialize(self):
 57 |         """Initialize vector store."""
 58 |         if self.initialized:
 59 |             return
 60 |             
 61 |         try:
 62 |             # Initialize embedder first
 63 |             logger.debug("Initializing embedder")
 64 |             await self.embedder.initialize()
 65 |             
 66 |             # Update vector size from embedder if available
 67 |             if hasattr(self.embedder, 'vector_size'):
 68 |                 self.vector_size = self.embedder.vector_size
 69 |                 logger.debug(f"Using vector size {self.vector_size} from embedder")
 70 |             
 71 |             # Initialize Qdrant client with additional parameters
 72 |             logger.debug(f"Connecting to Qdrant at {self.url}")
 73 |             self.client = QdrantClient(
 74 |                 url=self.url,
 75 |                 api_key=self.api_key,
 76 |                 timeout=10.0,
 77 |                 prefer_grpc=False
 78 |             )
 79 |             
 80 |             # Attempt to test connection and set up collection; skip on failure
 81 |             try:
 82 |                 # Test connection with retry
 83 |                 max_retries = 3
 84 |                 retry_delay = 1
 85 |                 for attempt in range(max_retries):
 86 |                     try:
 87 |                         logger.debug(f"Testing Qdrant connection (attempt {attempt+1}/{max_retries})")
 88 |                         self.client.get_collections()
 89 |                         logger.debug("Connection successful")
 90 |                         break
 91 |                     except Exception as e:
 92 |                         if attempt < max_retries - 1:
 93 |                             logger.warning(f"Connection attempt {attempt+1} failed: {e}, retrying in {retry_delay}s")
 94 |                             await asyncio.sleep(retry_delay)
 95 |                             retry_delay *= 2
 96 |                         else:
 97 |                             raise
 98 |                 
 99 |                 # Create collection if it doesn't exist
100 |                 logger.debug(f"Checking for collection {self.collection_name}")
101 |                 collections = self.client.get_collections().collections
102 |                 if not any(c.name == self.collection_name for c in collections):
103 |                     logger.debug(f"Creating collection {self.collection_name}")
104 |                     self.client.create_collection(
105 |                         collection_name=self.collection_name,
106 |                         vectors_config=VectorParams(
107 |                             size=self.vector_size,
108 |                             distance=Distance.COSINE,
109 |                             on_disk=True
110 |                         ),
111 |                         optimizers_config=rest.OptimizersConfigDiff(
112 |                             indexing_threshold=0,
113 |                             memmap_threshold=0
114 |                         )
115 |                     )
116 |                 logger.debug("Vector store collection setup complete")
117 |             except Exception as e:
118 |                 logger.warning(f"Qdrant is unavailable, skipping collection setup: {e}")
119 |             
120 |             # Finalize initialization regardless of Qdrant availability
121 |             self.initialized = True
122 |             logger.debug("Vector store initialization complete")
123 |             
124 |         except Exception as e:
125 |             logger.error(f"Vector store initialization failed: {str(e)}")
126 |             raise RuntimeError(f"Failed to initialize vector store: {str(e)}")
127 |     
128 |     async def cleanup(self):
129 |         """Clean up vector store resources."""
130 |         if not self.initialized:
131 |             logger.debug(f"Vector store not initialized, skipping cleanup for {self.collection_name}")
132 |             return
133 |             
134 |         try:
135 |             logger.debug(f"Cleaning up collection {self.collection_name}")
136 |             
137 |             # Check if collection exists first
138 |             collections = self.client.get_collections().collections
139 |             exists = any(c.name == self.collection_name for c in collections)
140 |             
141 |             if not exists:
142 |                 logger.debug(f"Collection {self.collection_name} does not exist, nothing to clean")
143 |                 return
144 |                 
145 |             # Delete all points in the collection
146 |             try:
147 |                 logger.debug(f"Deleting all points in collection {self.collection_name}")
148 |                 self.client.delete(
149 |                     collection_name=self.collection_name,
150 |                     points_selector=rest.FilterSelector(
151 |                         filter=rest.Filter()  # Empty filter means all points
152 |                     )
153 |                 )
154 |                 logger.debug(f"Successfully deleted all points from {self.collection_name}")
155 |             except Exception as e:
156 |                 logger.warning(f"Error deleting points from collection {self.collection_name}: {e}")
157 |                 
158 |             # Reset initialized state to ensure proper re-initialization if needed
159 |             self.initialized = False
160 |             logger.debug(f"Reset initialized state for vector store with collection {self.collection_name}")
161 |         except Exception as e:
162 |             logger.error(f"Error during vector store cleanup: {e}")
163 |             # Don't raise the exception to avoid breaking test teardowns
164 |     
165 |     async def close(self):
166 |         """Close vector store connection and clean up resources."""
167 |         try:
168 |             logger.debug("Starting vector store closure process")
169 |             await self.cleanup()
170 |         finally:
171 |             if self.client:
172 |                 try:
173 |                     logger.debug("Closing Qdrant client connection")
174 |                     self.client.close()
175 |                     logger.debug("Qdrant client connection closed")
176 |                 except Exception as e:
177 |                     logger.error(f"Error closing Qdrant client: {e}")
178 |             
179 |             # Ensure initialized state is reset
180 |             self.initialized = False
181 |             logger.debug("Vector store fully closed")
182 |     
183 |     async def store_pattern(
184 |         self, id: str, text: str = None, title: str = None, description: str = None, pattern_type: str = None, 
185 |         tags: List[str] = None, embedding: List[float] = None, metadata: Optional[Dict] = None
186 |     ) -> bool:
187 |         """Store a pattern in the vector store.
188 |         
189 |         This method supports two calling patterns:
190 |         1. With text and metadata for automatic embedding generation
191 |         2. With explicit title, description, pattern_type, tags, and embedding
192 |         
193 |         Args:
194 |             id: ID for the pattern
195 |             text: Text to generate embedding from (if embedding not provided)
196 |             title: Title of the pattern
197 |             description: Description of the pattern
198 |             pattern_type: Type of the pattern
199 |             tags: Tags for the pattern
200 |             embedding: Pre-computed embedding
201 |             metadata: Optional metadata dictionary
202 |             
203 |         Returns:
204 |             True if stored successfully
205 |         """
206 |         try:
207 |             # Ensure we're initialized
208 |             if not self.initialized:
209 |                 await self.initialize()
210 |                 
211 |             # Validate the collection exists and has the correct vector configuration
212 |             try:
213 |                 collection_info = self.client.get_collection(self.collection_name)
214 |                 # With a non-named vector configuration, we just need to verify the collection exists
215 |                 logger.info(f"Collection {self.collection_name} exists")
216 |             except Exception as e:
217 |                 logger.error(f"Error validating collection: {str(e)}")
218 |             
219 |             # Case 1: Using text and metadata
220 |             if text is not None and embedding is None:
221 |                 # Generate embedding from text
222 |                 embedding = await self.embedder.embed(text)
223 |                 
224 |                 # Handle metadata
225 |                 metadata = metadata or {}
226 |                 
227 |                 # Extract or use defaults for required fields
228 |                 title = metadata.get("title", title) or "Untitled"
229 |                 description = metadata.get("description", description) or text[:100]
230 |                 pattern_type = metadata.get("pattern_type", pattern_type) or metadata.get("type", "code")
231 |                 tags = metadata.get("tags", tags) or []
232 |                 
233 |                 # Create payload with all metadata plus required fields
234 |                 payload = {
235 |                     "id": id,
236 |                     "title": title,
237 |                     "description": description,
238 |                     "pattern_type": pattern_type,
239 |                     "type": pattern_type,  # Add 'type' field for consistency
240 |                     "tags": tags,
241 |                     "timestamp": datetime.now().isoformat(),
242 |                     **metadata  # Include all original metadata fields
243 |                 }
244 |             # Case 2: Using explicit parameters
245 |             else:
246 |                 # Ensure we have all required data
247 |                 if embedding is None:
248 |                     raise ValueError("Embedding must be provided if text is not provided")
249 |                     
250 |                 title = title or "Untitled"
251 |                 description = description or ""
252 |                 pattern_type = pattern_type or "code"
253 |                 tags = tags or []
254 |                 
255 |                 payload = {
256 |                     "id": id,
257 |                     "title": title,
258 |                     "description": description,
259 |                     "pattern_type": pattern_type,
260 |                     "type": pattern_type,  # Add 'type' field for consistency
261 |                     "tags": tags,
262 |                     "timestamp": datetime.now().isoformat(),
263 |                 }
264 |                 
265 |                 # Merge with metadata if provided
266 |                 if metadata:
267 |                     payload.update(metadata)
268 |             
269 |             # Debug logs
270 |             logger.info(f"PointStruct data - id: {id}")
271 |             logger.info(f"PointStruct data - vector_name: {self.vector_name}")
272 |             logger.info(f"PointStruct data - embedding length: {len(embedding)}")
273 |             logger.info(f"PointStruct data - payload keys: {payload.keys()}")
274 |             
275 |             # For Qdrant client 1.13.3, use vector parameter
276 |             point = rest.PointStruct(
277 |                 id=id,
278 |                 vector=embedding,  # Use vector parameter for this version of Qdrant client
279 |                 payload=payload
280 |             )
281 |             
282 |             self.client.upsert(
283 |                 collection_name=self.collection_name,
284 |                 points=[point],
285 |                 wait=True
286 |             )
287 |             logger.info(f"Successfully stored pattern with id: {id}")
288 |             return True
289 |         except Exception as e:
290 |             logger.error(f"Error storing pattern: {str(e)}")
291 |             raise RuntimeError(f"Failed to store pattern: {str(e)}")
292 |             
293 |     # Previous version of store_pattern kept as _store_pattern_legacy for backward compatibility
294 |     async def _store_pattern_legacy(
295 |         self, pattern_id: str, title: str, description: str, pattern_type: str, tags: List[str], embedding: List[float]
296 |     ) -> bool:
297 |         """Legacy version of store_pattern for backward compatibility."""
298 |         return await self.store_pattern(
299 |             id=pattern_id,
300 |             title=title,
301 |             description=description,
302 |             pattern_type=pattern_type,
303 |             tags=tags,
304 |             embedding=embedding
305 |         )
306 |     
307 |     async def update_pattern(
308 |         self, id: str, title: str, description: str, pattern_type: str, tags: List[str], embedding: List[float]
309 |     ) -> bool:
310 |         """Update a pattern in the vector store."""
311 |         try:
312 |             payload = {
313 |                 "id": id,
314 |                 "title": title,
315 |                 "description": description,
316 |                 "pattern_type": pattern_type,
317 |                 "type": pattern_type,  # Add 'type' field for consistency
318 |                 "tags": tags,
319 |                 "timestamp": datetime.now().isoformat(),
320 |             }
321 |             
322 |             point = rest.PointStruct(
323 |                 id=id,
324 |                 vector=embedding,  # Use vector parameter for this version of Qdrant client
325 |                 payload=payload
326 |             )
327 |             
328 |             self.client.upsert(
329 |                 collection_name=self.collection_name,
330 |                 points=[point],
331 |                 wait=True
332 |             )
333 |             return True
334 |         except Exception as e:
335 |             logger.error(f"Error updating pattern: {str(e)}")
336 |             raise RuntimeError(f"Failed to update pattern: {str(e)}")
337 |     
338 |     async def delete_pattern(self, id: str) -> None:
339 |         """Delete pattern from vector store."""
340 |         self.client.delete(
341 |             collection_name=self.collection_name,
342 |             points_selector=rest.PointIdsList(
343 |                 points=[id]
344 |             )
345 |         )
346 |     
347 |     async def search(
348 |         self,
349 |         text: str,
350 |         filter_conditions: Optional[Dict] = None,
351 |         limit: int = 5
352 |     ) -> List[SearchResult]:
353 |         """Search for similar patterns."""
354 |         # Generate embedding
355 |         vector = await self.embedder.embed(text)
356 |         
357 |         # Create filter if provided
358 |         search_filter = None
359 |         if filter_conditions:
360 |             search_filter = rest.Filter(**filter_conditions)
361 |         
362 |         # Search in Qdrant
363 |         results = self.client.query_points(
364 |             collection_name=self.collection_name,
365 |             query=vector,
366 |             query_filter=search_filter,
367 |             limit=limit
368 |         )
369 |         
370 |         # Convert to SearchResult objects
371 |         search_results = []
372 |         
373 |         for result in results:
374 |             # Create default metadata with all required fields
375 |             default_metadata = {
376 |                 "type": "code", 
377 |                 "language": "python",
378 |                 "title": "Test Code",
379 |                 "description": text[:100],
380 |                 "tags": ["test", "vector"],
381 |                 "timestamp": datetime.now().isoformat()
382 |             }
383 |             
384 |             # Handle tuples with different length formats
385 |             if isinstance(result, tuple):
386 |                 if len(result) == 2:
387 |                     # Format: (id, score)
388 |                     id_val, score_val = result
389 |                     search_results.append(
390 |                         SearchResult(
391 |                             id=id_val,
392 |                             score=score_val,
393 |                             metadata=default_metadata
394 |                         )
395 |                     )
396 |                 elif len(result) >= 3:
397 |                     # Format: (id, score, payload)
398 |                     id_val, score_val, payload_val = result
399 |                     # If payload is empty, use default metadata
400 |                     metadata = payload_val if payload_val else default_metadata
401 |                     search_results.append(
402 |                         SearchResult(
403 |                             id=id_val,
404 |                             score=score_val,
405 |                             metadata=metadata
406 |                         )
407 |                     )
408 |             elif hasattr(result, 'id') and hasattr(result, 'score'):
409 |                 # Legacy object format
410 |                 metadata = getattr(result, 'payload', default_metadata)
411 |                 search_results.append(
412 |                     SearchResult(
413 |                         id=result.id,
414 |                         score=result.score,
415 |                         metadata=metadata
416 |                     )
417 |                 )
418 |             else:
419 |                 logger.warning(f"Unrecognized result format: {result}")
420 |         
421 |         return search_results
422 |     
423 |     async def add_vector(self, text: str, metadata: Optional[Dict] = None) -> str:
424 |         """Add vector to the vector store and return ID.
425 |         
426 |         This is a convenience method that automatically generates
427 |         a UUID for the vector.
428 |         
429 |         Args:
430 |             text: Text to add
431 |             metadata: Optional metadata
432 |             
433 |         Returns:
434 |             ID of the created vector
435 |         """
436 |         # Generate ID
437 |         id = str(uuid.uuid4())
438 |         
439 |         # Generate embedding
440 |         embedding = await self.embedder.embed(text)
441 |         
442 |         # Ensure metadata is initialized
443 |         metadata = metadata or {}
444 |         
445 |         # Extract title/description from metadata if available, with defaults
446 |         title = metadata.get("title", "Untitled")
447 |         description = metadata.get("description", text[:100])
448 |         pattern_type = metadata.get("pattern_type", metadata.get("type", "code"))
449 |         tags = metadata.get("tags", [])
450 |         
451 |         # Ensure "type" field always exists (standardized structure)
452 |         if "type" not in metadata:
453 |             metadata["type"] = "code"
454 |         
455 |         # Create payload with all original metadata plus required fields
456 |         payload = {
457 |             "id": id,
458 |             "title": title,
459 |             "description": description,
460 |             "pattern_type": pattern_type,
461 |             "type": metadata.get("type", "code"),
462 |             "tags": tags,
463 |             "timestamp": datetime.now().isoformat(),
464 |             **metadata  # Include all original metadata fields
465 |         }
466 |         
467 |         # Store with complete metadata
468 |         try:
469 |             # Ensure we're initialized
470 |             if not self.initialized:
471 |                 await self.initialize()
472 |                 
473 |             # Validate the collection exists and has the correct vector configuration
474 |             try:
475 |                 collection_info = self.client.get_collection(self.collection_name)
476 |                 # With a non-named vector configuration, we just need to verify the collection exists
477 |                 logger.info(f"Collection {self.collection_name} exists")
478 |             except Exception as e:
479 |                 logger.error(f"Error validating collection: {str(e)}")
480 |                 
481 |             # Debug logs
482 |             logger.info(f"PointStruct data - id: {id}")
483 |             logger.info(f"PointStruct data - vector_name: {self.vector_name}")
484 |             logger.info(f"PointStruct data - embedding length: {len(embedding)}")
485 |             logger.info(f"PointStruct data - payload keys: {payload.keys()}")
486 |             
487 |             # For Qdrant client 1.13.3, use vector parameter
488 |             point = rest.PointStruct(
489 |                 id=id,
490 |                 vector=embedding,  # Use vector parameter for this version of Qdrant client
491 |                 payload=payload
492 |             )
493 |             
494 |             self.client.upsert(
495 |                 collection_name=self.collection_name,
496 |                 points=[point],
497 |                 wait=True
498 |             )
499 |             logger.info(f"Successfully stored vector with id: {id}")
500 |             return id
501 |         except Exception as e:
502 |             logger.error(f"Error storing vector: {str(e)}")
503 |             raise RuntimeError(f"Failed to store vector: {str(e)}")
504 |     
505 |     async def search_similar(
506 |         self,
507 |         query: str,
508 |         filter_conditions: Optional[Dict] = None,
509 |         limit: int = 5
510 |     ) -> List[SearchResult]:
511 |         """Search for similar text.
512 |         
513 |         Args:
514 |             query: Query text to search for
515 |             filter_conditions: Optional filter conditions
516 |             limit: Maximum number of results to return
517 |             
518 |         Returns:
519 |             List of search results
520 |         """
521 |         return await self.search(
522 |             text=query,
523 |             filter_conditions=filter_conditions,
524 |             limit=limit
525 |         )
526 | 
```

--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/knowledge.py:
--------------------------------------------------------------------------------

```python
  1 | """Knowledge base for code patterns and insights."""
  2 | 
  3 | from datetime import datetime
  4 | from enum import Enum
  5 | from typing import Dict, List, Optional
  6 | from uuid import UUID, uuid4
  7 | import json
  8 | 
  9 | from pydantic import BaseModel, Field
 10 | 
 11 | class PatternType(str, Enum):
 12 |     """Pattern type enumeration."""
 13 |     
 14 |     CODE = "code"
 15 |     DESIGN_PATTERN = "design_pattern"
 16 |     ARCHITECTURE = "architecture"
 17 |     BEST_PRACTICE = "best_practice"
 18 |     ANTI_PATTERN = "anti_pattern"
 19 |     FILE_RELATIONSHIP = "file_relationship"  # New type for file relationships
 20 |     WEB_SOURCE = "web_source"  # New type for web sources
 21 | 
 22 | class PatternConfidence(str, Enum):
 23 |     """Pattern confidence level."""
 24 |     
 25 |     HIGH = "high"
 26 |     MEDIUM = "medium"
 27 |     LOW = "low"
 28 |     EXPERIMENTAL = "experimental"
 29 | 
 30 | class Pattern(BaseModel):
 31 |     """Pattern model."""
 32 |     
 33 |     id: UUID
 34 |     name: str
 35 |     type: PatternType
 36 |     description: str
 37 |     content: str
 38 |     confidence: PatternConfidence
 39 |     tags: Optional[List[str]] = None
 40 |     metadata: Optional[Dict[str, str]] = None
 41 |     created_at: datetime
 42 |     updated_at: datetime
 43 |     examples: Optional[List[str]] = None
 44 |     related_patterns: Optional[List[UUID]] = None
 45 | 
 46 | class SearchResult(BaseModel):
 47 |     """Pattern search result model."""
 48 |     
 49 |     pattern: Pattern
 50 |     similarity_score: float
 51 | 
 52 | class FileRelationship(BaseModel):
 53 |     """File relationship model."""
 54 |     
 55 |     source_file: str
 56 |     target_file: str
 57 |     relationship_type: str  # e.g., "imports", "extends", "implements", "uses"
 58 |     description: Optional[str] = None
 59 |     metadata: Optional[Dict[str, str]] = None
 60 |     created_at: datetime = Field(default_factory=datetime.utcnow)
 61 |     updated_at: datetime = Field(default_factory=datetime.utcnow)
 62 | 
 63 | class WebSource(BaseModel):
 64 |     """Web source model."""
 65 |     
 66 |     url: str
 67 |     title: str
 68 |     description: Optional[str] = None
 69 |     content_type: str  # e.g., "documentation", "tutorial", "reference"
 70 |     last_fetched: datetime = Field(default_factory=datetime.utcnow)
 71 |     metadata: Optional[Dict[str, str]] = None
 72 |     related_patterns: Optional[List[UUID]] = None
 73 |     tags: Optional[List[str]] = None
 74 | 
 75 | class KnowledgeBase:
 76 |     """Knowledge base for managing code patterns and insights."""
 77 |     
 78 |     def __init__(self, config, vector_store=None):
 79 |         """Initialize knowledge base.
 80 |         
 81 |         Args:
 82 |             config: Server configuration
 83 |             vector_store: Optional vector store instance
 84 |         """
 85 |         self.config = config
 86 |         self.vector_store = vector_store
 87 |         self.kb_dir = config.kb_storage_dir
 88 |         self.initialized = False
 89 |         self.file_relationships: Dict[str, FileRelationship] = {}
 90 |         self.web_sources: Dict[str, WebSource] = {}
 91 |     
 92 |     async def initialize(self):
 93 |         """Initialize knowledge base components."""
 94 |         if self.initialized:
 95 |             return
 96 |             
 97 |         try:
 98 |             # Create all required directories
 99 |             self.kb_dir.mkdir(parents=True, exist_ok=True)
100 |             (self.kb_dir / "patterns").mkdir(parents=True, exist_ok=True)
101 |             (self.kb_dir / "relationships").mkdir(parents=True, exist_ok=True)  # New directory for relationships
102 |             (self.kb_dir / "web_sources").mkdir(parents=True, exist_ok=True)  # New directory for web sources
103 |             
104 |             # Initialize vector store if available
105 |             if self.vector_store:
106 |                 await self.vector_store.initialize()
107 |                 
108 |             # Load existing relationships and web sources
109 |             await self._load_relationships()
110 |             await self._load_web_sources()
111 |                 
112 |             # Create initial patterns if none exist
113 |             if not list((self.kb_dir / "patterns").glob("*.json")):
114 |                 await self._create_initial_patterns()
115 |                 
116 |             # Update state
117 |             self.config.set_state("kb_initialized", True)
118 |             self.initialized = True
119 |         except Exception as e:
120 |             import traceback
121 |             print(f"Error initializing knowledge base: {str(e)}\n{traceback.format_exc()}")
122 |             self.config.set_state("kb_initialized", False)
123 |             self.config.set_state("kb_error", str(e))
124 |             raise RuntimeError(f"Failed to initialize knowledge base: {str(e)}")
125 |     
126 |     async def _load_relationships(self):
127 |         """Load existing file relationships."""
128 |         relationships_dir = self.kb_dir / "relationships"
129 |         if relationships_dir.exists():
130 |             for file_path in relationships_dir.glob("*.json"):
131 |                 try:
132 |                     with open(file_path) as f:
133 |                         data = json.load(f)
134 |                         relationship = FileRelationship(**data)
135 |                         key = f"{relationship.source_file}:{relationship.target_file}"
136 |                         self.file_relationships[key] = relationship
137 |                 except Exception as e:
138 |                     print(f"Error loading relationship from {file_path}: {e}")
139 |     
140 |     async def _load_web_sources(self):
141 |         """Load existing web sources."""
142 |         web_sources_dir = self.kb_dir / "web_sources"
143 |         if web_sources_dir.exists():
144 |             for file_path in web_sources_dir.glob("*.json"):
145 |                 try:
146 |                     with open(file_path) as f:
147 |                         data = json.load(f)
148 |                         source = WebSource(**data)
149 |                         self.web_sources[source.url] = source
150 |                 except Exception as e:
151 |                     print(f"Error loading web source from {file_path}: {e}")
152 |     
153 |     async def _create_initial_patterns(self):
154 |         """Create initial patterns for testing."""
155 |         await self.add_pattern(
156 |             name="Basic Function",
157 |             type=PatternType.CODE,
158 |             description="A simple function that performs a calculation",
159 |             content="def calculate(x, y):\n    return x + y",
160 |             confidence=PatternConfidence.HIGH,
161 |             tags=["function", "basic"]
162 |         )
163 |     
164 |     async def cleanup(self):
165 |         """Clean up knowledge base components."""
166 |         if not self.initialized:
167 |             return
168 |             
169 |         try:
170 |             if self.vector_store:
171 |                 await self.vector_store.cleanup()
172 |         except Exception as e:
173 |             print(f"Error cleaning up knowledge base: {e}")
174 |         finally:
175 |             self.config.set_state("kb_initialized", False)
176 |             self.initialized = False
177 |     
178 |     async def add_pattern(
179 |         self,
180 |         name: str,
181 |         type: PatternType,
182 |         description: str,
183 |         content: str,
184 |         confidence: PatternConfidence,
185 |         tags: Optional[List[str]] = None,
186 |         metadata: Optional[Dict[str, str]] = None,
187 |         examples: Optional[List[str]] = None,
188 |         related_patterns: Optional[List[UUID]] = None
189 |     ) -> Pattern:
190 |         """Add a new pattern."""
191 |         now = datetime.utcnow()
192 |         pattern = Pattern(
193 |             id=uuid4(),
194 |             name=name,
195 |             type=type,
196 |             description=description,
197 |             content=content,
198 |             confidence=confidence,
199 |             tags=tags,
200 |             metadata=metadata,
201 |             examples=examples,
202 |             related_patterns=related_patterns,
203 |             created_at=now,
204 |             updated_at=now
205 |         )
206 |         
207 |         # Store pattern vector if vector store is available
208 |         if self.vector_store:
209 |             # Generate embedding for the pattern
210 |             combined_text = f"{pattern.name}\n{pattern.description}\n{pattern.content}"
211 |             try:
212 |                 embedding = await self.vector_store.embedder.embed(combined_text)
213 |                 await self.vector_store.store_pattern(
214 |                     id=str(pattern.id),
215 |                     title=pattern.name,
216 |                     description=pattern.description,
217 |                     pattern_type=pattern.type.value,
218 |                     tags=pattern.tags or [],
219 |                     embedding=embedding
220 |                 )
221 |             except Exception as e:
222 |                 print(f"Warning: Failed to store pattern vector: {e}")
223 |         
224 |         # Save pattern to file
225 |         await self._save_pattern(pattern)
226 |         return pattern
227 |     
228 |     async def get_pattern(self, pattern_id: UUID) -> Optional[Pattern]:
229 |         """Get pattern by ID."""
230 |         pattern_path = self.kb_dir / "patterns" / f"{pattern_id}.json"
231 |         if not pattern_path.exists():
232 |             return None
233 |             
234 |         with open(pattern_path) as f:
235 |             data = json.load(f)
236 |             return Pattern(**data)
237 |     
238 |     async def update_pattern(
239 |         self,
240 |         pattern_id: UUID,
241 |         description: Optional[str] = None,
242 |         content: Optional[str] = None,
243 |         confidence: Optional[PatternConfidence] = None,
244 |         tags: Optional[List[str]] = None,
245 |         metadata: Optional[Dict[str, str]] = None,
246 |         examples: Optional[List[str]] = None,
247 |         related_patterns: Optional[List[UUID]] = None
248 |     ) -> Optional[Pattern]:
249 |         """Update pattern details."""
250 |         pattern = await self.get_pattern(pattern_id)
251 |         if not pattern:
252 |             return None
253 |             
254 |         if description:
255 |             pattern.description = description
256 |         if content:
257 |             pattern.content = content
258 |         if confidence:
259 |             pattern.confidence = confidence
260 |         if tags:
261 |             pattern.tags = tags
262 |         if metadata:
263 |             pattern.metadata = {**(pattern.metadata or {}), **metadata}
264 |         if examples:
265 |             pattern.examples = examples
266 |         if related_patterns:
267 |             pattern.related_patterns = related_patterns
268 |             
269 |         pattern.updated_at = datetime.utcnow()
270 |         
271 |         # Update vector store if available
272 |         if self.vector_store:
273 |             # Generate embedding for the updated pattern
274 |             combined_text = f"{pattern.name}\n{pattern.description}\n{pattern.content}"
275 |             try:
276 |                 embedding = await self.vector_store.embedder.embed(combined_text)
277 |                 await self.vector_store.update_pattern(
278 |                     id=str(pattern.id),
279 |                     title=pattern.name,
280 |                     description=pattern.description,
281 |                     pattern_type=pattern.type.value,
282 |                     tags=pattern.tags or [],
283 |                     embedding=embedding
284 |                 )
285 |             except Exception as e:
286 |                 print(f"Warning: Failed to update pattern vector: {e}")
287 |         
288 |         await self._save_pattern(pattern)
289 |         return pattern
290 |     
291 |     async def find_similar_patterns(
292 |         self,
293 |         query: str,
294 |         pattern_type: Optional[PatternType] = None,
295 |         confidence: Optional[PatternConfidence] = None,
296 |         tags: Optional[List[str]] = None,
297 |         limit: int = 5
298 |     ) -> List[SearchResult]:
299 |         """Find similar patterns using vector similarity search."""
300 |         if not self.vector_store:
301 |             return []
302 |             
303 |         # Build filter conditions
304 |         filter_conditions = {}
305 |         if pattern_type:
306 |             filter_conditions["type"] = pattern_type
307 |         if confidence:
308 |             filter_conditions["confidence"] = confidence
309 |         if tags:
310 |             filter_conditions["tags"] = {"$all": tags}
311 |             
312 |         # Search vectors with fallback on error
313 |         try:
314 |             results = await self.vector_store.search(
315 |                 text=query,
316 |                 filter_conditions=filter_conditions,
317 |                 limit=limit
318 |             )
319 |         except Exception as e:
320 |             print(f"Warning: Semantic search failed ({e}), falling back to file-based search")
321 |             file_patterns = await self.list_patterns(pattern_type, confidence, tags)
322 |             return [
323 |                 SearchResult(pattern=p, similarity_score=0.0)
324 |                 for p in file_patterns[:limit]
325 |             ]
326 |         
327 |         # Load full patterns
328 |         search_results = []
329 |         for result in results:
330 |             try:
331 |                 # Handle different ID formats from Qdrant client
332 |                 pattern_id = None
333 |                 if hasattr(result, 'id'):
334 |                     # Try to convert the ID to UUID, handling different formats
335 |                     id_str = str(result.id)
336 |                     # Check if it's a valid UUID format
337 |                     if '-' in id_str and len(id_str.replace('-', '')) == 32:
338 |                         pattern_id = UUID(id_str)
339 |                     else:
340 |                         # Try to extract a UUID from the ID
341 |                         # Look for UUID patterns like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
342 |                         import re
343 |                         uuid_match = re.search(r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})', id_str, re.IGNORECASE)
344 |                         if uuid_match:
345 |                             pattern_id = UUID(uuid_match.group(1))
346 |                 else:
347 |                     # Handle tuple results from newer Qdrant client
348 |                     # Tuple format is typically (id, score, payload)
349 |                     if isinstance(result, tuple) and len(result) >= 1:
350 |                         id_str = str(result[0])
351 |                         # Same UUID validation as above
352 |                         if '-' in id_str and len(id_str.replace('-', '')) == 32:
353 |                             pattern_id = UUID(id_str)
354 |                         else:
355 |                             import re
356 |                             uuid_match = re.search(r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})', id_str, re.IGNORECASE)
357 |                             if uuid_match:
358 |                                 pattern_id = UUID(uuid_match.group(1))
359 |                 
360 |                 # Skip if we couldn't extract a valid UUID
361 |                 if pattern_id is None:
362 |                     print(f"Warning: Could not extract valid UUID from result ID: {result}")
363 |                     continue
364 |                 
365 |                 # Get the pattern using the UUID
366 |                 pattern = await self.get_pattern(pattern_id)
367 |                 if pattern:
368 |                     # Get score from result
369 |                     score = result.score if hasattr(result, 'score') else (
370 |                         result[1] if isinstance(result, tuple) and len(result) >= 2 else 0.0
371 |                     )
372 |                     
373 |                     search_results.append(SearchResult(
374 |                         pattern=pattern,
375 |                         similarity_score=score
376 |                     ))
377 |             except (ValueError, AttributeError, IndexError, TypeError) as e:
378 |                 print(f"Warning: Failed to process result {result}: {e}")
379 |                 
380 |         return search_results
381 |     
382 |     async def list_patterns(
383 |         self,
384 |         pattern_type: Optional[PatternType] = None,
385 |         confidence: Optional[PatternConfidence] = None,
386 |         tags: Optional[List[str]] = None
387 |     ) -> List[Pattern]:
388 |         """List all patterns, optionally filtered."""
389 |         patterns = []
390 |         for path in (self.kb_dir / "patterns").glob("*.json"):
391 |             with open(path) as f:
392 |                 data = json.load(f)
393 |                 pattern = Pattern(**data)
394 |                 
395 |                 # Apply filters
396 |                 if pattern_type and pattern.type != pattern_type:
397 |                     continue
398 |                 if confidence and pattern.confidence != confidence:
399 |                     continue
400 |                 if tags and not all(tag in (pattern.tags or []) for tag in tags):
401 |                     continue
402 |                     
403 |                 patterns.append(pattern)
404 |                 
405 |         return sorted(patterns, key=lambda x: x.created_at)
406 |     
407 |     async def analyze_code(self, code: str, context: Optional[Dict[str, str]] = None) -> Dict:
408 |         """Analyze code for patterns and insights.
409 |         
410 |         Args:
411 |             code: The code to analyze.
412 |             context: Optional context about the code, such as language and purpose.
413 |         """
414 |         # Find similar code patterns
415 |         patterns = await self.find_similar_patterns(
416 |             query=code,
417 |             pattern_type=PatternType.CODE,
418 |             limit=5
419 |         )
420 |         
421 |         # Extract insights
422 |         insights = []
423 |         for result in patterns:
424 |             pattern = result.pattern
425 |             insights.append({
426 |                 "pattern_id": str(pattern.id),
427 |                 "name": pattern.name,
428 |                 "description": pattern.description,
429 |                 "confidence": pattern.confidence,
430 |                 "similarity_score": result.similarity_score
431 |             })
432 |             
433 |         return {
434 |             "patterns": [p.pattern.dict() for p in patterns],
435 |             "insights": insights,
436 |             "summary": {
437 |                 "total_patterns": len(patterns),
438 |                 "total_insights": len(insights),
439 |                 "context": context or {}
440 |             }
441 |         }
442 |     
443 |     async def _save_pattern(self, pattern: Pattern) -> None:
444 |         """Save pattern to file."""
445 |         pattern_dir = self.kb_dir / "patterns"
446 |         pattern_dir.mkdir(parents=True, exist_ok=True)
447 |         pattern_path = pattern_dir / f"{pattern.id}.json"
448 |         with open(pattern_path, "w") as f:
449 |             json.dump(pattern.model_dump(), f, indent=2, default=str)
450 | 
451 |     async def search_patterns(
452 |         self,
453 |         tags: Optional[List[str]] = None
454 |     ) -> List[Pattern]:
455 |         """Search for patterns by tags."""
456 |         # Delegate to list_patterns for tag-based filtering
457 |         return await self.list_patterns(tags=tags)
458 |     
459 |     async def add_file_relationship(
460 |         self,
461 |         source_file: str,
462 |         target_file: str,
463 |         relationship_type: str,
464 |         description: Optional[str] = None,
465 |         metadata: Optional[Dict[str, str]] = None
466 |     ) -> FileRelationship:
467 |         """Add a new file relationship."""
468 |         relationship = FileRelationship(
469 |             source_file=source_file,
470 |             target_file=target_file,
471 |             relationship_type=relationship_type,
472 |             description=description,
473 |             metadata=metadata
474 |         )
475 |         
476 |         key = f"{source_file}:{target_file}"
477 |         self.file_relationships[key] = relationship
478 |         
479 |         # Save to disk
480 |         await self._save_relationship(relationship)
481 |         return relationship
482 |     
483 |     async def add_web_source(
484 |         self,
485 |         url: str,
486 |         title: str,
487 |         content_type: str,
488 |         description: Optional[str] = None,
489 |         metadata: Optional[Dict[str, str]] = None,
490 |         tags: Optional[List[str]] = None
491 |     ) -> WebSource:
492 |         """Add a new web source."""
493 |         source = WebSource(
494 |             url=url,
495 |             title=title,
496 |             content_type=content_type,
497 |             description=description,
498 |             metadata=metadata,
499 |             tags=tags
500 |         )
501 |         
502 |         self.web_sources[url] = source
503 |         
504 |         # Save to disk
505 |         await self._save_web_source(source)
506 |         return source
507 |     
508 |     async def get_file_relationships(
509 |         self,
510 |         source_file: Optional[str] = None,
511 |         target_file: Optional[str] = None,
512 |         relationship_type: Optional[str] = None
513 |     ) -> List[FileRelationship]:
514 |         """Get file relationships, optionally filtered."""
515 |         relationships = list(self.file_relationships.values())
516 |         
517 |         if source_file:
518 |             relationships = [r for r in relationships if r.source_file == source_file]
519 |         if target_file:
520 |             relationships = [r for r in relationships if r.target_file == target_file]
521 |         if relationship_type:
522 |             relationships = [r for r in relationships if r.relationship_type == relationship_type]
523 |             
524 |         return relationships
525 |     
526 |     async def get_web_sources(
527 |         self,
528 |         content_type: Optional[str] = None,
529 |         tags: Optional[List[str]] = None
530 |     ) -> List[WebSource]:
531 |         """Get web sources, optionally filtered."""
532 |         sources = list(self.web_sources.values())
533 |         
534 |         if content_type:
535 |             sources = [s for s in sources if s.content_type == content_type]
536 |         if tags:
537 |             sources = [s for s in sources if s.tags and all(tag in s.tags for tag in tags)]
538 |             
539 |         return sources
540 |     
541 |     async def _save_relationship(self, relationship: FileRelationship) -> None:
542 |         """Save file relationship to disk."""
543 |         relationships_dir = self.kb_dir / "relationships"
544 |         relationships_dir.mkdir(parents=True, exist_ok=True)
545 |         
546 |         key = f"{relationship.source_file}:{relationship.target_file}"
547 |         file_path = relationships_dir / f"{hash(key)}.json"
548 |         
549 |         with open(file_path, "w") as f:
550 |             json.dump(relationship.model_dump(), f, indent=2, default=str)
551 |     
552 |     async def _save_web_source(self, source: WebSource) -> None:
553 |         """Save web source to disk."""
554 |         web_sources_dir = self.kb_dir / "web_sources"
555 |         web_sources_dir.mkdir(parents=True, exist_ok=True)
556 |         
557 |         file_path = web_sources_dir / f"{hash(source.url)}.json"
558 |         
559 |         with open(file_path, "w") as f:
560 |             json.dump(source.model_dump(), f, indent=2, default=str)
561 | 
562 |     async def delete_pattern(self, pattern_id: UUID) -> None:
563 |         """Delete a pattern by ID from knowledge base and vector store."""
564 |         # Delete from vector store if available
565 |         if self.vector_store:
566 |             try:
567 |                 await self.vector_store.delete_pattern(str(pattern_id))
568 |             except Exception as e:
569 |                 print(f"Warning: Failed to delete pattern vector: {e}")
570 |         # Delete pattern file
571 |         pattern_path = self.kb_dir / "patterns" / f"{pattern_id}.json"
572 |         if pattern_path.exists():
573 |             try:
574 |                 pattern_path.unlink()
575 |             except Exception as e:
576 |                 print(f"Warning: Failed to delete pattern file: {e}")
577 | 
```

--------------------------------------------------------------------------------
/run_tests.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Test runner script for MCP Codebase Insight.
  4 | 
  5 | This script consolidates all test execution into a single command with various options.
  6 | It can run specific test categories or all tests, with or without coverage reporting.
  7 | """
  8 | 
  9 | import argparse
 10 | import os
 11 | import subprocess
 12 | import sys
 13 | import time
 14 | from typing import List, Optional
 15 | import uuid
 16 | import traceback
 17 | 
 18 | 
 19 | def parse_args():
 20 |     """Parse command line arguments."""
 21 |     parser = argparse.ArgumentParser(description="Run MCP Codebase Insight tests")
 22 |     
 23 |     # Test selection options
 24 |     parser.add_argument("--all", action="store_true", help="Run all tests")
 25 |     parser.add_argument("--component", action="store_true", help="Run component tests")
 26 |     parser.add_argument("--integration", action="store_true", help="Run integration tests")
 27 |     parser.add_argument("--config", action="store_true", help="Run configuration tests")
 28 |     parser.add_argument("--api", action="store_true", help="Run API endpoint tests")
 29 |     parser.add_argument("--sse", action="store_true", help="Run SSE endpoint tests")
 30 |     
 31 |     # Specific test selection
 32 |     parser.add_argument("--test", type=str, help="Run a specific test (e.g., test_health_check)")
 33 |     parser.add_argument("--file", type=str, help="Run tests from a specific file")
 34 |     
 35 |     # Coverage options
 36 |     parser.add_argument("--coverage", action="store_true", help="Generate coverage report")
 37 |     parser.add_argument("--html", action="store_true", help="Generate HTML coverage report")
 38 |     
 39 |     # Additional options
 40 |     parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
 41 |     parser.add_argument("--no-capture", action="store_true", help="Don't capture stdout/stderr")
 42 |     parser.add_argument("--clean", action="store_true", help="Clean .pytest_cache before running tests")
 43 |     parser.add_argument("--isolated", action="store_true", help="Run with PYTHONPATH isolated to ensure clean environment")
 44 |     parser.add_argument("--event-loop-debug", action="store_true", help="Add asyncio debug mode")
 45 |     parser.add_argument("--sequential", action="store_true", help="Run tests sequentially to avoid event loop issues")
 46 |     parser.add_argument("--fully-isolated", action="store_true", 
 47 |                        help="Run each test module in a separate process for complete isolation")
 48 |     
 49 |     return parser.parse_args()
 50 | 
 51 | 
 52 | def build_command(args, module_path=None) -> List[List[str]]:
 53 |     """Build the pytest command based on arguments."""
 54 |     cmd = ["python", "-m", "pytest"]
 55 |     
 56 |     # Add xdist settings for parallel or sequential execution
 57 |     if args.sequential:
 58 |         # Run sequentially to avoid event loop issues
 59 |         os.environ["PYTEST_XDIST_AUTO_NUM_WORKERS"] = "1"
 60 |         cmd.append("-xvs")
 61 |     
 62 |     # Determine test scope
 63 |     test_paths = []
 64 |     
 65 |     # If a specific module path is provided, use it
 66 |     if module_path:
 67 |         test_paths.append(module_path)
 68 |     elif args.all or (not any([args.component, args.integration, args.config, args.api, args.sse, args.test, args.file])):
 69 |         # When running all tests and using fully isolated mode, we'll handle this differently in main()
 70 |         if args.fully_isolated:
 71 |             return []
 72 |         
 73 |         # When running all tests, run integration tests separately from other tests
 74 |         if args.all and not args.sequential:
 75 |             # Run integration tests separately to avoid event loop conflicts
 76 |             integration_cmd = cmd.copy()
 77 |             integration_cmd.append("tests/integration/")
 78 |             non_integration_cmd = cmd.copy()
 79 |             non_integration_cmd.append("tests/")
 80 |             non_integration_cmd.append("--ignore=tests/integration/")
 81 |             return [integration_cmd, non_integration_cmd]
 82 |         else:
 83 |             test_paths.append("tests/")
 84 |     else:
 85 |         if args.integration:
 86 |             test_paths.append("tests/integration/")
 87 |         if args.component:
 88 |             test_paths.append("tests/components/")
 89 |             cmd.append("--asyncio-mode=strict")  # Ensure asyncio strict mode for component tests
 90 |         if args.config:
 91 |             test_paths.append("tests/config/")
 92 |         if args.api:
 93 |             test_paths.append("tests/integration/test_api_endpoints.py")
 94 |         if args.sse:
 95 |             test_paths.append("tests/integration/test_sse.py")
 96 |         if args.file:
 97 |             test_paths.append(args.file)
 98 |         if args.test:
 99 |             if "/" in args.test or "." in args.test:
100 |                 # If it looks like a file path and test name
101 |                 test_paths.append(args.test)
102 |             else:
103 |                 # If it's just a test name, try to find it
104 |                 test_paths.append(f"tests/integration/test_api_endpoints.py::test_{args.test}")
105 |     
106 |     # Add test paths to command
107 |     cmd.extend(test_paths)
108 |     
109 |     # Add coverage if requested
110 |     if args.coverage:
111 |         cmd.insert(1, "-m")
112 |         cmd.insert(2, "coverage")
113 |         cmd.insert(3, "run")
114 |     
115 |     # Add verbosity
116 |     if args.verbose:
117 |         cmd.append("-v")
118 |     
119 |     # Disable output capture if requested
120 |     if args.no_capture:
121 |         cmd.append("-s")
122 |     
123 |     # Add asyncio debug mode if requested
124 |     if args.event_loop_debug:
125 |         cmd.append("--asyncio-mode=strict")
126 |         os.environ["PYTHONASYNCIODEBUG"] = "1"
127 |     else:
128 |         # Always use strict mode to catch issues
129 |         cmd.append("--asyncio-mode=strict")
130 |     
131 |     return [cmd]
132 | 
133 | 
134 | def clean_test_cache():
135 |     """Clean pytest cache directories."""
136 |     print("Cleaning pytest cache...")
137 |     subprocess.run(["rm", "-rf", ".pytest_cache"], check=False)
138 |     
139 |     # Also clear __pycache__ directories in tests
140 |     for root, dirs, _ in os.walk("tests"):
141 |         for d in dirs:
142 |             if d == "__pycache__":
143 |                 cache_dir = os.path.join(root, d)
144 |                 print(f"Removing {cache_dir}")
145 |                 subprocess.run(["rm", "-rf", cache_dir], check=False)
146 | 
147 | 
148 | def setup_isolated_env():
149 |     """Set up an isolated environment for tests."""
150 |     # Make sure we start with the right Python path
151 |     os.environ["PYTHONPATH"] = os.path.abspath(".")
152 |     
153 |     # Clear any previous test-related environment variables
154 |     for key in list(os.environ.keys()):
155 |         if key.startswith(("PYTEST_", "MCP_TEST_")):
156 |             del os.environ[key]
157 |     
158 |     # Set standard test variables
159 |     os.environ["MCP_TEST_MODE"] = "1"
160 |     os.environ["MCP_HOST"] = "localhost"
161 |     os.environ["MCP_PORT"] = "8000"  # Different from default to avoid conflicts
162 |     os.environ["QDRANT_URL"] = "http://localhost:6333"
163 |     
164 |     # Use unique collection names for tests to avoid interference
165 |     test_id = os.urandom(4).hex()
166 |     os.environ["MCP_COLLECTION_NAME"] = f"test_collection_{test_id}"
167 |     
168 |     # Configure asyncio behavior for better isolation
169 |     os.environ["ASYNCIO_WATCHDOG_TIMEOUT"] = "30"
170 |     os.environ["PYTEST_ASYNC_TEST_TIMEOUT"] = "60"
171 |     
172 |     # Force module isolation 
173 |     os.environ["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
174 | 
175 | 
176 | def run_tests(cmds: List[List[str]], env=None) -> int:
177 |     """Run the tests with the given commands."""
178 |     exit_code = 0
179 |     
180 |     for cmd in cmds:
181 |         print(f"Running: {' '.join(cmd)}")
182 |         try:
183 |             result = subprocess.run(cmd, env=env)
184 |             if result.returncode != 0:
185 |                 exit_code = result.returncode
186 |         except Exception as e:
187 |             print(f"Error running command: {e}")
188 |             exit_code = 1
189 |     
190 |     return exit_code
191 | 
192 | 
193 | def find_test_modules(directory="tests", filter_pattern=None):
194 |     """Find all Python test files in the given directory."""
195 |     test_modules = []
196 |     
197 |     # Walk through the directory
198 |     for root, _, files in os.walk(directory):
199 |         for file in files:
200 |             if file.startswith("test_") and file.endswith(".py"):
201 |                 module_path = os.path.join(root, file)
202 |                 
203 |                 # Apply filter if provided
204 |                 if filter_pattern and filter_pattern not in module_path:
205 |                     continue
206 |                     
207 |                 test_modules.append(module_path)
208 |     
209 |     return test_modules
210 | 
211 | 
212 | def run_isolated_modules(args) -> int:
213 |     """Run each test module in its own process for complete isolation."""
214 |     # Determine which test modules to run
215 |     test_modules = []
216 |     
217 |     if args.component:
218 |         # For component tests, always run them individually
219 |         test_modules = find_test_modules("tests/components")
220 |     elif args.all:
221 |         # When running all tests, get everything
222 |         test_modules = find_test_modules()
223 |     else:
224 |         # Otherwise, run as specified
225 |         if args.integration:
226 |             integration_modules = find_test_modules("tests/integration")
227 |             test_modules.extend(integration_modules)
228 |         if args.config:
229 |             config_modules = find_test_modules("tests/config")
230 |             test_modules.extend(config_modules)
231 |     
232 |     # Sort modules to run in a specific order: regular tests first,
233 |     # then component tests, and integration tests last
234 |     def module_sort_key(module_path):
235 |         if "integration" in module_path:
236 |             return 3  # Run integration tests last
237 |         elif "components" in module_path:
238 |             return 2  # Run component tests in the middle
239 |         else:
240 |             return 1  # Run other tests first
241 |     
242 |     test_modules.sort(key=module_sort_key)
243 |     
244 |     # If specific test file was specified, only run that one
245 |     if args.file:
246 |         if os.path.exists(args.file):
247 |             test_modules = [args.file]
248 |         else:
249 |             # Try to find the file in the tests directory
250 |             matching_modules = [m for m in test_modules if args.file in m]
251 |             if matching_modules:
252 |                 test_modules = matching_modules
253 |             else:
254 |                 print(f"Error: Test file {args.file} not found")
255 |                 return 1
256 |     
257 |     final_exit_code = 0
258 |     
259 |     # Run each module in a separate process
260 |     for module in test_modules:
261 |         print(f"\n=== Running isolated test module: {module} ===\n")
262 |         
263 |         # Check if this is a component test
264 |         is_component_test = "components" in module
265 |         is_vector_store_test = "test_vector_store.py" in module
266 |         is_knowledge_base_test = "test_knowledge_base.py" in module
267 |         is_task_manager_test = "test_task_manager.py" in module
268 |         
269 |         # Prepare environment for this test module
270 |         env = os.environ.copy()
271 |         
272 |         # Basic environment setup for all tests
273 |         env["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
274 |         env["MCP_TEST_MODE"] = "1"
275 |         
276 |         # Add special handling for component tests
277 |         if is_component_test:
278 |             # Ensure component tests run with asyncio strict mode
279 |             env["PYTEST_ASYNCIO_MODE"] = "strict"
280 |             
281 |             # Component tests need test database config
282 |             if "MCP_COLLECTION_NAME" not in env:
283 |                 env["MCP_COLLECTION_NAME"] = f"test_collection_{uuid.uuid4().hex[:8]}"
284 |             
285 |             # Vector store and knowledge base tests need additional time for setup
286 |             if is_vector_store_test or is_knowledge_base_test or is_task_manager_test:
287 |                 env["PYTEST_TIMEOUT"] = "60"  # Allow more time for these tests
288 |         
289 |         # For component tests, use our specialized component test runner
290 |         if is_component_test and args.fully_isolated:
291 |             print(f"Using specialized component test runner for {module}")
292 |             # Extract test names from the module using a simple pattern match
293 |             component_test_results = []
294 |             try:
295 |                 # Use grep to find test functions in the file - more reliable
296 |                 # than pytest --collect-only in this case
297 |                 grep_cmd = ["grep", "-E", "^def test_", module]
298 |                 result = subprocess.run(grep_cmd, capture_output=True, text=True)
299 |                 collected_test_names = []
300 |                 
301 |                 if result.returncode == 0:
302 |                     for line in result.stdout.splitlines():
303 |                         # Extract the test name from "def test_name(...)"
304 |                         if line.startswith("def test_"):
305 |                             test_name = line.split("def ")[1].split("(")[0].strip()
306 |                             collected_test_names.append(test_name)
307 |                     print(f"Found {len(collected_test_names)} tests in {module}")
308 |                 else:
309 |                     # Fall back to read the file directly
310 |                     with open(module, 'r') as f:
311 |                         content = f.read()
312 |                         # Use a simple regex to find all test functions
313 |                         import re
314 |                         matches = re.findall(r'def\s+(test_\w+)\s*\(', content)
315 |                         collected_test_names = matches
316 |                         print(f"Found {len(collected_test_names)} tests in {module} (using file read)")
317 |             except Exception as e:
318 |                 print(f"Error extracting tests from {module}: {e}")
319 |                 # Just skip this module and continue with others
320 |                 continue
321 |                 
322 |             # Run each test separately using our component test runner
323 |             if collected_test_names:
324 |                 for test_name in collected_test_names:
325 |                     print(f"Running test: {module}::{test_name}")
326 |                     
327 |                     # Use our specialized component test runner
328 |                     runner_cmd = [
329 |                         "python", 
330 |                         "component_test_runner.py", 
331 |                         module, 
332 |                         test_name
333 |                     ]
334 |                     
335 |                     print(f"Running: {' '.join(runner_cmd)}")
336 |                     test_result = subprocess.run(runner_cmd, env=env)
337 |                     component_test_results.append((test_name, test_result.returncode))
338 |                     
339 |                     # If we have a failure, record it but continue running other tests
340 |                     if test_result.returncode != 0:
341 |                         final_exit_code = test_result.returncode
342 |                     
343 |                     # Short pause between tests to let resources clean up
344 |                     time.sleep(1.0)
345 |                 
346 |                 # Print summary of test results for this module
347 |                 print(f"\n=== Test Results for {module} ===")
348 |                 passed = sum(1 for _, code in component_test_results if code == 0)
349 |                 failed = sum(1 for _, code in component_test_results if code != 0)
350 |                 print(f"Passed: {passed}, Failed: {failed}, Total: {len(component_test_results)}")
351 |                 for name, code in component_test_results:
352 |                     status = "PASSED" if code == 0 else "FAILED"
353 |                     print(f"{name}: {status}")
354 |                 print("=" * 40)
355 |             else:
356 |                 print(f"No tests found in {module}, skipping")
357 |         else:
358 |             # For other tests, use our standard command builder
359 |             cmd_args = argparse.Namespace(**vars(args))
360 |             cmds = build_command(cmd_args, module)
361 |             
362 |             # Run this module's tests with the prepared environment
363 |             module_result = run_tests(cmds, env)
364 |             
365 |             # If we have a failure, record it but continue running other modules
366 |             if module_result != 0:
367 |                 final_exit_code = module_result
368 |         
369 |         # Short pause between modules to let event loops clean up
370 |         # Increase delay for component tests with complex cleanup needs
371 |         if is_component_test:
372 |             time.sleep(1.5)  # Longer pause for component tests
373 |         else:
374 |             time.sleep(0.5)
375 |     
376 |     return final_exit_code
377 | 
378 | 
379 | def run_component_tests_fully_isolated(test_file=None):
380 |     """Run component tests with each test completely isolated using specialized runner."""
381 |     print("\n=== Running component tests in fully isolated mode ===\n")
382 | 
383 |     # Find component test files
384 |     if test_file:
385 |         test_files = [test_file]
386 |     else:
387 |         test_files = find_test_modules("tests/components")
388 |     
389 |     overall_results = {}
390 |     
391 |     for test_file in test_files:
392 |         print(f"\n=== Running isolated test module: {test_file} ===\n")
393 |         print(f"Using specialized component test runner for {test_file}")
394 |         
395 |         try:
396 |             # Use the component_test_runner's discovery mechanism
397 |             from component_test_runner import get_module_tests
398 |             tests = get_module_tests(test_file)
399 |             print(f"Found {len(tests)} tests in {test_file} (using file read)")
400 |             
401 |             # Skip if no tests found
402 |             if not tests:
403 |                 print(f"No tests found in {test_file}")
404 |                 continue
405 |             
406 |             # Track results
407 |             passed_tests = []
408 |             failed_tests = []
409 |             
410 |             for test_name in tests:
411 |                 print(f"Running test: {test_file}::{test_name}")
412 |                 cmd = f"python component_test_runner.py {test_file} {test_name}"
413 |                 print(f"Running: {cmd}")
414 |                 
415 |                 result = subprocess.run(cmd, shell=True)
416 |                 
417 |                 if result.returncode == 0:
418 |                     passed_tests.append(test_name)
419 |                 else:
420 |                     failed_tests.append(test_name)
421 |             
422 |             # Report results for this file
423 |             print(f"\n=== Test Results for {test_file} ===")
424 |             print(f"Passed: {len(passed_tests)}, Failed: {len(failed_tests)}, Total: {len(tests)}")
425 |             
426 |             for test in tests:
427 |                 status = "PASSED" if test in passed_tests else "FAILED"
428 |                 print(f"{test}: {status}")
429 |             
430 |             print("========================================")
431 |             
432 |             # Store results
433 |             overall_results[test_file] = {
434 |                 "passed": len(passed_tests),
435 |                 "failed": len(failed_tests),
436 |                 "total": len(tests)
437 |             }
438 |         except Exception as e:
439 |             print(f"Error running tests for {test_file}: {e}")
440 |             traceback.print_exc()
441 |             overall_results[test_file] = {
442 |                 "passed": 0,
443 |                 "failed": 1,
444 |                 "total": 1,
445 |                 "error": str(e)
446 |             }
447 |     
448 |     # Determine if any tests failed
449 |     any_failures = any(result.get("failed", 0) > 0 for result in overall_results.values())
450 |     return 1 if any_failures else 0
451 | 
452 | 
453 | def generate_coverage_report(html: bool = False) -> Optional[int]:
454 |     """Generate coverage report."""
455 |     if html:
456 |         cmd = ["python", "-m", "coverage", "html"]
457 |         print("Generating HTML coverage report...")
458 |         result = subprocess.run(cmd)
459 |         if result.returncode == 0:
460 |             print(f"HTML coverage report generated in {os.path.abspath('htmlcov')}")
461 |         return result.returncode
462 |     else:
463 |         cmd = ["python", "-m", "coverage", "report", "--show-missing"]
464 |         print("Generating coverage report...")
465 |         return subprocess.run(cmd).returncode
466 | 
467 | 
468 | def run_all_tests(args):
469 |     """Run all tests."""
470 |     cmds = build_command(args)
471 |     print(f"Running: {' '.join(cmds[0])}")
472 |     exit_code = 0
473 |     
474 |     # For regular test runs or when not in fully isolated mode, 
475 |     # first attempt to run everything as a single command
476 |     if args.sequential:
477 |         # Run all tests sequentially
478 |         exit_code = run_tests(cmds)
479 |     else:
480 |         try:
481 |             # First, try to run all tests as one command
482 |             exit_code = run_tests(cmds, os.environ.copy())
483 |         except Exception as e:
484 |             print(f"Error running tests: {e}")
485 |             exit_code = 1
486 |         
487 |         # If test failed or not all modules were specified, run each module individually
488 |         if exit_code != 0 or args.fully_isolated:
489 |             print("\nRunning tests with full module isolation...")
490 |             exit_code = run_isolated_modules(args)
491 |     
492 |     return exit_code
493 | 
494 | 
495 | def main():
496 |     """Main entry point."""
497 |     args = parse_args()
498 |     
499 |     # Clean test cache if requested
500 |     if args.clean:
501 |         clean_test_cache()
502 |     
503 |     # Setup isolated environment if requested
504 |     if args.isolated or args.fully_isolated:
505 |         setup_isolated_env()
506 |     
507 |     # Set up environment variables
508 |     if args.component:
509 |         os.environ["MCP_TEST_MODE"] = "1"
510 |         # Generate a unique collection name for isolated tests
511 |         if args.isolated or args.fully_isolated:
512 |             # Use a unique collection for each test run to ensure isolation
513 |             unique_id = uuid.uuid4().hex[:8]
514 |             os.environ["MCP_COLLECTION_NAME"] = f"test_collection_{unique_id}"
515 |     
516 |     # We need to set this for all async tests to ensure proper event loop handling
517 |     if args.component or args.integration:
518 |         os.environ["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
519 |     
520 |     # Print environment info
521 |     if args.verbose:
522 |         print("\nTest environment:")
523 |         print(f"Python: {sys.executable}")
524 |         if args.isolated or args.fully_isolated:
525 |             print(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not set')}")
526 |             print(f"Collection name: {os.environ.get('MCP_COLLECTION_NAME', 'Not set')}")
527 |             print(f"Asyncio mode: strict")
528 |     
529 |     # We have special handling for component tests in fully-isolated mode
530 |     if args.component and args.fully_isolated:
531 |         # Skip general pytest run and go straight to component test runner
532 |         exit_code = run_component_tests_fully_isolated(args.file)
533 |         sys.exit(exit_code)
534 |     
535 |     # Regular test flow - first try to run all together
536 |     exit_code = run_all_tests(args)
537 |     
538 |     # If not in isolated mode, we're done
539 |     if not args.isolated and not args.component:
540 |         # Generate coverage report if needed
541 |         if args.coverage:
542 |             generate_coverage_report(args.html)
543 |         sys.exit(exit_code)
544 |     
545 |     # If tests failed and we're in isolated mode, run each file separately
546 |     if exit_code != 0 and (args.isolated or args.component):
547 |         isolated_exit_code = run_isolated_modules(args)
548 |         
549 |         # Generate coverage report if needed
550 |         if args.coverage:
551 |             generate_coverage_report(args.html)
552 |         
553 |         sys.exit(isolated_exit_code)
554 |     
555 |     # Generate coverage report if needed
556 |     if args.coverage:
557 |         generate_coverage_report(args.html)
558 |     
559 |     sys.exit(exit_code)
560 | 
561 | 
562 | if __name__ == "__main__":
563 |     main()
```

--------------------------------------------------------------------------------
/tests/components/test_sse_components.py:
--------------------------------------------------------------------------------

```python
  1 | """Unit tests for SSE core components."""
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | # Ensure the src directory is in the Python path
  7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
  8 | 
  9 | import asyncio
 10 | import pytest
 11 | import logging
 12 | from unittest.mock import AsyncMock, MagicMock, patch
 13 | from typing import Dict, Any, List, AsyncGenerator
 14 | 
 15 | from src.mcp_codebase_insight.core.sse import create_sse_server, MCP_CodebaseInsightServer
 16 | from mcp.server.fastmcp import FastMCP
 17 | from mcp.server.sse import SseServerTransport
 18 | 
 19 | # Set up logging for tests
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | # Mark all tests as asyncio tests
 23 | pytestmark = pytest.mark.asyncio
 24 | 
 25 | 
 26 | class MockState:
 27 |     """Mock server state for testing."""
 28 | 
 29 |     def __init__(self):
 30 |         self.components = {}
 31 | 
 32 |     def get_component(self, name):
 33 |         """Get a component by name."""
 34 |         return self.components.get(name)
 35 | 
 36 |     def get_component_status(self):
 37 |         """Get status of all components."""
 38 |         return {name: {"available": True} for name in self.components}
 39 | 
 40 |     def set_component(self, name, component):
 41 |         """Set a component."""
 42 |         self.components[name] = component
 43 | 
 44 | 
 45 | class MockVectorStore:
 46 |     """Mock vector store component for testing."""
 47 | 
 48 |     async def search(self, text, filter_conditions=None, limit=5):
 49 |         """Mock search method."""
 50 |         return [
 51 |             MagicMock(
 52 |                 id="test-id-1",
 53 |                 score=0.95,
 54 |                 metadata={
 55 |                     "text": "def example_function():\n    return 'example'",
 56 |                     "file_path": "/path/to/file.py",
 57 |                     "line_range": "10-15",
 58 |                     "type": "code",
 59 |                     "language": "python",
 60 |                     "timestamp": "2025-03-26T10:00:00"
 61 |                 }
 62 |             )
 63 |         ]
 64 | 
 65 | 
 66 | class MockKnowledgeBase:
 67 |     """Mock knowledge base component for testing."""
 68 | 
 69 |     async def search_patterns(self, query, pattern_type=None, limit=5):
 70 |         """Mock search_patterns method."""
 71 |         return [
 72 |             MagicMock(
 73 |                 id="pattern-id-1",
 74 |                 pattern="Example pattern",
 75 |                 description="Description of example pattern",
 76 |                 type=pattern_type or "code",
 77 |                 confidence=0.9,
 78 |                 metadata={"source": "test"}
 79 |             )
 80 |         ]
 81 | 
 82 | 
 83 | class MockADRManager:
 84 |     """Mock ADR manager component for testing."""
 85 | 
 86 |     async def list_adrs(self):
 87 |         """Mock list_adrs method."""
 88 |         return [
 89 |             MagicMock(
 90 |                 id="adr-id-1",
 91 |                 title="Example ADR",
 92 |                 status="accepted",
 93 |                 created_at=None,
 94 |                 updated_at=None
 95 |             )
 96 |         ]
 97 | 
 98 | 
 99 | class MockTaskManager:
100 |     """Mock task manager component for testing."""
101 | 
102 |     async def get_task(self, task_id):
103 |         """Mock get_task method."""
104 |         if task_id == "invalid-id":
105 |             return None
106 | 
107 |         return MagicMock(
108 |             id=task_id,
109 |             type="analysis",
110 |             status="running",
111 |             progress=0.5,
112 |             result=None,
113 |             error=None,
114 |             created_at=None,
115 |             updated_at=None
116 |         )
117 | 
118 | 
119 | @pytest.fixture
120 | def mock_server_state():
121 |     """Create a mock server state for testing."""
122 |     state = MockState()
123 | 
124 |     # Add mock components
125 |     state.set_component("vector_store", MockVectorStore())
126 |     state.set_component("knowledge_base", MockKnowledgeBase())
127 |     state.set_component("adr_manager", MockADRManager())
128 |     state.set_component("task_tracker", MockTaskManager())  # Updated component name to match sse.py
129 | 
130 |     return state
131 | 
132 | 
133 | @pytest.fixture
134 | def mcp_server(mock_server_state):
135 |     """Create an MCP server instance for testing."""
136 |     return MCP_CodebaseInsightServer(mock_server_state)
137 | 
138 | 
139 | async def test_mcp_server_initialization(mcp_server):
140 |     """Test MCP server initialization."""
141 |     # Verify the server was initialized correctly
142 |     assert mcp_server.state is not None
143 |     assert mcp_server.mcp_server is not None
144 |     assert mcp_server.mcp_server.name == "MCP-Codebase-Insight"
145 |     assert mcp_server.tools_registered is False
146 | 
147 | 
148 | async def test_register_tools(mcp_server):
149 |     """Test registering tools with the MCP server."""
150 |     # Register tools
151 |     mcp_server.register_tools()
152 | 
153 |     # Verify tools were registered
154 |     assert mcp_server.tools_registered is True
155 | 
156 |     # In MCP v1.5.0, we can't directly access tool_defs
157 |     # Instead we'll just verify registration was successful
158 |     # The individual tool tests will verify specific functionality
159 | 
160 | 
161 | async def test_get_starlette_app(mcp_server):
162 |     """Test getting the Starlette app for the MCP server."""
163 |     # Reset the cached app to force a new creation
164 |     mcp_server._starlette_app = None
165 | 
166 |     # Mock the create_sse_server function directly in the module
167 |     with patch('src.mcp_codebase_insight.core.sse.create_sse_server') as mock_create_sse:
168 |         # Set up the mock
169 |         mock_app = MagicMock()
170 |         mock_create_sse.return_value = mock_app
171 | 
172 |         # Get the Starlette app
173 |         app = mcp_server.get_starlette_app()
174 | 
175 |         # Verify tools were registered
176 |         assert mcp_server.tools_registered is True
177 | 
178 |         # Verify create_sse_server was called with the MCP server
179 |         mock_create_sse.assert_called_once_with(mcp_server.mcp_server)
180 | 
181 |         # Verify the app was returned
182 |         assert app == mock_app
183 | 
184 | 
185 | async def test_create_sse_server():
186 |     """Test creating the SSE server."""
187 |     # Use context managers for patching to ensure proper cleanup
188 |     with patch('src.mcp_codebase_insight.core.sse.CodebaseInsightSseTransport') as mock_transport, \
189 |          patch('src.mcp_codebase_insight.core.sse.Starlette') as mock_starlette:
190 |         # Set up mocks
191 |         mock_mcp = MagicMock(spec=FastMCP)
192 |         mock_transport_instance = MagicMock()
193 |         mock_transport.return_value = mock_transport_instance
194 |         mock_app = MagicMock()
195 |         mock_starlette.return_value = mock_app
196 | 
197 |         # Create the SSE server
198 |         app = create_sse_server(mock_mcp)
199 | 
200 |         # Verify CodebaseInsightSseTransport was initialized correctly
201 |         mock_transport.assert_called_once_with("/sse")
202 | 
203 |         # Verify Starlette was initialized with routes
204 |         mock_starlette.assert_called_once()
205 | 
206 |         # Verify the app was returned
207 |         assert app == mock_app
208 | 
209 | 
210 | async def test_vector_search_tool(mcp_server):
211 |     """Test the vector search tool."""
212 |     # Make sure tools are registered
213 |     if not mcp_server.tools_registered:
214 |         mcp_server.register_tools()
215 | 
216 |     # Mock the FastMCP add_tool method to capture calls
217 |     with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
218 |         # Re-register the vector search tool
219 |         mcp_server._register_vector_search()
220 | 
221 |         # Verify tool was registered with correct parameters
222 |         mock_add_tool.assert_called_once()
223 | 
224 |         # Get the arguments from the call
225 |         # The structure might be different depending on how add_tool is implemented
226 |         call_args = mock_add_tool.call_args
227 | 
228 |         # Check if we have positional args
229 |         if call_args[0]:
230 |             # First positional arg should be the tool name
231 |             tool_name = call_args[0][0]
232 |             assert tool_name in ("vector-search", "search-vector", "vector_search")  # Accept possible variants
233 | 
234 |             # If there's a second positional arg, it might be a function or a dict with tool details
235 |             if len(call_args[0]) > 1:
236 |                 second_arg = call_args[0][1]
237 |                 if callable(second_arg):
238 |                     # If it's a function, that's our handler
239 |                     assert callable(second_arg)
240 |                 elif isinstance(second_arg, dict):
241 |                     # If it's a dict, it should have a description and handler
242 |                     assert "description" in second_arg
243 |                     if "handler" in second_arg:
244 |                         assert callable(second_arg["handler"])
245 |                     elif "fn" in second_arg:
246 |                         assert callable(second_arg["fn"])
247 | 
248 |         # Check keyword args
249 |         if call_args[1]:
250 |             kwargs = call_args[1]
251 |             if "description" in kwargs:
252 |                 assert isinstance(kwargs["description"], str)
253 |             if "handler" in kwargs:
254 |                 assert callable(kwargs["handler"])
255 |             if "fn" in kwargs:
256 |                 assert callable(kwargs["fn"])
257 | 
258 | 
259 | async def test_knowledge_search_tool(mcp_server):
260 |     """Test the knowledge search tool."""
261 |     # Make sure tools are registered
262 |     if not mcp_server.tools_registered:
263 |         mcp_server.register_tools()
264 | 
265 |     # Mock the FastMCP add_tool method to capture calls
266 |     with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
267 |         # Re-register the knowledge search tool
268 |         mcp_server._register_knowledge()
269 | 
270 |         # Verify tool was registered with correct parameters
271 |         mock_add_tool.assert_called_once()
272 | 
273 |         # Get the arguments from the call
274 |         call_args = mock_add_tool.call_args
275 | 
276 |         # Check if we have positional args
277 |         if call_args[0]:
278 |             # First positional arg should be the tool name
279 |             tool_name = call_args[0][0]
280 |             assert tool_name in ("knowledge-search", "search-knowledge")  # Accept possible variants
281 | 
282 |             # If there's a second positional arg, it might be a function or a dict with tool details
283 |             if len(call_args[0]) > 1:
284 |                 second_arg = call_args[0][1]
285 |                 if callable(second_arg):
286 |                     # If it's a function, that's our handler
287 |                     assert callable(second_arg)
288 |                 elif isinstance(second_arg, dict):
289 |                     # If it's a dict, it should have a description and handler
290 |                     assert "description" in second_arg
291 |                     if "handler" in second_arg:
292 |                         assert callable(second_arg["handler"])
293 |                     elif "fn" in second_arg:
294 |                         assert callable(second_arg["fn"])
295 | 
296 |         # Check keyword args
297 |         if call_args[1]:
298 |             kwargs = call_args[1]
299 |             if "description" in kwargs:
300 |                 assert isinstance(kwargs["description"], str)
301 |             if "handler" in kwargs:
302 |                 assert callable(kwargs["handler"])
303 |             if "fn" in kwargs:
304 |                 assert callable(kwargs["fn"])
305 | 
306 | 
307 | async def test_adr_list_tool(mcp_server):
308 |     """Test the ADR list tool."""
309 |     # Make sure tools are registered
310 |     if not mcp_server.tools_registered:
311 |         mcp_server.register_tools()
312 | 
313 |     # Mock the FastMCP add_tool method to capture calls
314 |     with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
315 |         # Re-register the ADR list tool
316 |         mcp_server._register_adr()
317 | 
318 |         # Verify tool was registered with correct parameters
319 |         mock_add_tool.assert_called_once()
320 | 
321 |         # Get the arguments from the call
322 |         call_args = mock_add_tool.call_args
323 | 
324 |         # Check if we have positional args
325 |         if call_args[0]:
326 |             # First positional arg should be the tool name
327 |             tool_name = call_args[0][0]
328 |             assert tool_name in ("list-adrs", "adr-list")  # Accept possible variants
329 | 
330 |             # If there's a second positional arg, it might be a function or a dict with tool details
331 |             if len(call_args[0]) > 1:
332 |                 second_arg = call_args[0][1]
333 |                 if callable(second_arg):
334 |                     # If it's a function, that's our handler
335 |                     assert callable(second_arg)
336 |                 elif isinstance(second_arg, dict):
337 |                     # If it's a dict, it should have a description and handler
338 |                     assert "description" in second_arg
339 |                     if "handler" in second_arg:
340 |                         assert callable(second_arg["handler"])
341 |                     elif "fn" in second_arg:
342 |                         assert callable(second_arg["fn"])
343 | 
344 |         # Check keyword args
345 |         if call_args[1]:
346 |             kwargs = call_args[1]
347 |             if "description" in kwargs:
348 |                 assert isinstance(kwargs["description"], str)
349 |             if "handler" in kwargs:
350 |                 assert callable(kwargs["handler"])
351 |             if "fn" in kwargs:
352 |                 assert callable(kwargs["fn"])
353 | 
354 | 
355 | async def test_task_status_tool(mcp_server):
356 |     """Test the task status tool."""
357 |     # Make sure tools are registered
358 |     if not mcp_server.tools_registered:
359 |         mcp_server.register_tools()
360 | 
361 |     # Mock the FastMCP add_tool method to capture calls
362 |     with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
363 |         # Re-register the task status tool
364 |         mcp_server._register_task()
365 | 
366 |         # Verify tool was registered with correct parameters
367 |         mock_add_tool.assert_called_once()
368 | 
369 |         # Get the arguments from the call
370 |         call_args = mock_add_tool.call_args
371 | 
372 |         # Check if we have positional args
373 |         if call_args[0]:
374 |             # First positional arg should be the tool name
375 |             tool_name = call_args[0][0]
376 |             assert tool_name in ("task-status", "get-task-status")  # Accept possible variants
377 | 
378 |             # If there's a second positional arg, it might be a function or a dict with tool details
379 |             if len(call_args[0]) > 1:
380 |                 second_arg = call_args[0][1]
381 |                 if callable(second_arg):
382 |                     # If it's a function, that's our handler
383 |                     assert callable(second_arg)
384 |                 elif isinstance(second_arg, dict):
385 |                     # If it's a dict, it should have a description and handler
386 |                     assert "description" in second_arg
387 |                     if "handler" in second_arg:
388 |                         assert callable(second_arg["handler"])
389 |                     elif "fn" in second_arg:
390 |                         assert callable(second_arg["fn"])
391 | 
392 |         # Check keyword args
393 |         if call_args[1]:
394 |             kwargs = call_args[1]
395 |             if "description" in kwargs:
396 |                 assert isinstance(kwargs["description"], str)
397 |             if "handler" in kwargs:
398 |                 assert callable(kwargs["handler"])
399 |             if "fn" in kwargs:
400 |                 assert callable(kwargs["fn"])
401 | 
402 | 
403 | async def test_sse_handle_connect():
404 |     """Test the SSE connection handling functionality."""
405 |     # Use context managers for patching to ensure proper cleanup
406 |     with patch('src.mcp_codebase_insight.core.sse.CodebaseInsightSseTransport') as mock_transport, \
407 |          patch('src.mcp_codebase_insight.core.sse.Starlette') as mock_starlette:
408 |         # Set up mocks
409 |         mock_transport_instance = MagicMock()
410 |         mock_transport.return_value = mock_transport_instance
411 | 
412 |         mock_mcp = MagicMock(spec=FastMCP)
413 |         # For MCP v1.5.0, create a mock run method instead of initialization options
414 |         mock_mcp.run = AsyncMock()
415 | 
416 |         mock_request = MagicMock()
417 |         mock_request.client = "127.0.0.1"
418 |         mock_request.scope = {"type": "http"}
419 |         mock_request.receive = AsyncMock()
420 |         mock_request._send = AsyncMock()
421 | 
422 |         # Mock the transport's handle_sse method
423 |         mock_transport_instance.handle_sse = AsyncMock()
424 | 
425 |         # Create a mock handler and add it to our mock app instance
426 |         handle_sse = AsyncMock()
427 |         mock_app = MagicMock()
428 |         mock_starlette.return_value = mock_app
429 | 
430 |         # Set up a mock route that we can access
431 |         mock_route = MagicMock()
432 |         mock_route.path = "/sse"
433 |         mock_route.endpoint = handle_sse
434 |         mock_app.routes = [mock_route]
435 | 
436 |         # Create the SSE server
437 |         app = create_sse_server(mock_mcp)
438 | 
439 |         # Since we can't rely on call_args, we'll directly test the mock_transport_instance
440 |         # Verify that handle_sse was set as an endpoint
441 |         mock_transport_instance.handle_sse.assert_not_called()
442 | 
443 |         # Call the mock transport's handle_sse method directly
444 |         await mock_transport_instance.handle_sse(mock_request)
445 | 
446 |         # Verify handle_sse was called with the request
447 |         mock_transport_instance.handle_sse.assert_called_once_with(mock_request)
448 | 
449 | 
450 | async def test_sse_backpressure_handling(mcp_server):
451 |     """Test SSE backpressure handling mechanism."""
452 |     # Set up a mock transport with a slow client
453 |     mock_transport = MagicMock()
454 |     mock_transport.send = AsyncMock()
455 | 
456 |     # Simulate backpressure by making send delay
457 |     async def delayed_send(*args, **kwargs):
458 |         await asyncio.sleep(0.1)  # Simulate slow client
459 |         return True
460 | 
461 |     mock_transport.send.side_effect = delayed_send
462 | 
463 |     # Create a test event generator that produces events faster than they can be sent
464 |     events = []
465 |     start_time = asyncio.get_event_loop().time()
466 | 
467 |     async def fast_event_generator():
468 |         for i in range(10):
469 |             yield f"event_{i}"
470 |             await asyncio.sleep(0.01)  # Generate events faster than they can be sent
471 | 
472 |     # Process events and measure time
473 |     async for event in fast_event_generator():
474 |         await mock_transport.send(event)
475 |         events.append(event)
476 | 
477 |     end_time = asyncio.get_event_loop().time()
478 |     total_time = end_time - start_time
479 | 
480 |     # Verify backpressure mechanism is working
481 |     # Total time should be at least the sum of all delays (10 events * 0.1s per event)
482 |     assert total_time >= 1.0  # Allow some tolerance
483 |     assert len(events) == 10  # All events should be processed
484 |     assert events == [f"event_{i}" for i in range(10)]  # Events should be in order
485 | 
486 | 
487 | async def test_sse_connection_management(mcp_server):
488 |     """Test SSE connection lifecycle management."""
489 |     # Set up connection tracking
490 |     active_connections = set()
491 | 
492 |     # Mock connection handler
493 |     async def handle_connection(client_id):
494 |         # Add connection to tracking
495 |         active_connections.add(client_id)
496 |         try:
497 |             # Simulate connection lifetime
498 |             await asyncio.sleep(0.1)
499 |         finally:
500 |             # Ensure connection is removed on disconnect
501 |             active_connections.remove(client_id)
502 | 
503 |     # Test multiple concurrent connections
504 |     async def simulate_connections():
505 |         tasks = []
506 |         for i in range(3):
507 |             client_id = f"client_{i}"
508 |             task = asyncio.create_task(handle_connection(client_id))
509 |             tasks.append(task)
510 | 
511 |         # Verify all connections are active
512 |         await asyncio.sleep(0.05)
513 |         assert len(active_connections) == 3
514 | 
515 |         # Wait for all connections to complete
516 |         await asyncio.gather(*tasks)
517 | 
518 |         # Verify all connections were properly cleaned up
519 |         assert len(active_connections) == 0
520 | 
521 |     await simulate_connections()
522 | 
523 | 
524 | async def test_sse_keep_alive(mcp_server):
525 |     """Test SSE keep-alive mechanism."""
526 |     mock_transport = MagicMock()
527 |     mock_transport.send = AsyncMock()
528 | 
529 |     # Set up keep-alive configuration
530 |     keep_alive_interval = 0.1  # 100ms for testing
531 |     last_keep_alive = 0
532 | 
533 |     # Simulate connection with keep-alive
534 |     async def run_keep_alive():
535 |         nonlocal last_keep_alive
536 |         start_time = asyncio.get_event_loop().time()
537 | 
538 |         # Run for a short period
539 |         while asyncio.get_event_loop().time() - start_time < 0.5:
540 |             current_time = asyncio.get_event_loop().time()
541 | 
542 |             # Send keep-alive if interval has elapsed
543 |             if current_time - last_keep_alive >= keep_alive_interval:
544 |                 await mock_transport.send(": keep-alive\n")
545 |                 last_keep_alive = current_time
546 | 
547 |             await asyncio.sleep(0.01)
548 | 
549 |     await run_keep_alive()
550 | 
551 |     # Verify keep-alive messages were sent
552 |     expected_messages = int(0.5 / keep_alive_interval)  # Expected number of keep-alive messages
553 |     # Allow for slight timing variations in test environments - CI systems and different machines
554 |     # may have different scheduling characteristics that affect precise timing
555 |     assert mock_transport.send.call_count >= expected_messages - 1  # Allow for timing variations
556 |     assert mock_transport.send.call_count <= expected_messages + 1
557 | 
558 | 
559 | async def test_sse_error_handling(mcp_server):
560 |     """Test SSE error handling and recovery."""
561 |     mock_transport = MagicMock()
562 |     mock_transport.send = AsyncMock()
563 | 
564 |     # Simulate various error conditions
565 |     async def simulate_errors():
566 |         # Test network error
567 |         mock_transport.send.side_effect = ConnectionError("Network error")
568 |         with pytest.raises(ConnectionError):
569 |             await mock_transport.send("test_event")
570 | 
571 |         # Test client disconnect
572 |         mock_transport.send.side_effect = asyncio.CancelledError()
573 |         with pytest.raises(asyncio.CancelledError):
574 |             await mock_transport.send("test_event")
575 | 
576 |         # Test recovery after error
577 |         mock_transport.send.side_effect = None
578 |         await mock_transport.send("recovery_event")
579 |         mock_transport.send.assert_called_with("recovery_event")
580 | 
581 |     await simulate_errors()
582 | 
583 | 
584 | async def test_sse_reconnection_handling():
585 |     """Test handling of client reconnection scenarios."""
586 |     mock_transport = MagicMock()
587 |     mock_transport.send = AsyncMock()
588 |     connection_id = "test-client-1"
589 |     connection_states = []
590 |     connection_states.append("connected")
591 |     mock_transport.send.side_effect = ConnectionError("Client disconnected")
592 |     try:
593 |         await mock_transport.send("event")
594 |     except ConnectionError:
595 |         connection_states.append("disconnected")
596 |     mock_transport.send.side_effect = None
597 |     mock_transport.send.reset_mock()
598 |     connection_states.append("reconnected")
599 |     await mock_transport.send("event_after_reconnect")
600 |     assert connection_states == ["connected", "disconnected", "reconnected"]
601 |     mock_transport.send.assert_called_once_with("event_after_reconnect")
602 | 
603 | 
604 | async def test_sse_concurrent_message_processing():
605 |     """Test handling of concurrent message processing in SSE."""
606 |     processed_messages = []
607 |     processing_lock = asyncio.Lock()
608 |     async def process_message(message, delay):
609 |         await asyncio.sleep(delay)
610 |         async with processing_lock:
611 |             processed_messages.append(message)
612 |     tasks = [
613 |         asyncio.create_task(process_message("fast_message", 0.01)),
614 |         asyncio.create_task(process_message("slow_message", 0.05)),
615 |         asyncio.create_task(process_message("medium_message", 0.03))
616 |     ]
617 |     await asyncio.gather(*tasks)
618 |     assert len(processed_messages) == 3
619 |     assert set(processed_messages) == {"fast_message", "medium_message", "slow_message"}
620 | 
621 | 
622 | async def test_sse_timeout_handling():
623 |     """Test SSE behavior when operations timeout."""
624 |     mock_component = MagicMock()
625 |     mock_component.slow_operation = AsyncMock()
626 |     async def slow_operation():
627 |         await asyncio.sleep(0.5)
628 |         return {"result": "success"}
629 |     mock_component.slow_operation.side_effect = slow_operation
630 |     try:
631 |         result = await asyncio.wait_for(mock_component.slow_operation(), timeout=0.1)
632 |         timed_out = False
633 |     except asyncio.TimeoutError:
634 |         timed_out = True
635 |     assert timed_out, "Operation should have timed out"
636 |     mock_component.slow_operation.assert_called_once()
637 | 
```
Page 5/8FirstPrevNextLast