This is page 5 of 8. Use http://codebase.md/tosin2013/mcp-codebase-insight?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .bumpversion.cfg
├── .codecov.yml
├── .compile-venv-py3.11
│ ├── bin
│ │ ├── activate
│ │ ├── activate.csh
│ │ ├── activate.fish
│ │ ├── Activate.ps1
│ │ ├── coverage
│ │ ├── coverage-3.11
│ │ ├── coverage3
│ │ ├── pip
│ │ ├── pip-compile
│ │ ├── pip-sync
│ │ ├── pip3
│ │ ├── pip3.11
│ │ ├── py.test
│ │ ├── pyproject-build
│ │ ├── pytest
│ │ ├── python
│ │ ├── python3
│ │ ├── python3.11
│ │ └── wheel
│ └── pyvenv.cfg
├── .env.example
├── .github
│ ├── agents
│ │ ├── DebugAgent.agent.md
│ │ ├── DocAgent.agent.md
│ │ ├── README.md
│ │ ├── TestAgent.agent.md
│ │ └── VectorStoreAgent.agent.md
│ ├── copilot-instructions.md
│ └── workflows
│ ├── build-verification.yml
│ ├── publish.yml
│ └── tdd-verification.yml
├── .gitignore
├── async_fixture_wrapper.py
├── CHANGELOG.md
├── CLAUDE.md
├── codebase_structure.txt
├── component_test_runner.py
├── CONTRIBUTING.md
├── core_workflows.txt
├── create_release_issues.sh
├── debug_tests.md
├── Dockerfile
├── docs
│ ├── adrs
│ │ └── 001_use_docker_for_qdrant.md
│ ├── api.md
│ ├── components
│ │ └── README.md
│ ├── cookbook.md
│ ├── development
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ └── README.md
│ ├── documentation_map.md
│ ├── documentation_summary.md
│ ├── features
│ │ ├── adr-management.md
│ │ ├── code-analysis.md
│ │ └── documentation.md
│ ├── getting-started
│ │ ├── configuration.md
│ │ ├── docker-setup.md
│ │ ├── installation.md
│ │ ├── qdrant_setup.md
│ │ └── quickstart.md
│ ├── qdrant_setup.md
│ ├── README.md
│ ├── SSE_INTEGRATION.md
│ ├── system_architecture
│ │ └── README.md
│ ├── templates
│ │ └── adr.md
│ ├── testing_guide.md
│ ├── troubleshooting
│ │ ├── common-issues.md
│ │ └── faq.md
│ ├── vector_store_best_practices.md
│ └── workflows
│ └── README.md
├── error_logs.txt
├── examples
│ └── use_with_claude.py
├── github-actions-documentation.md
├── Makefile
├── module_summaries
│ ├── backend_summary.txt
│ ├── database_summary.txt
│ └── frontend_summary.txt
├── output.txt
├── package-lock.json
├── package.json
├── PLAN.md
├── prepare_codebase.sh
├── PULL_REQUEST.md
├── pyproject.toml
├── pytest.ini
├── README.md
├── requirements-3.11.txt
├── requirements-3.11.txt.backup
├── requirements-dev.txt
├── requirements.in
├── requirements.txt
├── run_build_verification.sh
├── run_fixed_tests.sh
├── run_test_with_path_fix.sh
├── run_tests.py
├── scripts
│ ├── check_qdrant_health.sh
│ ├── compile_requirements.sh
│ ├── load_example_patterns.py
│ ├── macos_install.sh
│ ├── README.md
│ ├── setup_qdrant.sh
│ ├── start_mcp_server.sh
│ ├── store_code_relationships.py
│ ├── store_report_in_mcp.py
│ ├── validate_knowledge_base.py
│ ├── validate_poc.py
│ ├── validate_vector_store.py
│ └── verify_build.py
├── server.py
├── setup_qdrant_collection.py
├── setup.py
├── src
│ └── mcp_codebase_insight
│ ├── __init__.py
│ ├── __main__.py
│ ├── asgi.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── adr.py
│ │ ├── cache.py
│ │ ├── component_status.py
│ │ ├── config.py
│ │ ├── debug.py
│ │ ├── di.py
│ │ ├── documentation.py
│ │ ├── embeddings.py
│ │ ├── errors.py
│ │ ├── health.py
│ │ ├── knowledge.py
│ │ ├── metrics.py
│ │ ├── prompts.py
│ │ ├── sse.py
│ │ ├── state.py
│ │ ├── task_tracker.py
│ │ ├── tasks.py
│ │ └── vector_store.py
│ ├── models.py
│ ├── server_test_isolation.py
│ ├── server.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── logger.py
│ └── version.py
├── start-mcpserver.sh
├── summary_document.txt
├── system-architecture.md
├── system-card.yml
├── test_fix_helper.py
├── test_fixes.md
├── test_function.txt
├── test_imports.py
├── tests
│ ├── components
│ │ ├── conftest.py
│ │ ├── test_core_components.py
│ │ ├── test_embeddings.py
│ │ ├── test_knowledge_base.py
│ │ ├── test_sse_components.py
│ │ ├── test_stdio_components.py
│ │ ├── test_task_manager.py
│ │ └── test_vector_store.py
│ ├── config
│ │ └── test_config_and_env.py
│ ├── conftest.py
│ ├── integration
│ │ ├── fixed_test2.py
│ │ ├── test_api_endpoints.py
│ │ ├── test_api_endpoints.py-e
│ │ ├── test_communication_integration.py
│ │ └── test_server.py
│ ├── README.md
│ ├── README.test.md
│ ├── test_build_verifier.py
│ └── test_file_relationships.py
└── trajectories
└── tosinakinosho
├── anthropic_filemap__claude-3-sonnet-20240229__t-0.00__p-1.00__c-3.00___db62b9
│ └── db62b9
│ └── config.yaml
├── default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e
│ └── 03565e
│ ├── 03565e.traj
│ └── config.yaml
└── default__openrouter
└── anthropic
└── claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e
└── 03565e
├── 03565e.pred
├── 03565e.traj
└── config.yaml
```
# Files
--------------------------------------------------------------------------------
/component_test_runner.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python
2 | """
3 | Component Test Runner
4 |
5 | A specialized runner for executing component tests with proper async fixture handling.
6 | This bypasses the standard pytest fixture mechanisms to handle async fixtures correctly
7 | in isolated execution environments.
8 | """
9 | import os
10 | import sys
11 | import uuid
12 | import asyncio
13 | import importlib
14 | from pathlib import Path
15 | import inspect
16 | import logging
17 | import re
18 | from typing import Dict, Any, List, Callable, Tuple, Optional, Set, Awaitable
19 |
20 | # Configure logging
21 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22 | logger = logging.getLogger("component-test-runner")
23 |
24 | # Import the sys module to modify path
25 | import sys
26 | sys.path.insert(0, '/Users/tosinakinosho/workspaces/mcp-codebase-insight')
27 |
28 | # Import required components directly to avoid fixture resolution issues
29 | from src.mcp_codebase_insight.core.config import ServerConfig
30 | from src.mcp_codebase_insight.core.vector_store import VectorStore
31 | from src.mcp_codebase_insight.core.embeddings import SentenceTransformerEmbedding
32 | from src.mcp_codebase_insight.core.knowledge import KnowledgeBase
33 | from src.mcp_codebase_insight.core.tasks import TaskManager
34 |
35 |
36 | async def create_test_config() -> ServerConfig:
37 | """Create a server configuration for tests."""
38 | # Generate a unique collection name for this test run
39 | collection_name = f"test_collection_{uuid.uuid4().hex[:8]}"
40 |
41 | # Check if MCP_COLLECTION_NAME is set in env, use that instead if available
42 | if "MCP_COLLECTION_NAME" in os.environ:
43 | collection_name = os.environ["MCP_COLLECTION_NAME"]
44 |
45 | logger.info(f"Using test collection: {collection_name}")
46 |
47 | config = ServerConfig(
48 | host="localhost",
49 | port=8000,
50 | log_level="DEBUG",
51 | qdrant_url="http://localhost:6333",
52 | docs_cache_dir=Path(".test_cache") / "docs",
53 | adr_dir=Path(".test_cache") / "docs/adrs",
54 | kb_storage_dir=Path(".test_cache") / "knowledge",
55 | embedding_model="all-MiniLM-L6-v2",
56 | collection_name=collection_name,
57 | debug_mode=True,
58 | metrics_enabled=False,
59 | cache_enabled=True,
60 | memory_cache_size=1000,
61 | disk_cache_dir=Path(".test_cache") / "cache"
62 | )
63 | return config
64 |
65 |
66 | async def create_embedder() -> SentenceTransformerEmbedding:
67 | """Create an embedder for tests."""
68 | logger.info("Initializing the embedder...")
69 | return SentenceTransformerEmbedding()
70 |
71 |
72 | async def create_vector_store(config: ServerConfig, embedder: SentenceTransformerEmbedding) -> VectorStore:
73 | """Create a vector store for tests."""
74 | logger.info("Initializing the vector store...")
75 | store = VectorStore(config.qdrant_url, embedder)
76 | try:
77 | await store.initialize()
78 | logger.info("Vector store initialized successfully")
79 | return store
80 | except Exception as e:
81 | logger.error(f"Failed to initialize vector store: {e}")
82 | raise RuntimeError(f"Failed to initialize vector store: {e}")
83 |
84 |
85 | async def create_knowledge_base(config: ServerConfig, vector_store: VectorStore) -> KnowledgeBase:
86 | """Create a knowledge base for tests."""
87 | logger.info("Initializing the knowledge base...")
88 | kb = KnowledgeBase(config, vector_store)
89 | try:
90 | await kb.initialize()
91 | logger.info("Knowledge base initialized successfully")
92 | return kb
93 | except Exception as e:
94 | logger.error(f"Failed to initialize knowledge base: {e}")
95 | raise RuntimeError(f"Failed to initialize knowledge base: {e}")
96 |
97 |
98 | async def create_task_manager(config: ServerConfig) -> TaskManager:
99 | """Create a task manager for tests."""
100 | logger.info("Initializing the task manager...")
101 | manager = TaskManager(config)
102 | try:
103 | await manager.initialize()
104 | logger.info("Task manager initialized successfully")
105 | return manager
106 | except Exception as e:
107 | logger.error(f"Failed to initialize task manager: {e}")
108 | raise RuntimeError(f"Failed to initialize task manager: {e}")
109 |
110 |
111 | async def create_test_metadata() -> Dict[str, Any]:
112 | """Standard test metadata for consistency across tests."""
113 | return {
114 | "type": "code",
115 | "language": "python",
116 | "title": "Test Code",
117 | "description": "Test code snippet for vector store testing",
118 | "tags": ["test", "vector"]
119 | }
120 |
121 |
122 | def create_test_code() -> str:
123 | """Provide sample code for testing task-related functionality."""
124 | return """
125 | def example_function():
126 | \"\"\"This is a test function for task manager tests.\"\"\"
127 | return "Hello, world!"
128 |
129 | class TestClass:
130 | def __init__(self):
131 | self.value = 42
132 |
133 | def method(self):
134 | return self.value
135 | """
136 |
137 |
138 | async def cleanup_vector_store(vector_store: VectorStore) -> None:
139 | """Cleanup a vector store after tests."""
140 | if vector_store and hasattr(vector_store, 'cleanup'):
141 | logger.info("Cleaning up vector store...")
142 | try:
143 | await vector_store.cleanup()
144 | logger.info("Vector store cleanup completed")
145 | except Exception as e:
146 | logger.error(f"Error during vector store cleanup: {e}")
147 |
148 |
149 | async def cleanup_knowledge_base(kb: KnowledgeBase) -> None:
150 | """Cleanup a knowledge base after tests."""
151 | if kb and hasattr(kb, 'cleanup'):
152 | logger.info("Cleaning up knowledge base...")
153 | try:
154 | await kb.cleanup()
155 | logger.info("Knowledge base cleanup completed")
156 | except Exception as e:
157 | logger.error(f"Error during knowledge base cleanup: {e}")
158 |
159 |
160 | async def cleanup_task_manager(manager: TaskManager) -> None:
161 | """Cleanup a task manager after tests."""
162 | if manager and hasattr(manager, 'cleanup'):
163 | logger.info("Cleaning up task manager...")
164 | try:
165 | await manager.cleanup()
166 | logger.info("Task manager cleanup completed")
167 | except Exception as e:
168 | logger.error(f"Error cleaning up task manager: {e}")
169 |
170 |
171 | def get_module_tests(module_path: str) -> List[str]:
172 | """Get the list of tests in a module."""
173 | logger.info(f"Analyzing module: {module_path}")
174 | with open(module_path, 'r') as file:
175 | content = file.read()
176 |
177 | # Pattern to match test functions but exclude fixtures
178 | pattern = r'async\s+def\s+(test_\w+)\s*\('
179 |
180 | # Find test functions that are not fixtures (exclude lines with @pytest.fixture)
181 | lines = content.split('\n')
182 | test_functions = []
183 |
184 | for i, line in enumerate(lines):
185 | if i > 0 and '@pytest.fixture' in lines[i-1]:
186 | continue # Skip this as it's a fixture, not a test
187 |
188 | match = re.search(pattern, line)
189 | if match:
190 | test_functions.append(match.group(1))
191 |
192 | logger.info(f"Found {len(test_functions)} tests in {module_path}")
193 | return test_functions
194 |
195 | def load_test_module(module_path: str):
196 | """Load a test module with proper path handling."""
197 | # Convert file path to module path
198 | if module_path.endswith('.py'):
199 | module_path = module_path[:-3] # Remove .py extension
200 |
201 | # Convert path separators to module separators
202 | module_name = module_path.replace('/', '.').replace('\\', '.')
203 |
204 | # Ensure we use the correct Python path
205 | if not any(p == '.' for p in sys.path):
206 | sys.path.append('.')
207 |
208 | logger.info(f"Attempting to import module: {module_name}")
209 | try:
210 | return importlib.import_module(module_name)
211 | except ImportError as e:
212 | logger.error(f"Failed to import test module {module_name}: {e}")
213 | return None
214 |
215 |
216 | async def run_component_test(module_path: str, test_name: str) -> bool:
217 | """
218 | Dynamically load and run a component test with proper fixture initialization.
219 |
220 | Args:
221 | module_path: Path to the test module
222 | test_name: Name of the test function to run
223 |
224 | Returns:
225 | True if test passed, False if it failed
226 | """
227 | logger.info(f"Running test: {module_path}::{test_name}")
228 |
229 | # Import the test module
230 | test_module = load_test_module(module_path)
231 | if not test_module:
232 | return False
233 |
234 | # Get the test function
235 | if not hasattr(test_module, test_name):
236 | logger.error(f"Test function {test_name} not found in module {module_name}")
237 | return False
238 |
239 | test_func = getattr(test_module, test_name)
240 |
241 | # Determine which fixtures the test needs
242 | required_fixtures = inspect.signature(test_func).parameters
243 | logger.info(f"Test requires fixtures: {list(required_fixtures.keys())}")
244 |
245 | # Initialize the required fixtures
246 | fixture_values = {}
247 | resources_to_cleanup = []
248 |
249 | try:
250 | # Create ServerConfig first since many other fixtures depend on it
251 | if "test_config" in required_fixtures:
252 | logger.info("Setting up test_config fixture")
253 | fixture_values["test_config"] = await create_test_config()
254 |
255 | # Create embedder if needed
256 | if "embedder" in required_fixtures:
257 | logger.info("Setting up embedder fixture")
258 | fixture_values["embedder"] = await create_embedder()
259 |
260 | # Create test metadata if needed
261 | if "test_metadata" in required_fixtures:
262 | logger.info("Setting up test_metadata fixture")
263 | fixture_values["test_metadata"] = await create_test_metadata()
264 |
265 | # Create test code if needed
266 | if "test_code" in required_fixtures:
267 | logger.info("Setting up test_code fixture")
268 | fixture_values["test_code"] = create_test_code()
269 |
270 | # Create vector store if needed
271 | if "vector_store" in required_fixtures:
272 | logger.info("Setting up vector_store fixture")
273 | if "test_config" not in fixture_values:
274 | fixture_values["test_config"] = await create_test_config()
275 | if "embedder" not in fixture_values:
276 | fixture_values["embedder"] = await create_embedder()
277 |
278 | fixture_values["vector_store"] = await create_vector_store(
279 | fixture_values["test_config"],
280 | fixture_values["embedder"]
281 | )
282 | resources_to_cleanup.append(("vector_store", fixture_values["vector_store"]))
283 |
284 | # Create knowledge base if needed
285 | if "knowledge_base" in required_fixtures:
286 | logger.info("Setting up knowledge_base fixture")
287 | if "test_config" not in fixture_values:
288 | fixture_values["test_config"] = await create_test_config()
289 | if "vector_store" not in fixture_values:
290 | if "embedder" not in fixture_values:
291 | fixture_values["embedder"] = await create_embedder()
292 | fixture_values["vector_store"] = await create_vector_store(
293 | fixture_values["test_config"],
294 | fixture_values["embedder"]
295 | )
296 | resources_to_cleanup.append(("vector_store", fixture_values["vector_store"]))
297 |
298 | fixture_values["knowledge_base"] = await create_knowledge_base(
299 | fixture_values["test_config"],
300 | fixture_values["vector_store"]
301 | )
302 | resources_to_cleanup.append(("knowledge_base", fixture_values["knowledge_base"]))
303 |
304 | # Create task manager if needed
305 | if "task_manager" in required_fixtures:
306 | logger.info("Setting up task_manager fixture")
307 | if "test_config" not in fixture_values:
308 | fixture_values["test_config"] = await create_test_config()
309 |
310 | fixture_values["task_manager"] = await create_task_manager(fixture_values["test_config"])
311 | resources_to_cleanup.append(("task_manager", fixture_values["task_manager"]))
312 |
313 | # Ensure all required fixtures are initialized
314 | missing_fixtures = set(required_fixtures.keys()) - set(fixture_values.keys())
315 | if missing_fixtures:
316 | logger.error(f"Missing required fixtures: {missing_fixtures}")
317 | return False
318 |
319 | # Run the actual test
320 | logger.info(f"Executing test with fixtures: {list(fixture_values.keys())}")
321 | test_kwargs = {name: value for name, value in fixture_values.items() if name in required_fixtures}
322 |
323 | # Check if the test function is an async function
324 | if inspect.iscoroutinefunction(test_func):
325 | # For async test functions, await them
326 | logger.info(f"Running async test: {test_name}")
327 | await test_func(**test_kwargs)
328 | else:
329 | # For regular test functions, just call them
330 | logger.info(f"Running synchronous test: {test_name}")
331 | test_func(**test_kwargs)
332 |
333 | logger.info(f"Test {test_name} completed successfully")
334 | return True
335 |
336 | except Exception as e:
337 | logger.error(f"Test {test_name} failed with error: {e}")
338 | import traceback
339 | logger.error(traceback.format_exc())
340 | return False
341 |
342 | finally:
343 | # Clean up resources in reverse order (LIFO)
344 | logger.info("Cleaning up resources...")
345 | for resource_type, resource in reversed(resources_to_cleanup):
346 | try:
347 | if resource_type == "vector_store":
348 | await cleanup_vector_store(resource)
349 | elif resource_type == "knowledge_base":
350 | await cleanup_knowledge_base(resource)
351 | elif resource_type == "task_manager":
352 | await cleanup_task_manager(resource)
353 | except Exception as e:
354 | logger.error(f"Error cleaning up {resource_type}: {e}")
355 |
356 |
357 | def main():
358 | """Run a component test with proper async fixture handling."""
359 | if len(sys.argv) < 2:
360 | print("Usage: python component_test_runner.py <module_path> <test_name>")
361 | sys.exit(1)
362 |
363 | module_path = sys.argv[1]
364 |
365 | # Configure event loop policy for macOS if needed
366 | if sys.platform == 'darwin':
367 | import platform
368 | if int(platform.mac_ver()[0].split('.')[0]) >= 10:
369 | asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
370 |
371 | try:
372 | if len(sys.argv) < 3:
373 | # No specific test provided, use module discovery
374 | tests = get_module_tests(module_path)
375 | if not tests:
376 | logger.error(f"No tests found in {module_path}")
377 | sys.exit(1)
378 |
379 | # Run all tests in the module
380 | successful_tests = 0
381 | for test_name in tests:
382 | loop = asyncio.new_event_loop()
383 | asyncio.set_event_loop(loop)
384 | test_result = loop.run_until_complete(run_component_test(module_path, test_name))
385 | loop.close()
386 | if test_result:
387 | successful_tests += 1
388 |
389 | # Report test results
390 | logger.info(f"Test Results: {successful_tests}/{len(tests)} tests passed")
391 | sys.exit(0 if successful_tests == len(tests) else 1)
392 | else:
393 | # Run a specific test
394 | test_name = sys.argv[2]
395 |
396 | loop = asyncio.new_event_loop()
397 | asyncio.set_event_loop(loop)
398 | result = loop.run_until_complete(run_component_test(module_path, test_name))
399 | loop.close()
400 | sys.exit(0 if result else 1)
401 | except KeyboardInterrupt:
402 | logger.info("Test execution interrupted")
403 | sys.exit(130) # 130 is the standard exit code for SIGINT
404 | except Exception as e:
405 | logger.error(f"Unhandled exception during test execution: {e}")
406 | import traceback
407 | logger.error(traceback.format_exc())
408 | sys.exit(1)
409 |
410 |
411 | if __name__ == "__main__":
412 | main()
413 |
```
--------------------------------------------------------------------------------
/trajectories/tosinakinosho/default__openrouter/anthropic/claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e/03565e/config.yaml:
--------------------------------------------------------------------------------
```yaml
1 | '{"env":{"deployment":{"image":"python:3.11","port":null,"docker_args":[],"startup_timeout":180.0,"pull":"missing","remove_images":false,"python_standalone_dir":"/root","platform":null,"type":"docker"},"repo":{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight","base_commit":"HEAD","type":"local"},"post_startup_commands":[],"post_startup_command_timeout":500,"name":"main"},"agent":{"name":"main","templates":{"system_template":"SETTING:
2 | You are an autonomous programmer, and you''re working directly in the command line
3 | with a special interface.\n\nThe special interface consists of a file editor that
4 | shows you {{WINDOW}} lines of a file at a time.\nIn addition to typical bash commands,
5 | you can also use specific commands to help you navigate and edit files.\nTo call
6 | a command, you need to invoke it with a function call/tool call.\n\nPlease note
7 | that THE EDIT COMMAND REQUIRES PROPER INDENTATION.\n\nFor example, if you are looking
8 | at this file:\n\ndef fct():\n print(\"Hello world\")\n\nand you want to edit
9 | the file to read:\n\ndef fct():\n print(\"Hello\")\n print(\"world\")\n\nyou
10 | search string should be `Hello world` and your replace string should be `\"Hello\"\\n print(\"world\")`\n(note
11 | the extra spaces before the print statement!).\n\nYou could also get the same result
12 | by search for ` print(\"Hello world\")` and replace with ` print(\"Hello\")\\n print(\"world\")`.\n\nRESPONSE
13 | FORMAT:\nYour shell prompt is formatted as follows:\n(Open file: <path>)\n(Current
14 | directory: <cwd>)\nbash-$\n\nFirst, you should _always_ include a general thought
15 | about what you''re going to do next.\nThen, for every response, you must include
16 | exactly _ONE_ tool call/function call.\n\nRemember, you should always include a
17 | _SINGLE_ tool call/function call and then wait for a response from the shell before
18 | continuing with more discussion and commands. Everything you include in the DISCUSSION
19 | section will be saved for future reference.\nIf you''d like to issue two commands
20 | at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool
21 | call, and then after receiving a response you''ll be able to issue the second .\nNote
22 | that the environment does NOT support interactive session commands (e.g. python,
23 | vim), so please do not invoke them.","instance_template":"We''re currently solving
24 | the following issue within our repository. Here''s the issue text:\nISSUE:\n{{problem_statement}}\n\nINSTRUCTIONS:\nNow,
25 | you''re going to solve this issue on your own. Your terminal session has started
26 | and you''re in the repository''s root directory. You can use any bash commands or
27 | the special interface to help you. Edit all the files you need to and run any checks
28 | or tests that you want.\nRemember, YOU SHOULD ALWAYS INCLUDE EXACTLY ONE TOOL CALL/FUNCTION
29 | CALL PER RESPONSE.\nWhen you''re satisfied with all of the changes you''ve made,
30 | you can submit your changes to the code base by simply running the submit command.\nNote
31 | however that you cannot use any interactive session commands (e.g. python, vim)
32 | in this environment, but you can write scripts and run them. E.g. you can write
33 | a python script and then run it with the python command.\n\nNOTE ABOUT THE EDIT
34 | COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate
35 | indentation before each line!\n\nGENERAL IMPORTANT TIPS:\n\n1. If you run a command
36 | and it doesn''t work, try running a different command. A command that did not work
37 | once will not work the second time unless you modify it!\n\n2. If you open a file
38 | and need to get to an area around a specific line that is not in the first 100 lines,
39 | say line 583, don''t just use the scroll_down command multiple times. Instead, use
40 | the goto 583 command. It''s much quicker.\n\n3. If the bug reproduction script requires
41 | inputting/reading a specific file, such as buggy-input.png, and you''d like to understand
42 | how to input that file, conduct a search in the existing repo code, to see whether
43 | someone else has already done that. Do this by running the command: find_file \"buggy-input.png\"
44 | If that doesn''t work, use the linux ''find'' command.\n\n4. Always make sure to
45 | look at the currently open file and the current working directory (which appears
46 | right after the currently open file). The currently open file might be in a different
47 | directory than the working directory! Note that some commands, such as ''create'',
48 | open files, so they might change the current open file.\n\n5. When editing files,
49 | it is easy to accidentally to write code with incorrect indentation or make other
50 | mistakes. Always check the code after you issue an edit to make sure that it reflects
51 | what you wanted to accomplish. If it didn''t, issue another command to fix it.\n\n6.
52 | When editing files, first explain the code you want to edit and why it is causing
53 | the problem. Then explain the edit you want to make and how it fixes the problem.
54 | Explain how the edit does not break existing functionality.\n\n7. Do not try to
55 | install any packages with `pip`, `conda`, or any other way. This will usually not
56 | work. If the environment is not set up correctly, try to fix the issue without executing
57 | python code or running any tests that require the package installed.\n\nSTRATEGY:\n\n1.
58 | Always start by trying to replicate the bug that the issues discusses.\n If the
59 | issue includes code for reproducing the bug, we recommend that you re-implement
60 | that in your environment, and run it to make sure you can reproduce the bug.\n Then
61 | start trying to fix it.\n\n If the bug reproduction script does not print anything
62 | when it successfully runs, we recommend adding a print(\"Script completed successfully,
63 | no errors.\") command at the end of the file,\n so that you can be sure that the
64 | script indeed ran fine all the way through.\n\n2. Locate relevant code using the
65 | find and search commands. `open` the file you want to edit.\n\n3. Use the `edit`
66 | command to perform edits.\n\n4. When you think you''ve fixed the bug, re-run the
67 | bug reproduction script to make sure that the bug has indeed been fixed.\n\n5. Create
68 | additional tests to verify the fix in a style similar to the existing reproduction
69 | script. In particular, make sure to test edge cases.\n If you find any issues,
70 | go back to the file you edited and perform further edits.\n\n(Open file: {{open_file}})\n(Current
71 | directory: {{working_dir}})\nbash-$","next_step_template":"{{observation}}\n(Open
72 | file: {{open_file}})\n(Current directory: {{working_dir}})\nbash-$","next_step_truncated_observation_template":"Observation:
73 | {{observation}}<response clipped><NOTE>Observations should not exceeded {{max_observation_length}}
74 | characters. {{elided_chars}} characters were elided. Please try a different command
75 | that produces less output or use head/tail/grep/redirect the output to a file. Do
76 | not use interactive pagers.</NOTE>","max_observation_length":100000,"next_step_no_output_template":"Your
77 | command ran successfully and did not produce any output.\n(Open file: {{open_file}})\n(Current
78 | directory: {{working_dir}})\nbash-$","strategy_template":null,"demonstration_template":"Here
79 | is a demonstration of how to correctly accomplish this task.\nIt is included to
80 | show you how to correctly use the interface.\nYou do not need to follow exactly
81 | what is done in the demonstration.\n--- DEMONSTRATION ---\n{{demonstration}}\n---
82 | END OF DEMONSTRATION ---\n","demonstrations":["/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj"],"put_demos_in_history":true,"shell_check_error_template":"Your
83 | bash command contained syntax errors and was NOT executed. Please fix the syntax
84 | errors and try again. This can be the result of not adhering to the syntax for multi-line
85 | commands. Here is the output of `bash -n`:\n{{bash_stdout}}\n{{bash_stderr}}","command_cancelled_timeout_template":"The
86 | command ''{{command}}'' was cancelled because it took more than {{timeout}} seconds.
87 | Please try a different command that completes more quickly."},"tools":{"filter":{"blocklist_error_template":"Operation
88 | ''{{action}}'' is not supported by this environment.","blocklist":["vim","vi","emacs","nano","nohup","gdb","less","tail
89 | -f","python -m venv","make"],"blocklist_standalone":["python","python3","ipython","bash","sh","/bin/bash","/bin/sh","nohup","vi","vim","emacs","nano","su"],"block_unless_regex":{"radare2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*","r2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*"}},"bundles":[{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/registry","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/defaults","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/search","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/edit_replace","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/submit","hidden_tools":[]}],"env_variables":{"WINDOW":100,"OVERLAP":2},"registry_variables":{},"submit_command":"submit","parse_function":{"error_message":"Your
90 | output was not formatted correctly. You must always include one discussion and one
91 | command as part of your response. Make sure you do not have multiple discussion/command
92 | tags.\nPlease make sure your output precisely matches the following format:\nDISCUSSION\nDiscuss
93 | here with yourself about what your planning and what you''re going to do in this
94 | step.\n\n```\ncommand(s) that you''re going to run\n```\n","type":"thought_action"},"enable_bash_tool":true,"format_error_template":"Your
95 | output was not formatted correctly. You must always include one discussion and one
96 | command as part of your response. Make sure you do not have multiple discussion/command
97 | tags.\nPlease make sure your output precisely matches the following format:\nDISCUSSION\nDiscuss
98 | here with yourself about what your planning and what you''re going to do in this
99 | step.\n\n```\ncommand(s) that you''re going to run\n```\n","command_docs":"bash:\n docstring:
100 | runs the given command directly in bash\n signature: <command>\n arguments:\n -
101 | command (string) [required]: The bash command to execute.\n\ngoto:\n docstring:
102 | moves the window to show <line_number>\n signature: goto <line_number>\n arguments:\n -
103 | line_number (integer) [required]: the line number to move the window to\n\nopen:\n docstring:
104 | opens the file at the given path in the editor. If line_number is provided, the
105 | window will be move to include that line\n signature: open \"<path>\" [<line_number>]\n arguments:\n -
106 | path (string) [required]: the path to the file to open\n - line_number (integer)
107 | [optional]: the line number to move the window to (if not provided, the window will
108 | start at the top of the file)\n\ncreate:\n docstring: creates and opens a new file
109 | with the given name\n signature: create <filename>\n arguments:\n - filename
110 | (string) [required]: the name of the file to create\n\nscroll_up:\n docstring:
111 | moves the window up 100 lines\n signature: scroll_up\n\nscroll_down:\n docstring:
112 | moves the window down 100 lines\n signature: scroll_down\n\nfind_file:\n docstring:
113 | finds all files with the given name or pattern in dir. If dir is not provided, searches
114 | in the current directory\n signature: find_file <file_name> [<dir>]\n arguments:\n -
115 | file_name (string) [required]: the name of the file or pattern to search for. supports
116 | shell-style wildcards (e.g. *.py)\n - dir (string) [optional]: the directory
117 | to search in (if not provided, searches in the current directory)\n\nsearch_dir:\n docstring:
118 | searches for search_term in all files in dir. If dir is not provided, searches in
119 | the current directory\n signature: search_dir <search_term> [<dir>]\n arguments:\n -
120 | search_term (string) [required]: the term to search for\n - dir (string) [optional]:
121 | the directory to search in (if not provided, searches in the current directory)\n\nsearch_file:\n docstring:
122 | searches for search_term in file. If file is not provided, searches in the current
123 | open file\n signature: search_file <search_term> [<file>]\n arguments:\n -
124 | search_term (string) [required]: the term to search for\n - file (string) [optional]:
125 | the file to search in (if not provided, searches in the current open file)\n\nedit:\n docstring:
126 | Replace first occurrence of <search> with <replace> in the currently displayed lines.
127 | If replace-all is True , replace all occurrences of <search> with <replace>.\nFor
128 | example, if you are looking at this file:\ndef fct():\n print(\"Hello world\")\n\nand
129 | you want to edit the file to read:\ndef fct():\n print(\"Hello\")\n print(\"world\")\n\nyou
130 | can search for `Hello world` and replace with `\"Hello\"\\n print(\"world\")`
131 | (note the extra spaces before the print statement!).\nTips:\n1. Always include proper
132 | whitespace/indentation 2. When you are adding an if/with/try statement, you need
133 | to INDENT the block that follows, so make sure to include it in both your search
134 | and replace strings! 3. If you are wrapping code in a try statement, make sure to
135 | also add an ''except'' or ''finally'' block.\nBefore every edit, please\n1. Explain
136 | the code you want to edit and why it is causing the problem 2. Explain the edit
137 | you want to make and how it fixes the problem 3. Explain how the edit does not break
138 | existing functionality\n\n signature: edit <search> <replace> [<replace-all>]\n\n arguments:\n -
139 | search (string) [required]: the text to search for (make sure to include proper
140 | whitespace if needed)\n - replace (string) [required]: the text to replace the
141 | search with (make sure to include proper whitespace if needed)\n - replace-all
142 | (boolean) [optional]: replace all occurrences rather than the first occurrence within
143 | the displayed lines\n\ninsert:\n docstring: Insert <text> at the end of the currently
144 | opened file or after <line> if specified.\n\n signature: insert <text> [<line>]\n\n arguments:\n -
145 | text (string) [required]: the text to insert\n - line (integer) [optional]: the
146 | line number to insert the text as new lines after\n\nsubmit:\n docstring: submits
147 | the current file\n signature: submit\n\n","multi_line_command_endings":{},"submit_command_end_name":null,"reset_commands":[],"execution_timeout":30,"install_timeout":300,"total_execution_timeout":1800,"max_consecutive_execution_timeouts":3},"history_processors":[{"n":5,"polling":1,"always_remove_output_for_tags":["remove_output"],"always_keep_output_for_tags":["keep_output"],"type":"last_n_observations"}],"model":{"name":"openrouter/anthropic/claude-3.5-sonnet-20240620:beta","per_instance_cost_limit":3.0,"total_cost_limit":0.0,"per_instance_call_limit":0,"temperature":0.0,"top_p":1.0,"api_base":null,"api_version":null,"api_key":null,"stop":[],"completion_kwargs":{},"convert_system_to_user":false,"retry":{"retries":20,"min_wait":10.0,"max_wait":120.0},"delay":0.0,"fallbacks":[],"choose_api_key_by_thread":true,"max_input_tokens":null,"max_output_tokens":null},"max_requeries":3,"action_sampler":null,"type":"default"},"problem_statement":{"text":"#
148 | Debug MCP Codebase Insight Tests","extra_fields":{},"type":"text","id":"03565e"},"output_dir":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/trajectories/tosinakinosho/default__openrouter/anthropic/claude-3.5-sonnet-20240620:beta__t-0.00__p-1.00__c-3.00___03565e","actions":{"open_pr":false,"pr_config":{"skip_if_commits_reference_issue":true},"apply_patch_locally":false},"env_var_path":null}'
149 |
```
--------------------------------------------------------------------------------
/trajectories/tosinakinosho/default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e/03565e/config.yaml:
--------------------------------------------------------------------------------
```yaml
1 | '{"env":{"deployment":{"image":"python:3.11","port":null,"docker_args":[],"startup_timeout":180.0,"pull":"missing","remove_images":false,"python_standalone_dir":"/root","platform":null,"type":"docker"},"repo":null,"post_startup_commands":[],"post_startup_command_timeout":500,"name":"main"},"agent":{"name":"main","templates":{"system_template":"SETTING:
2 | You are an autonomous programmer, and you''re working directly in the command line
3 | with a special interface.\n\nThe special interface consists of a file editor that
4 | shows you {{WINDOW}} lines of a file at a time.\nIn addition to typical bash commands,
5 | you can also use specific commands to help you navigate and edit files.\nTo call
6 | a command, you need to invoke it with a function call/tool call.\n\nPlease note
7 | that THE EDIT COMMAND REQUIRES PROPER INDENTATION.\n\nFor example, if you are looking
8 | at this file:\n\ndef fct():\n print(\"Hello world\")\n\nand you want to edit
9 | the file to read:\n\ndef fct():\n print(\"Hello\")\n print(\"world\")\n\nyou
10 | search string should be `Hello world` and your replace string should be `\"Hello\"\\n print(\"world\")`\n(note
11 | the extra spaces before the print statement!).\n\nYou could also get the same result
12 | by search for ` print(\"Hello world\")` and replace with ` print(\"Hello\")\\n print(\"world\")`.\n\nRESPONSE
13 | FORMAT:\nYour shell prompt is formatted as follows:\n(Open file: <path>)\n(Current
14 | directory: <cwd>)\nbash-$\n\nFirst, you should _always_ include a general thought
15 | about what you''re going to do next.\nThen, for every response, you must include
16 | exactly _ONE_ tool call/function call.\n\nRemember, you should always include a
17 | _SINGLE_ tool call/function call and then wait for a response from the shell before
18 | continuing with more discussion and commands. Everything you include in the DISCUSSION
19 | section will be saved for future reference.\nIf you''d like to issue two commands
20 | at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool
21 | call, and then after receiving a response you''ll be able to issue the second .\nNote
22 | that the environment does NOT support interactive session commands (e.g. python,
23 | vim), so please do not invoke them.","instance_template":"We''re currently solving
24 | the following issue within our repository. Here''s the issue text:\nISSUE:\n{{problem_statement}}\n\nINSTRUCTIONS:\nNow,
25 | you''re going to solve this issue on your own. Your terminal session has started
26 | and you''re in the repository''s root directory. You can use any bash commands or
27 | the special interface to help you. Edit all the files you need to and run any checks
28 | or tests that you want.\nRemember, YOU SHOULD ALWAYS INCLUDE EXACTLY ONE TOOL CALL/FUNCTION
29 | CALL PER RESPONSE.\nWhen you''re satisfied with all of the changes you''ve made,
30 | you can submit your changes to the code base by simply running the submit command.\nNote
31 | however that you cannot use any interactive session commands (e.g. python, vim)
32 | in this environment, but you can write scripts and run them. E.g. you can write
33 | a python script and then run it with the python command.\n\nNOTE ABOUT THE EDIT
34 | COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate
35 | indentation before each line!\n\nGENERAL IMPORTANT TIPS:\n\n1. If you run a command
36 | and it doesn''t work, try running a different command. A command that did not work
37 | once will not work the second time unless you modify it!\n\n2. If you open a file
38 | and need to get to an area around a specific line that is not in the first 100 lines,
39 | say line 583, don''t just use the scroll_down command multiple times. Instead, use
40 | the goto 583 command. It''s much quicker.\n\n3. If the bug reproduction script requires
41 | inputting/reading a specific file, such as buggy-input.png, and you''d like to understand
42 | how to input that file, conduct a search in the existing repo code, to see whether
43 | someone else has already done that. Do this by running the command: find_file \"buggy-input.png\"
44 | If that doesn''t work, use the linux ''find'' command.\n\n4. Always make sure to
45 | look at the currently open file and the current working directory (which appears
46 | right after the currently open file). The currently open file might be in a different
47 | directory than the working directory! Note that some commands, such as ''create'',
48 | open files, so they might change the current open file.\n\n5. When editing files,
49 | it is easy to accidentally to write code with incorrect indentation or make other
50 | mistakes. Always check the code after you issue an edit to make sure that it reflects
51 | what you wanted to accomplish. If it didn''t, issue another command to fix it.\n\n6.
52 | When editing files, first explain the code you want to edit and why it is causing
53 | the problem. Then explain the edit you want to make and how it fixes the problem.
54 | Explain how the edit does not break existing functionality.\n\n7. Do not try to
55 | install any packages with `pip`, `conda`, or any other way. This will usually not
56 | work. If the environment is not set up correctly, try to fix the issue without executing
57 | python code or running any tests that require the package installed.\n\nSTRATEGY:\n\n1.
58 | Always start by trying to replicate the bug that the issues discusses.\n If the
59 | issue includes code for reproducing the bug, we recommend that you re-implement
60 | that in your environment, and run it to make sure you can reproduce the bug.\n Then
61 | start trying to fix it.\n\n If the bug reproduction script does not print anything
62 | when it successfully runs, we recommend adding a print(\"Script completed successfully,
63 | no errors.\") command at the end of the file,\n so that you can be sure that the
64 | script indeed ran fine all the way through.\n\n2. Locate relevant code using the
65 | find and search commands. `open` the file you want to edit.\n\n3. Use the `edit`
66 | command to perform edits.\n\n4. When you think you''ve fixed the bug, re-run the
67 | bug reproduction script to make sure that the bug has indeed been fixed.\n\n5. Create
68 | additional tests to verify the fix in a style similar to the existing reproduction
69 | script. In particular, make sure to test edge cases.\n If you find any issues,
70 | go back to the file you edited and perform further edits.\n\n(Open file: {{open_file}})\n(Current
71 | directory: {{working_dir}})\nbash-$","next_step_template":"{{observation}}\n(Open
72 | file: {{open_file}})\n(Current directory: {{working_dir}})\nbash-$","next_step_truncated_observation_template":"Observation:
73 | {{observation}}<response clipped><NOTE>Observations should not exceeded {{max_observation_length}}
74 | characters. {{elided_chars}} characters were elided. Please try a different command
75 | that produces less output or use head/tail/grep/redirect the output to a file. Do
76 | not use interactive pagers.</NOTE>","max_observation_length":100000,"next_step_no_output_template":"Your
77 | command ran successfully and did not produce any output.\n(Open file: {{open_file}})\n(Current
78 | directory: {{working_dir}})\nbash-$","strategy_template":null,"demonstration_template":"Here
79 | is a demonstration of how to correctly accomplish this task.\nIt is included to
80 | show you how to correctly use the interface.\nYou do not need to follow exactly
81 | what is done in the demonstration.\n--- DEMONSTRATION ---\n{{demonstration}}\n---
82 | END OF DEMONSTRATION ---\n","demonstrations":["/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/trajectories/demonstrations/replay__marshmallow-code__marshmallow-1867__function_calling_replace__install-1/marshmallow-code__marshmallow-1867.traj"],"put_demos_in_history":true,"shell_check_error_template":"Your
83 | bash command contained syntax errors and was NOT executed. Please fix the syntax
84 | errors and try again. This can be the result of not adhering to the syntax for multi-line
85 | commands. Here is the output of `bash -n`:\n{{bash_stdout}}\n{{bash_stderr}}","command_cancelled_timeout_template":"The
86 | command ''{{command}}'' was cancelled because it took more than {{timeout}} seconds.
87 | Please try a different command that completes more quickly."},"tools":{"filter":{"blocklist_error_template":"Operation
88 | ''{{action}}'' is not supported by this environment.","blocklist":["vim","vi","emacs","nano","nohup","gdb","less","tail
89 | -f","python -m venv","make"],"blocklist_standalone":["python","python3","ipython","bash","sh","/bin/bash","/bin/sh","nohup","vi","vim","emacs","nano","su"],"block_unless_regex":{"radare2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*","r2":"\\b(?:radare2)\\b.*\\s+-c\\s+.*"}},"bundles":[{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/registry","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/defaults","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/search","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/edit_replace","hidden_tools":[]},{"path":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/SWE-agent/tools/submit","hidden_tools":[]}],"env_variables":{"WINDOW":100,"OVERLAP":2},"registry_variables":{},"submit_command":"submit","parse_function":{"error_message":"{%-
90 | if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
91 | make sure your output includes exactly _ONE_ function call!\nYou must invoke the
92 | function directly using the function call format.\nYou cannot invoke commands with
93 | ```, you have to use the function call format.\nIf you think you have already resolved
94 | the issue, please submit your changes by running the `submit` command.\nIf you think
95 | you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
96 | please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
97 | last output included multiple tool calls!\nPlease make sure your output includes
98 | a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
99 | -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
100 | your function call doesn''t include any extra arguments that are not in the allowed
101 | arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
102 | be parsed properly: {{exception_message}}.\n{% endif %}\n","type":"function_calling"},"enable_bash_tool":true,"format_error_template":"{%-
103 | if error_code == \"missing\" -%}\nYour last output did not use any tool calls!\nPlease
104 | make sure your output includes exactly _ONE_ function call!\nYou must invoke the
105 | function directly using the function call format.\nYou cannot invoke commands with
106 | ```, you have to use the function call format.\nIf you think you have already resolved
107 | the issue, please submit your changes by running the `submit` command.\nIf you think
108 | you cannot solve the problem, please run `exit_forfeit` (if available) or `submit`.\nElse,
109 | please continue with a new tool call!\n{%- elif error_code == \"multiple\" -%}\nYour
110 | last output included multiple tool calls!\nPlease make sure your output includes
111 | a thought and exactly _ONE_ function call.\n{%- elif error_code == \"unexpected_arg\"
112 | -%}\nYour action could not be parsed properly: {{exception_message}}.\nMake sure
113 | your function call doesn''t include any extra arguments that are not in the allowed
114 | arguments, and only use the allowed commands.\n{%- else -%}\nYour action could not
115 | be parsed properly: {{exception_message}}.\n{% endif %}\n","command_docs":"bash:\n docstring:
116 | runs the given command directly in bash\n signature: <command>\n arguments:\n -
117 | command (string) [required]: The bash command to execute.\n\ngoto:\n docstring:
118 | moves the window to show <line_number>\n signature: goto <line_number>\n arguments:\n -
119 | line_number (integer) [required]: the line number to move the window to\n\nopen:\n docstring:
120 | opens the file at the given path in the editor. If line_number is provided, the
121 | window will be move to include that line\n signature: open \"<path>\" [<line_number>]\n arguments:\n -
122 | path (string) [required]: the path to the file to open\n - line_number (integer)
123 | [optional]: the line number to move the window to (if not provided, the window will
124 | start at the top of the file)\n\ncreate:\n docstring: creates and opens a new file
125 | with the given name\n signature: create <filename>\n arguments:\n - filename
126 | (string) [required]: the name of the file to create\n\nscroll_up:\n docstring:
127 | moves the window up 100 lines\n signature: scroll_up\n\nscroll_down:\n docstring:
128 | moves the window down 100 lines\n signature: scroll_down\n\nfind_file:\n docstring:
129 | finds all files with the given name or pattern in dir. If dir is not provided, searches
130 | in the current directory\n signature: find_file <file_name> [<dir>]\n arguments:\n -
131 | file_name (string) [required]: the name of the file or pattern to search for. supports
132 | shell-style wildcards (e.g. *.py)\n - dir (string) [optional]: the directory
133 | to search in (if not provided, searches in the current directory)\n\nsearch_dir:\n docstring:
134 | searches for search_term in all files in dir. If dir is not provided, searches in
135 | the current directory\n signature: search_dir <search_term> [<dir>]\n arguments:\n -
136 | search_term (string) [required]: the term to search for\n - dir (string) [optional]:
137 | the directory to search in (if not provided, searches in the current directory)\n\nsearch_file:\n docstring:
138 | searches for search_term in file. If file is not provided, searches in the current
139 | open file\n signature: search_file <search_term> [<file>]\n arguments:\n -
140 | search_term (string) [required]: the term to search for\n - file (string) [optional]:
141 | the file to search in (if not provided, searches in the current open file)\n\nedit:\n docstring:
142 | Replace first occurrence of <search> with <replace> in the currently displayed lines.
143 | If replace-all is True , replace all occurrences of <search> with <replace>.\nFor
144 | example, if you are looking at this file:\ndef fct():\n print(\"Hello world\")\n\nand
145 | you want to edit the file to read:\ndef fct():\n print(\"Hello\")\n print(\"world\")\n\nyou
146 | can search for `Hello world` and replace with `\"Hello\"\\n print(\"world\")`
147 | (note the extra spaces before the print statement!).\nTips:\n1. Always include proper
148 | whitespace/indentation 2. When you are adding an if/with/try statement, you need
149 | to INDENT the block that follows, so make sure to include it in both your search
150 | and replace strings! 3. If you are wrapping code in a try statement, make sure to
151 | also add an ''except'' or ''finally'' block.\nBefore every edit, please\n1. Explain
152 | the code you want to edit and why it is causing the problem 2. Explain the edit
153 | you want to make and how it fixes the problem 3. Explain how the edit does not break
154 | existing functionality\n\n signature: edit <search> <replace> [<replace-all>]\n\n arguments:\n -
155 | search (string) [required]: the text to search for (make sure to include proper
156 | whitespace if needed)\n - replace (string) [required]: the text to replace the
157 | search with (make sure to include proper whitespace if needed)\n - replace-all
158 | (boolean) [optional]: replace all occurrences rather than the first occurrence within
159 | the displayed lines\n\ninsert:\n docstring: Insert <text> at the end of the currently
160 | opened file or after <line> if specified.\n\n signature: insert <text> [<line>]\n\n arguments:\n -
161 | text (string) [required]: the text to insert\n - line (integer) [optional]: the
162 | line number to insert the text as new lines after\n\nsubmit:\n docstring: submits
163 | the current file\n signature: submit\n\n","multi_line_command_endings":{},"submit_command_end_name":null,"reset_commands":[],"execution_timeout":30,"install_timeout":300,"total_execution_timeout":1800,"max_consecutive_execution_timeouts":3},"history_processors":[{"n":5,"polling":1,"always_remove_output_for_tags":["remove_output"],"always_keep_output_for_tags":["keep_output"],"type":"last_n_observations"}],"model":{"name":"claude-3-5-sonnet-20240620","per_instance_cost_limit":3.0,"total_cost_limit":0.0,"per_instance_call_limit":0,"temperature":0.0,"top_p":1.0,"api_base":null,"api_version":null,"api_key":null,"stop":[],"completion_kwargs":{},"convert_system_to_user":false,"retry":{"retries":20,"min_wait":10.0,"max_wait":120.0},"delay":0.0,"fallbacks":[],"choose_api_key_by_thread":true,"max_input_tokens":null,"max_output_tokens":null},"max_requeries":3,"action_sampler":null,"type":"default"},"problem_statement":{"text":"#
164 | Debug MCP Codebase Insight Tests","extra_fields":{},"type":"text","id":"03565e"},"output_dir":"/Users/tosinakinosho/workspaces/mcp-codebase-insight/trajectories/tosinakinosho/default__claude-3-5-sonnet-20240620__t-0.00__p-1.00__c-3.00___03565e","actions":{"open_pr":false,"pr_config":{"skip_if_commits_reference_issue":true},"apply_patch_locally":false},"env_var_path":null}'
165 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/sse.py:
--------------------------------------------------------------------------------
```python
1 | """Server-Sent Events (SSE) transport implementation for MCP."""
2 |
3 | import asyncio
4 | import logging
5 | import json
6 | from typing import Any, Callable, Dict, List, Optional, Tuple
7 | from datetime import datetime
8 | from starlette.applications import Starlette
9 | from starlette.routing import Mount, Route
10 | from starlette.requests import Request
11 | from starlette.responses import Response, JSONResponse, RedirectResponse, StreamingResponse
12 | import uuid
13 | from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
14 | from starlette.middleware.cors import CORSMiddleware
15 |
16 | from mcp.server.fastmcp import FastMCP
17 | from mcp.server.sse import SseServerTransport
18 | from ..utils.logger import get_logger
19 |
20 | logger = get_logger(__name__)
21 |
22 | async def send_heartbeats(queue: asyncio.Queue, interval: int = 30):
23 | """Send periodic heartbeat messages to keep the connection alive.
24 |
25 | Args:
26 | queue: The queue to send heartbeats to
27 | interval: Time between heartbeats in seconds
28 | """
29 | while True:
30 | try:
31 | await queue.put({"type": "heartbeat", "timestamp": datetime.utcnow().isoformat()})
32 | await asyncio.sleep(interval)
33 | except asyncio.CancelledError:
34 | break
35 | except Exception as e:
36 | logger.error(f"Error sending heartbeat: {e}")
37 | await asyncio.sleep(1) # Brief pause before retrying
38 |
39 | class CodebaseInsightSseTransport(SseServerTransport):
40 | """Custom SSE transport implementation for Codebase Insight."""
41 |
42 | def __init__(self, endpoint: str):
43 | """Initialize the SSE transport.
44 |
45 | Args:
46 | endpoint: The endpoint path for SSE connections
47 | """
48 | super().__init__(endpoint)
49 | self.connections = {}
50 | self.message_queue = asyncio.Queue()
51 | logger.info(f"Initializing SSE transport with endpoint: {endpoint}")
52 |
53 | async def handle_sse(self, request: Request) -> StreamingResponse:
54 | """Handle incoming SSE connection requests.
55 |
56 | Args:
57 | request: The incoming HTTP request
58 |
59 | Returns:
60 | StreamingResponse for the SSE connection
61 | """
62 | connection_id = str(uuid.uuid4())
63 | queue = asyncio.Queue()
64 | self.connections[connection_id] = queue
65 |
66 | logger.info(f"New SSE connection established: {connection_id}")
67 | logger.debug(f"Request headers: {dict(request.headers)}")
68 | logger.debug(f"Active connections: {len(self.connections)}")
69 |
70 | async def event_generator():
71 | try:
72 | logger.debug(f"Starting event generator for connection {connection_id}")
73 | heartbeat_task = asyncio.create_task(send_heartbeats(queue))
74 | logger.debug(f"Heartbeat task started for connection {connection_id}")
75 |
76 | while True:
77 | try:
78 | message = await queue.get()
79 | logger.debug(f"Connection {connection_id} received message: {message}")
80 |
81 | if isinstance(message, dict):
82 | data = json.dumps(message)
83 | else:
84 | data = str(message)
85 |
86 | yield f"data: {data}\n\n"
87 | logger.debug(f"Sent message to connection {connection_id}")
88 |
89 | except asyncio.CancelledError:
90 | logger.info(f"Event generator cancelled for connection {connection_id}")
91 | break
92 | except Exception as e:
93 | logger.error(f"Error in event generator for connection {connection_id}: {e}")
94 | break
95 |
96 | finally:
97 | heartbeat_task.cancel()
98 | try:
99 | await heartbeat_task
100 | except asyncio.CancelledError:
101 | pass
102 |
103 | if connection_id in self.connections:
104 | del self.connections[connection_id]
105 | logger.info(f"Event generator cleaned up for connection {connection_id}")
106 | logger.debug(f"Remaining active connections: {len(self.connections)}")
107 |
108 | return StreamingResponse(
109 | event_generator(),
110 | media_type="text/event-stream",
111 | headers={
112 | "Cache-Control": "no-cache",
113 | "Connection": "keep-alive",
114 | "X-Accel-Buffering": "no",
115 | "Access-Control-Allow-Origin": "*", # Allow CORS
116 | "Access-Control-Allow-Headers": "Content-Type",
117 | "Access-Control-Allow-Methods": "GET, POST"
118 | }
119 | )
120 |
121 | async def handle_message(self, request: Request) -> Response:
122 | """Handle incoming messages to be broadcast over SSE.
123 |
124 | Args:
125 | request: The incoming HTTP request with the message
126 |
127 | Returns:
128 | HTTP response indicating message handling status
129 | """
130 | try:
131 | message = await request.json()
132 |
133 | # Broadcast to all connections
134 | for queue in self.connections.values():
135 | await queue.put(message)
136 |
137 | return JSONResponse({"status": "message sent"})
138 |
139 | except Exception as e:
140 | logger.error(f"Error handling message: {e}")
141 | return JSONResponse(
142 | {"error": str(e)},
143 | status_code=500
144 | )
145 |
146 | async def send(self, message: Any) -> None:
147 | """Send a message to all connected clients.
148 |
149 | Args:
150 | message: The message to send
151 | """
152 | # Put message in queue for all connections
153 | for queue in self.connections.values():
154 | await queue.put(message)
155 |
156 | async def broadcast(self, message: Any) -> None:
157 | """Broadcast a message to all connected clients.
158 |
159 | Args:
160 | message: The message to broadcast
161 | """
162 | await self.send(message)
163 |
164 | async def connect(self) -> Tuple[MemoryObjectReceiveStream, MemoryObjectSendStream]:
165 | """Create a new SSE connection.
166 |
167 | Returns:
168 | Tuple of receive and send streams for the connection
169 | """
170 | # Create memory object streams for this connection
171 | receive_stream = MemoryObjectReceiveStream()
172 | send_stream = MemoryObjectSendStream()
173 |
174 | # Store the connection
175 | connection_id = str(uuid.uuid4())
176 | self.connections[connection_id] = send_stream
177 |
178 | return receive_stream, send_stream
179 |
180 | async def disconnect(self, connection_id: str) -> None:
181 | """Disconnect a client.
182 |
183 | Args:
184 | connection_id: The ID of the connection to disconnect
185 | """
186 | if connection_id in self.connections:
187 | del self.connections[connection_id]
188 | logger.info(f"Disconnected client: {connection_id}")
189 |
190 | async def verify_routes(app: Starlette) -> Dict[str, List[str]]:
191 | """Verify and log all registered routes in the application.
192 |
193 | Args:
194 | app: The Starlette application to verify
195 |
196 | Returns:
197 | Dictionary mapping route paths to their methods
198 | """
199 | routes = {}
200 | for route in app.routes:
201 | if isinstance(route, Mount):
202 | logger.info(f"Mount point: {route.path}")
203 | # Recursively verify mounted routes
204 | mounted_routes = await verify_routes(route.app)
205 | for path, methods in mounted_routes.items():
206 | full_path = f"{route.path}{path}"
207 | routes[full_path] = methods
208 | else:
209 | routes[route.path] = route.methods
210 | logger.info(f"Route: {route.path}, methods: {route.methods}")
211 | return routes
212 |
213 | def create_sse_server(mcp_server: Optional[FastMCP] = None) -> Starlette:
214 | """Create an SSE server instance.
215 |
216 | Args:
217 | mcp_server: Optional FastMCP instance to use. If not provided, a new one will be created.
218 |
219 | Returns:
220 | Starlette application configured for SSE
221 | """
222 | app = Starlette(debug=True) # Enable debug mode for better error reporting
223 |
224 | # Create SSE transport
225 | transport = CodebaseInsightSseTransport("/sse")
226 |
227 | # Add CORS middleware
228 | app.add_middleware(
229 | CORSMiddleware,
230 | allow_origins=["*"], # Allow all origins
231 | allow_credentials=True,
232 | allow_methods=["GET", "POST", "OPTIONS"],
233 | allow_headers=["*"],
234 | expose_headers=["*"]
235 | )
236 |
237 | async def health_check(request: Request) -> JSONResponse:
238 | """Health check endpoint."""
239 | return JSONResponse({
240 | "status": "ok",
241 | "timestamp": datetime.utcnow().isoformat(),
242 | "connections": len(transport.connections)
243 | })
244 |
245 | # Add routes
246 | app.add_route("/health", health_check, methods=["GET"])
247 | app.add_route("/sse", transport.handle_sse, methods=["GET"])
248 | app.add_route("/message", transport.handle_message, methods=["POST"])
249 |
250 | logger.info("Created SSE server with routes:")
251 | asyncio.create_task(verify_routes(app))
252 |
253 | return app
254 |
255 | class MCP_CodebaseInsightServer:
256 | """MCP server implementation for Codebase Insight.
257 |
258 | This class manages the Model Context Protocol server, connecting it to
259 | the Codebase Insight's core components and exposing them as MCP tools.
260 | """
261 |
262 | def __init__(self, server_state):
263 | """Initialize the MCP server with access to the application state.
264 |
265 | Args:
266 | server_state: The global server state providing access to components
267 | """
268 | self.state = server_state
269 | self.mcp_server = FastMCP(name="MCP-Codebase-Insight")
270 | self.tools_registered = False
271 | self._starlette_app = None # Cache the Starlette app
272 | logger.info("MCP Codebase Insight server initialized")
273 |
274 | async def cleanup(self):
275 | """Clean up resources used by the MCP server.
276 |
277 | This method ensures proper shutdown of the MCP server and
278 | releases any resources it might be holding.
279 | """
280 | logger.info("Cleaning up MCP server resources")
281 | # If the MCP server has a shutdown or cleanup method, call it here
282 | # For now, just log the cleanup attempt
283 | self.tools_registered = False
284 | self._starlette_app = None
285 | logger.info("MCP server cleanup completed")
286 |
287 | def is_initialized(self) -> bool:
288 | """Check if the MCP server is properly initialized.
289 |
290 | Returns:
291 | True if the server is initialized and ready to use, False otherwise
292 | """
293 | return self.tools_registered and self._starlette_app is not None
294 |
295 | def register_tools(self):
296 | """Register all available tools with the MCP server.
297 |
298 | This connects the MCP protocol to the Codebase Insight core components,
299 | exposing their functionality through the MCP interface.
300 | """
301 | if self.tools_registered:
302 | logger.debug("Tools already registered, skipping")
303 | return
304 |
305 | logger.info("Registering tools with MCP server")
306 |
307 | # Check if critical dependencies are available
308 | critical_dependencies = ["vector_store", "knowledge_base", "task_manager", "adr_manager"]
309 | missing_dependencies = []
310 |
311 | for dependency in critical_dependencies:
312 | if not self.state.get_component(dependency):
313 | missing_dependencies.append(dependency)
314 |
315 | if missing_dependencies:
316 | logger.warning(f"Some critical dependencies are not available: {', '.join(missing_dependencies)}")
317 | logger.warning("Tools requiring these dependencies will not be registered")
318 | # Don't fail registration completely - continue with available tools
319 |
320 | # Register available tools
321 | try:
322 | self._register_vector_search()
323 | self._register_knowledge()
324 | self._register_adr()
325 | self._register_task()
326 |
327 | # Mark tools as registered even if some failed
328 | self.tools_registered = True
329 | logger.info("MCP tools registration completed")
330 | except Exception as e:
331 | logger.error(f"Error registering MCP tools: {e}", exc_info=True)
332 | # Don't mark as registered if there was an error
333 |
334 | def _register_vector_search(self):
335 | """Register the vector search tool with the MCP server."""
336 | vector_store = self.state.get_component("vector_store")
337 | if not vector_store:
338 | logger.warning("Vector store component not available, skipping tool registration")
339 | return
340 |
341 | # Verify that the vector store is properly initialized
342 | if not hasattr(vector_store, 'search') or not callable(getattr(vector_store, 'search')):
343 | logger.warning("Vector store component does not have a search method, skipping tool registration")
344 | return
345 |
346 | async def vector_search(query: str, limit: int = 5, threshold: float = 0.7,
347 | file_type: Optional[str] = None, path_pattern: Optional[str] = None):
348 | """Search for code snippets semantically similar to the query text."""
349 | logger.debug(f"MCP vector search request: {query=}, {limit=}, {threshold=}")
350 |
351 | # Prepare filters if provided
352 | filter_conditions = {}
353 | if file_type:
354 | filter_conditions["file_type"] = {"$eq": file_type}
355 | if path_pattern:
356 | filter_conditions["path"] = {"$like": path_pattern}
357 |
358 | results = await vector_store.search(
359 | text=query,
360 | filter_conditions=filter_conditions if filter_conditions else None,
361 | limit=limit
362 | )
363 |
364 | # Format results
365 | formatted_results = [
366 | {
367 | "id": result.id,
368 | "score": result.score,
369 | "text": result.metadata.get("text", ""),
370 | "file_path": result.metadata.get("file_path", ""),
371 | "line_range": result.metadata.get("line_range", ""),
372 | "type": result.metadata.get("type", "code"),
373 | "language": result.metadata.get("language", ""),
374 | "timestamp": result.metadata.get("timestamp", "")
375 | }
376 | for result in results
377 | if result.score >= threshold
378 | ]
379 |
380 | return {"results": formatted_results}
381 |
382 | self.mcp_server.add_tool(
383 | name="vector-search",
384 | fn=vector_search,
385 | description="Search for code snippets semantically similar to the query text"
386 | )
387 | logger.debug("Vector search tool registered")
388 |
389 | def _register_knowledge(self):
390 | """Register the knowledge base tool with the MCP server."""
391 | knowledge_base = self.state.get_component("knowledge_base")
392 | if not knowledge_base:
393 | logger.warning("Knowledge base component not available, skipping tool registration")
394 | return
395 |
396 | async def search_knowledge(query: str, pattern_type: str = "code", limit: int = 5):
397 | """Search for patterns in the knowledge base."""
398 | logger.debug(f"MCP knowledge search request: {query=}, {pattern_type=}, {limit=}")
399 |
400 | results = await knowledge_base.search_patterns(
401 | query=query,
402 | pattern_type=pattern_type,
403 | limit=limit
404 | )
405 |
406 | # Format results
407 | formatted_results = [
408 | {
409 | "id": result.id,
410 | "pattern": result.pattern,
411 | "description": result.description,
412 | "type": result.type,
413 | "confidence": result.confidence,
414 | "metadata": result.metadata
415 | }
416 | for result in results
417 | ]
418 |
419 | return {"results": formatted_results}
420 |
421 | self.mcp_server.add_tool(
422 | name="knowledge-search",
423 | fn=search_knowledge,
424 | description="Search for patterns in the knowledge base"
425 | )
426 | logger.debug("Knowledge search tool registered")
427 |
428 | def _register_adr(self):
429 | """Register the ADR management tool with the MCP server."""
430 | adr_manager = self.state.get_component("adr_manager")
431 | if not adr_manager:
432 | logger.warning("ADR manager component not available, skipping tool registration")
433 | return
434 |
435 | async def list_adrs(status: Optional[str] = None, limit: int = 10):
436 | """List architectural decision records."""
437 | logger.debug(f"MCP ADR list request: {status=}, {limit=}")
438 |
439 | try:
440 | adrs = await adr_manager.list_adrs(status=status, limit=limit)
441 |
442 | # Format results
443 | formatted_results = [
444 | {
445 | "id": adr.id,
446 | "title": adr.title,
447 | "status": adr.status,
448 | "date": adr.date.isoformat() if adr.date else None,
449 | "authors": adr.authors,
450 | "summary": adr.summary
451 | }
452 | for adr in adrs
453 | ]
454 |
455 | return {"adrs": formatted_results}
456 | except Exception as e:
457 | logger.error(f"Error listing ADRs: {e}", exc_info=True)
458 | return {"error": str(e), "adrs": []}
459 |
460 | self.mcp_server.add_tool(
461 | name="adr-list",
462 | fn=list_adrs,
463 | description="List architectural decision records"
464 | )
465 | logger.debug("ADR management tool registered")
466 |
467 | def _register_task(self):
468 | """Register the task management tool with the MCP server."""
469 | task_tracker = self.state.get_component("task_tracker")
470 | if not task_tracker:
471 | logger.warning("Task tracker component not available, skipping tool registration")
472 | return
473 |
474 | async def get_task_status(task_id: str):
475 | """Get the status of a specific task."""
476 | logger.debug(f"MCP task status request: {task_id=}")
477 |
478 | try:
479 | status = await task_tracker.get_task_status(task_id)
480 | return status
481 | except Exception as e:
482 | logger.error(f"Error getting task status: {e}", exc_info=True)
483 | return {"error": str(e), "status": "unknown"}
484 |
485 | self.mcp_server.add_tool(
486 | name="task-status",
487 | fn=get_task_status,
488 | description="Get the status of a specific task"
489 | )
490 | logger.debug("Task management tool registered")
491 |
492 | def get_starlette_app(self) -> Starlette:
493 | """Get the Starlette application for the MCP server.
494 |
495 | Returns:
496 | Configured Starlette application
497 | """
498 | # Ensure tools are registered
499 | self.register_tools()
500 |
501 | # Create and return the Starlette app for SSE
502 | if self._starlette_app is None:
503 | self._starlette_app = create_sse_server(self.mcp_server)
504 | return self._starlette_app
505 |
```
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
```python
1 | """Test fixtures for the codebase insight server."""
2 |
3 | import asyncio
4 | import logging
5 | import os
6 | import sys
7 | import threading
8 | import uuid
9 | import warnings
10 | from contextlib import ExitStack
11 | from pathlib import Path
12 | from threading import Lock
13 | from typing import AsyncGenerator, Dict, Generator, Optional, Set
14 | import tracemalloc
15 |
16 | import httpx
17 | import pytest
18 | import pytest_asyncio
19 | from fastapi import FastAPI
20 |
21 | # Ensure the src directory is in the Python path
22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
23 |
24 | from src.mcp_codebase_insight.core.config import ServerConfig
25 | from src.mcp_codebase_insight.server import CodebaseAnalysisServer
26 | from src.mcp_codebase_insight.server_test_isolation import get_isolated_server_state
27 |
28 | logger = logging.getLogger(__name__)
29 |
30 | # Enable tracemalloc for debugging resource warnings and coroutine tracking
31 | tracemalloc.start(25) # Keep 25 frames to provide good traceback info
32 |
33 | # Track process-specific event loops with mutex protection
34 | _event_loops: Dict[int, asyncio.AbstractEventLoop] = {}
35 | _loops_lock = Lock()
36 | _active_test_ids: Set[str] = set()
37 | _tests_lock = Lock()
38 |
39 | # Configure logging for better debug info
40 | logging.basicConfig(level=logging.INFO)
41 | asyncio_logger = logging.getLogger("asyncio")
42 | asyncio_logger.setLevel(logging.INFO)
43 |
44 | def _get_test_id():
45 | """Get a unique identifier for the current test."""
46 | return f"{os.getpid()}_{threading.get_ident()}"
47 |
48 | # Primary event loop with session scope for compatibility with pytest-asyncio
49 | @pytest.fixture(scope="session")
50 | def event_loop():
51 | """Create a session-scoped event loop for the test session."""
52 | pid = os.getpid()
53 | logger.info(f"Creating session-scoped event loop for process {pid}")
54 |
55 | # Create and set a new loop for this session
56 | policy = asyncio.get_event_loop_policy()
57 | loop = policy.new_event_loop()
58 | asyncio.set_event_loop(loop)
59 |
60 | with _loops_lock:
61 | _event_loops[pid] = loop
62 |
63 | yield loop
64 |
65 | # Final cleanup
66 | with _loops_lock:
67 | if pid in _event_loops:
68 | del _event_loops[pid]
69 |
70 | # Close the loop to prevent asyncio related warnings
71 | try:
72 | if not loop.is_closed():
73 | loop.run_until_complete(loop.shutdown_asyncgens())
74 | loop.close()
75 | except:
76 | logger.exception("Error closing session event loop")
77 |
78 | # To address the event_loop fixture scope mismatch issue, we'll use a different approach
79 | # We'll have a single session-scoped event loop that's accessible to function-scoped fixtures
80 | @pytest.fixture(scope="function")
81 | def function_event_loop(event_loop):
82 | """
83 | Create a function-scoped event loop proxy for test isolation.
84 |
85 | This approach avoids the ScopeMismatch error by using the session-scoped event_loop
86 | but providing function-level isolation.
87 | """
88 | # Return the session loop, but track the test in our isolation system
89 | test_id = _get_test_id()
90 | logger.debug(f"Using function-level event loop isolation for test {test_id}")
91 |
92 | with _tests_lock:
93 | _active_test_ids.add(test_id)
94 |
95 | yield event_loop
96 |
97 | with _tests_lock:
98 | if test_id in _active_test_ids:
99 | _active_test_ids.remove(test_id)
100 |
101 | @pytest.fixture(scope="session")
102 | def anyio_backend():
103 | """Configure pytest-asyncio to use asyncio backend."""
104 | return "asyncio"
105 |
106 | @pytest.fixture(scope="session")
107 | def test_server_config():
108 | """Create a server configuration for tests."""
109 | # For CI/CD environment, use the environment variables if available
110 | qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
111 |
112 | # Use the CI/CD collection name if provided, otherwise generate a unique one
113 | collection_name = os.environ.get("COLLECTION_NAME", f"test_collection_{uuid.uuid4().hex[:8]}")
114 |
115 | # Optional: Use a shorter embedding model for tests to save resources
116 | embedding_model = os.environ.get("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
117 |
118 | logger.info(f"Configuring test server with Qdrant URL: {qdrant_url}, collection: {collection_name}")
119 |
120 | config = ServerConfig(
121 | host="localhost",
122 | port=8000,
123 | log_level="DEBUG",
124 | qdrant_url=qdrant_url,
125 | docs_cache_dir=Path(".test_cache") / "docs",
126 | adr_dir=Path(".test_cache") / "docs/adrs",
127 | kb_storage_dir=Path(".test_cache") / "knowledge",
128 | embedding_model=embedding_model,
129 | collection_name=collection_name,
130 | debug_mode=True,
131 | metrics_enabled=False,
132 | cache_enabled=True,
133 | memory_cache_size=1000,
134 | disk_cache_dir=Path(".test_cache") / "cache"
135 | )
136 | return config
137 |
138 | # Make the qdrant_client fixture session-scoped to avoid connection issues
139 | @pytest.fixture(scope="session")
140 | def qdrant_client(test_server_config):
141 | """Create a shared Qdrant client for tests."""
142 | from qdrant_client import QdrantClient
143 | from qdrant_client.http import models
144 |
145 | # Connect to Qdrant
146 | client = QdrantClient(url=test_server_config.qdrant_url)
147 |
148 | # Create the collection if it doesn't exist
149 | try:
150 | collections = client.get_collections().collections
151 | collection_names = [c.name for c in collections]
152 |
153 | # If collection doesn't exist, create it
154 | if test_server_config.collection_name not in collection_names:
155 | logger.info(f"Creating test collection: {test_server_config.collection_name}")
156 | client.create_collection(
157 | collection_name=test_server_config.collection_name,
158 | vectors_config=models.VectorParams(
159 | size=384, # Dimension for all-MiniLM-L6-v2
160 | distance=models.Distance.COSINE,
161 | ),
162 | )
163 | else:
164 | logger.info(f"Collection {test_server_config.collection_name} already exists")
165 | except Exception as e:
166 | logger.warning(f"Error checking/creating Qdrant collection: {e}")
167 |
168 | yield client
169 |
170 | # Cleanup - delete the collection at the end of the session
171 | try:
172 | if test_server_config.collection_name.startswith("test_"):
173 | logger.info(f"Cleaning up test collection: {test_server_config.collection_name}")
174 | client.delete_collection(collection_name=test_server_config.collection_name)
175 | except Exception as e:
176 | logger.warning(f"Error deleting Qdrant collection: {e}")
177 |
178 | # Session-scoped server instance for shared resources
179 | @pytest_asyncio.fixture(scope="session")
180 | async def session_test_server(event_loop, test_server_config):
181 | """Create a session-scoped server instance for shared tests."""
182 | logger.info(f"Creating session-scoped test server instance")
183 |
184 | # Create the server instance with the provided test configuration
185 | server = CodebaseAnalysisServer(test_server_config)
186 |
187 | # Initialize the server state
188 | logger.info("Initializing server state...")
189 | await server.state.initialize()
190 | logger.info("Server state initialized successfully")
191 |
192 | # Initialize the server
193 | logger.info("Initializing server...")
194 | await server.initialize()
195 | logger.info("Server initialized successfully")
196 |
197 | # Create and mount MCP server
198 | from src.mcp_codebase_insight.core.sse import MCP_CodebaseInsightServer, create_sse_server
199 | from src.mcp_codebase_insight.core.state import ComponentStatus
200 |
201 | logger.info("Creating and mounting MCP server...")
202 | try:
203 | # Create SSE server
204 | sse_server = create_sse_server()
205 | logger.info("Created SSE server")
206 |
207 | # Mount SSE server
208 | server.app.mount("/mcp", sse_server)
209 | logger.info("Mounted SSE server at /mcp")
210 |
211 | # Create MCP server instance
212 | mcp_server = MCP_CodebaseInsightServer(server.state)
213 | logger.info("Created MCP server instance")
214 |
215 | # Register tools
216 | mcp_server.register_tools()
217 | logger.info("Registered MCP server tools")
218 |
219 | # Update component status
220 | server.state.update_component_status(
221 | "mcp_server",
222 | ComponentStatus.INITIALIZED,
223 | instance=mcp_server
224 | )
225 | logger.info("Updated MCP server component status")
226 |
227 | except Exception as e:
228 | logger.error(f"Failed to create/mount MCP server: {e}", exc_info=True)
229 | raise RuntimeError(f"Failed to create/mount MCP server: {e}")
230 |
231 | # Add test-specific endpoints
232 | @server.app.get("/direct-sse")
233 | async def direct_sse_endpoint():
234 | """Test endpoint for direct SSE connection."""
235 | from starlette.responses import Response
236 | return Response(
237 | content="data: Direct SSE test endpoint\n\n",
238 | media_type="text/event-stream",
239 | headers={
240 | "Cache-Control": "no-cache",
241 | "Connection": "keep-alive",
242 | "X-Accel-Buffering": "no"
243 | }
244 | )
245 |
246 | @server.app.get("/mcp/sse-mock")
247 | async def mock_sse_endpoint():
248 | """Mock SSE endpoint for testing."""
249 | from starlette.responses import Response
250 | return Response(
251 | content="data: Mock SSE endpoint\n\n",
252 | media_type="text/event-stream",
253 | headers={
254 | "Cache-Control": "no-cache",
255 | "Connection": "keep-alive",
256 | "X-Accel-Buffering": "no"
257 | }
258 | )
259 |
260 | @server.app.get("/debug/routes")
261 | async def debug_routes():
262 | """Debug endpoint to list all registered routes."""
263 | from starlette.responses import Response
264 | routes = []
265 | for route in server.app.routes:
266 | route_info = {
267 | "path": getattr(route, "path", str(route)),
268 | "methods": getattr(route, "methods", set()),
269 | "name": getattr(route, "name", None),
270 | "endpoint": str(getattr(route, "endpoint", None))
271 | }
272 | routes.append(route_info)
273 | return {"routes": routes}
274 |
275 | @server.app.get("/health")
276 | async def health_check_test():
277 | """Health check endpoint for testing."""
278 | mcp_server = server.state.get_component("mcp_server")
279 | return {
280 | "status": "ok",
281 | "initialized": server.state.initialized,
282 | "mcp_available": mcp_server is not None,
283 | "instance_id": server.state.instance_id,
284 | "components": server.state.list_components()
285 | }
286 |
287 | # The server is already initialized, no need to start it
288 | logger.info("Test server ready")
289 |
290 | yield server
291 |
292 | # Cleanup
293 | logger.info("Cleaning up test server...")
294 | await server.shutdown()
295 | logger.info("Test server cleanup complete")
296 |
297 | # Function-scoped server instance for isolated tests
298 | @pytest_asyncio.fixture
299 | async def test_server_instance(function_event_loop, test_server_config):
300 | """Create a function-scoped server instance for isolated tests."""
301 | logger.info(f"Creating function-scoped test server instance for test {_get_test_id()}")
302 |
303 | # Create server with isolated state
304 | server = CodebaseAnalysisServer(test_server_config)
305 | instance_id = f"test_server_{uuid.uuid4().hex}"
306 | server.state = get_isolated_server_state(instance_id)
307 |
308 | try:
309 | # Initialize state
310 | if not server.state.initialized:
311 | logger.info("Initializing server state...")
312 | await server.state.initialize()
313 | logger.info("Server state initialized successfully")
314 |
315 | # Initialize server
316 | if not server.is_initialized:
317 | logger.info("Initializing server...")
318 | await server.initialize()
319 | logger.info("Server initialized successfully")
320 |
321 | yield server
322 | finally:
323 | try:
324 | # Clean up server state
325 | logger.info("Starting server cleanup...")
326 |
327 | # Check server.state exists and is initialized
328 | if hasattr(server, 'state') and server.state and hasattr(server.state, 'initialized') and server.state.initialized:
329 | logger.info("Cleaning up server state...")
330 | try:
331 | await server.state.cleanup()
332 | logger.info("Server state cleanup completed")
333 | except Exception as e:
334 | logger.error(f"Error during server state cleanup: {e}")
335 |
336 | # Check server is initialized
337 | if hasattr(server, 'is_initialized') and server.is_initialized:
338 | logger.info("Shutting down server...")
339 | try:
340 | await server.shutdown()
341 | logger.info("Server shutdown completed")
342 | except Exception as e:
343 | logger.error(f"Error during server shutdown: {e}")
344 | except Exception as e:
345 | logger.error(f"Error during overall server cleanup: {e}")
346 |
347 | # Session-scoped httpx client
348 | @pytest_asyncio.fixture(scope="session")
349 | async def session_httpx_client(session_test_server):
350 | """Create a session-scoped httpx client for shared tests."""
351 | logger.info(f"Creating session-scoped httpx test client")
352 |
353 | # Configure transport with proper ASGI handling
354 | transport = httpx.ASGITransport(
355 | app=session_test_server.app,
356 | raise_app_exceptions=False,
357 | )
358 |
359 | # Create client
360 | client = httpx.AsyncClient(
361 | transport=transport,
362 | base_url="http://testserver",
363 | follow_redirects=True,
364 | timeout=30.0
365 | )
366 |
367 | logger.info("Session-scoped httpx test client created")
368 |
369 | try:
370 | yield client
371 | finally:
372 | try:
373 | await client.aclose()
374 | logger.info("Session-scoped httpx test client closed")
375 | except Exception as e:
376 | logger.error(f"Error during session client cleanup: {e}")
377 |
378 | # Function-scoped httpx client
379 | @pytest_asyncio.fixture
380 | async def httpx_test_client(test_server_instance):
381 | """Create a function-scoped httpx client for isolated tests."""
382 | logger.info(f"Creating function-scoped httpx test client for test {_get_test_id()}")
383 |
384 | # Configure transport with proper ASGI handling
385 | transport = httpx.ASGITransport(
386 | app=test_server_instance.app,
387 | raise_app_exceptions=False,
388 | )
389 |
390 | # Create client
391 | client = httpx.AsyncClient(
392 | transport=transport,
393 | base_url="http://testserver",
394 | follow_redirects=True,
395 | timeout=30.0
396 | )
397 |
398 | logger.info("Function-scoped httpx test client created")
399 |
400 | try:
401 | yield client
402 | finally:
403 | try:
404 | await client.aclose()
405 | logger.info("Function-scoped httpx test client closed")
406 | except Exception as e:
407 | logger.error(f"Error during client cleanup: {e}")
408 |
409 | # Default client for tests (currently using session-scoped client)
410 | @pytest_asyncio.fixture
411 | async def client(session_httpx_client) -> AsyncGenerator[httpx.AsyncClient, None]:
412 | """Return the current httpx test client.
413 |
414 | This is a function-scoped async fixture that yields the session-scoped client.
415 | Tests can override this to use the function-scoped client if needed.
416 | """
417 | yield session_httpx_client
418 |
419 | # Test data fixtures
420 | @pytest.fixture
421 | def test_code():
422 | """Provide sample code for tests."""
423 | return """
424 | def factorial(n):
425 | if n <= 1:
426 | return 1
427 | return n * factorial(n-1)
428 | """
429 |
430 | @pytest.fixture
431 | def test_issue():
432 | """Provide a sample issue for tests."""
433 | return {
434 | "title": "Test Issue",
435 | "description": "This is a test issue for debugging",
436 | "code": "print('hello world')",
437 | "error": "TypeError: unsupported operand type(s)",
438 | }
439 |
440 | @pytest.fixture
441 | def test_adr():
442 | """Provide a sample ADR for tests."""
443 | return {
444 | "title": "Test ADR",
445 | "status": "proposed",
446 | "context": {
447 | "problem": "This is a test ADR for testing",
448 | "constraints": ["Test constraint"],
449 | "assumptions": ["Test assumption"],
450 | "background": "Test background"
451 | },
452 | "decision": "We decided to test the ADR system",
453 | "consequences": "Testing will be successful",
454 | "options": [
455 | {
456 | "title": "Test Option",
457 | "description": "A test option for the ADR.",
458 | "pros": ["Easy to implement"],
459 | "cons": ["Not production ready"]
460 | }
461 | ]
462 | }
463 |
464 | # Define custom pytest hooks
465 | def pytest_collection_modifyitems(items):
466 | """Add the isolated_event_loop marker to integration tests."""
467 | for item in items:
468 | module_name = item.module.__name__ if hasattr(item, 'module') else ''
469 | if 'integration' in module_name:
470 | # Add our custom marker to all integration tests
471 | item.add_marker(pytest.mark.isolated_event_loop)
472 |
473 | def pytest_configure(config):
474 | """Configure pytest with our specific settings."""
475 | config.addinivalue_line(
476 | "markers", "isolated_event_loop: mark test to use an isolated event loop"
477 | )
478 |
479 | # Suppress event loop warnings
480 | warnings.filterwarnings(
481 | "ignore",
482 | message="There is no current event loop",
483 | category=DeprecationWarning
484 | )
485 | warnings.filterwarnings(
486 | "ignore",
487 | message="The loop argument is deprecated",
488 | category=DeprecationWarning
489 | )
490 |
491 | def pytest_runtest_setup(item):
492 | """Set up for each test."""
493 | # Get the module name for the test
494 | module_name = item.module.__name__ if hasattr(item, 'module') else ''
495 |
496 | # Set an environment variable with the current test module
497 | # This helps with test isolation in the server code
498 | os.environ['CURRENT_TEST_MODULE'] = module_name
499 | os.environ['CURRENT_TEST_NAME'] = item.name if hasattr(item, 'name') else ''
500 |
501 | # For any async test, ensure we have a valid event loop
502 | if 'asyncio' in item.keywords:
503 | try:
504 | loop = asyncio.get_event_loop()
505 | if loop.is_closed():
506 | logger.warning(f"Found closed loop in {module_name}:{item.name}, creating new loop")
507 | loop = asyncio.new_event_loop()
508 | asyncio.set_event_loop(loop)
509 | except RuntimeError:
510 | logger.warning(f"No event loop found in {module_name}:{item.name}, creating new loop")
511 | loop = asyncio.new_event_loop()
512 | asyncio.set_event_loop(loop)
513 |
514 | def pytest_runtest_teardown(item):
515 | """Clean up after each test."""
516 | # Clear the current test environment variables
517 | if 'CURRENT_TEST_MODULE' in os.environ:
518 | del os.environ['CURRENT_TEST_MODULE']
519 | if 'CURRENT_TEST_NAME' in os.environ:
520 | del os.environ['CURRENT_TEST_NAME']
521 |
522 | # Cleanup fixture
523 | @pytest.fixture(autouse=True, scope="session")
524 | def cleanup_server_states(event_loop: asyncio.AbstractEventLoop):
525 | """Clean up any lingering server states."""
526 | from src.mcp_codebase_insight.server_test_isolation import _server_states
527 |
528 | yield
529 |
530 | try:
531 | # Report any unclosed instances
532 | logger.info(f"Found {len(_server_states)} server states at end of session")
533 | for instance_id, state in list(_server_states.items()):
534 | logger.info(f"Cleaning up state for instance: {instance_id}")
535 | try:
536 | if state.initialized:
537 | try:
538 | # Use the event loop for cleanup
539 | if not event_loop.is_closed():
540 | event_loop.run_until_complete(state.cleanup())
541 | except Exception as e:
542 | logger.error(f"Error cleaning up state: {e}")
543 | except Exception as e:
544 | logger.error(f"Error checking state initialized: {e}")
545 | except Exception as e:
546 | logger.error(f"Error during server states cleanup: {e}")
547 |
548 | try:
549 | # Cancel any remaining tasks
550 | for pid, loop in list(_event_loops.items()):
551 | if not loop.is_closed():
552 | for task in asyncio.all_tasks(loop):
553 | if not task.done() and not task.cancelled():
554 | logger.warning(f"Force cancelling task: {task.get_name()}")
555 | task.cancel()
556 | except Exception as e:
557 | logger.error(f"Error cancelling tasks: {e}")
558 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/vector_store.py:
--------------------------------------------------------------------------------
```python
1 | """Vector store for pattern similarity search using Qdrant."""
2 |
3 | from typing import Dict, List, Optional
4 | import asyncio
5 | import logging
6 | import uuid
7 | from datetime import datetime
8 |
9 | from qdrant_client import QdrantClient
10 | from qdrant_client.http import models as rest
11 | from qdrant_client.http.models import Distance, VectorParams
12 | from qdrant_client.http.exceptions import UnexpectedResponse
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | # Note: Parameter changes between Qdrant client versions:
17 | # - In v1.13.3+, the parameter 'query_vector' was renamed to 'query' in the query_points method
18 | # - The store_pattern and update_pattern methods now accept 'id' instead of 'pattern_id'
19 | # For backward compatibility, we support both parameter styles.
20 |
21 | class SearchResult:
22 | """Search result from vector store."""
23 |
24 | def __init__(self, id: str, score: float, metadata: Optional[Dict] = None):
25 | """Initialize search result."""
26 | self.id = id
27 | self.score = score
28 | self.metadata = metadata or {} # Initialize with empty dict or provided metadata
29 |
30 | def __repr__(self):
31 | """String representation of search result."""
32 | return f"SearchResult(id={self.id}, score={self.score}, metadata={self.metadata})"
33 |
34 | class VectorStore:
35 | """Vector store for pattern similarity search."""
36 |
37 | def __init__(
38 | self,
39 | url: str,
40 | embedder,
41 | collection_name: str = "codebase_patterns",
42 | vector_size: int = 384, # Default for all-MiniLM-L6-v2
43 | api_key: Optional[str] = None,
44 | vector_name: str = "default" # Add vector_name parameter with default value
45 | ):
46 | """Initialize vector store."""
47 | self.url = url
48 | self.embedder = embedder
49 | self.collection_name = collection_name
50 | self.vector_size = vector_size
51 | self.api_key = api_key
52 | self.vector_name = vector_name # Store the vector name
53 | self.initialized = False
54 | self.client = None
55 |
56 | async def initialize(self):
57 | """Initialize vector store."""
58 | if self.initialized:
59 | return
60 |
61 | try:
62 | # Initialize embedder first
63 | logger.debug("Initializing embedder")
64 | await self.embedder.initialize()
65 |
66 | # Update vector size from embedder if available
67 | if hasattr(self.embedder, 'vector_size'):
68 | self.vector_size = self.embedder.vector_size
69 | logger.debug(f"Using vector size {self.vector_size} from embedder")
70 |
71 | # Initialize Qdrant client with additional parameters
72 | logger.debug(f"Connecting to Qdrant at {self.url}")
73 | self.client = QdrantClient(
74 | url=self.url,
75 | api_key=self.api_key,
76 | timeout=10.0,
77 | prefer_grpc=False
78 | )
79 |
80 | # Attempt to test connection and set up collection; skip on failure
81 | try:
82 | # Test connection with retry
83 | max_retries = 3
84 | retry_delay = 1
85 | for attempt in range(max_retries):
86 | try:
87 | logger.debug(f"Testing Qdrant connection (attempt {attempt+1}/{max_retries})")
88 | self.client.get_collections()
89 | logger.debug("Connection successful")
90 | break
91 | except Exception as e:
92 | if attempt < max_retries - 1:
93 | logger.warning(f"Connection attempt {attempt+1} failed: {e}, retrying in {retry_delay}s")
94 | await asyncio.sleep(retry_delay)
95 | retry_delay *= 2
96 | else:
97 | raise
98 |
99 | # Create collection if it doesn't exist
100 | logger.debug(f"Checking for collection {self.collection_name}")
101 | collections = self.client.get_collections().collections
102 | if not any(c.name == self.collection_name for c in collections):
103 | logger.debug(f"Creating collection {self.collection_name}")
104 | self.client.create_collection(
105 | collection_name=self.collection_name,
106 | vectors_config=VectorParams(
107 | size=self.vector_size,
108 | distance=Distance.COSINE,
109 | on_disk=True
110 | ),
111 | optimizers_config=rest.OptimizersConfigDiff(
112 | indexing_threshold=0,
113 | memmap_threshold=0
114 | )
115 | )
116 | logger.debug("Vector store collection setup complete")
117 | except Exception as e:
118 | logger.warning(f"Qdrant is unavailable, skipping collection setup: {e}")
119 |
120 | # Finalize initialization regardless of Qdrant availability
121 | self.initialized = True
122 | logger.debug("Vector store initialization complete")
123 |
124 | except Exception as e:
125 | logger.error(f"Vector store initialization failed: {str(e)}")
126 | raise RuntimeError(f"Failed to initialize vector store: {str(e)}")
127 |
128 | async def cleanup(self):
129 | """Clean up vector store resources."""
130 | if not self.initialized:
131 | logger.debug(f"Vector store not initialized, skipping cleanup for {self.collection_name}")
132 | return
133 |
134 | try:
135 | logger.debug(f"Cleaning up collection {self.collection_name}")
136 |
137 | # Check if collection exists first
138 | collections = self.client.get_collections().collections
139 | exists = any(c.name == self.collection_name for c in collections)
140 |
141 | if not exists:
142 | logger.debug(f"Collection {self.collection_name} does not exist, nothing to clean")
143 | return
144 |
145 | # Delete all points in the collection
146 | try:
147 | logger.debug(f"Deleting all points in collection {self.collection_name}")
148 | self.client.delete(
149 | collection_name=self.collection_name,
150 | points_selector=rest.FilterSelector(
151 | filter=rest.Filter() # Empty filter means all points
152 | )
153 | )
154 | logger.debug(f"Successfully deleted all points from {self.collection_name}")
155 | except Exception as e:
156 | logger.warning(f"Error deleting points from collection {self.collection_name}: {e}")
157 |
158 | # Reset initialized state to ensure proper re-initialization if needed
159 | self.initialized = False
160 | logger.debug(f"Reset initialized state for vector store with collection {self.collection_name}")
161 | except Exception as e:
162 | logger.error(f"Error during vector store cleanup: {e}")
163 | # Don't raise the exception to avoid breaking test teardowns
164 |
165 | async def close(self):
166 | """Close vector store connection and clean up resources."""
167 | try:
168 | logger.debug("Starting vector store closure process")
169 | await self.cleanup()
170 | finally:
171 | if self.client:
172 | try:
173 | logger.debug("Closing Qdrant client connection")
174 | self.client.close()
175 | logger.debug("Qdrant client connection closed")
176 | except Exception as e:
177 | logger.error(f"Error closing Qdrant client: {e}")
178 |
179 | # Ensure initialized state is reset
180 | self.initialized = False
181 | logger.debug("Vector store fully closed")
182 |
183 | async def store_pattern(
184 | self, id: str, text: str = None, title: str = None, description: str = None, pattern_type: str = None,
185 | tags: List[str] = None, embedding: List[float] = None, metadata: Optional[Dict] = None
186 | ) -> bool:
187 | """Store a pattern in the vector store.
188 |
189 | This method supports two calling patterns:
190 | 1. With text and metadata for automatic embedding generation
191 | 2. With explicit title, description, pattern_type, tags, and embedding
192 |
193 | Args:
194 | id: ID for the pattern
195 | text: Text to generate embedding from (if embedding not provided)
196 | title: Title of the pattern
197 | description: Description of the pattern
198 | pattern_type: Type of the pattern
199 | tags: Tags for the pattern
200 | embedding: Pre-computed embedding
201 | metadata: Optional metadata dictionary
202 |
203 | Returns:
204 | True if stored successfully
205 | """
206 | try:
207 | # Ensure we're initialized
208 | if not self.initialized:
209 | await self.initialize()
210 |
211 | # Validate the collection exists and has the correct vector configuration
212 | try:
213 | collection_info = self.client.get_collection(self.collection_name)
214 | # With a non-named vector configuration, we just need to verify the collection exists
215 | logger.info(f"Collection {self.collection_name} exists")
216 | except Exception as e:
217 | logger.error(f"Error validating collection: {str(e)}")
218 |
219 | # Case 1: Using text and metadata
220 | if text is not None and embedding is None:
221 | # Generate embedding from text
222 | embedding = await self.embedder.embed(text)
223 |
224 | # Handle metadata
225 | metadata = metadata or {}
226 |
227 | # Extract or use defaults for required fields
228 | title = metadata.get("title", title) or "Untitled"
229 | description = metadata.get("description", description) or text[:100]
230 | pattern_type = metadata.get("pattern_type", pattern_type) or metadata.get("type", "code")
231 | tags = metadata.get("tags", tags) or []
232 |
233 | # Create payload with all metadata plus required fields
234 | payload = {
235 | "id": id,
236 | "title": title,
237 | "description": description,
238 | "pattern_type": pattern_type,
239 | "type": pattern_type, # Add 'type' field for consistency
240 | "tags": tags,
241 | "timestamp": datetime.now().isoformat(),
242 | **metadata # Include all original metadata fields
243 | }
244 | # Case 2: Using explicit parameters
245 | else:
246 | # Ensure we have all required data
247 | if embedding is None:
248 | raise ValueError("Embedding must be provided if text is not provided")
249 |
250 | title = title or "Untitled"
251 | description = description or ""
252 | pattern_type = pattern_type or "code"
253 | tags = tags or []
254 |
255 | payload = {
256 | "id": id,
257 | "title": title,
258 | "description": description,
259 | "pattern_type": pattern_type,
260 | "type": pattern_type, # Add 'type' field for consistency
261 | "tags": tags,
262 | "timestamp": datetime.now().isoformat(),
263 | }
264 |
265 | # Merge with metadata if provided
266 | if metadata:
267 | payload.update(metadata)
268 |
269 | # Debug logs
270 | logger.info(f"PointStruct data - id: {id}")
271 | logger.info(f"PointStruct data - vector_name: {self.vector_name}")
272 | logger.info(f"PointStruct data - embedding length: {len(embedding)}")
273 | logger.info(f"PointStruct data - payload keys: {payload.keys()}")
274 |
275 | # For Qdrant client 1.13.3, use vector parameter
276 | point = rest.PointStruct(
277 | id=id,
278 | vector=embedding, # Use vector parameter for this version of Qdrant client
279 | payload=payload
280 | )
281 |
282 | self.client.upsert(
283 | collection_name=self.collection_name,
284 | points=[point],
285 | wait=True
286 | )
287 | logger.info(f"Successfully stored pattern with id: {id}")
288 | return True
289 | except Exception as e:
290 | logger.error(f"Error storing pattern: {str(e)}")
291 | raise RuntimeError(f"Failed to store pattern: {str(e)}")
292 |
293 | # Previous version of store_pattern kept as _store_pattern_legacy for backward compatibility
294 | async def _store_pattern_legacy(
295 | self, pattern_id: str, title: str, description: str, pattern_type: str, tags: List[str], embedding: List[float]
296 | ) -> bool:
297 | """Legacy version of store_pattern for backward compatibility."""
298 | return await self.store_pattern(
299 | id=pattern_id,
300 | title=title,
301 | description=description,
302 | pattern_type=pattern_type,
303 | tags=tags,
304 | embedding=embedding
305 | )
306 |
307 | async def update_pattern(
308 | self, id: str, title: str, description: str, pattern_type: str, tags: List[str], embedding: List[float]
309 | ) -> bool:
310 | """Update a pattern in the vector store."""
311 | try:
312 | payload = {
313 | "id": id,
314 | "title": title,
315 | "description": description,
316 | "pattern_type": pattern_type,
317 | "type": pattern_type, # Add 'type' field for consistency
318 | "tags": tags,
319 | "timestamp": datetime.now().isoformat(),
320 | }
321 |
322 | point = rest.PointStruct(
323 | id=id,
324 | vector=embedding, # Use vector parameter for this version of Qdrant client
325 | payload=payload
326 | )
327 |
328 | self.client.upsert(
329 | collection_name=self.collection_name,
330 | points=[point],
331 | wait=True
332 | )
333 | return True
334 | except Exception as e:
335 | logger.error(f"Error updating pattern: {str(e)}")
336 | raise RuntimeError(f"Failed to update pattern: {str(e)}")
337 |
338 | async def delete_pattern(self, id: str) -> None:
339 | """Delete pattern from vector store."""
340 | self.client.delete(
341 | collection_name=self.collection_name,
342 | points_selector=rest.PointIdsList(
343 | points=[id]
344 | )
345 | )
346 |
347 | async def search(
348 | self,
349 | text: str,
350 | filter_conditions: Optional[Dict] = None,
351 | limit: int = 5
352 | ) -> List[SearchResult]:
353 | """Search for similar patterns."""
354 | # Generate embedding
355 | vector = await self.embedder.embed(text)
356 |
357 | # Create filter if provided
358 | search_filter = None
359 | if filter_conditions:
360 | search_filter = rest.Filter(**filter_conditions)
361 |
362 | # Search in Qdrant
363 | results = self.client.query_points(
364 | collection_name=self.collection_name,
365 | query=vector,
366 | query_filter=search_filter,
367 | limit=limit
368 | )
369 |
370 | # Convert to SearchResult objects
371 | search_results = []
372 |
373 | for result in results:
374 | # Create default metadata with all required fields
375 | default_metadata = {
376 | "type": "code",
377 | "language": "python",
378 | "title": "Test Code",
379 | "description": text[:100],
380 | "tags": ["test", "vector"],
381 | "timestamp": datetime.now().isoformat()
382 | }
383 |
384 | # Handle tuples with different length formats
385 | if isinstance(result, tuple):
386 | if len(result) == 2:
387 | # Format: (id, score)
388 | id_val, score_val = result
389 | search_results.append(
390 | SearchResult(
391 | id=id_val,
392 | score=score_val,
393 | metadata=default_metadata
394 | )
395 | )
396 | elif len(result) >= 3:
397 | # Format: (id, score, payload)
398 | id_val, score_val, payload_val = result
399 | # If payload is empty, use default metadata
400 | metadata = payload_val if payload_val else default_metadata
401 | search_results.append(
402 | SearchResult(
403 | id=id_val,
404 | score=score_val,
405 | metadata=metadata
406 | )
407 | )
408 | elif hasattr(result, 'id') and hasattr(result, 'score'):
409 | # Legacy object format
410 | metadata = getattr(result, 'payload', default_metadata)
411 | search_results.append(
412 | SearchResult(
413 | id=result.id,
414 | score=result.score,
415 | metadata=metadata
416 | )
417 | )
418 | else:
419 | logger.warning(f"Unrecognized result format: {result}")
420 |
421 | return search_results
422 |
423 | async def add_vector(self, text: str, metadata: Optional[Dict] = None) -> str:
424 | """Add vector to the vector store and return ID.
425 |
426 | This is a convenience method that automatically generates
427 | a UUID for the vector.
428 |
429 | Args:
430 | text: Text to add
431 | metadata: Optional metadata
432 |
433 | Returns:
434 | ID of the created vector
435 | """
436 | # Generate ID
437 | id = str(uuid.uuid4())
438 |
439 | # Generate embedding
440 | embedding = await self.embedder.embed(text)
441 |
442 | # Ensure metadata is initialized
443 | metadata = metadata or {}
444 |
445 | # Extract title/description from metadata if available, with defaults
446 | title = metadata.get("title", "Untitled")
447 | description = metadata.get("description", text[:100])
448 | pattern_type = metadata.get("pattern_type", metadata.get("type", "code"))
449 | tags = metadata.get("tags", [])
450 |
451 | # Ensure "type" field always exists (standardized structure)
452 | if "type" not in metadata:
453 | metadata["type"] = "code"
454 |
455 | # Create payload with all original metadata plus required fields
456 | payload = {
457 | "id": id,
458 | "title": title,
459 | "description": description,
460 | "pattern_type": pattern_type,
461 | "type": metadata.get("type", "code"),
462 | "tags": tags,
463 | "timestamp": datetime.now().isoformat(),
464 | **metadata # Include all original metadata fields
465 | }
466 |
467 | # Store with complete metadata
468 | try:
469 | # Ensure we're initialized
470 | if not self.initialized:
471 | await self.initialize()
472 |
473 | # Validate the collection exists and has the correct vector configuration
474 | try:
475 | collection_info = self.client.get_collection(self.collection_name)
476 | # With a non-named vector configuration, we just need to verify the collection exists
477 | logger.info(f"Collection {self.collection_name} exists")
478 | except Exception as e:
479 | logger.error(f"Error validating collection: {str(e)}")
480 |
481 | # Debug logs
482 | logger.info(f"PointStruct data - id: {id}")
483 | logger.info(f"PointStruct data - vector_name: {self.vector_name}")
484 | logger.info(f"PointStruct data - embedding length: {len(embedding)}")
485 | logger.info(f"PointStruct data - payload keys: {payload.keys()}")
486 |
487 | # For Qdrant client 1.13.3, use vector parameter
488 | point = rest.PointStruct(
489 | id=id,
490 | vector=embedding, # Use vector parameter for this version of Qdrant client
491 | payload=payload
492 | )
493 |
494 | self.client.upsert(
495 | collection_name=self.collection_name,
496 | points=[point],
497 | wait=True
498 | )
499 | logger.info(f"Successfully stored vector with id: {id}")
500 | return id
501 | except Exception as e:
502 | logger.error(f"Error storing vector: {str(e)}")
503 | raise RuntimeError(f"Failed to store vector: {str(e)}")
504 |
505 | async def search_similar(
506 | self,
507 | query: str,
508 | filter_conditions: Optional[Dict] = None,
509 | limit: int = 5
510 | ) -> List[SearchResult]:
511 | """Search for similar text.
512 |
513 | Args:
514 | query: Query text to search for
515 | filter_conditions: Optional filter conditions
516 | limit: Maximum number of results to return
517 |
518 | Returns:
519 | List of search results
520 | """
521 | return await self.search(
522 | text=query,
523 | filter_conditions=filter_conditions,
524 | limit=limit
525 | )
526 |
```
--------------------------------------------------------------------------------
/src/mcp_codebase_insight/core/knowledge.py:
--------------------------------------------------------------------------------
```python
1 | """Knowledge base for code patterns and insights."""
2 |
3 | from datetime import datetime
4 | from enum import Enum
5 | from typing import Dict, List, Optional
6 | from uuid import UUID, uuid4
7 | import json
8 |
9 | from pydantic import BaseModel, Field
10 |
11 | class PatternType(str, Enum):
12 | """Pattern type enumeration."""
13 |
14 | CODE = "code"
15 | DESIGN_PATTERN = "design_pattern"
16 | ARCHITECTURE = "architecture"
17 | BEST_PRACTICE = "best_practice"
18 | ANTI_PATTERN = "anti_pattern"
19 | FILE_RELATIONSHIP = "file_relationship" # New type for file relationships
20 | WEB_SOURCE = "web_source" # New type for web sources
21 |
22 | class PatternConfidence(str, Enum):
23 | """Pattern confidence level."""
24 |
25 | HIGH = "high"
26 | MEDIUM = "medium"
27 | LOW = "low"
28 | EXPERIMENTAL = "experimental"
29 |
30 | class Pattern(BaseModel):
31 | """Pattern model."""
32 |
33 | id: UUID
34 | name: str
35 | type: PatternType
36 | description: str
37 | content: str
38 | confidence: PatternConfidence
39 | tags: Optional[List[str]] = None
40 | metadata: Optional[Dict[str, str]] = None
41 | created_at: datetime
42 | updated_at: datetime
43 | examples: Optional[List[str]] = None
44 | related_patterns: Optional[List[UUID]] = None
45 |
46 | class SearchResult(BaseModel):
47 | """Pattern search result model."""
48 |
49 | pattern: Pattern
50 | similarity_score: float
51 |
52 | class FileRelationship(BaseModel):
53 | """File relationship model."""
54 |
55 | source_file: str
56 | target_file: str
57 | relationship_type: str # e.g., "imports", "extends", "implements", "uses"
58 | description: Optional[str] = None
59 | metadata: Optional[Dict[str, str]] = None
60 | created_at: datetime = Field(default_factory=datetime.utcnow)
61 | updated_at: datetime = Field(default_factory=datetime.utcnow)
62 |
63 | class WebSource(BaseModel):
64 | """Web source model."""
65 |
66 | url: str
67 | title: str
68 | description: Optional[str] = None
69 | content_type: str # e.g., "documentation", "tutorial", "reference"
70 | last_fetched: datetime = Field(default_factory=datetime.utcnow)
71 | metadata: Optional[Dict[str, str]] = None
72 | related_patterns: Optional[List[UUID]] = None
73 | tags: Optional[List[str]] = None
74 |
75 | class KnowledgeBase:
76 | """Knowledge base for managing code patterns and insights."""
77 |
78 | def __init__(self, config, vector_store=None):
79 | """Initialize knowledge base.
80 |
81 | Args:
82 | config: Server configuration
83 | vector_store: Optional vector store instance
84 | """
85 | self.config = config
86 | self.vector_store = vector_store
87 | self.kb_dir = config.kb_storage_dir
88 | self.initialized = False
89 | self.file_relationships: Dict[str, FileRelationship] = {}
90 | self.web_sources: Dict[str, WebSource] = {}
91 |
92 | async def initialize(self):
93 | """Initialize knowledge base components."""
94 | if self.initialized:
95 | return
96 |
97 | try:
98 | # Create all required directories
99 | self.kb_dir.mkdir(parents=True, exist_ok=True)
100 | (self.kb_dir / "patterns").mkdir(parents=True, exist_ok=True)
101 | (self.kb_dir / "relationships").mkdir(parents=True, exist_ok=True) # New directory for relationships
102 | (self.kb_dir / "web_sources").mkdir(parents=True, exist_ok=True) # New directory for web sources
103 |
104 | # Initialize vector store if available
105 | if self.vector_store:
106 | await self.vector_store.initialize()
107 |
108 | # Load existing relationships and web sources
109 | await self._load_relationships()
110 | await self._load_web_sources()
111 |
112 | # Create initial patterns if none exist
113 | if not list((self.kb_dir / "patterns").glob("*.json")):
114 | await self._create_initial_patterns()
115 |
116 | # Update state
117 | self.config.set_state("kb_initialized", True)
118 | self.initialized = True
119 | except Exception as e:
120 | import traceback
121 | print(f"Error initializing knowledge base: {str(e)}\n{traceback.format_exc()}")
122 | self.config.set_state("kb_initialized", False)
123 | self.config.set_state("kb_error", str(e))
124 | raise RuntimeError(f"Failed to initialize knowledge base: {str(e)}")
125 |
126 | async def _load_relationships(self):
127 | """Load existing file relationships."""
128 | relationships_dir = self.kb_dir / "relationships"
129 | if relationships_dir.exists():
130 | for file_path in relationships_dir.glob("*.json"):
131 | try:
132 | with open(file_path) as f:
133 | data = json.load(f)
134 | relationship = FileRelationship(**data)
135 | key = f"{relationship.source_file}:{relationship.target_file}"
136 | self.file_relationships[key] = relationship
137 | except Exception as e:
138 | print(f"Error loading relationship from {file_path}: {e}")
139 |
140 | async def _load_web_sources(self):
141 | """Load existing web sources."""
142 | web_sources_dir = self.kb_dir / "web_sources"
143 | if web_sources_dir.exists():
144 | for file_path in web_sources_dir.glob("*.json"):
145 | try:
146 | with open(file_path) as f:
147 | data = json.load(f)
148 | source = WebSource(**data)
149 | self.web_sources[source.url] = source
150 | except Exception as e:
151 | print(f"Error loading web source from {file_path}: {e}")
152 |
153 | async def _create_initial_patterns(self):
154 | """Create initial patterns for testing."""
155 | await self.add_pattern(
156 | name="Basic Function",
157 | type=PatternType.CODE,
158 | description="A simple function that performs a calculation",
159 | content="def calculate(x, y):\n return x + y",
160 | confidence=PatternConfidence.HIGH,
161 | tags=["function", "basic"]
162 | )
163 |
164 | async def cleanup(self):
165 | """Clean up knowledge base components."""
166 | if not self.initialized:
167 | return
168 |
169 | try:
170 | if self.vector_store:
171 | await self.vector_store.cleanup()
172 | except Exception as e:
173 | print(f"Error cleaning up knowledge base: {e}")
174 | finally:
175 | self.config.set_state("kb_initialized", False)
176 | self.initialized = False
177 |
178 | async def add_pattern(
179 | self,
180 | name: str,
181 | type: PatternType,
182 | description: str,
183 | content: str,
184 | confidence: PatternConfidence,
185 | tags: Optional[List[str]] = None,
186 | metadata: Optional[Dict[str, str]] = None,
187 | examples: Optional[List[str]] = None,
188 | related_patterns: Optional[List[UUID]] = None
189 | ) -> Pattern:
190 | """Add a new pattern."""
191 | now = datetime.utcnow()
192 | pattern = Pattern(
193 | id=uuid4(),
194 | name=name,
195 | type=type,
196 | description=description,
197 | content=content,
198 | confidence=confidence,
199 | tags=tags,
200 | metadata=metadata,
201 | examples=examples,
202 | related_patterns=related_patterns,
203 | created_at=now,
204 | updated_at=now
205 | )
206 |
207 | # Store pattern vector if vector store is available
208 | if self.vector_store:
209 | # Generate embedding for the pattern
210 | combined_text = f"{pattern.name}\n{pattern.description}\n{pattern.content}"
211 | try:
212 | embedding = await self.vector_store.embedder.embed(combined_text)
213 | await self.vector_store.store_pattern(
214 | id=str(pattern.id),
215 | title=pattern.name,
216 | description=pattern.description,
217 | pattern_type=pattern.type.value,
218 | tags=pattern.tags or [],
219 | embedding=embedding
220 | )
221 | except Exception as e:
222 | print(f"Warning: Failed to store pattern vector: {e}")
223 |
224 | # Save pattern to file
225 | await self._save_pattern(pattern)
226 | return pattern
227 |
228 | async def get_pattern(self, pattern_id: UUID) -> Optional[Pattern]:
229 | """Get pattern by ID."""
230 | pattern_path = self.kb_dir / "patterns" / f"{pattern_id}.json"
231 | if not pattern_path.exists():
232 | return None
233 |
234 | with open(pattern_path) as f:
235 | data = json.load(f)
236 | return Pattern(**data)
237 |
238 | async def update_pattern(
239 | self,
240 | pattern_id: UUID,
241 | description: Optional[str] = None,
242 | content: Optional[str] = None,
243 | confidence: Optional[PatternConfidence] = None,
244 | tags: Optional[List[str]] = None,
245 | metadata: Optional[Dict[str, str]] = None,
246 | examples: Optional[List[str]] = None,
247 | related_patterns: Optional[List[UUID]] = None
248 | ) -> Optional[Pattern]:
249 | """Update pattern details."""
250 | pattern = await self.get_pattern(pattern_id)
251 | if not pattern:
252 | return None
253 |
254 | if description:
255 | pattern.description = description
256 | if content:
257 | pattern.content = content
258 | if confidence:
259 | pattern.confidence = confidence
260 | if tags:
261 | pattern.tags = tags
262 | if metadata:
263 | pattern.metadata = {**(pattern.metadata or {}), **metadata}
264 | if examples:
265 | pattern.examples = examples
266 | if related_patterns:
267 | pattern.related_patterns = related_patterns
268 |
269 | pattern.updated_at = datetime.utcnow()
270 |
271 | # Update vector store if available
272 | if self.vector_store:
273 | # Generate embedding for the updated pattern
274 | combined_text = f"{pattern.name}\n{pattern.description}\n{pattern.content}"
275 | try:
276 | embedding = await self.vector_store.embedder.embed(combined_text)
277 | await self.vector_store.update_pattern(
278 | id=str(pattern.id),
279 | title=pattern.name,
280 | description=pattern.description,
281 | pattern_type=pattern.type.value,
282 | tags=pattern.tags or [],
283 | embedding=embedding
284 | )
285 | except Exception as e:
286 | print(f"Warning: Failed to update pattern vector: {e}")
287 |
288 | await self._save_pattern(pattern)
289 | return pattern
290 |
291 | async def find_similar_patterns(
292 | self,
293 | query: str,
294 | pattern_type: Optional[PatternType] = None,
295 | confidence: Optional[PatternConfidence] = None,
296 | tags: Optional[List[str]] = None,
297 | limit: int = 5
298 | ) -> List[SearchResult]:
299 | """Find similar patterns using vector similarity search."""
300 | if not self.vector_store:
301 | return []
302 |
303 | # Build filter conditions
304 | filter_conditions = {}
305 | if pattern_type:
306 | filter_conditions["type"] = pattern_type
307 | if confidence:
308 | filter_conditions["confidence"] = confidence
309 | if tags:
310 | filter_conditions["tags"] = {"$all": tags}
311 |
312 | # Search vectors with fallback on error
313 | try:
314 | results = await self.vector_store.search(
315 | text=query,
316 | filter_conditions=filter_conditions,
317 | limit=limit
318 | )
319 | except Exception as e:
320 | print(f"Warning: Semantic search failed ({e}), falling back to file-based search")
321 | file_patterns = await self.list_patterns(pattern_type, confidence, tags)
322 | return [
323 | SearchResult(pattern=p, similarity_score=0.0)
324 | for p in file_patterns[:limit]
325 | ]
326 |
327 | # Load full patterns
328 | search_results = []
329 | for result in results:
330 | try:
331 | # Handle different ID formats from Qdrant client
332 | pattern_id = None
333 | if hasattr(result, 'id'):
334 | # Try to convert the ID to UUID, handling different formats
335 | id_str = str(result.id)
336 | # Check if it's a valid UUID format
337 | if '-' in id_str and len(id_str.replace('-', '')) == 32:
338 | pattern_id = UUID(id_str)
339 | else:
340 | # Try to extract a UUID from the ID
341 | # Look for UUID patterns like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
342 | import re
343 | uuid_match = re.search(r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})', id_str, re.IGNORECASE)
344 | if uuid_match:
345 | pattern_id = UUID(uuid_match.group(1))
346 | else:
347 | # Handle tuple results from newer Qdrant client
348 | # Tuple format is typically (id, score, payload)
349 | if isinstance(result, tuple) and len(result) >= 1:
350 | id_str = str(result[0])
351 | # Same UUID validation as above
352 | if '-' in id_str and len(id_str.replace('-', '')) == 32:
353 | pattern_id = UUID(id_str)
354 | else:
355 | import re
356 | uuid_match = re.search(r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})', id_str, re.IGNORECASE)
357 | if uuid_match:
358 | pattern_id = UUID(uuid_match.group(1))
359 |
360 | # Skip if we couldn't extract a valid UUID
361 | if pattern_id is None:
362 | print(f"Warning: Could not extract valid UUID from result ID: {result}")
363 | continue
364 |
365 | # Get the pattern using the UUID
366 | pattern = await self.get_pattern(pattern_id)
367 | if pattern:
368 | # Get score from result
369 | score = result.score if hasattr(result, 'score') else (
370 | result[1] if isinstance(result, tuple) and len(result) >= 2 else 0.0
371 | )
372 |
373 | search_results.append(SearchResult(
374 | pattern=pattern,
375 | similarity_score=score
376 | ))
377 | except (ValueError, AttributeError, IndexError, TypeError) as e:
378 | print(f"Warning: Failed to process result {result}: {e}")
379 |
380 | return search_results
381 |
382 | async def list_patterns(
383 | self,
384 | pattern_type: Optional[PatternType] = None,
385 | confidence: Optional[PatternConfidence] = None,
386 | tags: Optional[List[str]] = None
387 | ) -> List[Pattern]:
388 | """List all patterns, optionally filtered."""
389 | patterns = []
390 | for path in (self.kb_dir / "patterns").glob("*.json"):
391 | with open(path) as f:
392 | data = json.load(f)
393 | pattern = Pattern(**data)
394 |
395 | # Apply filters
396 | if pattern_type and pattern.type != pattern_type:
397 | continue
398 | if confidence and pattern.confidence != confidence:
399 | continue
400 | if tags and not all(tag in (pattern.tags or []) for tag in tags):
401 | continue
402 |
403 | patterns.append(pattern)
404 |
405 | return sorted(patterns, key=lambda x: x.created_at)
406 |
407 | async def analyze_code(self, code: str, context: Optional[Dict[str, str]] = None) -> Dict:
408 | """Analyze code for patterns and insights.
409 |
410 | Args:
411 | code: The code to analyze.
412 | context: Optional context about the code, such as language and purpose.
413 | """
414 | # Find similar code patterns
415 | patterns = await self.find_similar_patterns(
416 | query=code,
417 | pattern_type=PatternType.CODE,
418 | limit=5
419 | )
420 |
421 | # Extract insights
422 | insights = []
423 | for result in patterns:
424 | pattern = result.pattern
425 | insights.append({
426 | "pattern_id": str(pattern.id),
427 | "name": pattern.name,
428 | "description": pattern.description,
429 | "confidence": pattern.confidence,
430 | "similarity_score": result.similarity_score
431 | })
432 |
433 | return {
434 | "patterns": [p.pattern.dict() for p in patterns],
435 | "insights": insights,
436 | "summary": {
437 | "total_patterns": len(patterns),
438 | "total_insights": len(insights),
439 | "context": context or {}
440 | }
441 | }
442 |
443 | async def _save_pattern(self, pattern: Pattern) -> None:
444 | """Save pattern to file."""
445 | pattern_dir = self.kb_dir / "patterns"
446 | pattern_dir.mkdir(parents=True, exist_ok=True)
447 | pattern_path = pattern_dir / f"{pattern.id}.json"
448 | with open(pattern_path, "w") as f:
449 | json.dump(pattern.model_dump(), f, indent=2, default=str)
450 |
451 | async def search_patterns(
452 | self,
453 | tags: Optional[List[str]] = None
454 | ) -> List[Pattern]:
455 | """Search for patterns by tags."""
456 | # Delegate to list_patterns for tag-based filtering
457 | return await self.list_patterns(tags=tags)
458 |
459 | async def add_file_relationship(
460 | self,
461 | source_file: str,
462 | target_file: str,
463 | relationship_type: str,
464 | description: Optional[str] = None,
465 | metadata: Optional[Dict[str, str]] = None
466 | ) -> FileRelationship:
467 | """Add a new file relationship."""
468 | relationship = FileRelationship(
469 | source_file=source_file,
470 | target_file=target_file,
471 | relationship_type=relationship_type,
472 | description=description,
473 | metadata=metadata
474 | )
475 |
476 | key = f"{source_file}:{target_file}"
477 | self.file_relationships[key] = relationship
478 |
479 | # Save to disk
480 | await self._save_relationship(relationship)
481 | return relationship
482 |
483 | async def add_web_source(
484 | self,
485 | url: str,
486 | title: str,
487 | content_type: str,
488 | description: Optional[str] = None,
489 | metadata: Optional[Dict[str, str]] = None,
490 | tags: Optional[List[str]] = None
491 | ) -> WebSource:
492 | """Add a new web source."""
493 | source = WebSource(
494 | url=url,
495 | title=title,
496 | content_type=content_type,
497 | description=description,
498 | metadata=metadata,
499 | tags=tags
500 | )
501 |
502 | self.web_sources[url] = source
503 |
504 | # Save to disk
505 | await self._save_web_source(source)
506 | return source
507 |
508 | async def get_file_relationships(
509 | self,
510 | source_file: Optional[str] = None,
511 | target_file: Optional[str] = None,
512 | relationship_type: Optional[str] = None
513 | ) -> List[FileRelationship]:
514 | """Get file relationships, optionally filtered."""
515 | relationships = list(self.file_relationships.values())
516 |
517 | if source_file:
518 | relationships = [r for r in relationships if r.source_file == source_file]
519 | if target_file:
520 | relationships = [r for r in relationships if r.target_file == target_file]
521 | if relationship_type:
522 | relationships = [r for r in relationships if r.relationship_type == relationship_type]
523 |
524 | return relationships
525 |
526 | async def get_web_sources(
527 | self,
528 | content_type: Optional[str] = None,
529 | tags: Optional[List[str]] = None
530 | ) -> List[WebSource]:
531 | """Get web sources, optionally filtered."""
532 | sources = list(self.web_sources.values())
533 |
534 | if content_type:
535 | sources = [s for s in sources if s.content_type == content_type]
536 | if tags:
537 | sources = [s for s in sources if s.tags and all(tag in s.tags for tag in tags)]
538 |
539 | return sources
540 |
541 | async def _save_relationship(self, relationship: FileRelationship) -> None:
542 | """Save file relationship to disk."""
543 | relationships_dir = self.kb_dir / "relationships"
544 | relationships_dir.mkdir(parents=True, exist_ok=True)
545 |
546 | key = f"{relationship.source_file}:{relationship.target_file}"
547 | file_path = relationships_dir / f"{hash(key)}.json"
548 |
549 | with open(file_path, "w") as f:
550 | json.dump(relationship.model_dump(), f, indent=2, default=str)
551 |
552 | async def _save_web_source(self, source: WebSource) -> None:
553 | """Save web source to disk."""
554 | web_sources_dir = self.kb_dir / "web_sources"
555 | web_sources_dir.mkdir(parents=True, exist_ok=True)
556 |
557 | file_path = web_sources_dir / f"{hash(source.url)}.json"
558 |
559 | with open(file_path, "w") as f:
560 | json.dump(source.model_dump(), f, indent=2, default=str)
561 |
562 | async def delete_pattern(self, pattern_id: UUID) -> None:
563 | """Delete a pattern by ID from knowledge base and vector store."""
564 | # Delete from vector store if available
565 | if self.vector_store:
566 | try:
567 | await self.vector_store.delete_pattern(str(pattern_id))
568 | except Exception as e:
569 | print(f"Warning: Failed to delete pattern vector: {e}")
570 | # Delete pattern file
571 | pattern_path = self.kb_dir / "patterns" / f"{pattern_id}.json"
572 | if pattern_path.exists():
573 | try:
574 | pattern_path.unlink()
575 | except Exception as e:
576 | print(f"Warning: Failed to delete pattern file: {e}")
577 |
```
--------------------------------------------------------------------------------
/run_tests.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 | """
3 | Test runner script for MCP Codebase Insight.
4 |
5 | This script consolidates all test execution into a single command with various options.
6 | It can run specific test categories or all tests, with or without coverage reporting.
7 | """
8 |
9 | import argparse
10 | import os
11 | import subprocess
12 | import sys
13 | import time
14 | from typing import List, Optional
15 | import uuid
16 | import traceback
17 |
18 |
19 | def parse_args():
20 | """Parse command line arguments."""
21 | parser = argparse.ArgumentParser(description="Run MCP Codebase Insight tests")
22 |
23 | # Test selection options
24 | parser.add_argument("--all", action="store_true", help="Run all tests")
25 | parser.add_argument("--component", action="store_true", help="Run component tests")
26 | parser.add_argument("--integration", action="store_true", help="Run integration tests")
27 | parser.add_argument("--config", action="store_true", help="Run configuration tests")
28 | parser.add_argument("--api", action="store_true", help="Run API endpoint tests")
29 | parser.add_argument("--sse", action="store_true", help="Run SSE endpoint tests")
30 |
31 | # Specific test selection
32 | parser.add_argument("--test", type=str, help="Run a specific test (e.g., test_health_check)")
33 | parser.add_argument("--file", type=str, help="Run tests from a specific file")
34 |
35 | # Coverage options
36 | parser.add_argument("--coverage", action="store_true", help="Generate coverage report")
37 | parser.add_argument("--html", action="store_true", help="Generate HTML coverage report")
38 |
39 | # Additional options
40 | parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
41 | parser.add_argument("--no-capture", action="store_true", help="Don't capture stdout/stderr")
42 | parser.add_argument("--clean", action="store_true", help="Clean .pytest_cache before running tests")
43 | parser.add_argument("--isolated", action="store_true", help="Run with PYTHONPATH isolated to ensure clean environment")
44 | parser.add_argument("--event-loop-debug", action="store_true", help="Add asyncio debug mode")
45 | parser.add_argument("--sequential", action="store_true", help="Run tests sequentially to avoid event loop issues")
46 | parser.add_argument("--fully-isolated", action="store_true",
47 | help="Run each test module in a separate process for complete isolation")
48 |
49 | return parser.parse_args()
50 |
51 |
52 | def build_command(args, module_path=None) -> List[List[str]]:
53 | """Build the pytest command based on arguments."""
54 | cmd = ["python", "-m", "pytest"]
55 |
56 | # Add xdist settings for parallel or sequential execution
57 | if args.sequential:
58 | # Run sequentially to avoid event loop issues
59 | os.environ["PYTEST_XDIST_AUTO_NUM_WORKERS"] = "1"
60 | cmd.append("-xvs")
61 |
62 | # Determine test scope
63 | test_paths = []
64 |
65 | # If a specific module path is provided, use it
66 | if module_path:
67 | test_paths.append(module_path)
68 | elif args.all or (not any([args.component, args.integration, args.config, args.api, args.sse, args.test, args.file])):
69 | # When running all tests and using fully isolated mode, we'll handle this differently in main()
70 | if args.fully_isolated:
71 | return []
72 |
73 | # When running all tests, run integration tests separately from other tests
74 | if args.all and not args.sequential:
75 | # Run integration tests separately to avoid event loop conflicts
76 | integration_cmd = cmd.copy()
77 | integration_cmd.append("tests/integration/")
78 | non_integration_cmd = cmd.copy()
79 | non_integration_cmd.append("tests/")
80 | non_integration_cmd.append("--ignore=tests/integration/")
81 | return [integration_cmd, non_integration_cmd]
82 | else:
83 | test_paths.append("tests/")
84 | else:
85 | if args.integration:
86 | test_paths.append("tests/integration/")
87 | if args.component:
88 | test_paths.append("tests/components/")
89 | cmd.append("--asyncio-mode=strict") # Ensure asyncio strict mode for component tests
90 | if args.config:
91 | test_paths.append("tests/config/")
92 | if args.api:
93 | test_paths.append("tests/integration/test_api_endpoints.py")
94 | if args.sse:
95 | test_paths.append("tests/integration/test_sse.py")
96 | if args.file:
97 | test_paths.append(args.file)
98 | if args.test:
99 | if "/" in args.test or "." in args.test:
100 | # If it looks like a file path and test name
101 | test_paths.append(args.test)
102 | else:
103 | # If it's just a test name, try to find it
104 | test_paths.append(f"tests/integration/test_api_endpoints.py::test_{args.test}")
105 |
106 | # Add test paths to command
107 | cmd.extend(test_paths)
108 |
109 | # Add coverage if requested
110 | if args.coverage:
111 | cmd.insert(1, "-m")
112 | cmd.insert(2, "coverage")
113 | cmd.insert(3, "run")
114 |
115 | # Add verbosity
116 | if args.verbose:
117 | cmd.append("-v")
118 |
119 | # Disable output capture if requested
120 | if args.no_capture:
121 | cmd.append("-s")
122 |
123 | # Add asyncio debug mode if requested
124 | if args.event_loop_debug:
125 | cmd.append("--asyncio-mode=strict")
126 | os.environ["PYTHONASYNCIODEBUG"] = "1"
127 | else:
128 | # Always use strict mode to catch issues
129 | cmd.append("--asyncio-mode=strict")
130 |
131 | return [cmd]
132 |
133 |
134 | def clean_test_cache():
135 | """Clean pytest cache directories."""
136 | print("Cleaning pytest cache...")
137 | subprocess.run(["rm", "-rf", ".pytest_cache"], check=False)
138 |
139 | # Also clear __pycache__ directories in tests
140 | for root, dirs, _ in os.walk("tests"):
141 | for d in dirs:
142 | if d == "__pycache__":
143 | cache_dir = os.path.join(root, d)
144 | print(f"Removing {cache_dir}")
145 | subprocess.run(["rm", "-rf", cache_dir], check=False)
146 |
147 |
148 | def setup_isolated_env():
149 | """Set up an isolated environment for tests."""
150 | # Make sure we start with the right Python path
151 | os.environ["PYTHONPATH"] = os.path.abspath(".")
152 |
153 | # Clear any previous test-related environment variables
154 | for key in list(os.environ.keys()):
155 | if key.startswith(("PYTEST_", "MCP_TEST_")):
156 | del os.environ[key]
157 |
158 | # Set standard test variables
159 | os.environ["MCP_TEST_MODE"] = "1"
160 | os.environ["MCP_HOST"] = "localhost"
161 | os.environ["MCP_PORT"] = "8000" # Different from default to avoid conflicts
162 | os.environ["QDRANT_URL"] = "http://localhost:6333"
163 |
164 | # Use unique collection names for tests to avoid interference
165 | test_id = os.urandom(4).hex()
166 | os.environ["MCP_COLLECTION_NAME"] = f"test_collection_{test_id}"
167 |
168 | # Configure asyncio behavior for better isolation
169 | os.environ["ASYNCIO_WATCHDOG_TIMEOUT"] = "30"
170 | os.environ["PYTEST_ASYNC_TEST_TIMEOUT"] = "60"
171 |
172 | # Force module isolation
173 | os.environ["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
174 |
175 |
176 | def run_tests(cmds: List[List[str]], env=None) -> int:
177 | """Run the tests with the given commands."""
178 | exit_code = 0
179 |
180 | for cmd in cmds:
181 | print(f"Running: {' '.join(cmd)}")
182 | try:
183 | result = subprocess.run(cmd, env=env)
184 | if result.returncode != 0:
185 | exit_code = result.returncode
186 | except Exception as e:
187 | print(f"Error running command: {e}")
188 | exit_code = 1
189 |
190 | return exit_code
191 |
192 |
193 | def find_test_modules(directory="tests", filter_pattern=None):
194 | """Find all Python test files in the given directory."""
195 | test_modules = []
196 |
197 | # Walk through the directory
198 | for root, _, files in os.walk(directory):
199 | for file in files:
200 | if file.startswith("test_") and file.endswith(".py"):
201 | module_path = os.path.join(root, file)
202 |
203 | # Apply filter if provided
204 | if filter_pattern and filter_pattern not in module_path:
205 | continue
206 |
207 | test_modules.append(module_path)
208 |
209 | return test_modules
210 |
211 |
212 | def run_isolated_modules(args) -> int:
213 | """Run each test module in its own process for complete isolation."""
214 | # Determine which test modules to run
215 | test_modules = []
216 |
217 | if args.component:
218 | # For component tests, always run them individually
219 | test_modules = find_test_modules("tests/components")
220 | elif args.all:
221 | # When running all tests, get everything
222 | test_modules = find_test_modules()
223 | else:
224 | # Otherwise, run as specified
225 | if args.integration:
226 | integration_modules = find_test_modules("tests/integration")
227 | test_modules.extend(integration_modules)
228 | if args.config:
229 | config_modules = find_test_modules("tests/config")
230 | test_modules.extend(config_modules)
231 |
232 | # Sort modules to run in a specific order: regular tests first,
233 | # then component tests, and integration tests last
234 | def module_sort_key(module_path):
235 | if "integration" in module_path:
236 | return 3 # Run integration tests last
237 | elif "components" in module_path:
238 | return 2 # Run component tests in the middle
239 | else:
240 | return 1 # Run other tests first
241 |
242 | test_modules.sort(key=module_sort_key)
243 |
244 | # If specific test file was specified, only run that one
245 | if args.file:
246 | if os.path.exists(args.file):
247 | test_modules = [args.file]
248 | else:
249 | # Try to find the file in the tests directory
250 | matching_modules = [m for m in test_modules if args.file in m]
251 | if matching_modules:
252 | test_modules = matching_modules
253 | else:
254 | print(f"Error: Test file {args.file} not found")
255 | return 1
256 |
257 | final_exit_code = 0
258 |
259 | # Run each module in a separate process
260 | for module in test_modules:
261 | print(f"\n=== Running isolated test module: {module} ===\n")
262 |
263 | # Check if this is a component test
264 | is_component_test = "components" in module
265 | is_vector_store_test = "test_vector_store.py" in module
266 | is_knowledge_base_test = "test_knowledge_base.py" in module
267 | is_task_manager_test = "test_task_manager.py" in module
268 |
269 | # Prepare environment for this test module
270 | env = os.environ.copy()
271 |
272 | # Basic environment setup for all tests
273 | env["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
274 | env["MCP_TEST_MODE"] = "1"
275 |
276 | # Add special handling for component tests
277 | if is_component_test:
278 | # Ensure component tests run with asyncio strict mode
279 | env["PYTEST_ASYNCIO_MODE"] = "strict"
280 |
281 | # Component tests need test database config
282 | if "MCP_COLLECTION_NAME" not in env:
283 | env["MCP_COLLECTION_NAME"] = f"test_collection_{uuid.uuid4().hex[:8]}"
284 |
285 | # Vector store and knowledge base tests need additional time for setup
286 | if is_vector_store_test or is_knowledge_base_test or is_task_manager_test:
287 | env["PYTEST_TIMEOUT"] = "60" # Allow more time for these tests
288 |
289 | # For component tests, use our specialized component test runner
290 | if is_component_test and args.fully_isolated:
291 | print(f"Using specialized component test runner for {module}")
292 | # Extract test names from the module using a simple pattern match
293 | component_test_results = []
294 | try:
295 | # Use grep to find test functions in the file - more reliable
296 | # than pytest --collect-only in this case
297 | grep_cmd = ["grep", "-E", "^def test_", module]
298 | result = subprocess.run(grep_cmd, capture_output=True, text=True)
299 | collected_test_names = []
300 |
301 | if result.returncode == 0:
302 | for line in result.stdout.splitlines():
303 | # Extract the test name from "def test_name(...)"
304 | if line.startswith("def test_"):
305 | test_name = line.split("def ")[1].split("(")[0].strip()
306 | collected_test_names.append(test_name)
307 | print(f"Found {len(collected_test_names)} tests in {module}")
308 | else:
309 | # Fall back to read the file directly
310 | with open(module, 'r') as f:
311 | content = f.read()
312 | # Use a simple regex to find all test functions
313 | import re
314 | matches = re.findall(r'def\s+(test_\w+)\s*\(', content)
315 | collected_test_names = matches
316 | print(f"Found {len(collected_test_names)} tests in {module} (using file read)")
317 | except Exception as e:
318 | print(f"Error extracting tests from {module}: {e}")
319 | # Just skip this module and continue with others
320 | continue
321 |
322 | # Run each test separately using our component test runner
323 | if collected_test_names:
324 | for test_name in collected_test_names:
325 | print(f"Running test: {module}::{test_name}")
326 |
327 | # Use our specialized component test runner
328 | runner_cmd = [
329 | "python",
330 | "component_test_runner.py",
331 | module,
332 | test_name
333 | ]
334 |
335 | print(f"Running: {' '.join(runner_cmd)}")
336 | test_result = subprocess.run(runner_cmd, env=env)
337 | component_test_results.append((test_name, test_result.returncode))
338 |
339 | # If we have a failure, record it but continue running other tests
340 | if test_result.returncode != 0:
341 | final_exit_code = test_result.returncode
342 |
343 | # Short pause between tests to let resources clean up
344 | time.sleep(1.0)
345 |
346 | # Print summary of test results for this module
347 | print(f"\n=== Test Results for {module} ===")
348 | passed = sum(1 for _, code in component_test_results if code == 0)
349 | failed = sum(1 for _, code in component_test_results if code != 0)
350 | print(f"Passed: {passed}, Failed: {failed}, Total: {len(component_test_results)}")
351 | for name, code in component_test_results:
352 | status = "PASSED" if code == 0 else "FAILED"
353 | print(f"{name}: {status}")
354 | print("=" * 40)
355 | else:
356 | print(f"No tests found in {module}, skipping")
357 | else:
358 | # For other tests, use our standard command builder
359 | cmd_args = argparse.Namespace(**vars(args))
360 | cmds = build_command(cmd_args, module)
361 |
362 | # Run this module's tests with the prepared environment
363 | module_result = run_tests(cmds, env)
364 |
365 | # If we have a failure, record it but continue running other modules
366 | if module_result != 0:
367 | final_exit_code = module_result
368 |
369 | # Short pause between modules to let event loops clean up
370 | # Increase delay for component tests with complex cleanup needs
371 | if is_component_test:
372 | time.sleep(1.5) # Longer pause for component tests
373 | else:
374 | time.sleep(0.5)
375 |
376 | return final_exit_code
377 |
378 |
379 | def run_component_tests_fully_isolated(test_file=None):
380 | """Run component tests with each test completely isolated using specialized runner."""
381 | print("\n=== Running component tests in fully isolated mode ===\n")
382 |
383 | # Find component test files
384 | if test_file:
385 | test_files = [test_file]
386 | else:
387 | test_files = find_test_modules("tests/components")
388 |
389 | overall_results = {}
390 |
391 | for test_file in test_files:
392 | print(f"\n=== Running isolated test module: {test_file} ===\n")
393 | print(f"Using specialized component test runner for {test_file}")
394 |
395 | try:
396 | # Use the component_test_runner's discovery mechanism
397 | from component_test_runner import get_module_tests
398 | tests = get_module_tests(test_file)
399 | print(f"Found {len(tests)} tests in {test_file} (using file read)")
400 |
401 | # Skip if no tests found
402 | if not tests:
403 | print(f"No tests found in {test_file}")
404 | continue
405 |
406 | # Track results
407 | passed_tests = []
408 | failed_tests = []
409 |
410 | for test_name in tests:
411 | print(f"Running test: {test_file}::{test_name}")
412 | cmd = f"python component_test_runner.py {test_file} {test_name}"
413 | print(f"Running: {cmd}")
414 |
415 | result = subprocess.run(cmd, shell=True)
416 |
417 | if result.returncode == 0:
418 | passed_tests.append(test_name)
419 | else:
420 | failed_tests.append(test_name)
421 |
422 | # Report results for this file
423 | print(f"\n=== Test Results for {test_file} ===")
424 | print(f"Passed: {len(passed_tests)}, Failed: {len(failed_tests)}, Total: {len(tests)}")
425 |
426 | for test in tests:
427 | status = "PASSED" if test in passed_tests else "FAILED"
428 | print(f"{test}: {status}")
429 |
430 | print("========================================")
431 |
432 | # Store results
433 | overall_results[test_file] = {
434 | "passed": len(passed_tests),
435 | "failed": len(failed_tests),
436 | "total": len(tests)
437 | }
438 | except Exception as e:
439 | print(f"Error running tests for {test_file}: {e}")
440 | traceback.print_exc()
441 | overall_results[test_file] = {
442 | "passed": 0,
443 | "failed": 1,
444 | "total": 1,
445 | "error": str(e)
446 | }
447 |
448 | # Determine if any tests failed
449 | any_failures = any(result.get("failed", 0) > 0 for result in overall_results.values())
450 | return 1 if any_failures else 0
451 |
452 |
453 | def generate_coverage_report(html: bool = False) -> Optional[int]:
454 | """Generate coverage report."""
455 | if html:
456 | cmd = ["python", "-m", "coverage", "html"]
457 | print("Generating HTML coverage report...")
458 | result = subprocess.run(cmd)
459 | if result.returncode == 0:
460 | print(f"HTML coverage report generated in {os.path.abspath('htmlcov')}")
461 | return result.returncode
462 | else:
463 | cmd = ["python", "-m", "coverage", "report", "--show-missing"]
464 | print("Generating coverage report...")
465 | return subprocess.run(cmd).returncode
466 |
467 |
468 | def run_all_tests(args):
469 | """Run all tests."""
470 | cmds = build_command(args)
471 | print(f"Running: {' '.join(cmds[0])}")
472 | exit_code = 0
473 |
474 | # For regular test runs or when not in fully isolated mode,
475 | # first attempt to run everything as a single command
476 | if args.sequential:
477 | # Run all tests sequentially
478 | exit_code = run_tests(cmds)
479 | else:
480 | try:
481 | # First, try to run all tests as one command
482 | exit_code = run_tests(cmds, os.environ.copy())
483 | except Exception as e:
484 | print(f"Error running tests: {e}")
485 | exit_code = 1
486 |
487 | # If test failed or not all modules were specified, run each module individually
488 | if exit_code != 0 or args.fully_isolated:
489 | print("\nRunning tests with full module isolation...")
490 | exit_code = run_isolated_modules(args)
491 |
492 | return exit_code
493 |
494 |
495 | def main():
496 | """Main entry point."""
497 | args = parse_args()
498 |
499 | # Clean test cache if requested
500 | if args.clean:
501 | clean_test_cache()
502 |
503 | # Setup isolated environment if requested
504 | if args.isolated or args.fully_isolated:
505 | setup_isolated_env()
506 |
507 | # Set up environment variables
508 | if args.component:
509 | os.environ["MCP_TEST_MODE"] = "1"
510 | # Generate a unique collection name for isolated tests
511 | if args.isolated or args.fully_isolated:
512 | # Use a unique collection for each test run to ensure isolation
513 | unique_id = uuid.uuid4().hex[:8]
514 | os.environ["MCP_COLLECTION_NAME"] = f"test_collection_{unique_id}"
515 |
516 | # We need to set this for all async tests to ensure proper event loop handling
517 | if args.component or args.integration:
518 | os.environ["PYTEST_FORCE_ISOLATED_EVENT_LOOP"] = "1"
519 |
520 | # Print environment info
521 | if args.verbose:
522 | print("\nTest environment:")
523 | print(f"Python: {sys.executable}")
524 | if args.isolated or args.fully_isolated:
525 | print(f"PYTHONPATH: {os.environ.get('PYTHONPATH', 'Not set')}")
526 | print(f"Collection name: {os.environ.get('MCP_COLLECTION_NAME', 'Not set')}")
527 | print(f"Asyncio mode: strict")
528 |
529 | # We have special handling for component tests in fully-isolated mode
530 | if args.component and args.fully_isolated:
531 | # Skip general pytest run and go straight to component test runner
532 | exit_code = run_component_tests_fully_isolated(args.file)
533 | sys.exit(exit_code)
534 |
535 | # Regular test flow - first try to run all together
536 | exit_code = run_all_tests(args)
537 |
538 | # If not in isolated mode, we're done
539 | if not args.isolated and not args.component:
540 | # Generate coverage report if needed
541 | if args.coverage:
542 | generate_coverage_report(args.html)
543 | sys.exit(exit_code)
544 |
545 | # If tests failed and we're in isolated mode, run each file separately
546 | if exit_code != 0 and (args.isolated or args.component):
547 | isolated_exit_code = run_isolated_modules(args)
548 |
549 | # Generate coverage report if needed
550 | if args.coverage:
551 | generate_coverage_report(args.html)
552 |
553 | sys.exit(isolated_exit_code)
554 |
555 | # Generate coverage report if needed
556 | if args.coverage:
557 | generate_coverage_report(args.html)
558 |
559 | sys.exit(exit_code)
560 |
561 |
562 | if __name__ == "__main__":
563 | main()
```
--------------------------------------------------------------------------------
/tests/components/test_sse_components.py:
--------------------------------------------------------------------------------
```python
1 | """Unit tests for SSE core components."""
2 |
3 | import sys
4 | import os
5 |
6 | # Ensure the src directory is in the Python path
7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
8 |
9 | import asyncio
10 | import pytest
11 | import logging
12 | from unittest.mock import AsyncMock, MagicMock, patch
13 | from typing import Dict, Any, List, AsyncGenerator
14 |
15 | from src.mcp_codebase_insight.core.sse import create_sse_server, MCP_CodebaseInsightServer
16 | from mcp.server.fastmcp import FastMCP
17 | from mcp.server.sse import SseServerTransport
18 |
19 | # Set up logging for tests
20 | logger = logging.getLogger(__name__)
21 |
22 | # Mark all tests as asyncio tests
23 | pytestmark = pytest.mark.asyncio
24 |
25 |
26 | class MockState:
27 | """Mock server state for testing."""
28 |
29 | def __init__(self):
30 | self.components = {}
31 |
32 | def get_component(self, name):
33 | """Get a component by name."""
34 | return self.components.get(name)
35 |
36 | def get_component_status(self):
37 | """Get status of all components."""
38 | return {name: {"available": True} for name in self.components}
39 |
40 | def set_component(self, name, component):
41 | """Set a component."""
42 | self.components[name] = component
43 |
44 |
45 | class MockVectorStore:
46 | """Mock vector store component for testing."""
47 |
48 | async def search(self, text, filter_conditions=None, limit=5):
49 | """Mock search method."""
50 | return [
51 | MagicMock(
52 | id="test-id-1",
53 | score=0.95,
54 | metadata={
55 | "text": "def example_function():\n return 'example'",
56 | "file_path": "/path/to/file.py",
57 | "line_range": "10-15",
58 | "type": "code",
59 | "language": "python",
60 | "timestamp": "2025-03-26T10:00:00"
61 | }
62 | )
63 | ]
64 |
65 |
66 | class MockKnowledgeBase:
67 | """Mock knowledge base component for testing."""
68 |
69 | async def search_patterns(self, query, pattern_type=None, limit=5):
70 | """Mock search_patterns method."""
71 | return [
72 | MagicMock(
73 | id="pattern-id-1",
74 | pattern="Example pattern",
75 | description="Description of example pattern",
76 | type=pattern_type or "code",
77 | confidence=0.9,
78 | metadata={"source": "test"}
79 | )
80 | ]
81 |
82 |
83 | class MockADRManager:
84 | """Mock ADR manager component for testing."""
85 |
86 | async def list_adrs(self):
87 | """Mock list_adrs method."""
88 | return [
89 | MagicMock(
90 | id="adr-id-1",
91 | title="Example ADR",
92 | status="accepted",
93 | created_at=None,
94 | updated_at=None
95 | )
96 | ]
97 |
98 |
99 | class MockTaskManager:
100 | """Mock task manager component for testing."""
101 |
102 | async def get_task(self, task_id):
103 | """Mock get_task method."""
104 | if task_id == "invalid-id":
105 | return None
106 |
107 | return MagicMock(
108 | id=task_id,
109 | type="analysis",
110 | status="running",
111 | progress=0.5,
112 | result=None,
113 | error=None,
114 | created_at=None,
115 | updated_at=None
116 | )
117 |
118 |
119 | @pytest.fixture
120 | def mock_server_state():
121 | """Create a mock server state for testing."""
122 | state = MockState()
123 |
124 | # Add mock components
125 | state.set_component("vector_store", MockVectorStore())
126 | state.set_component("knowledge_base", MockKnowledgeBase())
127 | state.set_component("adr_manager", MockADRManager())
128 | state.set_component("task_tracker", MockTaskManager()) # Updated component name to match sse.py
129 |
130 | return state
131 |
132 |
133 | @pytest.fixture
134 | def mcp_server(mock_server_state):
135 | """Create an MCP server instance for testing."""
136 | return MCP_CodebaseInsightServer(mock_server_state)
137 |
138 |
139 | async def test_mcp_server_initialization(mcp_server):
140 | """Test MCP server initialization."""
141 | # Verify the server was initialized correctly
142 | assert mcp_server.state is not None
143 | assert mcp_server.mcp_server is not None
144 | assert mcp_server.mcp_server.name == "MCP-Codebase-Insight"
145 | assert mcp_server.tools_registered is False
146 |
147 |
148 | async def test_register_tools(mcp_server):
149 | """Test registering tools with the MCP server."""
150 | # Register tools
151 | mcp_server.register_tools()
152 |
153 | # Verify tools were registered
154 | assert mcp_server.tools_registered is True
155 |
156 | # In MCP v1.5.0, we can't directly access tool_defs
157 | # Instead we'll just verify registration was successful
158 | # The individual tool tests will verify specific functionality
159 |
160 |
161 | async def test_get_starlette_app(mcp_server):
162 | """Test getting the Starlette app for the MCP server."""
163 | # Reset the cached app to force a new creation
164 | mcp_server._starlette_app = None
165 |
166 | # Mock the create_sse_server function directly in the module
167 | with patch('src.mcp_codebase_insight.core.sse.create_sse_server') as mock_create_sse:
168 | # Set up the mock
169 | mock_app = MagicMock()
170 | mock_create_sse.return_value = mock_app
171 |
172 | # Get the Starlette app
173 | app = mcp_server.get_starlette_app()
174 |
175 | # Verify tools were registered
176 | assert mcp_server.tools_registered is True
177 |
178 | # Verify create_sse_server was called with the MCP server
179 | mock_create_sse.assert_called_once_with(mcp_server.mcp_server)
180 |
181 | # Verify the app was returned
182 | assert app == mock_app
183 |
184 |
185 | async def test_create_sse_server():
186 | """Test creating the SSE server."""
187 | # Use context managers for patching to ensure proper cleanup
188 | with patch('src.mcp_codebase_insight.core.sse.CodebaseInsightSseTransport') as mock_transport, \
189 | patch('src.mcp_codebase_insight.core.sse.Starlette') as mock_starlette:
190 | # Set up mocks
191 | mock_mcp = MagicMock(spec=FastMCP)
192 | mock_transport_instance = MagicMock()
193 | mock_transport.return_value = mock_transport_instance
194 | mock_app = MagicMock()
195 | mock_starlette.return_value = mock_app
196 |
197 | # Create the SSE server
198 | app = create_sse_server(mock_mcp)
199 |
200 | # Verify CodebaseInsightSseTransport was initialized correctly
201 | mock_transport.assert_called_once_with("/sse")
202 |
203 | # Verify Starlette was initialized with routes
204 | mock_starlette.assert_called_once()
205 |
206 | # Verify the app was returned
207 | assert app == mock_app
208 |
209 |
210 | async def test_vector_search_tool(mcp_server):
211 | """Test the vector search tool."""
212 | # Make sure tools are registered
213 | if not mcp_server.tools_registered:
214 | mcp_server.register_tools()
215 |
216 | # Mock the FastMCP add_tool method to capture calls
217 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
218 | # Re-register the vector search tool
219 | mcp_server._register_vector_search()
220 |
221 | # Verify tool was registered with correct parameters
222 | mock_add_tool.assert_called_once()
223 |
224 | # Get the arguments from the call
225 | # The structure might be different depending on how add_tool is implemented
226 | call_args = mock_add_tool.call_args
227 |
228 | # Check if we have positional args
229 | if call_args[0]:
230 | # First positional arg should be the tool name
231 | tool_name = call_args[0][0]
232 | assert tool_name in ("vector-search", "search-vector", "vector_search") # Accept possible variants
233 |
234 | # If there's a second positional arg, it might be a function or a dict with tool details
235 | if len(call_args[0]) > 1:
236 | second_arg = call_args[0][1]
237 | if callable(second_arg):
238 | # If it's a function, that's our handler
239 | assert callable(second_arg)
240 | elif isinstance(second_arg, dict):
241 | # If it's a dict, it should have a description and handler
242 | assert "description" in second_arg
243 | if "handler" in second_arg:
244 | assert callable(second_arg["handler"])
245 | elif "fn" in second_arg:
246 | assert callable(second_arg["fn"])
247 |
248 | # Check keyword args
249 | if call_args[1]:
250 | kwargs = call_args[1]
251 | if "description" in kwargs:
252 | assert isinstance(kwargs["description"], str)
253 | if "handler" in kwargs:
254 | assert callable(kwargs["handler"])
255 | if "fn" in kwargs:
256 | assert callable(kwargs["fn"])
257 |
258 |
259 | async def test_knowledge_search_tool(mcp_server):
260 | """Test the knowledge search tool."""
261 | # Make sure tools are registered
262 | if not mcp_server.tools_registered:
263 | mcp_server.register_tools()
264 |
265 | # Mock the FastMCP add_tool method to capture calls
266 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
267 | # Re-register the knowledge search tool
268 | mcp_server._register_knowledge()
269 |
270 | # Verify tool was registered with correct parameters
271 | mock_add_tool.assert_called_once()
272 |
273 | # Get the arguments from the call
274 | call_args = mock_add_tool.call_args
275 |
276 | # Check if we have positional args
277 | if call_args[0]:
278 | # First positional arg should be the tool name
279 | tool_name = call_args[0][0]
280 | assert tool_name in ("knowledge-search", "search-knowledge") # Accept possible variants
281 |
282 | # If there's a second positional arg, it might be a function or a dict with tool details
283 | if len(call_args[0]) > 1:
284 | second_arg = call_args[0][1]
285 | if callable(second_arg):
286 | # If it's a function, that's our handler
287 | assert callable(second_arg)
288 | elif isinstance(second_arg, dict):
289 | # If it's a dict, it should have a description and handler
290 | assert "description" in second_arg
291 | if "handler" in second_arg:
292 | assert callable(second_arg["handler"])
293 | elif "fn" in second_arg:
294 | assert callable(second_arg["fn"])
295 |
296 | # Check keyword args
297 | if call_args[1]:
298 | kwargs = call_args[1]
299 | if "description" in kwargs:
300 | assert isinstance(kwargs["description"], str)
301 | if "handler" in kwargs:
302 | assert callable(kwargs["handler"])
303 | if "fn" in kwargs:
304 | assert callable(kwargs["fn"])
305 |
306 |
307 | async def test_adr_list_tool(mcp_server):
308 | """Test the ADR list tool."""
309 | # Make sure tools are registered
310 | if not mcp_server.tools_registered:
311 | mcp_server.register_tools()
312 |
313 | # Mock the FastMCP add_tool method to capture calls
314 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
315 | # Re-register the ADR list tool
316 | mcp_server._register_adr()
317 |
318 | # Verify tool was registered with correct parameters
319 | mock_add_tool.assert_called_once()
320 |
321 | # Get the arguments from the call
322 | call_args = mock_add_tool.call_args
323 |
324 | # Check if we have positional args
325 | if call_args[0]:
326 | # First positional arg should be the tool name
327 | tool_name = call_args[0][0]
328 | assert tool_name in ("list-adrs", "adr-list") # Accept possible variants
329 |
330 | # If there's a second positional arg, it might be a function or a dict with tool details
331 | if len(call_args[0]) > 1:
332 | second_arg = call_args[0][1]
333 | if callable(second_arg):
334 | # If it's a function, that's our handler
335 | assert callable(second_arg)
336 | elif isinstance(second_arg, dict):
337 | # If it's a dict, it should have a description and handler
338 | assert "description" in second_arg
339 | if "handler" in second_arg:
340 | assert callable(second_arg["handler"])
341 | elif "fn" in second_arg:
342 | assert callable(second_arg["fn"])
343 |
344 | # Check keyword args
345 | if call_args[1]:
346 | kwargs = call_args[1]
347 | if "description" in kwargs:
348 | assert isinstance(kwargs["description"], str)
349 | if "handler" in kwargs:
350 | assert callable(kwargs["handler"])
351 | if "fn" in kwargs:
352 | assert callable(kwargs["fn"])
353 |
354 |
355 | async def test_task_status_tool(mcp_server):
356 | """Test the task status tool."""
357 | # Make sure tools are registered
358 | if not mcp_server.tools_registered:
359 | mcp_server.register_tools()
360 |
361 | # Mock the FastMCP add_tool method to capture calls
362 | with patch.object(mcp_server.mcp_server, 'add_tool') as mock_add_tool:
363 | # Re-register the task status tool
364 | mcp_server._register_task()
365 |
366 | # Verify tool was registered with correct parameters
367 | mock_add_tool.assert_called_once()
368 |
369 | # Get the arguments from the call
370 | call_args = mock_add_tool.call_args
371 |
372 | # Check if we have positional args
373 | if call_args[0]:
374 | # First positional arg should be the tool name
375 | tool_name = call_args[0][0]
376 | assert tool_name in ("task-status", "get-task-status") # Accept possible variants
377 |
378 | # If there's a second positional arg, it might be a function or a dict with tool details
379 | if len(call_args[0]) > 1:
380 | second_arg = call_args[0][1]
381 | if callable(second_arg):
382 | # If it's a function, that's our handler
383 | assert callable(second_arg)
384 | elif isinstance(second_arg, dict):
385 | # If it's a dict, it should have a description and handler
386 | assert "description" in second_arg
387 | if "handler" in second_arg:
388 | assert callable(second_arg["handler"])
389 | elif "fn" in second_arg:
390 | assert callable(second_arg["fn"])
391 |
392 | # Check keyword args
393 | if call_args[1]:
394 | kwargs = call_args[1]
395 | if "description" in kwargs:
396 | assert isinstance(kwargs["description"], str)
397 | if "handler" in kwargs:
398 | assert callable(kwargs["handler"])
399 | if "fn" in kwargs:
400 | assert callable(kwargs["fn"])
401 |
402 |
403 | async def test_sse_handle_connect():
404 | """Test the SSE connection handling functionality."""
405 | # Use context managers for patching to ensure proper cleanup
406 | with patch('src.mcp_codebase_insight.core.sse.CodebaseInsightSseTransport') as mock_transport, \
407 | patch('src.mcp_codebase_insight.core.sse.Starlette') as mock_starlette:
408 | # Set up mocks
409 | mock_transport_instance = MagicMock()
410 | mock_transport.return_value = mock_transport_instance
411 |
412 | mock_mcp = MagicMock(spec=FastMCP)
413 | # For MCP v1.5.0, create a mock run method instead of initialization options
414 | mock_mcp.run = AsyncMock()
415 |
416 | mock_request = MagicMock()
417 | mock_request.client = "127.0.0.1"
418 | mock_request.scope = {"type": "http"}
419 | mock_request.receive = AsyncMock()
420 | mock_request._send = AsyncMock()
421 |
422 | # Mock the transport's handle_sse method
423 | mock_transport_instance.handle_sse = AsyncMock()
424 |
425 | # Create a mock handler and add it to our mock app instance
426 | handle_sse = AsyncMock()
427 | mock_app = MagicMock()
428 | mock_starlette.return_value = mock_app
429 |
430 | # Set up a mock route that we can access
431 | mock_route = MagicMock()
432 | mock_route.path = "/sse"
433 | mock_route.endpoint = handle_sse
434 | mock_app.routes = [mock_route]
435 |
436 | # Create the SSE server
437 | app = create_sse_server(mock_mcp)
438 |
439 | # Since we can't rely on call_args, we'll directly test the mock_transport_instance
440 | # Verify that handle_sse was set as an endpoint
441 | mock_transport_instance.handle_sse.assert_not_called()
442 |
443 | # Call the mock transport's handle_sse method directly
444 | await mock_transport_instance.handle_sse(mock_request)
445 |
446 | # Verify handle_sse was called with the request
447 | mock_transport_instance.handle_sse.assert_called_once_with(mock_request)
448 |
449 |
450 | async def test_sse_backpressure_handling(mcp_server):
451 | """Test SSE backpressure handling mechanism."""
452 | # Set up a mock transport with a slow client
453 | mock_transport = MagicMock()
454 | mock_transport.send = AsyncMock()
455 |
456 | # Simulate backpressure by making send delay
457 | async def delayed_send(*args, **kwargs):
458 | await asyncio.sleep(0.1) # Simulate slow client
459 | return True
460 |
461 | mock_transport.send.side_effect = delayed_send
462 |
463 | # Create a test event generator that produces events faster than they can be sent
464 | events = []
465 | start_time = asyncio.get_event_loop().time()
466 |
467 | async def fast_event_generator():
468 | for i in range(10):
469 | yield f"event_{i}"
470 | await asyncio.sleep(0.01) # Generate events faster than they can be sent
471 |
472 | # Process events and measure time
473 | async for event in fast_event_generator():
474 | await mock_transport.send(event)
475 | events.append(event)
476 |
477 | end_time = asyncio.get_event_loop().time()
478 | total_time = end_time - start_time
479 |
480 | # Verify backpressure mechanism is working
481 | # Total time should be at least the sum of all delays (10 events * 0.1s per event)
482 | assert total_time >= 1.0 # Allow some tolerance
483 | assert len(events) == 10 # All events should be processed
484 | assert events == [f"event_{i}" for i in range(10)] # Events should be in order
485 |
486 |
487 | async def test_sse_connection_management(mcp_server):
488 | """Test SSE connection lifecycle management."""
489 | # Set up connection tracking
490 | active_connections = set()
491 |
492 | # Mock connection handler
493 | async def handle_connection(client_id):
494 | # Add connection to tracking
495 | active_connections.add(client_id)
496 | try:
497 | # Simulate connection lifetime
498 | await asyncio.sleep(0.1)
499 | finally:
500 | # Ensure connection is removed on disconnect
501 | active_connections.remove(client_id)
502 |
503 | # Test multiple concurrent connections
504 | async def simulate_connections():
505 | tasks = []
506 | for i in range(3):
507 | client_id = f"client_{i}"
508 | task = asyncio.create_task(handle_connection(client_id))
509 | tasks.append(task)
510 |
511 | # Verify all connections are active
512 | await asyncio.sleep(0.05)
513 | assert len(active_connections) == 3
514 |
515 | # Wait for all connections to complete
516 | await asyncio.gather(*tasks)
517 |
518 | # Verify all connections were properly cleaned up
519 | assert len(active_connections) == 0
520 |
521 | await simulate_connections()
522 |
523 |
524 | async def test_sse_keep_alive(mcp_server):
525 | """Test SSE keep-alive mechanism."""
526 | mock_transport = MagicMock()
527 | mock_transport.send = AsyncMock()
528 |
529 | # Set up keep-alive configuration
530 | keep_alive_interval = 0.1 # 100ms for testing
531 | last_keep_alive = 0
532 |
533 | # Simulate connection with keep-alive
534 | async def run_keep_alive():
535 | nonlocal last_keep_alive
536 | start_time = asyncio.get_event_loop().time()
537 |
538 | # Run for a short period
539 | while asyncio.get_event_loop().time() - start_time < 0.5:
540 | current_time = asyncio.get_event_loop().time()
541 |
542 | # Send keep-alive if interval has elapsed
543 | if current_time - last_keep_alive >= keep_alive_interval:
544 | await mock_transport.send(": keep-alive\n")
545 | last_keep_alive = current_time
546 |
547 | await asyncio.sleep(0.01)
548 |
549 | await run_keep_alive()
550 |
551 | # Verify keep-alive messages were sent
552 | expected_messages = int(0.5 / keep_alive_interval) # Expected number of keep-alive messages
553 | # Allow for slight timing variations in test environments - CI systems and different machines
554 | # may have different scheduling characteristics that affect precise timing
555 | assert mock_transport.send.call_count >= expected_messages - 1 # Allow for timing variations
556 | assert mock_transport.send.call_count <= expected_messages + 1
557 |
558 |
559 | async def test_sse_error_handling(mcp_server):
560 | """Test SSE error handling and recovery."""
561 | mock_transport = MagicMock()
562 | mock_transport.send = AsyncMock()
563 |
564 | # Simulate various error conditions
565 | async def simulate_errors():
566 | # Test network error
567 | mock_transport.send.side_effect = ConnectionError("Network error")
568 | with pytest.raises(ConnectionError):
569 | await mock_transport.send("test_event")
570 |
571 | # Test client disconnect
572 | mock_transport.send.side_effect = asyncio.CancelledError()
573 | with pytest.raises(asyncio.CancelledError):
574 | await mock_transport.send("test_event")
575 |
576 | # Test recovery after error
577 | mock_transport.send.side_effect = None
578 | await mock_transport.send("recovery_event")
579 | mock_transport.send.assert_called_with("recovery_event")
580 |
581 | await simulate_errors()
582 |
583 |
584 | async def test_sse_reconnection_handling():
585 | """Test handling of client reconnection scenarios."""
586 | mock_transport = MagicMock()
587 | mock_transport.send = AsyncMock()
588 | connection_id = "test-client-1"
589 | connection_states = []
590 | connection_states.append("connected")
591 | mock_transport.send.side_effect = ConnectionError("Client disconnected")
592 | try:
593 | await mock_transport.send("event")
594 | except ConnectionError:
595 | connection_states.append("disconnected")
596 | mock_transport.send.side_effect = None
597 | mock_transport.send.reset_mock()
598 | connection_states.append("reconnected")
599 | await mock_transport.send("event_after_reconnect")
600 | assert connection_states == ["connected", "disconnected", "reconnected"]
601 | mock_transport.send.assert_called_once_with("event_after_reconnect")
602 |
603 |
604 | async def test_sse_concurrent_message_processing():
605 | """Test handling of concurrent message processing in SSE."""
606 | processed_messages = []
607 | processing_lock = asyncio.Lock()
608 | async def process_message(message, delay):
609 | await asyncio.sleep(delay)
610 | async with processing_lock:
611 | processed_messages.append(message)
612 | tasks = [
613 | asyncio.create_task(process_message("fast_message", 0.01)),
614 | asyncio.create_task(process_message("slow_message", 0.05)),
615 | asyncio.create_task(process_message("medium_message", 0.03))
616 | ]
617 | await asyncio.gather(*tasks)
618 | assert len(processed_messages) == 3
619 | assert set(processed_messages) == {"fast_message", "medium_message", "slow_message"}
620 |
621 |
622 | async def test_sse_timeout_handling():
623 | """Test SSE behavior when operations timeout."""
624 | mock_component = MagicMock()
625 | mock_component.slow_operation = AsyncMock()
626 | async def slow_operation():
627 | await asyncio.sleep(0.5)
628 | return {"result": "success"}
629 | mock_component.slow_operation.side_effect = slow_operation
630 | try:
631 | result = await asyncio.wait_for(mock_component.slow_operation(), timeout=0.1)
632 | timed_out = False
633 | except asyncio.TimeoutError:
634 | timed_out = True
635 | assert timed_out, "Operation should have timed out"
636 | mock_component.slow_operation.assert_called_once()
637 |
```