wshobson/maverick-mcp # codebase.md

This is page 29 of 39. Use http://codebase.md/wshobson/maverick-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .dockerignore
├── .env.example
├── .github
│   ├── dependabot.yml
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── feature_request.md
│   │   ├── question.md
│   │   └── security_report.md
│   ├── pull_request_template.md
│   └── workflows
│       ├── claude-code-review.yml
│       └── claude.yml
├── .gitignore
├── .python-version
├── .vscode
│   ├── launch.json
│   └── settings.json
├── alembic
│   ├── env.py
│   ├── script.py.mako
│   └── versions
│       ├── 001_initial_schema.py
│       ├── 003_add_performance_indexes.py
│       ├── 006_rename_metadata_columns.py
│       ├── 008_performance_optimization_indexes.py
│       ├── 009_rename_to_supply_demand.py
│       ├── 010_self_contained_schema.py
│       ├── 011_remove_proprietary_terms.py
│       ├── 013_add_backtest_persistence_models.py
│       ├── 014_add_portfolio_models.py
│       ├── 08e3945a0c93_merge_heads.py
│       ├── 9374a5c9b679_merge_heads_for_testing.py
│       ├── abf9b9afb134_merge_multiple_heads.py
│       ├── adda6d3fd84b_merge_proprietary_terms_removal_with_.py
│       ├── e0c75b0bdadb_fix_financial_data_precision_only.py
│       ├── f0696e2cac15_add_essential_performance_indexes.py
│       └── fix_database_integrity_issues.py
├── alembic.ini
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DATABASE_SETUP.md
├── docker-compose.override.yml.example
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── api
│   │   └── backtesting.md
│   ├── BACKTESTING.md
│   ├── COST_BASIS_SPECIFICATION.md
│   ├── deep_research_agent.md
│   ├── exa_research_testing_strategy.md
│   ├── PORTFOLIO_PERSONALIZATION_PLAN.md
│   ├── PORTFOLIO.md
│   ├── SETUP_SELF_CONTAINED.md
│   └── speed_testing_framework.md
├── examples
│   ├── complete_speed_validation.py
│   ├── deep_research_integration.py
│   ├── llm_optimization_example.py
│   ├── llm_speed_demo.py
│   ├── monitoring_example.py
│   ├── parallel_research_example.py
│   ├── speed_optimization_demo.py
│   └── timeout_fix_demonstration.py
├── LICENSE
├── Makefile
├── MANIFEST.in
├── maverick_mcp
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── circuit_breaker.py
│   │   ├── deep_research.py
│   │   ├── market_analysis.py
│   │   ├── optimized_research.py
│   │   ├── supervisor.py
│   │   └── technical_analysis.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── api_server.py
│   │   ├── connection_manager.py
│   │   ├── dependencies
│   │   │   ├── __init__.py
│   │   │   ├── stock_analysis.py
│   │   │   └── technical_analysis.py
│   │   ├── error_handling.py
│   │   ├── inspector_compatible_sse.py
│   │   ├── inspector_sse.py
│   │   ├── middleware
│   │   │   ├── error_handling.py
│   │   │   ├── mcp_logging.py
│   │   │   ├── rate_limiting_enhanced.py
│   │   │   └── security.py
│   │   ├── openapi_config.py
│   │   ├── routers
│   │   │   ├── __init__.py
│   │   │   ├── agents.py
│   │   │   ├── backtesting.py
│   │   │   ├── data_enhanced.py
│   │   │   ├── data.py
│   │   │   ├── health_enhanced.py
│   │   │   ├── health_tools.py
│   │   │   ├── health.py
│   │   │   ├── intelligent_backtesting.py
│   │   │   ├── introspection.py
│   │   │   ├── mcp_prompts.py
│   │   │   ├── monitoring.py
│   │   │   ├── news_sentiment_enhanced.py
│   │   │   ├── performance.py
│   │   │   ├── portfolio.py
│   │   │   ├── research.py
│   │   │   ├── screening_ddd.py
│   │   │   ├── screening_parallel.py
│   │   │   ├── screening.py
│   │   │   ├── technical_ddd.py
│   │   │   ├── technical_enhanced.py
│   │   │   ├── technical.py
│   │   │   └── tool_registry.py
│   │   ├── server.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   ├── base_service.py
│   │   │   ├── market_service.py
│   │   │   ├── portfolio_service.py
│   │   │   ├── prompt_service.py
│   │   │   └── resource_service.py
│   │   ├── simple_sse.py
│   │   └── utils
│   │       ├── __init__.py
│   │       ├── insomnia_export.py
│   │       └── postman_export.py
│   ├── application
│   │   ├── __init__.py
│   │   ├── commands
│   │   │   └── __init__.py
│   │   ├── dto
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_dto.py
│   │   ├── queries
│   │   │   ├── __init__.py
│   │   │   └── get_technical_analysis.py
│   │   └── screening
│   │       ├── __init__.py
│   │       ├── dtos.py
│   │       └── queries.py
│   ├── backtesting
│   │   ├── __init__.py
│   │   ├── ab_testing.py
│   │   ├── analysis.py
│   │   ├── batch_processing_stub.py
│   │   ├── batch_processing.py
│   │   ├── model_manager.py
│   │   ├── optimization.py
│   │   ├── persistence.py
│   │   ├── retraining_pipeline.py
│   │   ├── strategies
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── ml
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive.py
│   │   │   │   ├── ensemble.py
│   │   │   │   ├── feature_engineering.py
│   │   │   │   └── regime_aware.py
│   │   │   ├── ml_strategies.py
│   │   │   ├── parser.py
│   │   │   └── templates.py
│   │   ├── strategy_executor.py
│   │   ├── vectorbt_engine.py
│   │   └── visualization.py
│   ├── config
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── database_self_contained.py
│   │   ├── database.py
│   │   ├── llm_optimization_config.py
│   │   ├── logging_settings.py
│   │   ├── plotly_config.py
│   │   ├── security_utils.py
│   │   ├── security.py
│   │   ├── settings.py
│   │   ├── technical_constants.py
│   │   ├── tool_estimation.py
│   │   └── validation.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── technical_analysis.py
│   │   └── visualization.py
│   ├── data
│   │   ├── __init__.py
│   │   ├── cache_manager.py
│   │   ├── cache.py
│   │   ├── django_adapter.py
│   │   ├── health.py
│   │   ├── models.py
│   │   ├── performance.py
│   │   ├── session_management.py
│   │   └── validation.py
│   ├── database
│   │   ├── __init__.py
│   │   ├── base.py
│   │   └── optimization.py
│   ├── dependencies.py
│   ├── domain
│   │   ├── __init__.py
│   │   ├── entities
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis.py
│   │   ├── events
│   │   │   └── __init__.py
│   │   ├── portfolio.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   ├── entities.py
│   │   │   ├── services.py
│   │   │   └── value_objects.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_service.py
│   │   ├── stock_analysis
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis_service.py
│   │   └── value_objects
│   │       ├── __init__.py
│   │       └── technical_indicators.py
│   ├── exceptions.py
│   ├── infrastructure
│   │   ├── __init__.py
│   │   ├── cache
│   │   │   └── __init__.py
│   │   ├── caching
│   │   │   ├── __init__.py
│   │   │   └── cache_management_service.py
│   │   ├── connection_manager.py
│   │   ├── data_fetching
│   │   │   ├── __init__.py
│   │   │   └── stock_data_service.py
│   │   ├── health
│   │   │   ├── __init__.py
│   │   │   └── health_checker.py
│   │   ├── persistence
│   │   │   ├── __init__.py
│   │   │   └── stock_repository.py
│   │   ├── providers
│   │   │   └── __init__.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   └── repositories.py
│   │   └── sse_optimizer.py
│   ├── langchain_tools
│   │   ├── __init__.py
│   │   ├── adapters.py
│   │   └── registry.py
│   ├── logging_config.py
│   ├── memory
│   │   ├── __init__.py
│   │   └── stores.py
│   ├── monitoring
│   │   ├── __init__.py
│   │   ├── health_check.py
│   │   ├── health_monitor.py
│   │   ├── integration_example.py
│   │   ├── metrics.py
│   │   ├── middleware.py
│   │   └── status_dashboard.py
│   ├── providers
│   │   ├── __init__.py
│   │   ├── dependencies.py
│   │   ├── factories
│   │   │   ├── __init__.py
│   │   │   ├── config_factory.py
│   │   │   └── provider_factory.py
│   │   ├── implementations
│   │   │   ├── __init__.py
│   │   │   ├── cache_adapter.py
│   │   │   ├── macro_data_adapter.py
│   │   │   ├── market_data_adapter.py
│   │   │   ├── persistence_adapter.py
│   │   │   └── stock_data_adapter.py
│   │   ├── interfaces
│   │   │   ├── __init__.py
│   │   │   ├── cache.py
│   │   │   ├── config.py
│   │   │   ├── macro_data.py
│   │   │   ├── market_data.py
│   │   │   ├── persistence.py
│   │   │   └── stock_data.py
│   │   ├── llm_factory.py
│   │   ├── macro_data.py
│   │   ├── market_data.py
│   │   ├── mocks
│   │   │   ├── __init__.py
│   │   │   ├── mock_cache.py
│   │   │   ├── mock_config.py
│   │   │   ├── mock_macro_data.py
│   │   │   ├── mock_market_data.py
│   │   │   ├── mock_persistence.py
│   │   │   └── mock_stock_data.py
│   │   ├── openrouter_provider.py
│   │   ├── optimized_screening.py
│   │   ├── optimized_stock_data.py
│   │   └── stock_data.py
│   ├── README.md
│   ├── tests
│   │   ├── __init__.py
│   │   ├── README_INMEMORY_TESTS.md
│   │   ├── test_cache_debug.py
│   │   ├── test_fixes_validation.py
│   │   ├── test_in_memory_routers.py
│   │   ├── test_in_memory_server.py
│   │   ├── test_macro_data_provider.py
│   │   ├── test_mailgun_email.py
│   │   ├── test_market_calendar_caching.py
│   │   ├── test_mcp_tool_fixes_pytest.py
│   │   ├── test_mcp_tool_fixes.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_models_functional.py
│   │   ├── test_server.py
│   │   ├── test_stock_data_enhanced.py
│   │   ├── test_stock_data_provider.py
│   │   └── test_technical_analysis.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── performance_monitoring.py
│   │   ├── portfolio_manager.py
│   │   ├── risk_management.py
│   │   └── sentiment_analysis.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── agent_errors.py
│   │   ├── batch_processing.py
│   │   ├── cache_warmer.py
│   │   ├── circuit_breaker_decorators.py
│   │   ├── circuit_breaker_services.py
│   │   ├── circuit_breaker.py
│   │   ├── data_chunking.py
│   │   ├── database_monitoring.py
│   │   ├── debug_utils.py
│   │   ├── fallback_strategies.py
│   │   ├── llm_optimization.py
│   │   ├── logging_example.py
│   │   ├── logging_init.py
│   │   ├── logging.py
│   │   ├── mcp_logging.py
│   │   ├── memory_profiler.py
│   │   ├── monitoring_middleware.py
│   │   ├── monitoring.py
│   │   ├── orchestration_logging.py
│   │   ├── parallel_research.py
│   │   ├── parallel_screening.py
│   │   ├── quick_cache.py
│   │   ├── resource_manager.py
│   │   ├── shutdown.py
│   │   ├── stock_helpers.py
│   │   ├── structured_logger.py
│   │   ├── tool_monitoring.py
│   │   ├── tracing.py
│   │   └── yfinance_pool.py
│   ├── validation
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── data.py
│   │   ├── middleware.py
│   │   ├── portfolio.py
│   │   ├── responses.py
│   │   ├── screening.py
│   │   └── technical.py
│   └── workflows
│       ├── __init__.py
│       ├── agents
│       │   ├── __init__.py
│       │   ├── market_analyzer.py
│       │   ├── optimizer_agent.py
│       │   ├── strategy_selector.py
│       │   └── validator_agent.py
│       ├── backtesting_workflow.py
│       └── state.py
├── PLANS.md
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── dev.sh
│   ├── INSTALLATION_GUIDE.md
│   ├── load_example.py
│   ├── load_market_data.py
│   ├── load_tiingo_data.py
│   ├── migrate_db.py
│   ├── README_TIINGO_LOADER.md
│   ├── requirements_tiingo.txt
│   ├── run_stock_screening.py
│   ├── run-migrations.sh
│   ├── seed_db.py
│   ├── seed_sp500.py
│   ├── setup_database.sh
│   ├── setup_self_contained.py
│   ├── setup_sp500_database.sh
│   ├── test_seeded_data.py
│   ├── test_tiingo_loader.py
│   ├── tiingo_config.py
│   └── validate_setup.py
├── SECURITY.md
├── server.json
├── setup.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── core
│   │   └── test_technical_analysis.py
│   ├── data
│   │   └── test_portfolio_models.py
│   ├── domain
│   │   ├── conftest.py
│   │   ├── test_portfolio_entities.py
│   │   └── test_technical_analysis_service.py
│   ├── fixtures
│   │   └── orchestration_fixtures.py
│   ├── integration
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── README.md
│   │   ├── run_integration_tests.sh
│   │   ├── test_api_technical.py
│   │   ├── test_chaos_engineering.py
│   │   ├── test_config_management.py
│   │   ├── test_full_backtest_workflow_advanced.py
│   │   ├── test_full_backtest_workflow.py
│   │   ├── test_high_volume.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_orchestration_complete.py
│   │   ├── test_portfolio_persistence.py
│   │   ├── test_redis_cache.py
│   │   ├── test_security_integration.py.disabled
│   │   └── vcr_setup.py
│   ├── performance
│   │   ├── __init__.py
│   │   ├── test_benchmarks.py
│   │   ├── test_load.py
│   │   ├── test_profiling.py
│   │   └── test_stress.py
│   ├── providers
│   │   └── test_stock_data_simple.py
│   ├── README.md
│   ├── test_agents_router_mcp.py
│   ├── test_backtest_persistence.py
│   ├── test_cache_management_service.py
│   ├── test_cache_serialization.py
│   ├── test_circuit_breaker.py
│   ├── test_database_pool_config_simple.py
│   ├── test_database_pool_config.py
│   ├── test_deep_research_functional.py
│   ├── test_deep_research_integration.py
│   ├── test_deep_research_parallel_execution.py
│   ├── test_error_handling.py
│   ├── test_event_loop_integrity.py
│   ├── test_exa_research_integration.py
│   ├── test_exception_hierarchy.py
│   ├── test_financial_search.py
│   ├── test_graceful_shutdown.py
│   ├── test_integration_simple.py
│   ├── test_langgraph_workflow.py
│   ├── test_market_data_async.py
│   ├── test_market_data_simple.py
│   ├── test_mcp_orchestration_functional.py
│   ├── test_ml_strategies.py
│   ├── test_optimized_research_agent.py
│   ├── test_orchestration_integration.py
│   ├── test_orchestration_logging.py
│   ├── test_orchestration_tools_simple.py
│   ├── test_parallel_research_integration.py
│   ├── test_parallel_research_orchestrator.py
│   ├── test_parallel_research_performance.py
│   ├── test_performance_optimizations.py
│   ├── test_production_validation.py
│   ├── test_provider_architecture.py
│   ├── test_rate_limiting_enhanced.py
│   ├── test_runner_validation.py
│   ├── test_security_comprehensive.py.disabled
│   ├── test_security_cors.py
│   ├── test_security_enhancements.py.disabled
│   ├── test_security_headers.py
│   ├── test_security_penetration.py
│   ├── test_session_management.py
│   ├── test_speed_optimization_validation.py
│   ├── test_stock_analysis_dependencies.py
│   ├── test_stock_analysis_service.py
│   ├── test_stock_data_fetching_service.py
│   ├── test_supervisor_agent.py
│   ├── test_supervisor_functional.py
│   ├── test_tool_estimation_config.py
│   ├── test_visualization.py
│   └── utils
│       ├── test_agent_errors.py
│       ├── test_logging.py
│       ├── test_parallel_screening.py
│       └── test_quick_cache.py
├── tools
│   ├── check_orchestration_config.py
│   ├── experiments
│   │   ├── validation_examples.py
│   │   └── validation_fixed.py
│   ├── fast_dev.sh
│   ├── hot_reload.py
│   ├── quick_test.py
│   └── templates
│       ├── new_router_template.py
│       ├── new_tool_template.py
│       ├── screening_strategy_template.py
│       └── test_template.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/tests/test_parallel_research_performance.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Comprehensive Performance Test Suite for Parallel Research System
  3 | 
  4 | This test suite validates parallel research performance against specific thresholds:
  5 | - Minimum speedup: 2 agents = 1.3x speedup, 4 agents = 2.0x speedup
  6 | - Maximum memory increase: 3x memory usage acceptable for 4x agents
  7 | - Test duration: Quick tests for CI (<30s total runtime)
  8 | 
  9 | Features:
 10 | - Realistic failure simulation (0%, 10%, 25% failure rates)
 11 | - Memory usage monitoring and validation
 12 | - Statistical significance testing (3+ runs per test)
 13 | - Integration with existing pytest infrastructure
 14 | - Performance markers for easy filtering
 15 | """
 16 | 
 17 | import asyncio
 18 | import gc
 19 | import logging
 20 | import random
 21 | import statistics
 22 | import time
 23 | import tracemalloc
 24 | from datetime import datetime
 25 | from typing import Any
 26 | from unittest.mock import AsyncMock
 27 | 
 28 | import psutil
 29 | import pytest
 30 | 
 31 | from maverick_mcp.agents.deep_research import DeepResearchAgent
 32 | from maverick_mcp.utils.parallel_research import (
 33 |     ParallelResearchConfig,
 34 |     ParallelResearchOrchestrator,
 35 |     ResearchResult,
 36 |     ResearchTask,
 37 | )
 38 | 
 39 | logger = logging.getLogger(__name__)
 40 | 
 41 | # Performance thresholds and test configuration
 42 | PERFORMANCE_THRESHOLDS = {
 43 |     "speedup": {
 44 |         2: 1.3,  # 2 agents should achieve 1.3x speedup minimum
 45 |         4: 2.0,  # 4 agents should achieve 2.0x speedup minimum
 46 |     },
 47 |     "memory_multiplier": 3.0,  # 3x memory usage acceptable for 4x agents
 48 |     "max_test_duration": 30.0,  # Total test suite should complete within 30s
 49 | }
 50 | 
 51 | # Failure simulation configuration
 52 | FAILURE_RATES = [0.0, 0.10, 0.25]  # 0%, 10%, 25% failure rates
 53 | STATISTICAL_RUNS = 3  # Number of runs for statistical significance
 54 | 
 55 | 
 56 | class PerformanceMonitor:
 57 |     """Monitors CPU, memory, and timing performance during test execution."""
 58 | 
 59 |     def __init__(self, test_name: str):
 60 |         self.test_name = test_name
 61 |         self.start_time: float = 0
 62 |         self.start_memory: int = 0
 63 |         self.peak_memory: int = 0
 64 |         self.cpu_percent_samples: list[float] = []
 65 |         self.process = psutil.Process()
 66 | 
 67 |     def __enter__(self):
 68 |         """Start performance monitoring."""
 69 |         # Force garbage collection for accurate memory measurement
 70 |         gc.collect()
 71 | 
 72 |         # Start memory tracing
 73 |         tracemalloc.start()
 74 | 
 75 |         # Record baseline metrics
 76 |         self.start_time = time.time()
 77 |         self.start_memory = self.process.memory_info().rss
 78 |         self.peak_memory = self.start_memory
 79 | 
 80 |         logger.info(f"Performance monitoring started for {self.test_name}")
 81 |         return self
 82 | 
 83 |     def sample_cpu(self):
 84 |         """Sample current CPU usage."""
 85 |         try:
 86 |             cpu_percent = self.process.cpu_percent()
 87 |             self.cpu_percent_samples.append(cpu_percent)
 88 | 
 89 |             # Track peak memory
 90 |             current_memory = self.process.memory_info().rss
 91 |             self.peak_memory = max(self.peak_memory, current_memory)
 92 |         except Exception as e:
 93 |             logger.warning(f"Failed to sample CPU/memory: {e}")
 94 | 
 95 |     def __exit__(self, exc_type, exc_val, exc_tb):
 96 |         """Complete performance monitoring and return metrics."""
 97 |         end_time = time.time()
 98 |         end_memory = self.process.memory_info().rss
 99 | 
100 |         # Get memory tracing results
101 |         current, peak_traced = tracemalloc.get_traced_memory()
102 |         tracemalloc.stop()
103 | 
104 |         # Calculate metrics
105 |         execution_time = end_time - self.start_time
106 |         memory_increase = end_memory - self.start_memory
107 |         memory_multiplier = end_memory / max(self.start_memory, 1)
108 |         avg_cpu = (
109 |             statistics.mean(self.cpu_percent_samples) if self.cpu_percent_samples else 0
110 |         )
111 | 
112 |         self.metrics = {
113 |             "execution_time": execution_time,
114 |             "start_memory_mb": self.start_memory / (1024 * 1024),
115 |             "end_memory_mb": end_memory / (1024 * 1024),
116 |             "peak_memory_mb": max(self.peak_memory, peak_traced) / (1024 * 1024),
117 |             "memory_increase_mb": memory_increase / (1024 * 1024),
118 |             "memory_multiplier": memory_multiplier,
119 |             "avg_cpu_percent": avg_cpu,
120 |             "cpu_samples": len(self.cpu_percent_samples),
121 |         }
122 | 
123 |         logger.info(
124 |             f"Performance metrics for {self.test_name}: "
125 |             f"Time: {execution_time:.3f}s, "
126 |             f"Memory: {self.metrics['start_memory_mb']:.1f}MB -> "
127 |             f"{self.metrics['end_memory_mb']:.1f}MB "
128 |             f"({memory_multiplier:.2f}x), "
129 |             f"CPU: {avg_cpu:.1f}%"
130 |         )
131 | 
132 | 
133 | class MockResearchExecutor:
134 |     """
135 |     Realistic mock executor that simulates LLM research operations with:
136 |     - Configurable failure rates and timeout scenarios
137 |     - Variable response times (100-500ms)
138 |     - Different response sizes to test memory patterns
139 |     - Structured research data that mirrors real usage
140 |     """
141 | 
142 |     def __init__(
143 |         self,
144 |         failure_rate: float = 0.0,
145 |         base_delay: float = 0.1,
146 |         delay_variance: float = 0.4,
147 |         include_timeouts: bool = False,
148 |     ):
149 |         self.failure_rate = failure_rate
150 |         self.base_delay = base_delay
151 |         self.delay_variance = delay_variance
152 |         self.include_timeouts = include_timeouts
153 |         self.execution_count = 0
154 |         self.execution_log: list[dict[str, Any]] = []
155 | 
156 |     async def __call__(self, task: ResearchTask) -> dict[str, Any]:
157 |         """Execute mock research task with realistic behavior."""
158 |         self.execution_count += 1
159 |         start_time = time.time()
160 | 
161 |         # Simulate realistic processing delay (100-500ms)
162 |         delay = self.base_delay + random.uniform(0, self.delay_variance)
163 |         await asyncio.sleep(delay)
164 | 
165 |         # Simulate various failure modes
166 |         if random.random() < self.failure_rate:
167 |             failure_type = random.choice(
168 |                 ["api_timeout", "rate_limit", "auth_error", "network_error"]
169 |             )
170 | 
171 |             if failure_type == "api_timeout" and self.include_timeouts:
172 |                 # Simulate timeout by sleeping longer than task timeout
173 |                 await asyncio.sleep(task.timeout + 1 if task.timeout else 10)
174 | 
175 |             raise Exception(f"Simulated {failure_type} for task {task.task_id}")
176 | 
177 |         # Generate realistic research response based on task type
178 |         response = self._generate_research_response(task)
179 | 
180 |         execution_time = time.time() - start_time
181 |         self.execution_log.append(
182 |             {
183 |                 "task_id": task.task_id,
184 |                 "task_type": task.task_type,
185 |                 "execution_time": execution_time,
186 |                 "delay_simulated": delay,
187 |                 "response_size": len(str(response)),
188 |             }
189 |         )
190 | 
191 |         return response
192 | 
193 |     def _generate_research_response(self, task: ResearchTask) -> dict[str, Any]:
194 |         """Generate structured research data with realistic content."""
195 |         # Vary response size based on research type
196 |         insight_counts = {
197 |             "fundamental": random.randint(8, 15),
198 |             "technical": random.randint(6, 12),
199 |             "sentiment": random.randint(10, 20),
200 |             "competitive": random.randint(7, 14),
201 |         }
202 | 
203 |         insight_count = insight_counts.get(task.task_type, 10)
204 | 
205 |         # Generate realistic insights
206 |         insights = [
207 |             f"{task.task_type} insight {i + 1} for {task.target_topic[:20]}"
208 |             for i in range(insight_count)
209 |         ]
210 | 
211 |         # Add context-specific data
212 |         sources = [
213 |             {
214 |                 "title": f"Source {i + 1} for {task.task_type} research",
215 |                 "url": f"https://example.com/research/{task.task_type}/{i}",
216 |                 "credibility_score": random.uniform(0.6, 0.95),
217 |                 "published_date": datetime.now().isoformat(),
218 |                 "content_snippet": f"Content from source {i + 1} "
219 |                 * random.randint(10, 50),
220 |                 "relevance_score": random.uniform(0.7, 1.0),
221 |             }
222 |             for i in range(random.randint(3, 8))
223 |         ]
224 | 
225 |         return {
226 |             "research_type": task.task_type,
227 |             "insights": insights,
228 |             "risk_factors": [
229 |                 f"{task.task_type} risk factor {i + 1}"
230 |                 for i in range(random.randint(3, 7))
231 |             ],
232 |             "opportunities": [
233 |                 f"{task.task_type} opportunity {i + 1}"
234 |                 for i in range(random.randint(3, 7))
235 |             ],
236 |             "sentiment": {
237 |                 "direction": random.choice(["bullish", "bearish", "neutral"]),
238 |                 "confidence": random.uniform(0.5, 0.9),
239 |                 "consensus": random.uniform(0.4, 0.8),
240 |             },
241 |             "credibility_score": random.uniform(0.6, 0.9),
242 |             "sources": sources,
243 |             "focus_areas": task.focus_areas,
244 |             "metadata": {
245 |                 "execution_time": random.uniform(0.1, 0.5),
246 |                 "api_calls_made": random.randint(2, 8),
247 |                 "cache_hits": random.randint(0, 4),
248 |                 "cache_misses": random.randint(1, 6),
249 |             },
250 |             # Add some large data structures to test memory usage
251 |             "detailed_analysis": {
252 |                 f"analysis_point_{i}": f"Detailed analysis content {i} "
253 |                 * random.randint(50, 200)
254 |                 for i in range(random.randint(5, 15))
255 |             },
256 |         }
257 | 
258 |     def get_execution_stats(self) -> dict[str, Any]:
259 |         """Get detailed execution statistics."""
260 |         if not self.execution_log:
261 |             return {"total_executions": 0}
262 | 
263 |         execution_times = [log["execution_time"] for log in self.execution_log]
264 |         response_sizes = [log["response_size"] for log in self.execution_log]
265 | 
266 |         return {
267 |             "total_executions": len(self.execution_log),
268 |             "avg_execution_time": statistics.mean(execution_times),
269 |             "median_execution_time": statistics.median(execution_times),
270 |             "avg_response_size": statistics.mean(response_sizes),
271 |             "total_response_size": sum(response_sizes),
272 |             "task_type_distribution": {
273 |                 task_type: len(
274 |                     [log for log in self.execution_log if log["task_type"] == task_type]
275 |                 )
276 |                 for task_type in [
277 |                     "fundamental",
278 |                     "technical",
279 |                     "sentiment",
280 |                     "competitive",
281 |                 ]
282 |             },
283 |         }
284 | 
285 | 
286 | class PerformanceTester:
287 |     """Manages performance test execution and validation."""
288 | 
289 |     @staticmethod
290 |     def create_test_tasks(
291 |         agent_count: int, session_id: str = "perf_test"
292 |     ) -> list[ResearchTask]:
293 |         """Create realistic research tasks for performance testing."""
294 |         topics = [
295 |             "Apple Inc financial analysis and market outlook",
296 |             "Tesla Inc competitive position and technical analysis",
297 |             "Microsoft Corp sentiment analysis and growth prospects",
298 |             "NVIDIA Corp fundamental analysis and AI sector outlook",
299 |             "Amazon Inc market position and e-commerce trends",
300 |             "Google Inc advertising revenue and cloud competition",
301 |         ]
302 | 
303 |         tasks = []
304 |         for i in range(agent_count):
305 |             topic = topics[i % len(topics)]
306 |             task_types = ["fundamental", "technical", "sentiment", "competitive"]
307 |             task_type = task_types[i % len(task_types)]
308 | 
309 |             task = ResearchTask(
310 |                 task_id=f"{session_id}_{task_type}_{i}",
311 |                 task_type=task_type,
312 |                 target_topic=topic,
313 |                 focus_areas=[f"focus_{i}", f"area_{i % 3}"],
314 |                 priority=random.randint(5, 9),
315 |                 timeout=10,  # Short timeout for CI
316 |             )
317 |             tasks.append(task)
318 | 
319 |         return tasks
320 | 
321 |     @staticmethod
322 |     async def run_sequential_baseline(
323 |         tasks: list[ResearchTask], executor: MockResearchExecutor
324 |     ) -> dict[str, Any]:
325 |         """Run tasks sequentially to establish performance baseline."""
326 |         start_time = time.time()
327 |         results = []
328 | 
329 |         for task in tasks:
330 |             try:
331 |                 result = await executor(task)
332 |                 results.append({"task": task, "result": result, "status": "success"})
333 |             except Exception as e:
334 |                 results.append({"task": task, "error": str(e), "status": "failed"})
335 | 
336 |         execution_time = time.time() - start_time
337 |         successful_results = [r for r in results if r["status"] == "success"]
338 | 
339 |         return {
340 |             "execution_time": execution_time,
341 |             "successful_tasks": len(successful_results),
342 |             "failed_tasks": len(results) - len(successful_results),
343 |             "results": results,
344 |         }
345 | 
346 |     @staticmethod
347 |     async def run_parallel_test(
348 |         tasks: list[ResearchTask],
349 |         config: ParallelResearchConfig,
350 |         executor: MockResearchExecutor,
351 |     ) -> ResearchResult:
352 |         """Run parallel research test with orchestrator."""
353 |         orchestrator = ParallelResearchOrchestrator(config)
354 | 
355 |         # Mock synthesis callback
356 |         async def mock_synthesis(
357 |             task_results: dict[str, ResearchTask],
358 |         ) -> dict[str, Any]:
359 |             successful_results = [
360 |                 task.result
361 |                 for task in task_results.values()
362 |                 if task.status == "completed" and task.result
363 |             ]
364 | 
365 |             return {
366 |                 "synthesis": f"Mock synthesis from {len(successful_results)} results",
367 |                 "confidence_score": random.uniform(0.7, 0.9),
368 |                 "key_findings": [
369 |                     f"Finding {i}" for i in range(min(len(successful_results), 5))
370 |                 ],
371 |             }
372 | 
373 |         return await orchestrator.execute_parallel_research(
374 |             tasks=tasks,
375 |             research_executor=executor,
376 |             synthesis_callback=mock_synthesis,
377 |         )
378 | 
379 |     @staticmethod
380 |     def validate_performance_thresholds(
381 |         agent_count: int,
382 |         sequential_time: float,
383 |         parallel_time: float,
384 |         memory_multiplier: float,
385 |         success_threshold: float = 0.8,  # 80% of tests should pass threshold
386 |     ) -> dict[str, bool]:
387 |         """Validate performance against defined thresholds."""
388 |         speedup = sequential_time / max(parallel_time, 0.001)  # Avoid division by zero
389 |         expected_speedup = PERFORMANCE_THRESHOLDS["speedup"].get(agent_count, 1.0)
390 |         max_memory_multiplier = PERFORMANCE_THRESHOLDS["memory_multiplier"]
391 | 
392 |         return {
393 |             "speedup_threshold_met": speedup >= expected_speedup * success_threshold,
394 |             "memory_threshold_met": memory_multiplier <= max_memory_multiplier,
395 |             "speedup_achieved": speedup,
396 |             "speedup_expected": expected_speedup,
397 |             "memory_multiplier": memory_multiplier,
398 |             "memory_limit": max_memory_multiplier,
399 |         }
400 | 
401 | 
402 | # Test fixtures
403 | @pytest.fixture
404 | def performance_monitor():
405 |     """Create performance monitor for tests."""
406 | 
407 |     def _create_monitor(test_name: str):
408 |         return PerformanceMonitor(test_name)
409 | 
410 |     return _create_monitor
411 | 
412 | 
413 | @pytest.fixture
414 | def mock_executor_factory():
415 |     """Factory for creating mock research executors with different configurations."""
416 | 
417 |     def _create_executor(**kwargs):
418 |         return MockResearchExecutor(**kwargs)
419 | 
420 |     return _create_executor
421 | 
422 | 
423 | @pytest.fixture
424 | def performance_tester():
425 |     """Provide PerformanceTester utility."""
426 |     return PerformanceTester()
427 | 
428 | 
429 | # Unit Performance Tests
430 | @pytest.mark.unit
431 | class TestParallelResearchPerformance:
432 |     """Core performance tests for parallel research system."""
433 | 
434 |     @pytest.mark.parametrize("agent_count", [2, 4])
435 |     @pytest.mark.parametrize(
436 |         "failure_rate", FAILURE_RATES[:2]
437 |     )  # 0% and 10% for unit tests
438 |     async def test_parallel_speedup_thresholds(
439 |         self,
440 |         agent_count: int,
441 |         failure_rate: float,
442 |         performance_monitor,
443 |         mock_executor_factory,
444 |         performance_tester,
445 |     ):
446 |         """Test that parallel execution meets minimum speedup thresholds."""
447 |         test_name = f"speedup_{agent_count}agents_{int(failure_rate * 100)}pct_failure"
448 | 
449 |         # Run multiple iterations for statistical significance
450 |         speedup_results = []
451 |         memory_results = []
452 | 
453 |         for run in range(STATISTICAL_RUNS):
454 |             with performance_monitor(f"{test_name}_run{run}") as monitor:
455 |                 # Create test configuration
456 |                 config = ParallelResearchConfig(
457 |                     max_concurrent_agents=agent_count,
458 |                     timeout_per_agent=5,
459 |                     enable_fallbacks=True,
460 |                     rate_limit_delay=0.05,  # Fast for testing
461 |                 )
462 | 
463 |                 # Create tasks and executor
464 |                 tasks = performance_tester.create_test_tasks(
465 |                     agent_count, f"speedup_test_{run}"
466 |                 )
467 |                 executor = mock_executor_factory(
468 |                     failure_rate=failure_rate,
469 |                     base_delay=0.1,
470 |                     delay_variance=0.1,
471 |                 )
472 | 
473 |                 # Sample CPU/memory during test
474 |                 monitor.sample_cpu()
475 | 
476 |                 # Run sequential baseline
477 |                 sequential_start = time.time()
478 |                 await performance_tester.run_sequential_baseline(tasks, executor)
479 |                 sequential_time = time.time() - sequential_start
480 | 
481 |                 monitor.sample_cpu()
482 | 
483 |                 # Reset executor for parallel test
484 |                 executor.execution_count = 0
485 |                 executor.execution_log.clear()
486 | 
487 |                 # Run parallel test
488 |                 parallel_start = time.time()
489 |                 await performance_tester.run_parallel_test(tasks, config, executor)
490 |                 parallel_time = time.time() - parallel_start
491 | 
492 |                 monitor.sample_cpu()
493 | 
494 |             # Calculate metrics
495 |             speedup = sequential_time / max(parallel_time, 0.001)
496 |             speedup_results.append(speedup)
497 |             memory_results.append(monitor.metrics["memory_multiplier"])
498 | 
499 |             logger.info(
500 |                 f"Run {run + 1}: Sequential: {sequential_time:.3f}s, "
501 |                 f"Parallel: {parallel_time:.3f}s, Speedup: {speedup:.2f}x"
502 |             )
503 | 
504 |         # Statistical analysis
505 |         avg_speedup = statistics.mean(speedup_results)
506 |         median_speedup = statistics.median(speedup_results)
507 |         avg_memory_multiplier = statistics.mean(memory_results)
508 | 
509 |         # Validate against thresholds
510 |         expected_speedup = PERFORMANCE_THRESHOLDS["speedup"][agent_count]
511 |         validation = performance_tester.validate_performance_thresholds(
512 |             agent_count=agent_count,
513 |             sequential_time=1.0,  # Normalized
514 |             parallel_time=1.0 / avg_speedup,  # Normalized
515 |             memory_multiplier=avg_memory_multiplier,
516 |         )
517 | 
518 |         logger.info(
519 |             f"Performance summary for {agent_count} agents, {failure_rate * 100}% failure rate: "
520 |             f"Avg speedup: {avg_speedup:.2f}x (expected: {expected_speedup:.2f}x), "
521 |             f"Memory multiplier: {avg_memory_multiplier:.2f}x"
522 |         )
523 | 
524 |         # Assertions with clear failure messages
525 |         assert validation["speedup_threshold_met"], (
526 |             f"Speedup threshold not met: achieved {avg_speedup:.2f}x, "
527 |             f"expected {expected_speedup:.2f}x (with 80% success rate)"
528 |         )
529 | 
530 |         assert validation["memory_threshold_met"], (
531 |             f"Memory threshold exceeded: {avg_memory_multiplier:.2f}x > "
532 |             f"{PERFORMANCE_THRESHOLDS['memory_multiplier']}x limit"
533 |         )
534 | 
535 |         # Performance characteristics validation
536 |         assert median_speedup > 1.0, "Parallel execution should show some speedup"
537 |         assert all(m < 10.0 for m in memory_results), (
538 |             "Memory usage should be reasonable"
539 |         )
540 | 
541 |     async def test_performance_under_failures(
542 |         self, performance_monitor, mock_executor_factory, performance_tester
543 |     ):
544 |         """Test performance degradation under different failure scenarios."""
545 |         agent_count = 4
546 |         test_name = "failure_resilience_test"
547 | 
548 |         results = {}
549 | 
550 |         for failure_rate in FAILURE_RATES:
551 |             with performance_monitor(
552 |                 f"{test_name}_{int(failure_rate * 100)}pct"
553 |             ) as monitor:
554 |                 config = ParallelResearchConfig(
555 |                     max_concurrent_agents=agent_count,
556 |                     timeout_per_agent=3,
557 |                     enable_fallbacks=True,
558 |                     rate_limit_delay=0.02,
559 |                 )
560 | 
561 |                 tasks = performance_tester.create_test_tasks(
562 |                     agent_count, f"failure_test_{int(failure_rate * 100)}"
563 |                 )
564 |                 executor = mock_executor_factory(
565 |                     failure_rate=failure_rate,
566 |                     base_delay=0.05,
567 |                     include_timeouts=False,  # No timeouts for this test
568 |                 )
569 | 
570 |                 monitor.sample_cpu()
571 |                 parallel_result = await performance_tester.run_parallel_test(
572 |                     tasks, config, executor
573 |                 )
574 |                 monitor.sample_cpu()
575 | 
576 |             results[failure_rate] = {
577 |                 "successful_tasks": parallel_result.successful_tasks,
578 |                 "failed_tasks": parallel_result.failed_tasks,
579 |                 "execution_time": monitor.metrics["execution_time"],
580 |                 "memory_multiplier": monitor.metrics["memory_multiplier"],
581 |                 "success_rate": parallel_result.successful_tasks
582 |                 / (parallel_result.successful_tasks + parallel_result.failed_tasks),
583 |             }
584 | 
585 |         # Validate failure handling - adjusted for realistic expectations
586 |         assert results[0.0]["success_rate"] == 1.0, (
587 |             "Zero failure rate should achieve 100% success"
588 |         )
589 |         assert results[0.10]["success_rate"] >= 0.7, (
590 |             "10% failure rate should maintain >70% success"
591 |         )
592 |         assert results[0.25]["success_rate"] >= 0.5, (
593 |             "25% failure rate should maintain >50% success"
594 |         )
595 | 
596 |         # Validate performance doesn't degrade drastically with failures
597 |         baseline_time = results[0.0]["execution_time"]
598 |         assert results[0.10]["execution_time"] <= baseline_time * 1.5, (
599 |             "10% failure shouldn't increase time by >50%"
600 |         )
601 |         assert results[0.25]["execution_time"] <= baseline_time * 2.0, (
602 |             "25% failure shouldn't double execution time"
603 |         )
604 | 
605 |         logger.info("Failure resilience test completed successfully")
606 | 
607 |     async def test_memory_usage_patterns(
608 |         self, performance_monitor, mock_executor_factory, performance_tester
609 |     ):
610 |         """Test memory usage patterns across different agent counts."""
611 |         memory_results = {}
612 | 
613 |         for agent_count in [1, 2, 4]:
614 |             with performance_monitor(f"memory_test_{agent_count}_agents") as monitor:
615 |                 config = ParallelResearchConfig(
616 |                     max_concurrent_agents=agent_count,
617 |                     timeout_per_agent=5,
618 |                     enable_fallbacks=True,
619 |                     rate_limit_delay=0.01,
620 |                 )
621 | 
622 |                 # Create larger dataset to test memory scaling
623 |                 tasks = performance_tester.create_test_tasks(
624 |                     agent_count * 2, f"memory_test_{agent_count}"
625 |                 )
626 |                 executor = mock_executor_factory(
627 |                     failure_rate=0.0,
628 |                     base_delay=0.05,  # Short delay to focus on memory
629 |                 )
630 | 
631 |                 # Force garbage collection before test
632 |                 gc.collect()
633 |                 monitor.sample_cpu()
634 | 
635 |                 # Run test with memory monitoring
636 |                 result = await performance_tester.run_parallel_test(
637 |                     tasks, config, executor
638 |                 )
639 | 
640 |                 # Sample memory again
641 |                 monitor.sample_cpu()
642 | 
643 |                 # Force another GC to see post-test memory
644 |                 gc.collect()
645 |                 await asyncio.sleep(0.1)  # Allow cleanup
646 | 
647 |             memory_results[agent_count] = {
648 |                 "peak_memory_mb": monitor.metrics["peak_memory_mb"],
649 |                 "memory_increase_mb": monitor.metrics["memory_increase_mb"],
650 |                 "memory_multiplier": monitor.metrics["memory_multiplier"],
651 |                 "successful_tasks": result.successful_tasks,
652 |             }
653 | 
654 |         # Validate memory scaling is reasonable
655 |         baseline_memory = memory_results[1]["peak_memory_mb"]
656 |         memory_4x = memory_results[4]["peak_memory_mb"]
657 | 
658 |         # 4x agents should not use more than 3x memory
659 |         memory_scaling = memory_4x / baseline_memory
660 |         assert memory_scaling <= PERFORMANCE_THRESHOLDS["memory_multiplier"], (
661 |             f"Memory scaling too high: {memory_scaling:.2f}x > "
662 |             f"{PERFORMANCE_THRESHOLDS['memory_multiplier']}x limit"
663 |         )
664 | 
665 |         # Memory usage should scale sub-linearly (better than linear)
666 |         assert memory_scaling < 4.0, "Memory should scale sub-linearly with agent count"
667 | 
668 |         logger.info(f"Memory scaling from 1 to 4 agents: {memory_scaling:.2f}x")
669 | 
670 | 
671 | # Slow/Integration Performance Tests
672 | @pytest.mark.slow
673 | class TestParallelResearchIntegrationPerformance:
674 |     """Integration performance tests with more realistic scenarios."""
675 | 
676 |     async def test_deep_research_agent_parallel_integration(
677 |         self, performance_monitor, mock_executor_factory
678 |     ):
679 |         """Test DeepResearchAgent with parallel execution enabled."""
680 |         with performance_monitor("deep_research_agent_parallel") as monitor:
681 |             # Mock LLM for DeepResearchAgent
682 |             mock_llm = AsyncMock()
683 |             mock_llm.ainvoke.return_value.content = '{"analysis": "test analysis"}'
684 | 
685 |             # Create agent with parallel execution enabled
686 |             agent = DeepResearchAgent(
687 |                 llm=mock_llm,
688 |                 persona="moderate",
689 |                 enable_parallel_execution=True,
690 |                 parallel_config=ParallelResearchConfig(
691 |                     max_concurrent_agents=3,
692 |                     timeout_per_agent=5,
693 |                     enable_fallbacks=True,
694 |                 ),
695 |             )
696 | 
697 |             monitor.sample_cpu()
698 | 
699 |             # Mock the subagent execution to avoid real API calls
700 |             async def mock_subagent_execution(task: ResearchTask) -> dict[str, Any]:
701 |                 await asyncio.sleep(0.1)  # Simulate work
702 |                 return {
703 |                     "research_type": task.task_type,
704 |                     "insights": [f"Mock insight for {task.task_type}"],
705 |                     "sentiment": {"direction": "neutral", "confidence": 0.7},
706 |                     "credibility_score": 0.8,
707 |                     "sources": [{"title": "Mock source", "url": "http://example.com"}],
708 |                 }
709 | 
710 |             # Override the subagent execution method
711 |             agent._execute_subagent_task = mock_subagent_execution
712 | 
713 |             # Run comprehensive research
714 |             result = await agent.research_comprehensive(
715 |                 topic="Apple Inc comprehensive analysis",
716 |                 session_id="integration_test",
717 |                 depth="standard",
718 |                 use_parallel_execution=True,
719 |             )
720 | 
721 |             monitor.sample_cpu()
722 | 
723 |         # Validate integration results
724 |         assert result["status"] == "success"
725 |         assert result["execution_mode"] == "parallel"
726 |         assert "parallel_execution_stats" in result
727 |         assert result["parallel_execution_stats"]["total_tasks"] > 0
728 |         assert result["execution_time_ms"] > 0
729 | 
730 |         # Performance validation
731 |         execution_time_seconds = result["execution_time_ms"] / 1000
732 |         assert execution_time_seconds < 10.0, (
733 |             "Integration test should complete within 10s"
734 |         )
735 |         assert monitor.metrics["memory_multiplier"] < 5.0, (
736 |             "Memory usage should be reasonable"
737 |         )
738 | 
739 |         logger.info("Deep research agent integration test passed")
740 | 
741 |     @pytest.mark.parametrize(
742 |         "failure_rate", [0.25]
743 |     )  # Only high failure rate for slow tests
744 |     async def test_high_failure_rate_resilience(
745 |         self,
746 |         failure_rate: float,
747 |         performance_monitor,
748 |         mock_executor_factory,
749 |         performance_tester,
750 |     ):
751 |         """Test system resilience under high failure rates."""
752 |         agent_count = 6  # More agents for integration testing
753 |         test_name = f"high_failure_resilience_{int(failure_rate * 100)}pct"
754 | 
755 |         resilience_results = []
756 | 
757 |         for run in range(STATISTICAL_RUNS):
758 |             with performance_monitor(f"{test_name}_run{run}") as monitor:
759 |                 config = ParallelResearchConfig(
760 |                     max_concurrent_agents=4,  # Limit concurrency
761 |                     timeout_per_agent=8,  # Longer timeout for resilience
762 |                     enable_fallbacks=True,
763 |                     rate_limit_delay=0.1,
764 |                 )
765 | 
766 |                 tasks = performance_tester.create_test_tasks(
767 |                     agent_count, f"resilience_test_{run}"
768 |                 )
769 |                 executor = mock_executor_factory(
770 |                     failure_rate=failure_rate,
771 |                     base_delay=0.2,  # Longer delays to simulate real API calls
772 |                     delay_variance=0.3,
773 |                     include_timeouts=True,  # Include timeout scenarios
774 |                 )
775 | 
776 |                 monitor.sample_cpu()
777 | 
778 |                 try:
779 |                     result = await performance_tester.run_parallel_test(
780 |                         tasks, config, executor
781 |                     )
782 | 
783 |                     success_rate = result.successful_tasks / (
784 |                         result.successful_tasks + result.failed_tasks
785 |                     )
786 |                     resilience_results.append(
787 |                         {
788 |                             "success_rate": success_rate,
789 |                             "execution_time": monitor.metrics["execution_time"],
790 |                             "memory_multiplier": monitor.metrics["memory_multiplier"],
791 |                             "parallel_efficiency": result.parallel_efficiency,
792 |                         }
793 |                     )
794 | 
795 |                 except Exception as e:
796 |                     logger.warning(f"Resilience test run {run} failed: {e}")
797 |                     resilience_results.append(
798 |                         {
799 |                             "success_rate": 0.0,
800 |                             "execution_time": monitor.metrics["execution_time"],
801 |                             "memory_multiplier": monitor.metrics["memory_multiplier"],
802 |                             "parallel_efficiency": 0.0,
803 |                         }
804 |                     )
805 | 
806 |                 monitor.sample_cpu()
807 | 
808 |         # Analyze resilience results
809 |         avg_success_rate = statistics.mean(
810 |             [r["success_rate"] for r in resilience_results]
811 |         )
812 |         avg_execution_time = statistics.mean(
813 |             [r["execution_time"] for r in resilience_results]
814 |         )
815 | 
816 |         # Validate system maintained reasonable performance under stress
817 |         min_acceptable_success = 0.5  # 50% success rate under 25% failure
818 |         assert avg_success_rate >= min_acceptable_success, (
819 |             f"System not resilient enough: {avg_success_rate:.2f} < {min_acceptable_success}"
820 |         )
821 | 
822 |         assert avg_execution_time < 20.0, (
823 |             "High failure rate tests should still complete reasonably"
824 |         )
825 | 
826 |         logger.info(
827 |             f"High failure rate resilience test: {avg_success_rate:.2f} average success rate, "
828 |             f"{avg_execution_time:.2f}s average execution time"
829 |         )
830 | 
831 | 
832 | # Test Suite Performance Validation
833 | @pytest.mark.unit
834 | class TestSuitePerformance:
835 |     """Validate overall test suite performance characteristics."""
836 | 
837 |     async def test_total_test_duration_under_threshold(self, performance_monitor):
838 |         """Validate that core performance tests complete within time budget."""
839 |         with performance_monitor("test_suite_duration") as monitor:
840 |             # Simulate running the key performance tests
841 |             await asyncio.sleep(0.1)  # Placeholder for actual test execution
842 |             monitor.sample_cpu()
843 | 
844 |         # This is a meta-test that would be updated based on actual suite performance
845 |         # For now, we validate the monitoring infrastructure works
846 |         assert monitor.metrics["execution_time"] < 1.0, (
847 |             "Meta-test should complete quickly"
848 |         )
849 |         assert monitor.metrics["memory_multiplier"] < 2.0, (
850 |             "Meta-test should use minimal memory"
851 |         )
852 | 
853 |     def test_performance_threshold_configuration(self):
854 |         """Validate performance threshold configuration is reasonable."""
855 |         # Test threshold sanity checks
856 |         assert PERFORMANCE_THRESHOLDS["speedup"][2] > 1.0, (
857 |             "2-agent speedup should exceed 1x"
858 |         )
859 |         assert (
860 |             PERFORMANCE_THRESHOLDS["speedup"][4] > PERFORMANCE_THRESHOLDS["speedup"][2]
861 |         ), "4-agent speedup should exceed 2-agent"
862 |         assert PERFORMANCE_THRESHOLDS["memory_multiplier"] > 1.0, (
863 |             "Memory multiplier should allow some increase"
864 |         )
865 |         assert PERFORMANCE_THRESHOLDS["memory_multiplier"] < 10.0, (
866 |             "Memory multiplier should be reasonable"
867 |         )
868 |         assert PERFORMANCE_THRESHOLDS["max_test_duration"] > 10.0, (
869 |             "Test duration budget should be reasonable"
870 |         )
871 | 
872 |         # Test failure rate configuration
873 |         assert all(0.0 <= rate <= 1.0 for rate in FAILURE_RATES), (
874 |             "Failure rates should be valid percentages"
875 |         )
876 |         assert len(set(FAILURE_RATES)) == len(FAILURE_RATES), (
877 |             "Failure rates should be unique"
878 |         )
879 | 
880 |         # Test statistical significance configuration
881 |         assert STATISTICAL_RUNS >= 3, (
882 |             "Statistical runs should provide reasonable sample size"
883 |         )
884 |         assert STATISTICAL_RUNS <= 10, "Statistical runs should not be excessive for CI"
885 | 
886 | 
887 | if __name__ == "__main__":
888 |     # Allow running individual performance tests for development
889 |     import sys
890 | 
891 |     if len(sys.argv) > 1:
892 |         pytest.main([sys.argv[1], "-v", "-s", "--tb=short"])
893 |     else:
894 |         # Run unit performance tests by default
895 |         pytest.main([__file__, "-v", "-s", "-m", "unit", "--tb=short"])
896 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_mcp_tools.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | MCP Tools Integration Tests for Claude Desktop Interaction.
  3 | 
  4 | This test suite covers:
  5 | - All MCP tool registrations and functionality
  6 | - Tool parameter validation and error handling
  7 | - Tool response formats and data integrity
  8 | - Claude Desktop simulation and interaction patterns
  9 | - Real-world usage scenarios
 10 | - Performance and timeout handling
 11 | """
 12 | 
 13 | import asyncio
 14 | import logging
 15 | from unittest.mock import Mock, patch
 16 | 
 17 | import numpy as np
 18 | import pytest
 19 | from fastmcp import Context
 20 | 
 21 | from maverick_mcp.api.routers.backtesting import setup_backtesting_tools
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | class MockFastMCP:
 27 |     """Mock FastMCP instance for testing tool registration."""
 28 | 
 29 |     def __init__(self):
 30 |         self.tools = {}
 31 |         self.tool_functions = {}
 32 | 
 33 |     def tool(self, name: str = None):
 34 |         """Mock tool decorator."""
 35 | 
 36 |         def decorator(func):
 37 |             tool_name = name or func.__name__
 38 |             self.tools[tool_name] = {
 39 |                 "function": func,
 40 |                 "name": tool_name,
 41 |                 "signature": self._get_function_signature(func),
 42 |             }
 43 |             self.tool_functions[tool_name] = func
 44 |             return func
 45 | 
 46 |         return decorator
 47 | 
 48 |     def _get_function_signature(self, func):
 49 |         """Extract function signature for validation."""
 50 |         import inspect
 51 | 
 52 |         sig = inspect.signature(func)
 53 |         return {
 54 |             "parameters": list(sig.parameters.keys()),
 55 |             "annotations": {k: str(v.annotation) for k, v in sig.parameters.items()},
 56 |         }
 57 | 
 58 | 
 59 | class TestMCPToolsIntegration:
 60 |     """Integration tests for MCP tools and Claude Desktop interaction."""
 61 | 
 62 |     @pytest.fixture
 63 |     def mock_mcp(self):
 64 |         """Create mock FastMCP instance."""
 65 |         return MockFastMCP()
 66 | 
 67 |     @pytest.fixture
 68 |     def mock_context(self):
 69 |         """Create mock MCP context."""
 70 |         context = Mock(spec=Context)
 71 |         context.session = {}
 72 |         return context
 73 | 
 74 |     @pytest.fixture
 75 |     async def setup_tools(self, mock_mcp):
 76 |         """Set up all backtesting tools for testing."""
 77 |         setup_backtesting_tools(mock_mcp)
 78 |         return mock_mcp
 79 | 
 80 |     async def test_all_mcp_tools_registration(self, setup_tools):
 81 |         """Test that all MCP tools are properly registered."""
 82 |         mcp = setup_tools
 83 | 
 84 |         # Expected tools from backtesting router
 85 |         expected_tools = [
 86 |             "run_backtest",
 87 |             "optimize_strategy",
 88 |             "walk_forward_analysis",
 89 |             "monte_carlo_simulation",
 90 |             "compare_strategies",
 91 |             "list_strategies",
 92 |             "parse_strategy",
 93 |             "backtest_portfolio",
 94 |             "generate_backtest_charts",
 95 |             "generate_optimization_charts",
 96 |             "run_ml_strategy_backtest",
 97 |             "train_ml_predictor",
 98 |             "analyze_market_regimes",
 99 |             "create_strategy_ensemble",
100 |         ]
101 | 
102 |         # Check all tools are registered
103 |         registered_tools = set(mcp.tools.keys())
104 |         expected_set = set(expected_tools)
105 | 
106 |         missing_tools = expected_set - registered_tools
107 |         extra_tools = registered_tools - expected_set
108 | 
109 |         assert len(missing_tools) == 0, f"Missing tools: {missing_tools}"
110 | 
111 |         logger.info(f"✓ All {len(registered_tools)} MCP tools registered successfully")
112 |         if extra_tools:
113 |             logger.info(f"Additional tools found: {extra_tools}")
114 | 
115 |         # Validate each tool has proper signature
116 |         for tool_name, tool_info in mcp.tools.items():
117 |             assert callable(tool_info["function"]), f"Tool {tool_name} is not callable"
118 |             assert "signature" in tool_info, f"Tool {tool_name} missing signature"
119 | 
120 |         return {
121 |             "registered_tools": list(registered_tools),
122 |             "tool_count": len(registered_tools),
123 |         }
124 | 
125 |     async def test_run_backtest_tool_comprehensive(self, setup_tools, mock_context):
126 |         """Test run_backtest tool with comprehensive parameter validation."""
127 |         mcp = setup_tools
128 |         tool_func = mcp.tool_functions["run_backtest"]
129 | 
130 |         # Test cases with different parameter combinations
131 |         test_cases = [
132 |             {
133 |                 "name": "basic_sma_cross",
134 |                 "params": {
135 |                     "symbol": "AAPL",
136 |                     "strategy": "sma_cross",
137 |                     "fast_period": "10",
138 |                     "slow_period": "20",
139 |                 },
140 |                 "should_succeed": True,
141 |             },
142 |             {
143 |                 "name": "rsi_strategy",
144 |                 "params": {
145 |                     "symbol": "GOOGL",
146 |                     "strategy": "rsi",
147 |                     "period": "14",
148 |                     "oversold": "30",
149 |                     "overbought": "70",
150 |                 },
151 |                 "should_succeed": True,
152 |             },
153 |             {
154 |                 "name": "invalid_symbol",
155 |                 "params": {
156 |                     "symbol": "",  # Empty symbol
157 |                     "strategy": "sma_cross",
158 |                 },
159 |                 "should_succeed": False,
160 |             },
161 |             {
162 |                 "name": "invalid_strategy",
163 |                 "params": {
164 |                     "symbol": "AAPL",
165 |                     "strategy": "nonexistent_strategy",
166 |                 },
167 |                 "should_succeed": False,
168 |             },
169 |             {
170 |                 "name": "invalid_numeric_params",
171 |                 "params": {
172 |                     "symbol": "AAPL",
173 |                     "strategy": "sma_cross",
174 |                     "fast_period": "invalid_number",
175 |                 },
176 |                 "should_succeed": False,
177 |             },
178 |         ]
179 | 
180 |         results = {}
181 | 
182 |         for test_case in test_cases:
183 |             try:
184 |                 # Mock the VectorBT engine to avoid actual data fetching
185 |                 with patch("maverick_mcp.backtesting.VectorBTEngine") as mock_engine:
186 |                     mock_instance = Mock()
187 |                     mock_engine.return_value = mock_instance
188 | 
189 |                     # Mock successful backtest result
190 |                     mock_result = {
191 |                         "symbol": test_case["params"]["symbol"],
192 |                         "strategy_type": test_case["params"]["strategy"],
193 |                         "metrics": {
194 |                             "total_return": 0.15,
195 |                             "sharpe_ratio": 1.2,
196 |                             "max_drawdown": -0.12,
197 |                             "total_trades": 25,
198 |                         },
199 |                         "trades": [],
200 |                         "equity_curve": [10000, 10100, 10200, 10300],
201 |                         "drawdown_series": [0, -0.01, -0.02, 0],
202 |                     }
203 |                     mock_instance.run_backtest.return_value = mock_result
204 | 
205 |                     # Execute tool
206 |                     result = await tool_func(mock_context, **test_case["params"])
207 | 
208 |                     if test_case["should_succeed"]:
209 |                         assert isinstance(result, dict), (
210 |                             f"Result should be dict for {test_case['name']}"
211 |                         )
212 |                         assert "symbol" in result, (
213 |                             f"Missing symbol in result for {test_case['name']}"
214 |                         )
215 |                         assert "metrics" in result, (
216 |                             f"Missing metrics in result for {test_case['name']}"
217 |                         )
218 |                         results[test_case["name"]] = {"success": True, "result": result}
219 |                         logger.info(f"✓ {test_case['name']} succeeded as expected")
220 |                     else:
221 |                         # If we got here, it didn't fail as expected
222 |                         results[test_case["name"]] = {
223 |                             "success": False,
224 |                             "unexpected_success": True,
225 |                         }
226 |                         logger.warning(
227 |                             f"⚠ {test_case['name']} succeeded but was expected to fail"
228 |                         )
229 | 
230 |             except Exception as e:
231 |                 if test_case["should_succeed"]:
232 |                     results[test_case["name"]] = {"success": False, "error": str(e)}
233 |                     logger.error(f"✗ {test_case['name']} failed unexpectedly: {e}")
234 |                 else:
235 |                     results[test_case["name"]] = {
236 |                         "success": True,
237 |                         "expected_error": str(e),
238 |                     }
239 |                     logger.info(f"✓ {test_case['name']} failed as expected: {e}")
240 | 
241 |         # Calculate success rate
242 |         total_tests = len(test_cases)
243 |         successful_tests = sum(1 for r in results.values() if r.get("success", False))
244 |         success_rate = successful_tests / total_tests
245 | 
246 |         assert success_rate >= 0.8, f"Success rate too low: {success_rate:.1%}"
247 | 
248 |         return {"test_results": results, "success_rate": success_rate}
249 | 
250 |     async def test_strategy_tools_integration(self, setup_tools, mock_context):
251 |         """Test strategy-related tools integration."""
252 |         mcp = setup_tools
253 | 
254 |         # Test list_strategies tool
255 |         list_func = mcp.tool_functions["list_strategies"]
256 |         strategies_result = await list_func(mock_context)
257 | 
258 |         assert isinstance(strategies_result, dict), "list_strategies should return dict"
259 |         assert "available_strategies" in strategies_result, (
260 |             "Missing available_strategies"
261 |         )
262 |         assert "total_count" in strategies_result, "Missing total_count"
263 |         assert strategies_result["total_count"] > 0, "Should have strategies available"
264 | 
265 |         logger.info(f"✓ Found {strategies_result['total_count']} available strategies")
266 | 
267 |         # Test parse_strategy tool
268 |         parse_func = mcp.tool_functions["parse_strategy"]
269 | 
270 |         parse_test_cases = [
271 |             "Buy when RSI is below 30 and sell when above 70",
272 |             "Use 10-day and 20-day moving average crossover",
273 |             "MACD strategy with standard parameters",
274 |             "Invalid strategy description that makes no sense",
275 |         ]
276 | 
277 |         parse_results = {}
278 |         for description in parse_test_cases:
279 |             try:
280 |                 result = await parse_func(mock_context, description=description)
281 |                 assert isinstance(result, dict), "parse_strategy should return dict"
282 |                 assert "success" in result, "Missing success field"
283 |                 assert "strategy" in result, "Missing strategy field"
284 | 
285 |                 parse_results[description] = result
286 |                 status = "✓" if result["success"] else "⚠"
287 |                 logger.info(
288 |                     f"{status} Parsed: '{description}' -> {result['strategy'].get('strategy_type', 'unknown')}"
289 |                 )
290 | 
291 |             except Exception as e:
292 |                 parse_results[description] = {"error": str(e)}
293 |                 logger.error(f"✗ Parse failed for: '{description}' - {e}")
294 | 
295 |         return {
296 |             "strategies_list": strategies_result,
297 |             "parse_results": parse_results,
298 |         }
299 | 
300 |     async def test_optimization_tools_integration(self, setup_tools, mock_context):
301 |         """Test optimization-related tools integration."""
302 |         mcp = setup_tools
303 | 
304 |         # Mock VectorBT engine for optimization tests
305 |         with patch("maverick_mcp.backtesting.VectorBTEngine") as mock_engine_class:
306 |             mock_engine = Mock()
307 |             mock_engine_class.return_value = mock_engine
308 | 
309 |             # Mock optimization results
310 |             mock_optimization_result = {
311 |                 "best_parameters": {"fast_period": 12, "slow_period": 26},
312 |                 "best_performance": {
313 |                     "total_return": 0.25,
314 |                     "sharpe_ratio": 1.8,
315 |                     "max_drawdown": -0.08,
316 |                 },
317 |                 "optimization_results": [
318 |                     {
319 |                         "parameters": {"fast_period": 10, "slow_period": 20},
320 |                         "metrics": {"sharpe_ratio": 1.2},
321 |                     },
322 |                     {
323 |                         "parameters": {"fast_period": 12, "slow_period": 26},
324 |                         "metrics": {"sharpe_ratio": 1.8},
325 |                     },
326 |                 ],
327 |             }
328 |             mock_engine.optimize_parameters.return_value = mock_optimization_result
329 | 
330 |             # Test optimize_strategy tool
331 |             optimize_func = mcp.tool_functions["optimize_strategy"]
332 | 
333 |             result = await optimize_func(
334 |                 mock_context,
335 |                 symbol="AAPL",
336 |                 strategy="sma_cross",
337 |                 optimization_level="medium",
338 |                 top_n=5,
339 |             )
340 | 
341 |             assert isinstance(result, dict), "optimize_strategy should return dict"
342 |             logger.info("✓ optimize_strategy tool executed successfully")
343 | 
344 |             # Test walk_forward_analysis tool
345 |             walk_forward_func = mcp.tool_functions["walk_forward_analysis"]
346 | 
347 |             # Mock walk-forward analysis
348 |             with patch(
349 |                 "maverick_mcp.backtesting.StrategyOptimizer"
350 |             ) as mock_optimizer_class:
351 |                 mock_optimizer = Mock()
352 |                 mock_optimizer_class.return_value = mock_optimizer
353 | 
354 |                 mock_walk_forward_result = {
355 |                     "out_of_sample_performance": {
356 |                         "total_return": 0.18,
357 |                         "sharpe_ratio": 1.5,
358 |                         "win_rate": 0.65,
359 |                     },
360 |                     "windows_tested": 4,
361 |                     "average_window_performance": 0.15,
362 |                 }
363 |                 mock_optimizer.walk_forward_analysis.return_value = (
364 |                     mock_walk_forward_result
365 |                 )
366 | 
367 |                 result = await walk_forward_func(
368 |                     mock_context,
369 |                     symbol="AAPL",
370 |                     strategy="sma_cross",
371 |                     window_size=252,
372 |                     step_size=63,
373 |                 )
374 | 
375 |                 assert isinstance(result, dict), (
376 |                     "walk_forward_analysis should return dict"
377 |                 )
378 |                 logger.info("✓ walk_forward_analysis tool executed successfully")
379 | 
380 |         return {"optimization_tests": "completed"}
381 | 
382 |     async def test_ml_tools_integration(self, setup_tools, mock_context):
383 |         """Test ML-enhanced tools integration."""
384 |         mcp = setup_tools
385 | 
386 |         # Test ML strategy tools
387 |         ml_tools = [
388 |             "run_ml_strategy_backtest",
389 |             "train_ml_predictor",
390 |             "analyze_market_regimes",
391 |             "create_strategy_ensemble",
392 |         ]
393 | 
394 |         ml_results = {}
395 | 
396 |         for tool_name in ml_tools:
397 |             if tool_name in mcp.tool_functions:
398 |                 try:
399 |                     tool_func = mcp.tool_functions[tool_name]
400 | 
401 |                     # Mock ML dependencies
402 |                     with patch(
403 |                         "maverick_mcp.backtesting.VectorBTEngine"
404 |                     ) as mock_engine:
405 |                         mock_instance = Mock()
406 |                         mock_engine.return_value = mock_instance
407 | 
408 |                         # Mock historical data
409 |                         import numpy as np
410 |                         import pandas as pd
411 | 
412 |                         dates = pd.date_range(
413 |                             start="2022-01-01", end="2023-12-31", freq="D"
414 |                         )
415 |                         mock_data = pd.DataFrame(
416 |                             {
417 |                                 "open": np.random.uniform(100, 200, len(dates)),
418 |                                 "high": np.random.uniform(100, 200, len(dates)),
419 |                                 "low": np.random.uniform(100, 200, len(dates)),
420 |                                 "close": np.random.uniform(100, 200, len(dates)),
421 |                                 "volume": np.random.randint(
422 |                                     1000000, 10000000, len(dates)
423 |                                 ),
424 |                             },
425 |                             index=dates,
426 |                         )
427 |                         mock_instance.get_historical_data.return_value = mock_data
428 | 
429 |                         # Test specific ML tools
430 |                         if tool_name == "run_ml_strategy_backtest":
431 |                             result = await tool_func(
432 |                                 mock_context,
433 |                                 symbol="AAPL",
434 |                                 strategy_type="ml_predictor",
435 |                                 model_type="random_forest",
436 |                             )
437 |                         elif tool_name == "train_ml_predictor":
438 |                             result = await tool_func(
439 |                                 mock_context,
440 |                                 symbol="AAPL",
441 |                                 model_type="random_forest",
442 |                                 n_estimators=100,
443 |                             )
444 |                         elif tool_name == "analyze_market_regimes":
445 |                             result = await tool_func(
446 |                                 mock_context,
447 |                                 symbol="AAPL",
448 |                                 method="hmm",
449 |                                 n_regimes=3,
450 |                             )
451 |                         elif tool_name == "create_strategy_ensemble":
452 |                             result = await tool_func(
453 |                                 mock_context,
454 |                                 symbols=["AAPL", "GOOGL"],
455 |                                 base_strategies=["sma_cross", "rsi"],
456 |                             )
457 | 
458 |                         ml_results[tool_name] = {
459 |                             "success": True,
460 |                             "type": type(result).__name__,
461 |                         }
462 |                         logger.info(f"✓ {tool_name} executed successfully")
463 | 
464 |                 except Exception as e:
465 |                     ml_results[tool_name] = {"success": False, "error": str(e)}
466 |                     logger.error(f"✗ {tool_name} failed: {e}")
467 |             else:
468 |                 ml_results[tool_name] = {"success": False, "error": "Tool not found"}
469 | 
470 |         return ml_results
471 | 
472 |     async def test_visualization_tools_integration(self, setup_tools, mock_context):
473 |         """Test visualization tools integration."""
474 |         mcp = setup_tools
475 | 
476 |         visualization_tools = [
477 |             "generate_backtest_charts",
478 |             "generate_optimization_charts",
479 |         ]
480 | 
481 |         viz_results = {}
482 | 
483 |         for tool_name in visualization_tools:
484 |             if tool_name in mcp.tool_functions:
485 |                 try:
486 |                     tool_func = mcp.tool_functions[tool_name]
487 | 
488 |                     # Mock VectorBT engine and visualization dependencies
489 |                     with patch(
490 |                         "maverick_mcp.backtesting.VectorBTEngine"
491 |                     ) as mock_engine:
492 |                         mock_instance = Mock()
493 |                         mock_engine.return_value = mock_instance
494 | 
495 |                         # Mock backtest result for charts
496 |                         mock_result = {
497 |                             "symbol": "AAPL",
498 |                             "equity_curve": [10000, 10100, 10200, 10300, 10250],
499 |                             "drawdown_series": [0, -0.01, -0.02, 0, -0.005],
500 |                             "trades": [
501 |                                 {
502 |                                     "entry_time": "2023-01-01",
503 |                                     "exit_time": "2023-02-01",
504 |                                     "pnl": 100,
505 |                                 },
506 |                                 {
507 |                                     "entry_time": "2023-03-01",
508 |                                     "exit_time": "2023-04-01",
509 |                                     "pnl": -50,
510 |                                 },
511 |                             ],
512 |                             "metrics": {
513 |                                 "total_return": 0.15,
514 |                                 "sharpe_ratio": 1.2,
515 |                                 "max_drawdown": -0.08,
516 |                                 "total_trades": 10,
517 |                             },
518 |                         }
519 |                         mock_instance.run_backtest.return_value = mock_result
520 | 
521 |                         # Mock visualization functions
522 |                         with patch(
523 |                             "maverick_mcp.backtesting.visualization.generate_equity_curve"
524 |                         ) as mock_equity:
525 |                             with patch(
526 |                                 "maverick_mcp.backtesting.visualization.generate_performance_dashboard"
527 |                             ) as mock_dashboard:
528 |                                 with patch(
529 |                                     "maverick_mcp.backtesting.visualization.generate_trade_scatter"
530 |                                 ) as mock_scatter:
531 |                                     with patch(
532 |                                         "maverick_mcp.backtesting.visualization.generate_optimization_heatmap"
533 |                                     ) as mock_heatmap:
534 |                                         # Mock chart returns (base64 strings)
535 |                                         mock_chart_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
536 |                                         mock_equity.return_value = mock_chart_data
537 |                                         mock_dashboard.return_value = mock_chart_data
538 |                                         mock_scatter.return_value = mock_chart_data
539 |                                         mock_heatmap.return_value = mock_chart_data
540 | 
541 |                                         # Execute visualization tool
542 |                                         result = await tool_func(
543 |                                             mock_context,
544 |                                             symbol="AAPL",
545 |                                             strategy="sma_cross",
546 |                                             theme="light",
547 |                                         )
548 | 
549 |                                         assert isinstance(result, dict), (
550 |                                             f"{tool_name} should return dict"
551 |                                         )
552 | 
553 |                                         # Validate chart data
554 |                                         for chart_name, chart_data in result.items():
555 |                                             assert isinstance(chart_data, str), (
556 |                                                 f"Chart {chart_name} should be string"
557 |                                             )
558 |                                             assert len(chart_data) > 0, (
559 |                                                 f"Chart {chart_name} should have data"
560 |                                             )
561 | 
562 |                                         viz_results[tool_name] = {
563 |                                             "success": True,
564 |                                             "charts_generated": list(result.keys()),
565 |                                             "chart_count": len(result),
566 |                                         }
567 |                                         logger.info(
568 |                                             f"✓ {tool_name} generated {len(result)} charts successfully"
569 |                                         )
570 | 
571 |                 except Exception as e:
572 |                     viz_results[tool_name] = {"success": False, "error": str(e)}
573 |                     logger.error(f"✗ {tool_name} failed: {e}")
574 |             else:
575 |                 viz_results[tool_name] = {"success": False, "error": "Tool not found"}
576 | 
577 |         return viz_results
578 | 
579 |     async def test_claude_desktop_simulation(self, setup_tools, mock_context):
580 |         """Simulate realistic Claude Desktop usage patterns."""
581 |         mcp = setup_tools
582 | 
583 |         # Simulate a typical Claude Desktop session
584 |         session_commands = [
585 |             {
586 |                 "command": "List available strategies",
587 |                 "tool": "list_strategies",
588 |                 "params": {},
589 |             },
590 |             {
591 |                 "command": "Run backtest for AAPL with SMA crossover",
592 |                 "tool": "run_backtest",
593 |                 "params": {
594 |                     "symbol": "AAPL",
595 |                     "strategy": "sma_cross",
596 |                     "fast_period": "10",
597 |                     "slow_period": "20",
598 |                 },
599 |             },
600 |             {
601 |                 "command": "Compare multiple strategies",
602 |                 "tool": "compare_strategies",
603 |                 "params": {
604 |                     "symbol": "AAPL",
605 |                     "strategies": ["sma_cross", "rsi", "macd"],
606 |                 },
607 |             },
608 |             {
609 |                 "command": "Generate charts for backtest",
610 |                 "tool": "generate_backtest_charts",
611 |                 "params": {
612 |                     "symbol": "AAPL",
613 |                     "strategy": "sma_cross",
614 |                 },
615 |             },
616 |         ]
617 | 
618 |         session_results = []
619 | 
620 |         # Mock all necessary dependencies for simulation
621 |         with patch("maverick_mcp.backtesting.VectorBTEngine") as mock_engine_class:
622 |             mock_engine = Mock()
623 |             mock_engine_class.return_value = mock_engine
624 | 
625 |             # Mock results for different tools
626 |             mock_backtest_result = {
627 |                 "symbol": "AAPL",
628 |                 "strategy_type": "sma_cross",
629 |                 "metrics": {"total_return": 0.15, "sharpe_ratio": 1.2},
630 |                 "trades": [],
631 |                 "equity_curve": [10000, 10150],
632 |                 "drawdown_series": [0, -0.02],
633 |             }
634 |             mock_engine.run_backtest.return_value = mock_backtest_result
635 | 
636 |             # Mock comparison results
637 |             with patch(
638 |                 "maverick_mcp.backtesting.BacktestAnalyzer"
639 |             ) as mock_analyzer_class:
640 |                 mock_analyzer = Mock()
641 |                 mock_analyzer_class.return_value = mock_analyzer
642 | 
643 |                 mock_comparison = {
644 |                     "strategy_rankings": [
645 |                         {"strategy": "sma_cross", "rank": 1, "sharpe_ratio": 1.2},
646 |                         {"strategy": "rsi", "rank": 2, "sharpe_ratio": 1.1},
647 |                         {"strategy": "macd", "rank": 3, "sharpe_ratio": 0.9},
648 |                     ],
649 |                     "best_strategy": "sma_cross",
650 |                 }
651 |                 mock_analyzer.compare_strategies.return_value = mock_comparison
652 | 
653 |                 # Mock visualization
654 |                 with patch(
655 |                     "maverick_mcp.backtesting.visualization.generate_equity_curve"
656 |                 ) as mock_viz:
657 |                     mock_viz.return_value = "mock_chart_data"
658 | 
659 |                     # Execute session commands
660 |                     for command_info in session_commands:
661 |                         try:
662 |                             start_time = asyncio.get_event_loop().time()
663 | 
664 |                             tool_func = mcp.tool_functions[command_info["tool"]]
665 |                             result = await tool_func(
666 |                                 mock_context, **command_info["params"]
667 |                             )
668 | 
669 |                             execution_time = (
670 |                                 asyncio.get_event_loop().time() - start_time
671 |                             )
672 | 
673 |                             session_results.append(
674 |                                 {
675 |                                     "command": command_info["command"],
676 |                                     "tool": command_info["tool"],
677 |                                     "success": True,
678 |                                     "execution_time": execution_time,
679 |                                     "result_type": type(result).__name__,
680 |                                 }
681 |                             )
682 | 
683 |                             logger.info(
684 |                                 f"✓ '{command_info['command']}' completed in {execution_time:.3f}s"
685 |                             )
686 | 
687 |                         except Exception as e:
688 |                             session_results.append(
689 |                                 {
690 |                                     "command": command_info["command"],
691 |                                     "tool": command_info["tool"],
692 |                                     "success": False,
693 |                                     "error": str(e),
694 |                                 }
695 |                             )
696 |                             logger.error(f"✗ '{command_info['command']}' failed: {e}")
697 | 
698 |         # Analyze session results
699 |         total_commands = len(session_commands)
700 |         successful_commands = sum(1 for r in session_results if r.get("success", False))
701 |         success_rate = successful_commands / total_commands
702 |         avg_execution_time = np.mean(
703 |             [r.get("execution_time", 0) for r in session_results if r.get("success")]
704 |         )
705 | 
706 |         assert success_rate >= 0.75, f"Session success rate too low: {success_rate:.1%}"
707 |         assert avg_execution_time < 5.0, (
708 |             f"Average execution time too high: {avg_execution_time:.3f}s"
709 |         )
710 | 
711 |         logger.info(
712 |             f"Claude Desktop Simulation Results:\n"
713 |             f"  • Commands Executed: {total_commands}\n"
714 |             f"  • Successful: {successful_commands}\n"
715 |             f"  • Success Rate: {success_rate:.1%}\n"
716 |             f"  • Avg Execution Time: {avg_execution_time:.3f}s"
717 |         )
718 | 
719 |         return {
720 |             "session_results": session_results,
721 |             "success_rate": success_rate,
722 |             "avg_execution_time": avg_execution_time,
723 |         }
724 | 
725 |     async def test_tool_parameter_validation_comprehensive(
726 |         self, setup_tools, mock_context
727 |     ):
728 |         """Test comprehensive parameter validation across all tools."""
729 |         mcp = setup_tools
730 | 
731 |         validation_tests = []
732 | 
733 |         # Test parameter validation for key tools
734 |         test_cases = [
735 |             {
736 |                 "tool": "run_backtest",
737 |                 "valid_params": {"symbol": "AAPL", "strategy": "sma_cross"},
738 |                 "invalid_params": [
739 |                     {"symbol": "", "strategy": "sma_cross"},  # Empty symbol
740 |                     {"symbol": "AAPL", "strategy": ""},  # Empty strategy
741 |                     {
742 |                         "symbol": "AAPL",
743 |                         "strategy": "sma_cross",
744 |                         "fast_period": "not_a_number",
745 |                     },  # Invalid number
746 |                 ],
747 |             },
748 |             {
749 |                 "tool": "optimize_strategy",
750 |                 "valid_params": {"symbol": "AAPL", "strategy": "sma_cross"},
751 |                 "invalid_params": [
752 |                     {
753 |                         "symbol": "AAPL",
754 |                         "strategy": "invalid_strategy",
755 |                     },  # Invalid strategy
756 |                     {
757 |                         "symbol": "AAPL",
758 |                         "strategy": "sma_cross",
759 |                         "top_n": -1,
760 |                     },  # Negative top_n
761 |                 ],
762 |             },
763 |         ]
764 | 
765 |         for test_case in test_cases:
766 |             tool_name = test_case["tool"]
767 |             if tool_name in mcp.tool_functions:
768 |                 tool_func = mcp.tool_functions[tool_name]
769 | 
770 |                 # Test valid parameters
771 |                 try:
772 |                     with patch("maverick_mcp.backtesting.VectorBTEngine"):
773 |                         await tool_func(mock_context, **test_case["valid_params"])
774 |                         validation_tests.append(
775 |                             {
776 |                                 "tool": tool_name,
777 |                                 "test": "valid_params",
778 |                                 "success": True,
779 |                             }
780 |                         )
781 |                 except Exception as e:
782 |                     validation_tests.append(
783 |                         {
784 |                             "tool": tool_name,
785 |                             "test": "valid_params",
786 |                             "success": False,
787 |                             "error": str(e),
788 |                         }
789 |                     )
790 | 
791 |                 # Test invalid parameters
792 |                 for invalid_params in test_case["invalid_params"]:
793 |                     try:
794 |                         with patch("maverick_mcp.backtesting.VectorBTEngine"):
795 |                             await tool_func(mock_context, **invalid_params)
796 |                         # If we got here, validation didn't catch the error
797 |                         validation_tests.append(
798 |                             {
799 |                                 "tool": tool_name,
800 |                                 "test": f"invalid_params_{invalid_params}",
801 |                                 "success": False,
802 |                                 "error": "Validation should have failed but didn't",
803 |                             }
804 |                         )
805 |                     except Exception as e:
806 |                         # Expected to fail
807 |                         validation_tests.append(
808 |                             {
809 |                                 "tool": tool_name,
810 |                                 "test": f"invalid_params_{invalid_params}",
811 |                                 "success": True,
812 |                                 "expected_error": str(e),
813 |                             }
814 |                         )
815 | 
816 |         # Calculate validation success rate
817 |         total_validation_tests = len(validation_tests)
818 |         successful_validations = sum(
819 |             1 for t in validation_tests if t.get("success", False)
820 |         )
821 |         validation_success_rate = (
822 |             successful_validations / total_validation_tests
823 |             if total_validation_tests > 0
824 |             else 0
825 |         )
826 | 
827 |         logger.info(
828 |             f"Parameter Validation Results:\n"
829 |             f"  • Total Validation Tests: {total_validation_tests}\n"
830 |             f"  • Successful Validations: {successful_validations}\n"
831 |             f"  • Validation Success Rate: {validation_success_rate:.1%}"
832 |         )
833 | 
834 |         return {
835 |             "validation_tests": validation_tests,
836 |             "validation_success_rate": validation_success_rate,
837 |         }
838 | 
839 | 
840 | if __name__ == "__main__":
841 |     # Run MCP tools integration tests
842 |     pytest.main(
843 |         [
844 |             __file__,
845 |             "-v",
846 |             "--tb=short",
847 |             "--asyncio-mode=auto",
848 |             "--timeout=300",  # 5 minute timeout
849 |             "--durations=10",
850 |         ]
851 |     )
852 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/agents/supervisor.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | SupervisorAgent implementation using 2025 LangGraph patterns.
  3 | 
  4 | Orchestrates multiple specialized agents with intelligent routing, result synthesis,
  5 | and conflict resolution for comprehensive financial analysis.
  6 | """
  7 | 
  8 | import logging
  9 | from datetime import datetime
 10 | from typing import Any
 11 | 
 12 | from langchain_core.language_models import BaseChatModel
 13 | from langchain_core.messages import HumanMessage, SystemMessage
 14 | from langchain_core.tools import BaseTool
 15 | from langgraph.checkpoint.memory import MemorySaver
 16 | from langgraph.graph import END, START, StateGraph
 17 | from langgraph.types import Command
 18 | 
 19 | from maverick_mcp.agents.base import INVESTOR_PERSONAS, PersonaAwareAgent
 20 | from maverick_mcp.config.settings import get_settings
 21 | from maverick_mcp.exceptions import AgentInitializationError
 22 | from maverick_mcp.memory.stores import ConversationStore
 23 | from maverick_mcp.workflows.state import SupervisorState
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | settings = get_settings()
 27 | 
 28 | # Query routing matrix for intelligent agent selection
 29 | ROUTING_MATRIX = {
 30 |     "market_screening": {
 31 |         "agents": ["market"],
 32 |         "primary": "market",
 33 |         "parallel": False,
 34 |         "confidence_threshold": 0.7,
 35 |         "synthesis_required": False,
 36 |     },
 37 |     "technical_analysis": {
 38 |         "agents": ["technical"],
 39 |         "primary": "technical",
 40 |         "parallel": False,
 41 |         "confidence_threshold": 0.8,
 42 |         "synthesis_required": False,
 43 |     },
 44 |     "stock_investment_decision": {
 45 |         "agents": ["market", "technical"],
 46 |         "primary": "technical",
 47 |         "parallel": True,
 48 |         "confidence_threshold": 0.85,
 49 |         "synthesis_required": True,
 50 |     },
 51 |     "portfolio_analysis": {
 52 |         "agents": ["market", "technical"],
 53 |         "primary": "market",
 54 |         "parallel": True,
 55 |         "confidence_threshold": 0.75,
 56 |         "synthesis_required": True,
 57 |     },
 58 |     "deep_research": {
 59 |         "agents": ["research"],  # Research agent handles comprehensive analysis
 60 |         "primary": "research",
 61 |         "parallel": False,
 62 |         "confidence_threshold": 0.9,
 63 |         "synthesis_required": False,  # Research agent provides complete analysis
 64 |     },
 65 |     "company_research": {
 66 |         "agents": ["research"],  # Dedicated company research
 67 |         "primary": "research",
 68 |         "parallel": False,
 69 |         "confidence_threshold": 0.85,
 70 |         "synthesis_required": False,
 71 |     },
 72 |     "sentiment_analysis": {
 73 |         "agents": ["research"],  # Market sentiment analysis
 74 |         "primary": "research",
 75 |         "parallel": False,
 76 |         "confidence_threshold": 0.8,
 77 |         "synthesis_required": False,
 78 |     },
 79 |     "risk_assessment": {
 80 |         "agents": ["market", "technical"],  # Future risk agent integration
 81 |         "primary": "market",
 82 |         "parallel": True,
 83 |         "confidence_threshold": 0.8,
 84 |         "synthesis_required": True,
 85 |     },
 86 | }
 87 | 
 88 | 
 89 | class QueryClassifier:
 90 |     """LLM-powered query classification with rule-based fallback."""
 91 | 
 92 |     def __init__(self, llm: BaseChatModel):
 93 |         self.llm = llm
 94 | 
 95 |     async def classify_query(self, query: str, persona: str) -> dict[str, Any]:
 96 |         """Classify query using LLM with structured output."""
 97 | 
 98 |         classification_prompt = f"""
 99 |         Analyze this financial query and classify it for multi-agent routing.
100 | 
101 |         Query: "{query}"
102 |         Investor Persona: {persona}
103 | 
104 |         Classify into one of these categories:
105 |         1. market_screening - Finding stocks, sector analysis, market breadth
106 |         2. technical_analysis - Chart patterns, indicators, entry/exit points
107 |         3. stock_investment_decision - Complete analysis of specific stock(s)
108 |         4. portfolio_analysis - Portfolio optimization, risk assessment
109 |         5. deep_research - Fundamental analysis, company research, news analysis
110 |         6. risk_assessment - Position sizing, risk management, portfolio risk
111 | 
112 |         Consider the complexity and return classification with confidence.
113 | 
114 |         Return ONLY valid JSON in this exact format:
115 |         {{
116 |             "category": "category_name",
117 |             "confidence": 0.85,
118 |             "required_agents": ["agent1", "agent2"],
119 |             "complexity": "simple",
120 |             "estimated_execution_time_ms": 30000,
121 |             "parallel_capable": true,
122 |             "reasoning": "Brief explanation of classification"
123 |         }}
124 |         """
125 | 
126 |         try:
127 |             response = await self.llm.ainvoke(
128 |                 [
129 |                     SystemMessage(
130 |                         content="You are a financial query classifier. Return only valid JSON."
131 |                     ),
132 |                     HumanMessage(content=classification_prompt),
133 |                 ]
134 |             )
135 | 
136 |             # Parse LLM response
137 |             import json
138 | 
139 |             classification = json.loads(response.content.strip())
140 | 
141 |             # Validate and enhance with routing matrix
142 |             category = classification.get("category", "stock_investment_decision")
143 |             routing_config = ROUTING_MATRIX.get(
144 |                 category, ROUTING_MATRIX["stock_investment_decision"]
145 |             )
146 | 
147 |             return {
148 |                 **classification,
149 |                 "routing_config": routing_config,
150 |                 "timestamp": datetime.now(),
151 |             }
152 | 
153 |         except Exception as e:
154 |             logger.warning(f"LLM classification failed: {e}, using rule-based fallback")
155 |             return self._rule_based_fallback(query, persona)
156 | 
157 |     def _rule_based_fallback(self, query: str, persona: str) -> dict[str, Any]:
158 |         """Rule-based classification fallback."""
159 |         query_lower = query.lower()
160 | 
161 |         # Simple keyword-based classification
162 |         if any(
163 |             word in query_lower for word in ["screen", "find stocks", "scan", "search"]
164 |         ):
165 |             category = "market_screening"
166 |         elif any(
167 |             word in query_lower
168 |             for word in ["chart", "technical", "rsi", "macd", "pattern"]
169 |         ):
170 |             category = "technical_analysis"
171 |         elif any(
172 |             word in query_lower for word in ["portfolio", "allocation", "diversif"]
173 |         ):
174 |             category = "portfolio_analysis"
175 |         elif any(
176 |             word in query_lower
177 |             for word in ["research", "fundamental", "news", "earnings"]
178 |         ):
179 |             category = "deep_research"
180 |         elif any(
181 |             word in query_lower
182 |             for word in ["company", "business", "competitive", "industry"]
183 |         ):
184 |             category = "company_research"
185 |         elif any(
186 |             word in query_lower for word in ["sentiment", "opinion", "mood", "feeling"]
187 |         ):
188 |             category = "sentiment_analysis"
189 |         elif any(
190 |             word in query_lower for word in ["risk", "position size", "stop loss"]
191 |         ):
192 |             category = "risk_assessment"
193 |         else:
194 |             category = "stock_investment_decision"
195 | 
196 |         routing_config = ROUTING_MATRIX[category]
197 | 
198 |         return {
199 |             "category": category,
200 |             "confidence": 0.6,
201 |             "required_agents": routing_config["agents"],
202 |             "complexity": "moderate",
203 |             "estimated_execution_time_ms": 60000,
204 |             "parallel_capable": routing_config["parallel"],
205 |             "reasoning": "Rule-based classification fallback",
206 |             "routing_config": routing_config,
207 |             "timestamp": datetime.now(),
208 |         }
209 | 
210 | 
211 | class ResultSynthesizer:
212 |     """Synthesize results from multiple agents with conflict resolution."""
213 | 
214 |     def __init__(self, llm: BaseChatModel, persona):
215 |         self.llm = llm
216 |         self.persona = persona
217 | 
218 |     async def synthesize_results(
219 |         self,
220 |         agent_results: dict[str, Any],
221 |         query_type: str,
222 |         conflicts: list[dict[str, Any]],
223 |     ) -> dict[str, Any]:
224 |         """Synthesize final recommendation from agent results."""
225 | 
226 |         # Calculate agent weights based on query type and persona
227 |         weights = self._calculate_agent_weights(query_type, agent_results)
228 | 
229 |         # Create synthesis prompt
230 |         synthesis_prompt = self._build_synthesis_prompt(
231 |             agent_results, weights, query_type, conflicts
232 |         )
233 | 
234 |         # Use LLM to synthesize coherent response
235 |         synthesis_response = await self.llm.ainvoke(
236 |             [
237 |                 SystemMessage(content="You are a financial analysis synthesizer."),
238 |                 HumanMessage(content=synthesis_prompt),
239 |             ]
240 |         )
241 | 
242 |         return {
243 |             "synthesis": synthesis_response.content,
244 |             "weights_applied": weights,
245 |             "conflicts_resolved": len(conflicts),
246 |             "confidence_score": self._calculate_overall_confidence(
247 |                 agent_results, weights
248 |             ),
249 |             "contributing_agents": list(agent_results.keys()),
250 |             "persona_alignment": self._assess_persona_alignment(
251 |                 synthesis_response.content
252 |             ),
253 |         }
254 | 
255 |     def _calculate_agent_weights(
256 |         self, query_type: str, agent_results: dict
257 |     ) -> dict[str, float]:
258 |         """Calculate weights for agent results based on context."""
259 |         base_weights = {
260 |             "market_screening": {"market": 0.9, "technical": 0.1},
261 |             "technical_analysis": {"market": 0.2, "technical": 0.8},
262 |             "stock_investment_decision": {"market": 0.4, "technical": 0.6},
263 |             "portfolio_analysis": {"market": 0.6, "technical": 0.4},
264 |             "deep_research": {"research": 1.0},
265 |             "company_research": {"research": 1.0},
266 |             "sentiment_analysis": {"research": 1.0},
267 |             "risk_assessment": {"market": 0.3, "technical": 0.3, "risk": 0.4},
268 |         }
269 | 
270 |         weights = base_weights.get(query_type, {"market": 0.5, "technical": 0.5})
271 | 
272 |         # Adjust weights based on agent confidence scores
273 |         for agent, base_weight in weights.items():
274 |             if agent in agent_results:
275 |                 confidence = agent_results[agent].get("confidence_score", 0.5)
276 |                 weights[agent] = base_weight * (0.5 + confidence * 0.5)
277 | 
278 |         # Normalize weights to sum to 1.0
279 |         total_weight = sum(weights.values())
280 |         if total_weight > 0:
281 |             weights = {k: v / total_weight for k, v in weights.items()}
282 | 
283 |         return weights
284 | 
285 |     def _build_synthesis_prompt(
286 |         self,
287 |         agent_results: dict[str, Any],
288 |         weights: dict[str, float],
289 |         query_type: str,
290 |         conflicts: list[dict[str, Any]],
291 |     ) -> str:
292 |         """Build synthesis prompt for LLM."""
293 | 
294 |         prompt = f"""
295 |         Synthesize a comprehensive financial analysis response from multiple specialized agents.
296 | 
297 |         Query Type: {query_type}
298 |         Investor Persona: {self.persona.name} - {", ".join(self.persona.characteristics)}
299 | 
300 |         Agent Results:
301 |         """
302 | 
303 |         for agent, result in agent_results.items():
304 |             weight = weights.get(agent, 0.0)
305 |             prompt += f"\n{agent.upper()} Agent (Weight: {weight:.2f}):\n"
306 |             prompt += f"  - Confidence: {result.get('confidence_score', 0.5)}\n"
307 |             prompt += (
308 |                 f"  - Analysis: {result.get('analysis', 'No analysis provided')}\n"
309 |             )
310 |             if "recommendations" in result:
311 |                 prompt += f"  - Recommendations: {result['recommendations']}\n"
312 | 
313 |         if conflicts:
314 |             prompt += f"\nConflicts Detected ({len(conflicts)}):\n"
315 |             for i, conflict in enumerate(conflicts, 1):
316 |                 prompt += f"{i}. {conflict}\n"
317 | 
318 |         prompt += f"""
319 | 
320 |         Please synthesize these results into a coherent, actionable response that:
321 |         1. Weighs agent inputs according to their weights and confidence scores
322 |         2. Resolves any conflicts using the {self.persona.name} investor perspective
323 |         3. Provides clear, actionable recommendations aligned with {self.persona.name} characteristics
324 |         4. Includes appropriate risk disclaimers
325 |         5. Maintains professional, confident tone
326 | 
327 |         Focus on actionable insights for the {self.persona.name} investor profile.
328 |         """
329 | 
330 |         return prompt
331 | 
332 |     def _calculate_overall_confidence(
333 |         self, agent_results: dict, weights: dict[str, float]
334 |     ) -> float:
335 |         """Calculate weighted overall confidence score."""
336 |         total_confidence = 0.0
337 |         total_weight = 0.0
338 | 
339 |         for agent, weight in weights.items():
340 |             if agent in agent_results:
341 |                 confidence = agent_results[agent].get("confidence_score", 0.5)
342 |                 total_confidence += confidence * weight
343 |                 total_weight += weight
344 | 
345 |         return total_confidence / total_weight if total_weight > 0 else 0.5
346 | 
347 |     def _assess_persona_alignment(self, synthesis_content: str) -> float:
348 |         """Assess how well synthesis aligns with investor persona."""
349 |         # Simple keyword-based alignment scoring
350 |         persona_keywords = {
351 |             "conservative": ["stable", "dividend", "low-risk", "preservation"],
352 |             "moderate": ["balanced", "diversified", "moderate", "growth"],
353 |             "aggressive": ["growth", "momentum", "high-return", "opportunity"],
354 |         }
355 | 
356 |         keywords = persona_keywords.get(self.persona.name.lower(), [])
357 |         content_lower = synthesis_content.lower()
358 | 
359 |         alignment_score = sum(1 for keyword in keywords if keyword in content_lower)
360 |         return min(alignment_score / len(keywords) if keywords else 0.5, 1.0)
361 | 
362 | 
363 | class SupervisorAgent(PersonaAwareAgent):
364 |     """
365 |     Multi-agent supervisor using 2025 LangGraph patterns.
366 | 
367 |     Orchestrates MarketAnalysisAgent, TechnicalAnalysisAgent, and future DeepResearchAgent
368 |     with intelligent routing, result synthesis, and conflict resolution.
369 |     """
370 | 
371 |     def __init__(
372 |         self,
373 |         llm: BaseChatModel,
374 |         agents: dict[str, PersonaAwareAgent],
375 |         persona: str = "moderate",
376 |         checkpointer: MemorySaver | None = None,
377 |         ttl_hours: int = 1,
378 |         routing_strategy: str = "llm_powered",
379 |         synthesis_mode: str = "weighted",
380 |         conflict_resolution: str = "confidence_based",
381 |         max_iterations: int = 5,
382 |     ):
383 |         """Initialize supervisor with existing agent instances."""
384 | 
385 |         if not agents:
386 |             raise AgentInitializationError(
387 |                 agent_type="SupervisorAgent",
388 |                 reason="No agents provided for supervision",
389 |             )
390 | 
391 |         # Store agent references
392 |         self.agents = agents
393 |         self.market_agent = agents.get("market")
394 |         self.technical_agent = agents.get("technical")
395 |         self.research_agent = agents.get("research")  # DeepResearchAgent integration
396 | 
397 |         # Configuration
398 |         self.routing_strategy = routing_strategy
399 |         self.synthesis_mode = synthesis_mode
400 |         self.conflict_resolution = conflict_resolution
401 |         self.max_iterations = max_iterations
402 | 
403 |         # Ensure all agents use the same persona
404 |         persona_obj = INVESTOR_PERSONAS.get(persona, INVESTOR_PERSONAS["moderate"])
405 |         for agent in agents.values():
406 |             if hasattr(agent, "persona"):
407 |                 agent.persona = persona_obj
408 | 
409 |         # Get supervisor-specific tools
410 |         supervisor_tools = self._get_supervisor_tools()
411 | 
412 |         # Initialize base class
413 |         super().__init__(
414 |             llm=llm,
415 |             tools=supervisor_tools,
416 |             persona=persona,
417 |             checkpointer=checkpointer or MemorySaver(),
418 |             ttl_hours=ttl_hours,
419 |         )
420 | 
421 |         # Initialize components
422 |         self.conversation_store = ConversationStore(ttl_hours=ttl_hours)
423 |         self.query_classifier = QueryClassifier(llm)
424 |         self.result_synthesizer = ResultSynthesizer(llm, self.persona)
425 | 
426 |         logger.info(
427 |             f"SupervisorAgent initialized with {len(agents)} agents: {list(agents.keys())}"
428 |         )
429 | 
430 |     def get_state_schema(self) -> type:
431 |         """Return SupervisorState schema."""
432 |         return SupervisorState
433 | 
434 |     def _get_supervisor_tools(self) -> list[BaseTool]:
435 |         """Get tools specific to supervision and coordination."""
436 |         from langchain_core.tools import tool
437 | 
438 |         tools = []
439 | 
440 |         if self.market_agent:
441 | 
442 |             @tool
443 |             async def query_market_agent(
444 |                 query: str,
445 |                 session_id: str,
446 |                 screening_strategy: str = "momentum",
447 |                 max_results: int = 20,
448 |             ) -> dict[str, Any]:
449 |                 """Query the market analysis agent for stock screening and market analysis."""
450 |                 try:
451 |                     return await self.market_agent.analyze_market(
452 |                         query=query,
453 |                         session_id=session_id,
454 |                         screening_strategy=screening_strategy,
455 |                         max_results=max_results,
456 |                     )
457 |                 except Exception as e:
458 |                     return {"error": f"Market agent error: {str(e)}"}
459 | 
460 |             tools.append(query_market_agent)
461 | 
462 |         if self.technical_agent:
463 | 
464 |             @tool
465 |             async def query_technical_agent(
466 |                 symbol: str, timeframe: str = "1d", indicators: list[str] | None = None
467 |             ) -> dict[str, Any]:
468 |                 """Query the technical analysis agent for chart analysis and indicators."""
469 |                 try:
470 |                     if indicators is None:
471 |                         indicators = ["sma_20", "rsi", "macd"]
472 | 
473 |                     return await self.technical_agent.analyze_stock(
474 |                         symbol=symbol, timeframe=timeframe, indicators=indicators
475 |                     )
476 |                 except Exception as e:
477 |                     return {"error": f"Technical agent error: {str(e)}"}
478 | 
479 |             tools.append(query_technical_agent)
480 | 
481 |         if self.research_agent:
482 | 
483 |             @tool
484 |             async def query_research_agent(
485 |                 query: str,
486 |                 session_id: str,
487 |                 research_scope: str = "comprehensive",
488 |                 max_sources: int = 50,
489 |                 timeframe: str = "1m",
490 |             ) -> dict[str, Any]:
491 |                 """Query the deep research agent for comprehensive research and analysis."""
492 |                 try:
493 |                     return await self.research_agent.research_topic(
494 |                         query=query,
495 |                         session_id=session_id,
496 |                         research_scope=research_scope,
497 |                         max_sources=max_sources,
498 |                         timeframe=timeframe,
499 |                     )
500 |                 except Exception as e:
501 |                     return {"error": f"Research agent error: {str(e)}"}
502 | 
503 |             @tool
504 |             async def analyze_company_research(
505 |                 symbol: str, session_id: str, include_competitive: bool = True
506 |             ) -> dict[str, Any]:
507 |                 """Perform comprehensive company research and fundamental analysis."""
508 |                 try:
509 |                     return await self.research_agent.research_company_comprehensive(
510 |                         symbol=symbol,
511 |                         session_id=session_id,
512 |                         include_competitive_analysis=include_competitive,
513 |                     )
514 |                 except Exception as e:
515 |                     return {"error": f"Company research error: {str(e)}"}
516 | 
517 |             @tool
518 |             async def analyze_market_sentiment_research(
519 |                 topic: str, session_id: str, timeframe: str = "1w"
520 |             ) -> dict[str, Any]:
521 |                 """Analyze market sentiment using deep research capabilities."""
522 |                 try:
523 |                     return await self.research_agent.analyze_market_sentiment(
524 |                         topic=topic, session_id=session_id, timeframe=timeframe
525 |                     )
526 |                 except Exception as e:
527 |                     return {"error": f"Sentiment analysis error: {str(e)}"}
528 | 
529 |             tools.extend(
530 |                 [
531 |                     query_research_agent,
532 |                     analyze_company_research,
533 |                     analyze_market_sentiment_research,
534 |                 ]
535 |             )
536 | 
537 |         return tools
538 | 
539 |     def _build_graph(self):
540 |         """Build supervisor graph with multi-agent coordination."""
541 |         workflow = StateGraph(SupervisorState)
542 | 
543 |         # Core supervisor nodes
544 |         workflow.add_node("analyze_query", self._analyze_query)
545 |         workflow.add_node("create_execution_plan", self._create_execution_plan)
546 |         workflow.add_node("route_to_agents", self._route_to_agents)
547 |         workflow.add_node("aggregate_results", self._aggregate_results)
548 |         workflow.add_node("resolve_conflicts", self._resolve_conflicts)
549 |         workflow.add_node("synthesize_response", self._synthesize_response)
550 | 
551 |         # Agent invocation nodes
552 |         if self.market_agent:
553 |             workflow.add_node("invoke_market_agent", self._invoke_market_agent)
554 |         if self.technical_agent:
555 |             workflow.add_node("invoke_technical_agent", self._invoke_technical_agent)
556 |         if self.research_agent:
557 |             workflow.add_node("invoke_research_agent", self._invoke_research_agent)
558 | 
559 |         # Coordination nodes
560 |         workflow.add_node("parallel_coordinator", self._parallel_coordinator)
561 | 
562 |         # Tool node
563 |         if self.tools:
564 |             from langgraph.prebuilt import ToolNode
565 | 
566 |             tool_node = ToolNode(self.tools)
567 |             workflow.add_node("tools", tool_node)
568 | 
569 |         # Define workflow edges
570 |         workflow.add_edge(START, "analyze_query")
571 |         workflow.add_edge("analyze_query", "create_execution_plan")
572 |         workflow.add_edge("create_execution_plan", "route_to_agents")
573 | 
574 |         # Conditional routing based on execution plan
575 |         workflow.add_conditional_edges(
576 |             "route_to_agents",
577 |             self._route_decision,
578 |             {
579 |                 "market_only": "invoke_market_agent"
580 |                 if self.market_agent
581 |                 else "synthesize_response",
582 |                 "technical_only": "invoke_technical_agent"
583 |                 if self.technical_agent
584 |                 else "synthesize_response",
585 |                 "research_only": "invoke_research_agent"
586 |                 if self.research_agent
587 |                 else "synthesize_response",
588 |                 "parallel_execution": "parallel_coordinator",
589 |                 "use_tools": "tools" if self.tools else "synthesize_response",
590 |                 "synthesize": "synthesize_response",
591 |             },
592 |         )
593 | 
594 |         # Agent result collection
595 |         if self.market_agent:
596 |             workflow.add_edge("invoke_market_agent", "aggregate_results")
597 |         if self.technical_agent:
598 |             workflow.add_edge("invoke_technical_agent", "aggregate_results")
599 |         if self.research_agent:
600 |             workflow.add_edge("invoke_research_agent", "aggregate_results")
601 | 
602 |         workflow.add_edge("parallel_coordinator", "aggregate_results")
603 | 
604 |         if self.tools:
605 |             workflow.add_edge("tools", "aggregate_results")
606 | 
607 |         # Conflict detection and resolution
608 |         workflow.add_conditional_edges(
609 |             "aggregate_results",
610 |             self._check_conflicts,
611 |             {"resolve": "resolve_conflicts", "synthesize": "synthesize_response"},
612 |         )
613 | 
614 |         workflow.add_edge("resolve_conflicts", "synthesize_response")
615 |         workflow.add_edge("synthesize_response", END)
616 | 
617 |         return workflow.compile(checkpointer=self.checkpointer)
618 | 
619 |     # Workflow node implementations will continue...
620 |     # (The rest of the implementation follows the same pattern)
621 | 
622 |     async def coordinate_agents(
623 |         self, query: str, session_id: str, **kwargs
624 |     ) -> dict[str, Any]:
625 |         """
626 |         Main entry point for multi-agent coordination.
627 | 
628 |         Args:
629 |             query: User query requiring multiple agents
630 |             session_id: Session identifier
631 |             **kwargs: Additional parameters
632 | 
633 |         Returns:
634 |             Coordinated response from multiple agents
635 |         """
636 |         start_time = datetime.now()
637 | 
638 |         # Initialize supervisor state
639 |         initial_state = {
640 |             "messages": [HumanMessage(content=query)],
641 |             "persona": self.persona.name,
642 |             "session_id": session_id,
643 |             "timestamp": datetime.now(),
644 |             "query_classification": {},
645 |             "execution_plan": [],
646 |             "current_subtask_index": 0,
647 |             "routing_strategy": self.routing_strategy,
648 |             "active_agents": [],
649 |             "agent_results": {},
650 |             "agent_confidence": {},
651 |             "agent_execution_times": {},
652 |             "agent_errors": {},
653 |             "workflow_status": "planning",
654 |             "parallel_execution": False,
655 |             "dependency_graph": {},
656 |             "max_iterations": self.max_iterations,
657 |             "current_iteration": 0,
658 |             "conflicts_detected": [],
659 |             "conflict_resolution": {},
660 |             "synthesis_weights": {},
661 |             "final_recommendation_confidence": 0.0,
662 |             "synthesis_mode": self.synthesis_mode,
663 |             "total_execution_time_ms": 0.0,
664 |             "agent_coordination_overhead_ms": 0.0,
665 |             "synthesis_time_ms": 0.0,
666 |             "cache_utilization": {},
667 |             "api_calls_made": 0,
668 |             "cache_hits": 0,
669 |             "cache_misses": 0,
670 |             # Legacy fields initialized as None for backward compatibility
671 |             "query_type": None,
672 |             "subtasks": None,
673 |             "current_subtask": None,
674 |             "workflow_plan": None,
675 |             "completed_steps": None,
676 |             "pending_steps": None,
677 |             "final_recommendations": None,
678 |             "confidence_scores": None,
679 |             "risk_warnings": None,
680 |         }
681 | 
682 |         # Add any additional parameters
683 |         initial_state.update(kwargs)
684 | 
685 |         # Execute supervision workflow
686 |         try:
687 |             result = await self.graph.ainvoke(
688 |                 initial_state,
689 |                 config={
690 |                     "configurable": {
691 |                         "thread_id": session_id,
692 |                         "checkpoint_ns": "supervisor",
693 |                     }
694 |                 },
695 |             )
696 | 
697 |             # Calculate total execution time
698 |             execution_time = (datetime.now() - start_time).total_seconds() * 1000
699 |             result["total_execution_time_ms"] = execution_time
700 | 
701 |             return self._format_supervisor_response(result)
702 | 
703 |         except Exception as e:
704 |             logger.error(f"Error in supervisor coordination: {e}")
705 |             return {
706 |                 "status": "error",
707 |                 "error": str(e),
708 |                 "total_execution_time_ms": (datetime.now() - start_time).total_seconds()
709 |                 * 1000,
710 |                 "agent_type": "supervisor",
711 |             }
712 | 
713 |     def _format_supervisor_response(self, result: dict[str, Any]) -> dict[str, Any]:
714 |         """Format supervisor response for consistent output."""
715 |         return {
716 |             "status": "success",
717 |             "agent_type": "supervisor",
718 |             "persona": result.get("persona"),
719 |             "query_classification": result.get("query_classification", {}),
720 |             "agents_used": result.get("active_agents", []),
721 |             "synthesis": result.get("messages", [])[-1].content
722 |             if result.get("messages")
723 |             else "No synthesis available",
724 |             "confidence_score": result.get("final_recommendation_confidence", 0.0),
725 |             "execution_time_ms": result.get("total_execution_time_ms", 0.0),
726 |             "conflicts_resolved": len(result.get("conflicts_detected", [])),
727 |             "workflow_status": result.get("workflow_status", "completed"),
728 |         }
729 | 
730 |     # Placeholder implementations for workflow nodes
731 |     # These will be implemented based on the specific node logic
732 | 
733 |     async def _analyze_query(self, state: SupervisorState) -> Command:
734 |         """Analyze query to determine routing strategy and requirements."""
735 |         query = state["messages"][-1].content if state["messages"] else ""
736 | 
737 |         # Classify the query
738 |         classification = await self.query_classifier.classify_query(
739 |             query, state["persona"]
740 |         )
741 | 
742 |         return Command(
743 |             goto="create_execution_plan",
744 |             update={
745 |                 "query_classification": classification,
746 |                 "workflow_status": "analyzing",
747 |             },
748 |         )
749 | 
750 |     async def _create_execution_plan(self, state: SupervisorState) -> Command:
751 |         """Create execution plan based on query classification."""
752 |         classification = state["query_classification"]
753 | 
754 |         # Create execution plan based on classification
755 |         execution_plan = [
756 |             {
757 |                 "task_id": "main_analysis",
758 |                 "agents": classification.get("required_agents", ["market"]),
759 |                 "parallel": classification.get("parallel_capable", False),
760 |                 "priority": 1,
761 |             }
762 |         ]
763 | 
764 |         return Command(
765 |             goto="route_to_agents",
766 |             update={"execution_plan": execution_plan, "workflow_status": "planning"},
767 |         )
768 | 
769 |     async def _route_to_agents(self, state: SupervisorState) -> Command:
770 |         """Route query to appropriate agents based on execution plan."""
771 |         return Command(
772 |             goto="parallel_execution", update={"workflow_status": "executing"}
773 |         )
774 | 
775 |     async def _route_decision(self, state: SupervisorState) -> str:
776 |         """Decide routing strategy based on state."""
777 |         classification = state.get("query_classification", {})
778 |         required_agents = classification.get("required_agents", ["market"])
779 |         parallel = classification.get("parallel_capable", False)
780 | 
781 |         if len(required_agents) == 1:
782 |             agent = required_agents[0]
783 |             if agent == "market" and self.market_agent:
784 |                 return "market_only"
785 |             elif agent == "technical" and self.technical_agent:
786 |                 return "technical_only"
787 |             elif agent == "research" and self.research_agent:
788 |                 return "research_only"
789 |         elif len(required_agents) > 1 and parallel:
790 |             return "parallel_execution"
791 | 
792 |         return "synthesize"
793 | 
794 |     async def _parallel_coordinator(self, state: SupervisorState) -> Command:
795 |         """Coordinate parallel execution of multiple agents."""
796 |         # This would implement parallel agent coordination
797 |         # For now, return to aggregation
798 |         return Command(
799 |             goto="aggregate_results", update={"workflow_status": "aggregating"}
800 |         )
801 | 
802 |     async def _invoke_market_agent(self, state: SupervisorState) -> Command:
803 |         """Invoke market analysis agent."""
804 |         if not self.market_agent:
805 |             return Command(
806 |                 goto="aggregate_results",
807 |                 update={"agent_errors": {"market": "Market agent not available"}},
808 |             )
809 | 
810 |         try:
811 |             query = state["messages"][-1].content if state["messages"] else ""
812 |             result = await self.market_agent.analyze_market(
813 |                 query=query, session_id=state["session_id"]
814 |             )
815 | 
816 |             return Command(
817 |                 goto="aggregate_results",
818 |                 update={
819 |                     "agent_results": {"market": result},
820 |                     "active_agents": ["market"],
821 |                 },
822 |             )
823 | 
824 |         except Exception as e:
825 |             return Command(
826 |                 goto="aggregate_results",
827 |                 update={
828 |                     "agent_errors": {"market": str(e)},
829 |                     "active_agents": ["market"],
830 |                 },
831 |             )
832 | 
833 |     async def _invoke_technical_agent(self, state: SupervisorState) -> Command:
834 |         """Invoke technical analysis agent."""
835 |         if not self.technical_agent:
836 |             return Command(
837 |                 goto="aggregate_results",
838 |                 update={"agent_errors": {"technical": "Technical agent not available"}},
839 |             )
840 | 
841 |         # This would implement technical agent invocation
842 |         return Command(
843 |             goto="aggregate_results", update={"active_agents": ["technical"]}
844 |         )
845 | 
846 |     async def _invoke_research_agent(self, state: SupervisorState) -> Command:
847 |         """Invoke deep research agent (future implementation)."""
848 |         if not self.research_agent:
849 |             return Command(
850 |                 goto="aggregate_results",
851 |                 update={"agent_errors": {"research": "Research agent not available"}},
852 |             )
853 | 
854 |         # Future implementation
855 |         return Command(goto="aggregate_results", update={"active_agents": ["research"]})
856 | 
857 |     async def _aggregate_results(self, state: SupervisorState) -> Command:
858 |         """Aggregate results from all agents."""
859 |         return Command(
860 |             goto="synthesize_response", update={"workflow_status": "synthesizing"}
861 |         )
862 | 
863 |     def _check_conflicts(self, state: SupervisorState) -> str:
864 |         """Check if there are conflicts between agent results."""
865 |         conflicts = state.get("conflicts_detected", [])
866 |         return "resolve" if conflicts else "synthesize"
867 | 
868 |     async def _resolve_conflicts(self, state: SupervisorState) -> Command:
869 |         """Resolve conflicts between agent recommendations."""
870 |         return Command(
871 |             goto="synthesize_response",
872 |             update={"conflict_resolution": {"strategy": "confidence_based"}},
873 |         )
874 | 
875 |     async def _synthesize_response(self, state: SupervisorState) -> Command:
876 |         """Synthesize final response from agent results."""
877 |         agent_results = state.get("agent_results", {})
878 |         conflicts = state.get("conflicts_detected", [])
879 |         classification = state.get("query_classification", {})
880 | 
881 |         if agent_results:
882 |             synthesis = await self.result_synthesizer.synthesize_results(
883 |                 agent_results=agent_results,
884 |                 query_type=classification.get("category", "stock_investment_decision"),
885 |                 conflicts=conflicts,
886 |             )
887 | 
888 |             return Command(
889 |                 goto="__end__",
890 |                 update={
891 |                     "final_recommendation_confidence": synthesis["confidence_score"],
892 |                     "synthesis_weights": synthesis["weights_applied"],
893 |                     "workflow_status": "completed",
894 |                     "messages": state["messages"]
895 |                     + [HumanMessage(content=synthesis["synthesis"])],
896 |                 },
897 |             )
898 |         else:
899 |             return Command(
900 |                 goto="__end__",
901 |                 update={
902 |                     "workflow_status": "completed",
903 |                     "messages": state["messages"]
904 |                     + [
905 |                         HumanMessage(content="No agent results available for synthesis")
906 |                     ],
907 |                 },
908 |             )
909 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/backtesting/strategies/ml/regime_aware.py:
--------------------------------------------------------------------------------

```python
  1 | """Market regime-aware trading strategies with automatic strategy switching."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from pandas import DataFrame, Series
  9 | from sklearn.cluster import KMeans
 10 | from sklearn.mixture import GaussianMixture
 11 | from sklearn.preprocessing import StandardScaler
 12 | 
 13 | from maverick_mcp.backtesting.strategies.base import Strategy
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | class MarketRegimeDetector:
 19 |     """Detect market regimes using various statistical methods."""
 20 | 
 21 |     def __init__(
 22 |         self, method: str = "hmm", n_regimes: int = 3, lookback_period: int = 50
 23 |     ):
 24 |         """Initialize regime detector.
 25 | 
 26 |         Args:
 27 |             method: Detection method ('hmm', 'kmeans', 'threshold')
 28 |             n_regimes: Number of market regimes to detect
 29 |             lookback_period: Period for regime detection
 30 |         """
 31 |         self.method = method
 32 |         self.n_regimes = n_regimes
 33 |         self.lookback_period = lookback_period
 34 |         self.scaler = StandardScaler()
 35 | 
 36 |         # Initialize detection model
 37 |         self.model = None
 38 |         self.is_fitted = False
 39 |         self._initialize_model()
 40 | 
 41 |     def _initialize_model(self):
 42 |         """Initialize regime detection model with better configurations."""
 43 |         if self.method == "hmm":
 44 |             # Use GaussianMixture with more stable configuration
 45 |             self.model = GaussianMixture(
 46 |                 n_components=self.n_regimes,
 47 |                 covariance_type="diag",  # Use diagonal covariance for stability
 48 |                 random_state=42,
 49 |                 max_iter=200,
 50 |                 tol=1e-6,
 51 |                 reg_covar=1e-6,  # Regularization for numerical stability
 52 |                 init_params="kmeans",  # Better initialization
 53 |                 warm_start=False,
 54 |             )
 55 |         elif self.method == "kmeans":
 56 |             self.model = KMeans(
 57 |                 n_clusters=self.n_regimes,
 58 |                 random_state=42,
 59 |                 n_init=10,
 60 |                 max_iter=500,
 61 |                 tol=1e-6,
 62 |                 algorithm="lloyd",  # More stable algorithm
 63 |             )
 64 |         elif self.method == "threshold":
 65 |             # Threshold-based regime detection
 66 |             self.model = None
 67 |         else:
 68 |             raise ValueError(f"Unsupported regime detection method: {self.method}")
 69 | 
 70 |     def extract_regime_features(self, data: DataFrame) -> np.ndarray:
 71 |         """Extract robust features for regime detection.
 72 | 
 73 |         Args:
 74 |             data: Price data
 75 | 
 76 |         Returns:
 77 |             Feature array with consistent dimensionality and stability
 78 |         """
 79 |         try:
 80 |             # Validate input data
 81 |             if data is None or data.empty or len(data) < 10:
 82 |                 logger.debug("Insufficient data for regime feature extraction")
 83 |                 return np.array([])
 84 | 
 85 |             if "close" not in data.columns:
 86 |                 logger.warning("Close price data not available for regime features")
 87 |                 return np.array([])
 88 | 
 89 |             features = []
 90 |             returns = data["close"].pct_change().dropna()
 91 | 
 92 |             if len(returns) == 0:
 93 |                 logger.debug("No valid returns data for regime features")
 94 |                 return np.array([])
 95 | 
 96 |             # Rolling statistics with robust error handling
 97 |             for window in [5, 10, 20]:
 98 |                 if len(returns) >= window:
 99 |                     window_returns = returns.rolling(window)
100 | 
101 |                     mean_return = window_returns.mean().iloc[-1]
102 |                     std_return = window_returns.std().iloc[-1]
103 | 
104 |                     # Robust skewness and kurtosis
105 |                     if window >= 5:
106 |                         skew_return = window_returns.skew().iloc[-1]
107 |                         kurt_return = window_returns.kurt().iloc[-1]
108 |                     else:
109 |                         skew_return = 0.0
110 |                         kurt_return = 0.0
111 | 
112 |                     # Replace NaN/inf values with sensible defaults
113 |                     features.extend(
114 |                         [
115 |                             mean_return if np.isfinite(mean_return) else 0.0,
116 |                             std_return if np.isfinite(std_return) else 0.01,
117 |                             skew_return if np.isfinite(skew_return) else 0.0,
118 |                             kurt_return if np.isfinite(kurt_return) else 0.0,
119 |                         ]
120 |                     )
121 |                 else:
122 |                     # Default values for insufficient data
123 |                     features.extend([0.0, 0.01, 0.0, 0.0])
124 | 
125 |             # Enhanced technical indicators for regime detection
126 |             current_price = data["close"].iloc[-1]
127 | 
128 |             # Multiple timeframe trend strength
129 |             if len(data) >= 20:
130 |                 # Short-term trend (20-day)
131 |                 sma_20 = data["close"].rolling(20).mean()
132 |                 sma_20_value = (
133 |                     float(sma_20.iloc[-1]) if not pd.isna(sma_20.iloc[-1]) else 0.0
134 |                 )
135 |                 if sma_20_value != 0.0:
136 |                     trend_strength_20 = (current_price - sma_20_value) / sma_20_value
137 |                 else:
138 |                     trend_strength_20 = 0.0
139 |                 features.append(
140 |                     trend_strength_20 if np.isfinite(trend_strength_20) else 0.0
141 |                 )
142 | 
143 |                 # Price momentum (rate of change)
144 |                 prev_price = (
145 |                     float(data["close"].iloc[-20])
146 |                     if not pd.isna(data["close"].iloc[-20])
147 |                     else current_price
148 |                 )
149 |                 if prev_price != 0.0:
150 |                     momentum_20 = (current_price - prev_price) / prev_price
151 |                 else:
152 |                     momentum_20 = 0.0
153 |                 features.append(momentum_20 if np.isfinite(momentum_20) else 0.0)
154 |             else:
155 |                 features.extend([0.0, 0.0])
156 | 
157 |             # Multi-timeframe volatility regime detection
158 |             if len(returns) >= 20:
159 |                 vol_short = returns.rolling(20).std().iloc[-1] * np.sqrt(
160 |                     252
161 |                 )  # Annualized
162 |                 vol_medium = (
163 |                     returns.rolling(60).std().iloc[-1] * np.sqrt(252)
164 |                     if len(returns) >= 60
165 |                     else vol_short
166 |                 )
167 | 
168 |                 # Volatility regime indicator
169 |                 vol_regime = vol_short / vol_medium if vol_medium > 0 else 1.0
170 |                 features.append(vol_regime if np.isfinite(vol_regime) else 1.0)
171 | 
172 |                 # Absolute volatility level (normalized)
173 |                 vol_level = min(vol_short / 0.3, 3.0)  # Cap at 3x of 30% volatility
174 |                 features.append(vol_level if np.isfinite(vol_level) else 1.0)
175 |             else:
176 |                 features.extend([1.0, 1.0])
177 | 
178 |             # Market structure and volume features (if available)
179 |             if "volume" in data.columns and len(data) >= 10:
180 |                 current_volume = data["volume"].iloc[-1]
181 | 
182 |                 # Volume trend
183 |                 if len(data) >= 20:
184 |                     volume_ma_short = data["volume"].rolling(10).mean().iloc[-1]
185 |                     volume_ma_long = data["volume"].rolling(20).mean().iloc[-1]
186 | 
187 |                     volume_trend = (
188 |                         volume_ma_short / volume_ma_long if volume_ma_long > 0 else 1.0
189 |                     )
190 |                     features.append(volume_trend if np.isfinite(volume_trend) else 1.0)
191 | 
192 |                     # Volume surge indicator
193 |                     volume_surge = (
194 |                         current_volume / volume_ma_long if volume_ma_long > 0 else 1.0
195 |                     )
196 |                     features.append(
197 |                         min(volume_surge, 10.0) if np.isfinite(volume_surge) else 1.0
198 |                     )
199 |                 else:
200 |                     features.extend([1.0, 1.0])
201 |             else:
202 |                 features.extend([1.0, 1.0])
203 | 
204 |             # Price dispersion (high-low range analysis)
205 |             if "high" in data.columns and "low" in data.columns and len(data) >= 10:
206 |                 hl_range = (data["high"] - data["low"]) / data["close"]
207 |                 avg_range = (
208 |                     hl_range.rolling(20).mean().iloc[-1]
209 |                     if len(data) >= 20
210 |                     else hl_range.mean()
211 |                 )
212 |                 current_range = hl_range.iloc[-1]
213 | 
214 |                 range_regime = current_range / avg_range if avg_range > 0 else 1.0
215 |                 features.append(range_regime if np.isfinite(range_regime) else 1.0)
216 |             else:
217 |                 features.append(1.0)
218 | 
219 |             feature_array = np.array(features)
220 | 
221 |             # Final validation and cleaning
222 |             if len(feature_array) == 0:
223 |                 return np.array([])
224 | 
225 |             # Replace any remaining NaN/inf values
226 |             feature_array = np.nan_to_num(
227 |                 feature_array, nan=0.0, posinf=1.0, neginf=-1.0
228 |             )
229 | 
230 |             return feature_array
231 | 
232 |         except Exception as e:
233 |             logger.error(f"Error extracting regime features: {e}")
234 |             return np.array([])
235 | 
236 |     def detect_regime_threshold(self, data: DataFrame) -> int:
237 |         """Detect regime using threshold-based method.
238 | 
239 |         Args:
240 |             data: Price data
241 | 
242 |         Returns:
243 |             Regime label (0: bear/declining, 1: sideways, 2: bull/trending)
244 |         """
245 |         if len(data) < 20:
246 |             return 1  # Default to sideways
247 | 
248 |         # Calculate trend and volatility measures
249 |         returns = data["close"].pct_change()
250 | 
251 |         # Trend measure (20-day slope)
252 |         x = np.arange(20)
253 |         y = data["close"].iloc[-20:].values
254 |         trend_slope = np.polyfit(x, y, 1)[0] / y[-1]  # Normalized slope
255 | 
256 |         # Volatility measure
257 |         vol_20 = returns.rolling(20).std().iloc[-1] * np.sqrt(252)
258 | 
259 |         # Define regime thresholds
260 |         trend_threshold = 0.001  # 0.1% daily trend threshold
261 |         vol_threshold = 0.25  # 25% annual volatility threshold
262 | 
263 |         # Classify regime
264 |         if trend_slope > trend_threshold and vol_20 < vol_threshold:
265 |             return 2  # Bull/trending market
266 |         elif trend_slope < -trend_threshold and vol_20 > vol_threshold:
267 |             return 0  # Bear/declining market
268 |         else:
269 |             return 1  # Sideways/uncertain market
270 | 
271 |     def fit_regimes(self, data: DataFrame) -> None:
272 |         """Fit regime detection model to historical data with enhanced robustness.
273 | 
274 |         Args:
275 |             data: Historical price data
276 |         """
277 |         if self.method == "threshold":
278 |             self.is_fitted = True
279 |             return
280 | 
281 |         try:
282 |             # Need sufficient data for stable regime detection
283 |             min_required_samples = max(50, self.n_regimes * 20)
284 |             if len(data) < min_required_samples + self.lookback_period:
285 |                 logger.warning(
286 |                     f"Insufficient data for regime fitting: {len(data)} < {min_required_samples + self.lookback_period}"
287 |                 )
288 |                 self.is_fitted = True
289 |                 return
290 | 
291 |             # Extract features for regime detection with temporal consistency
292 |             feature_list = []
293 |             feature_consistency_count = None
294 | 
295 |             # Use overlapping windows for more stable regime detection
296 |             step_size = max(1, self.lookback_period // 10)
297 | 
298 |             for i in range(self.lookback_period, len(data), step_size):
299 |                 window_data = data.iloc[max(0, i - self.lookback_period) : i + 1]
300 |                 features = self.extract_regime_features(window_data)
301 | 
302 |                 if len(features) > 0 and np.all(np.isfinite(features)):
303 |                     # Check feature consistency
304 |                     if feature_consistency_count is None:
305 |                         feature_consistency_count = len(features)
306 |                     elif len(features) != feature_consistency_count:
307 |                         logger.warning(
308 |                             f"Feature dimension mismatch: expected {feature_consistency_count}, got {len(features)}"
309 |                         )
310 |                         continue
311 | 
312 |                     feature_list.append(features)
313 | 
314 |             if len(feature_list) < min_required_samples:
315 |                 logger.warning(
316 |                     f"Insufficient valid samples for regime fitting: {len(feature_list)} < {min_required_samples}"
317 |                 )
318 |                 self.is_fitted = True
319 |                 return
320 | 
321 |             # Ensure we have valid feature_list before creating array
322 |             if len(feature_list) == 0:
323 |                 logger.warning(
324 |                     "Empty feature list after filtering, cannot create feature matrix"
325 |                 )
326 |                 self.is_fitted = True
327 |                 return
328 | 
329 |             X = np.array(feature_list)
330 | 
331 |             # Additional data quality checks
332 |             if X.size == 0:
333 |                 logger.warning("Empty feature matrix, cannot fit regime detector")
334 |                 self.is_fitted = True
335 |                 return
336 |             elif np.any(np.isnan(X)) or np.any(np.isinf(X)):
337 |                 logger.warning("Found NaN or inf values in feature matrix, cleaning...")
338 |                 X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
339 | 
340 |             # Check for zero variance features
341 |             feature_std = np.std(X, axis=0)
342 |             zero_variance_features = np.where(feature_std < 1e-8)[0]
343 |             if len(zero_variance_features) > 0:
344 |                 logger.debug(
345 |                     f"Found {len(zero_variance_features)} zero-variance features"
346 |                 )
347 |                 # Add small noise to zero-variance features
348 |                 for idx in zero_variance_features:
349 |                     X[:, idx] += np.random.normal(0, 1e-6, X.shape[0])
350 | 
351 |             # Scale features with robust scaler
352 |             X_scaled = self.scaler.fit_transform(X)
353 | 
354 |             # Fit model with better error handling
355 |             try:
356 |                 if self.method == "hmm":
357 |                     # For GaussianMixture, ensure numerical stability
358 |                     self.model.fit(X_scaled)
359 | 
360 |                     # Validate fitted model
361 |                     if (
362 |                         not hasattr(self.model, "weights_")
363 |                         or len(self.model.weights_) != self.n_regimes
364 |                     ):
365 |                         raise ValueError("Model fitting failed - invalid weights")
366 | 
367 |                     # Check convergence
368 |                     if not self.model.converged_:
369 |                         logger.warning(
370 |                             "GaussianMixture did not converge, but will proceed"
371 |                         )
372 | 
373 |                 elif self.method == "kmeans":
374 |                     self.model.fit(X_scaled)
375 | 
376 |                     # Validate fitted model
377 |                     if (
378 |                         not hasattr(self.model, "cluster_centers_")
379 |                         or len(self.model.cluster_centers_) != self.n_regimes
380 |                     ):
381 |                         raise ValueError(
382 |                             "KMeans fitting failed - invalid cluster centers"
383 |                         )
384 | 
385 |                 self.is_fitted = True
386 | 
387 |                 # Log fitting success with model diagnostics
388 |                 if self.method == "hmm":
389 |                     avg_log_likelihood = self.model.score(X_scaled) / len(X_scaled)
390 |                     logger.info(
391 |                         f"Fitted {self.method} regime detector with {len(X)} samples, avg log-likelihood: {avg_log_likelihood:.4f}"
392 |                     )
393 |                 else:
394 |                     inertia = (
395 |                         self.model.inertia_
396 |                         if hasattr(self.model, "inertia_")
397 |                         else "N/A"
398 |                     )
399 |                     logger.info(
400 |                         f"Fitted {self.method} regime detector with {len(X)} samples, inertia: {inertia}"
401 |                     )
402 | 
403 |             except Exception as model_error:
404 |                 logger.error(f"Model fitting failed: {model_error}")
405 |                 logger.info("Falling back to threshold method")
406 |                 self.method = "threshold"  # Fallback to threshold method
407 |                 self.is_fitted = True
408 | 
409 |         except Exception as e:
410 |             logger.error(f"Error fitting regime detector: {e}")
411 |             self.is_fitted = True  # Allow fallback to threshold method
412 | 
413 |     def detect_current_regime(self, data: DataFrame) -> int:
414 |         """Detect current market regime with enhanced error handling.
415 | 
416 |         Args:
417 |             data: Recent price data
418 | 
419 |         Returns:
420 |             Regime label (0: bear, 1: sideways, 2: bull)
421 |         """
422 |         if not self.is_fitted:
423 |             logger.debug("Regime detector not fitted, using threshold method")
424 |             return self.detect_regime_threshold(data)
425 | 
426 |         try:
427 |             if self.method == "threshold":
428 |                 return self.detect_regime_threshold(data)
429 | 
430 |             # Extract features for current regime
431 |             features = self.extract_regime_features(data)
432 | 
433 |             if len(features) == 0:
434 |                 logger.debug("No features extracted, falling back to threshold method")
435 |                 return self.detect_regime_threshold(data)
436 | 
437 |             # Check for non-finite features only if features array is not empty
438 |             if features.size > 0 and np.any(~np.isfinite(features)):
439 |                 logger.debug("Non-finite features detected, cleaning and proceeding")
440 |                 features = np.nan_to_num(features, nan=0.0, posinf=1.0, neginf=-1.0)
441 | 
442 |             # Validate feature consistency with training
443 |             expected_features = (
444 |                 self.scaler.n_features_in_
445 |                 if hasattr(self.scaler, "n_features_in_")
446 |                 else None
447 |             )
448 |             if expected_features is not None and len(features) != expected_features:
449 |                 logger.warning(
450 |                     f"Feature count mismatch in prediction: expected {expected_features}, got {len(features)}"
451 |                 )
452 |                 return self.detect_regime_threshold(data)
453 | 
454 |             # Scale features and predict regime
455 |             try:
456 |                 X = self.scaler.transform([features])
457 |                 regime = self.model.predict(X)[0]
458 | 
459 |                 # Validate regime prediction
460 |                 if regime < 0 or regime >= self.n_regimes:
461 |                     logger.warning(
462 |                         f"Invalid regime prediction: {regime}, using threshold method"
463 |                     )
464 |                     return self.detect_regime_threshold(data)
465 | 
466 |                 return int(regime)
467 | 
468 |             except Exception as pred_error:
469 |                 logger.debug(
470 |                     f"Prediction error: {pred_error}, falling back to threshold method"
471 |                 )
472 |                 return self.detect_regime_threshold(data)
473 | 
474 |         except Exception as e:
475 |             logger.error(f"Error detecting current regime: {e}")
476 |             return self.detect_regime_threshold(data)  # Always fallback to threshold
477 | 
478 |     def get_regime_probabilities(self, data: DataFrame) -> np.ndarray:
479 |         """Get probabilities for each regime.
480 | 
481 |         Args:
482 |             data: Recent price data
483 | 
484 |         Returns:
485 |             Array of regime probabilities
486 |         """
487 |         if not self.is_fitted or self.method == "threshold":
488 |             # For threshold method, return deterministic probabilities
489 |             regime = self.detect_current_regime(data)
490 |             probs = np.zeros(self.n_regimes)
491 |             probs[regime] = 1.0
492 |             return probs
493 | 
494 |         try:
495 |             features = self.extract_regime_features(data)
496 | 
497 |             if len(features) == 0:
498 |                 return np.ones(self.n_regimes) / self.n_regimes
499 |             elif features.size > 0 and np.any(np.isnan(features)):
500 |                 return np.ones(self.n_regimes) / self.n_regimes
501 | 
502 |             X = self.scaler.transform([features])
503 | 
504 |             if hasattr(self.model, "predict_proba"):
505 |                 return self.model.predict_proba(X)[0]
506 |             else:
507 |                 # For methods without probabilities, return one-hot encoding
508 |                 regime = self.model.predict(X)[0]
509 |                 probs = np.zeros(self.n_regimes)
510 |                 probs[regime] = 1.0
511 |                 return probs
512 | 
513 |         except Exception as e:
514 |             logger.error(f"Error getting regime probabilities: {e}")
515 |             return np.ones(self.n_regimes) / self.n_regimes
516 | 
517 | 
518 | class RegimeAwareStrategy(Strategy):
519 |     """Strategy that switches between different strategies based on market regime."""
520 | 
521 |     def __init__(
522 |         self,
523 |         regime_strategies: dict[int, Strategy],
524 |         regime_detector: MarketRegimeDetector = None,
525 |         regime_names: dict[int, str] = None,
526 |         switch_threshold: float = 0.7,
527 |         min_regime_duration: int = 5,
528 |         parameters: dict[str, Any] = None,
529 |     ):
530 |         """Initialize regime-aware strategy.
531 | 
532 |         Args:
533 |             regime_strategies: Dictionary mapping regime labels to strategies
534 |             regime_detector: Market regime detector instance
535 |             regime_names: Names for each regime
536 |             switch_threshold: Probability threshold for regime switching
537 |             min_regime_duration: Minimum duration before switching regimes
538 |             parameters: Additional parameters
539 |         """
540 |         super().__init__(parameters)
541 |         self.regime_strategies = regime_strategies
542 |         self.regime_detector = regime_detector or MarketRegimeDetector()
543 |         self.regime_names = regime_names or {0: "Bear", 1: "Sideways", 2: "Bull"}
544 |         self.switch_threshold = switch_threshold
545 |         self.min_regime_duration = min_regime_duration
546 | 
547 |         # Regime tracking
548 |         self.current_regime = 1  # Start with sideways
549 |         self.regime_history = []
550 |         self.regime_duration = 0
551 |         self.regime_switches = 0
552 | 
553 |     @property
554 |     def name(self) -> str:
555 |         """Get strategy name."""
556 |         strategy_names = [s.name for s in self.regime_strategies.values()]
557 |         return f"RegimeAware({','.join(strategy_names)})"
558 | 
559 |     @property
560 |     def description(self) -> str:
561 |         """Get strategy description."""
562 |         return f"Regime-aware strategy switching between {len(self.regime_strategies)} strategies based on market conditions"
563 | 
564 |     def fit_regime_detector(self, data: DataFrame) -> None:
565 |         """Fit regime detector to historical data.
566 | 
567 |         Args:
568 |             data: Historical price data
569 |         """
570 |         self.regime_detector.fit_regimes(data)
571 | 
572 |     def update_current_regime(self, data: DataFrame, current_idx: int) -> bool:
573 |         """Update current market regime.
574 | 
575 |         Args:
576 |             data: Price data
577 |             current_idx: Current index in data
578 | 
579 |         Returns:
580 |             True if regime changed, False otherwise
581 |         """
582 |         # Get regime probabilities
583 |         window_data = data.iloc[
584 |             max(0, current_idx - self.regime_detector.lookback_period) : current_idx + 1
585 |         ]
586 |         regime_probs = self.regime_detector.get_regime_probabilities(window_data)
587 | 
588 |         # Find most likely regime
589 |         most_likely_regime = np.argmax(regime_probs)
590 |         max_prob = regime_probs[most_likely_regime]
591 | 
592 |         # Check if we should switch regimes
593 |         regime_changed = False
594 | 
595 |         if (
596 |             most_likely_regime != self.current_regime
597 |             and max_prob >= self.switch_threshold
598 |             and self.regime_duration >= self.min_regime_duration
599 |         ):
600 |             old_regime = self.current_regime
601 |             self.current_regime = most_likely_regime
602 |             self.regime_duration = 0
603 |             self.regime_switches += 1
604 |             regime_changed = True
605 | 
606 |             logger.info(
607 |                 f"Regime switch: {self.regime_names.get(old_regime, old_regime)} -> "
608 |                 f"{self.regime_names.get(self.current_regime, self.current_regime)} "
609 |                 f"(prob: {max_prob:.3f})"
610 |             )
611 |         else:
612 |             self.regime_duration += 1
613 | 
614 |         # Track regime history
615 |         self.regime_history.append(
616 |             {
617 |                 "index": current_idx,
618 |                 "regime": self.current_regime,
619 |                 "probabilities": regime_probs.tolist(),
620 |                 "duration": self.regime_duration,
621 |                 "switched": regime_changed,
622 |             }
623 |         )
624 | 
625 |         return regime_changed
626 | 
627 |     def get_active_strategy(self) -> Strategy:
628 |         """Get currently active strategy based on regime.
629 | 
630 |         Returns:
631 |             Active strategy for current regime
632 |         """
633 |         if self.current_regime in self.regime_strategies:
634 |             return self.regime_strategies[self.current_regime]
635 |         else:
636 |             # Fallback to first available strategy
637 |             return next(iter(self.regime_strategies.values()))
638 | 
639 |     def generate_signals(self, data: DataFrame) -> tuple[Series, Series]:
640 |         """Generate regime-aware trading signals.
641 | 
642 |         Args:
643 |             data: Price data with OHLCV columns
644 | 
645 |         Returns:
646 |             Tuple of (entry_signals, exit_signals) as boolean Series
647 |         """
648 |         try:
649 |             # Validate input data
650 |             if data is None or len(data) == 0:
651 |                 logger.warning("Empty or invalid data provided to generate_signals")
652 |                 # Create empty Series with a dummy index to avoid empty array issues
653 |                 dummy_index = pd.DatetimeIndex([pd.Timestamp.now()])
654 |                 return pd.Series(False, index=dummy_index), pd.Series(
655 |                     False, index=dummy_index
656 |                 )
657 | 
658 |             # Ensure minimum data requirements
659 |             min_required_data = max(50, self.regime_detector.lookback_period)
660 |             if len(data) < min_required_data:
661 |                 logger.warning(
662 |                     f"Insufficient data for regime-aware strategy: {len(data)} < {min_required_data}"
663 |                 )
664 |                 # Return all False signals but with valid data index
665 |                 return pd.Series(False, index=data.index), pd.Series(
666 |                     False, index=data.index
667 |                 )
668 | 
669 |             # Fit regime detector if not already done
670 |             if not self.regime_detector.is_fitted:
671 |                 try:
672 |                     self.fit_regime_detector(data)
673 |                 except Exception as e:
674 |                     logger.error(
675 |                         f"Failed to fit regime detector: {e}, falling back to single strategy"
676 |                     )
677 |                     # Fallback to using first available strategy without regime switching
678 |                     fallback_strategy = next(iter(self.regime_strategies.values()))
679 |                     return fallback_strategy.generate_signals(data)
680 | 
681 |             entry_signals = pd.Series(False, index=data.index)
682 |             exit_signals = pd.Series(False, index=data.index)
683 | 
684 |             # Generate signals with regime awareness
685 |             current_strategy = None
686 | 
687 |             for idx in range(len(data)):
688 |                 # Update regime
689 |                 regime_changed = self.update_current_regime(data, idx)
690 | 
691 |                 # Get active strategy
692 |                 active_strategy = self.get_active_strategy()
693 | 
694 |                 # If regime changed, regenerate signals from new strategy
695 |                 if regime_changed or current_strategy != active_strategy:
696 |                     current_strategy = active_strategy
697 | 
698 |                     # Generate signals for remaining data
699 |                     remaining_data = data.iloc[idx:]
700 |                     if len(remaining_data) > 0:
701 |                         strategy_entry, strategy_exit = (
702 |                             current_strategy.generate_signals(remaining_data)
703 |                         )
704 | 
705 |                         # Update signals for remaining period
706 |                         end_idx = min(idx + len(strategy_entry), len(data))
707 |                         entry_signals.iloc[idx:end_idx] = strategy_entry.iloc[
708 |                             : end_idx - idx
709 |                         ]
710 |                         exit_signals.iloc[idx:end_idx] = strategy_exit.iloc[
711 |                             : end_idx - idx
712 |                         ]
713 | 
714 |             logger.info(
715 |                 f"Generated regime-aware signals with {self.regime_switches} regime switches"
716 |             )
717 | 
718 |             return entry_signals, exit_signals
719 | 
720 |         except Exception as e:
721 |             logger.error(f"Error generating regime-aware signals: {e}")
722 |             # Ensure we always return valid series even on error
723 |             if data is not None and len(data) > 0:
724 |                 return pd.Series(False, index=data.index), pd.Series(
725 |                     False, index=data.index
726 |                 )
727 |             else:
728 |                 # Create dummy index to avoid empty array issues
729 |                 dummy_index = pd.DatetimeIndex([pd.Timestamp.now()])
730 |                 return pd.Series(False, index=dummy_index), pd.Series(
731 |                     False, index=dummy_index
732 |                 )
733 | 
734 |     def get_regime_analysis(self) -> dict[str, Any]:
735 |         """Get analysis of regime detection and switching.
736 | 
737 |         Returns:
738 |             Dictionary with regime analysis
739 |         """
740 |         if not self.regime_history:
741 |             return {}
742 | 
743 |         regime_counts = {}
744 |         regime_durations = {}
745 | 
746 |         for record in self.regime_history:
747 |             regime = record["regime"]
748 |             regime_name = self.regime_names.get(regime, f"Regime_{regime}")
749 | 
750 |             if regime_name not in regime_counts:
751 |                 regime_counts[regime_name] = 0
752 |                 regime_durations[regime_name] = []
753 | 
754 |             regime_counts[regime_name] += 1
755 | 
756 |             # Track regime durations
757 |             if record["switched"] and len(self.regime_history) > 1:
758 |                 # Find duration of previous regime
759 |                 prev_regime_start = 0
760 |                 for i in range(len(self.regime_history) - 2, -1, -1):
761 |                     if (
762 |                         self.regime_history[i]["regime"]
763 |                         != self.regime_history[-1]["regime"]
764 |                     ):
765 |                         prev_regime_start = i + 1
766 |                         break
767 | 
768 |                 duration = len(self.regime_history) - prev_regime_start - 1
769 |                 prev_regime = self.regime_history[prev_regime_start]["regime"]
770 |                 prev_regime_name = self.regime_names.get(
771 |                     prev_regime, f"Regime_{prev_regime}"
772 |                 )
773 | 
774 |                 if prev_regime_name in regime_durations:
775 |                     regime_durations[prev_regime_name].append(duration)
776 | 
777 |         # Calculate average durations
778 |         avg_durations = {}
779 |         for regime_name, durations in regime_durations.items():
780 |             if durations:
781 |                 avg_durations[regime_name] = np.mean(durations)
782 |             else:
783 |                 avg_durations[regime_name] = 0
784 | 
785 |         return {
786 |             "current_regime": self.regime_names.get(
787 |                 self.current_regime, self.current_regime
788 |             ),
789 |             "total_switches": self.regime_switches,
790 |             "regime_counts": regime_counts,
791 |             "average_regime_durations": avg_durations,
792 |             "regime_history": self.regime_history[-50:],  # Last 50 records
793 |             "active_strategy": self.get_active_strategy().name,
794 |         }
795 | 
796 |     def validate_parameters(self) -> bool:
797 |         """Validate regime-aware strategy parameters.
798 | 
799 |         Returns:
800 |             True if parameters are valid
801 |         """
802 |         if not self.regime_strategies:
803 |             return False
804 | 
805 |         if self.switch_threshold < 0 or self.switch_threshold > 1:
806 |             return False
807 | 
808 |         if self.min_regime_duration < 0:
809 |             return False
810 | 
811 |         # Validate individual strategies
812 |         for strategy in self.regime_strategies.values():
813 |             if not strategy.validate_parameters():
814 |                 return False
815 | 
816 |         return True
817 | 
818 |     def get_default_parameters(self) -> dict[str, Any]:
819 |         """Get default parameters for regime-aware strategy.
820 | 
821 |         Returns:
822 |             Dictionary of default parameters
823 |         """
824 |         return {
825 |             "switch_threshold": 0.7,
826 |             "min_regime_duration": 5,
827 |             "regime_detection_method": "hmm",
828 |             "n_regimes": 3,
829 |             "lookback_period": 50,
830 |         }
831 | 
832 | 
833 | class AdaptiveRegimeStrategy(RegimeAwareStrategy):
834 |     """Advanced regime-aware strategy with adaptive regime detection."""
835 | 
836 |     def __init__(
837 |         self,
838 |         regime_strategies: dict[int, Strategy],
839 |         adaptation_frequency: int = 100,
840 |         regime_confidence_threshold: float = 0.6,
841 |         **kwargs,
842 |     ):
843 |         """Initialize adaptive regime strategy.
844 | 
845 |         Args:
846 |             regime_strategies: Dictionary mapping regime labels to strategies
847 |             adaptation_frequency: How often to re-fit regime detector
848 |             regime_confidence_threshold: Minimum confidence for regime detection
849 |             **kwargs: Additional parameters for RegimeAwareStrategy
850 |         """
851 |         super().__init__(regime_strategies, **kwargs)
852 |         self.adaptation_frequency = adaptation_frequency
853 |         self.regime_confidence_threshold = regime_confidence_threshold
854 |         self.last_adaptation = 0
855 | 
856 |     @property
857 |     def name(self) -> str:
858 |         """Get strategy name."""
859 |         return f"Adaptive{super().name}"
860 | 
861 |     def adapt_regime_detector(self, data: DataFrame, current_idx: int) -> None:
862 |         """Re-fit regime detector with recent data.
863 | 
864 |         Args:
865 |             data: Price data
866 |             current_idx: Current index
867 |         """
868 |         if current_idx - self.last_adaptation < self.adaptation_frequency:
869 |             return
870 | 
871 |         try:
872 |             # Use recent data for adaptation
873 |             adaptation_data = data.iloc[max(0, current_idx - 500) : current_idx]
874 | 
875 |             if len(adaptation_data) >= self.regime_detector.lookback_period:
876 |                 logger.info(f"Adapting regime detector at index {current_idx}")
877 |                 self.regime_detector.fit_regimes(adaptation_data)
878 |                 self.last_adaptation = current_idx
879 | 
880 |         except Exception as e:
881 |             logger.error(f"Error adapting regime detector: {e}")
882 | 
883 |     def generate_signals(self, data: DataFrame) -> tuple[Series, Series]:
884 |         """Generate adaptive regime-aware signals.
885 | 
886 |         Args:
887 |             data: Price data with OHLCV columns
888 | 
889 |         Returns:
890 |             Tuple of (entry_signals, exit_signals) as boolean Series
891 |         """
892 |         # Periodically adapt regime detector
893 |         for idx in range(
894 |             self.adaptation_frequency, len(data), self.adaptation_frequency
895 |         ):
896 |             self.adapt_regime_detector(data, idx)
897 | 
898 |         # Generate signals using parent method
899 |         return super().generate_signals(data)
900 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_orchestration_complete.py:
--------------------------------------------------------------------------------

```python
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive Integration Test Suite for MaverickMCP Orchestration Capabilities
  4 | 
  5 | This test suite thoroughly validates all orchestration features including:
  6 | - agents_orchestrated_analysis with different personas and routing strategies
  7 | - agents_deep_research_financial with various research depths and focus areas
  8 | - agents_compare_multi_agent_analysis with different agent combinations
  9 | 
 10 | The tests simulate real Claude Desktop usage patterns and validate end-to-end
 11 | functionality with comprehensive error handling and performance monitoring.
 12 | """
 13 | 
 14 | import asyncio
 15 | import json
 16 | import logging
 17 | import os
 18 | import sys
 19 | import time
 20 | import tracemalloc
 21 | from datetime import datetime
 22 | from typing import Any
 23 | 
 24 | # Add project root to Python path
 25 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 26 | 
 27 | # Test configuration
 28 | TEST_CONFIG = {
 29 |     "timeout_seconds": 300,  # 5 minutes max per test
 30 |     "concurrent_limit": 3,  # Maximum concurrent tests
 31 |     "performance_monitoring": True,
 32 |     "detailed_validation": True,
 33 |     "save_results": True,
 34 | }
 35 | 
 36 | # Test scenarios for each orchestration tool
 37 | ORCHESTRATED_ANALYSIS_SCENARIOS = [
 38 |     {
 39 |         "name": "Conservative_LLM_Powered_Routing",
 40 |         "query": "Analyze AAPL for long-term investment",
 41 |         "persona": "conservative",
 42 |         "routing_strategy": "llm_powered",
 43 |         "max_agents": 2,
 44 |         "parallel_execution": False,
 45 |     },
 46 |     {
 47 |         "name": "Aggressive_Rule_Based_Routing",
 48 |         "query": "Find high momentum growth stocks in tech",
 49 |         "persona": "aggressive",
 50 |         "routing_strategy": "rule_based",
 51 |         "max_agents": 3,
 52 |         "parallel_execution": True,
 53 |     },
 54 |     {
 55 |         "name": "Moderate_Hybrid_Routing",
 56 |         "query": "Portfolio analysis for MSFT GOOGL NVDA",
 57 |         "persona": "moderate",
 58 |         "routing_strategy": "hybrid",
 59 |         "max_agents": 2,
 60 |         "parallel_execution": True,
 61 |     },
 62 |     {
 63 |         "name": "Day_Trader_Fast_Execution",
 64 |         "query": "Quick technical analysis on SPY options",
 65 |         "persona": "day_trader",
 66 |         "routing_strategy": "rule_based",
 67 |         "max_agents": 1,
 68 |         "parallel_execution": False,
 69 |     },
 70 | ]
 71 | 
 72 | DEEP_RESEARCH_SCENARIOS = [
 73 |     {
 74 |         "name": "Basic_Company_Research",
 75 |         "research_topic": "Tesla Inc stock analysis",
 76 |         "persona": "moderate",
 77 |         "research_depth": "basic",
 78 |         "focus_areas": ["fundamentals"],
 79 |         "timeframe": "7d",
 80 |     },
 81 |     {
 82 |         "name": "Standard_Sector_Research",
 83 |         "research_topic": "renewable energy sector trends",
 84 |         "persona": "conservative",
 85 |         "research_depth": "standard",
 86 |         "focus_areas": ["market_sentiment", "competitive_landscape"],
 87 |         "timeframe": "30d",
 88 |     },
 89 |     {
 90 |         "name": "Comprehensive_Market_Research",
 91 |         "research_topic": "AI and machine learning investment opportunities",
 92 |         "persona": "aggressive",
 93 |         "research_depth": "comprehensive",
 94 |         "focus_areas": ["fundamentals", "technicals", "market_sentiment"],
 95 |         "timeframe": "90d",
 96 |     },
 97 |     {
 98 |         "name": "Exhaustive_Crypto_Research",
 99 |         "research_topic": "Bitcoin and cryptocurrency market analysis",
100 |         "persona": "day_trader",
101 |         "research_depth": "exhaustive",
102 |         "focus_areas": ["technicals", "market_sentiment", "competitive_landscape"],
103 |         "timeframe": "1y",
104 |     },
105 | ]
106 | 
107 | MULTI_AGENT_COMPARISON_SCENARIOS = [
108 |     {
109 |         "name": "Market_vs_Supervisor_Stock_Analysis",
110 |         "query": "Should I invest in Apple stock now?",
111 |         "agent_types": ["market", "supervisor"],
112 |         "persona": "moderate",
113 |     },
114 |     {
115 |         "name": "Conservative_Multi_Agent_Portfolio",
116 |         "query": "Build a balanced portfolio for retirement",
117 |         "agent_types": ["market", "supervisor"],
118 |         "persona": "conservative",
119 |     },
120 |     {
121 |         "name": "Aggressive_Growth_Strategy",
122 |         "query": "Find the best growth stocks for 2025",
123 |         "agent_types": ["market", "supervisor"],
124 |         "persona": "aggressive",
125 |     },
126 | ]
127 | 
128 | ERROR_HANDLING_SCENARIOS = [
129 |     {
130 |         "tool": "orchestrated_analysis",
131 |         "params": {
132 |             "query": "",  # Empty query
133 |             "persona": "invalid_persona",
134 |             "routing_strategy": "unknown_strategy",
135 |         },
136 |     },
137 |     {
138 |         "tool": "deep_research_financial",
139 |         "params": {
140 |             "research_topic": "XYZ",
141 |             "research_depth": "invalid_depth",
142 |             "focus_areas": ["invalid_area"],
143 |         },
144 |     },
145 |     {
146 |         "tool": "compare_multi_agent_analysis",
147 |         "params": {
148 |             "query": "test",
149 |             "agent_types": ["nonexistent_agent"],
150 |             "persona": "unknown",
151 |         },
152 |     },
153 | ]
154 | 
155 | 
156 | class TestResult:
157 |     """Container for individual test results."""
158 | 
159 |     def __init__(self, test_name: str, tool_name: str):
160 |         self.test_name = test_name
161 |         self.tool_name = tool_name
162 |         self.start_time = time.time()
163 |         self.end_time: float | None = None
164 |         self.success = False
165 |         self.error: str | None = None
166 |         self.response: dict[str, Any] | None = None
167 |         self.execution_time_ms: float | None = None
168 |         self.memory_usage_mb: float | None = None
169 |         self.validation_results: dict[str, bool] = {}
170 | 
171 |     def mark_completed(
172 |         self, success: bool, response: dict | None = None, error: str | None = None
173 |     ):
174 |         """Mark test as completed with results."""
175 |         self.end_time = time.time()
176 |         self.execution_time_ms = (self.end_time - self.start_time) * 1000
177 |         self.success = success
178 |         self.response = response
179 |         self.error = error
180 | 
181 |     def to_dict(self) -> dict[str, Any]:
182 |         """Convert to dictionary for serialization."""
183 |         return {
184 |             "test_name": self.test_name,
185 |             "tool_name": self.tool_name,
186 |             "success": self.success,
187 |             "execution_time_ms": self.execution_time_ms,
188 |             "memory_usage_mb": self.memory_usage_mb,
189 |             "error": self.error,
190 |             "validation_results": self.validation_results,
191 |             "response_summary": self._summarize_response() if self.response else None,
192 |         }
193 | 
194 |     def _summarize_response(self) -> dict[str, Any]:
195 |         """Create summary of response for reporting."""
196 |         if not self.response:
197 |             return {}
198 | 
199 |         summary = {
200 |             "status": self.response.get("status"),
201 |             "agent_type": self.response.get("agent_type"),
202 |             "persona": self.response.get("persona"),
203 |         }
204 | 
205 |         # Add tool-specific summary fields
206 |         if "agents_used" in self.response:
207 |             summary["agents_used"] = self.response["agents_used"]
208 |         if "sources_analyzed" in self.response:
209 |             summary["sources_analyzed"] = self.response["sources_analyzed"]
210 |         if "agents_compared" in self.response:
211 |             summary["agents_compared"] = self.response["agents_compared"]
212 | 
213 |         return summary
214 | 
215 | 
216 | class IntegrationTestSuite:
217 |     """Comprehensive integration test suite for MCP orchestration tools."""
218 | 
219 |     def __init__(self):
220 |         self.setup_logging()
221 |         self.results: list[TestResult] = []
222 |         self.start_time = time.time()
223 |         self.session_id = f"test_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
224 | 
225 |         # Import MCP tools
226 |         self._import_tools()
227 | 
228 |     def setup_logging(self):
229 |         """Configure logging for test execution."""
230 |         logging.basicConfig(
231 |             level=logging.INFO,
232 |             format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
233 |             handlers=[
234 |                 logging.FileHandler(
235 |                     f"integration_test_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
236 |                 ),
237 |                 logging.StreamHandler(),
238 |             ],
239 |         )
240 |         self.logger = logging.getLogger(__name__)
241 | 
242 |     def _import_tools(self):
243 |         """Import and initialize MCP tools."""
244 |         try:
245 |             from maverick_mcp.api.routers.agents import (
246 |                 compare_multi_agent_analysis,
247 |                 deep_research_financial,
248 |                 orchestrated_analysis,
249 |             )
250 | 
251 |             self.orchestrated_analysis = orchestrated_analysis
252 |             self.deep_research_financial = deep_research_financial
253 |             self.compare_multi_agent_analysis = compare_multi_agent_analysis
254 | 
255 |             self.logger.info("Successfully imported MCP orchestration tools")
256 | 
257 |         except ImportError as e:
258 |             self.logger.error(f"Failed to import MCP tools: {e}")
259 |             raise RuntimeError(f"Cannot run tests without MCP tools: {e}")
260 | 
261 |     def print_header(self, title: str):
262 |         """Print formatted test section header."""
263 |         print(f"\n{'=' * 80}")
264 |         print(f"  {title}")
265 |         print(f"{'=' * 80}")
266 | 
267 |     def print_progress(self, current: int, total: int, test_name: str):
268 |         """Print progress indicator."""
269 |         percentage = (current / total) * 100
270 |         print(f"[{current:2d}/{total}] ({percentage:5.1f}%) {test_name}")
271 | 
272 |     async def test_orchestrated_analysis(self) -> list[TestResult]:
273 |         """Test agents_orchestrated_analysis with various scenarios."""
274 |         self.print_header("Testing Orchestrated Analysis Tool")
275 |         results = []
276 | 
277 |         for i, scenario in enumerate(ORCHESTRATED_ANALYSIS_SCENARIOS):
278 |             test_name = f"orchestrated_analysis_{scenario['name']}"
279 |             self.print_progress(i + 1, len(ORCHESTRATED_ANALYSIS_SCENARIOS), test_name)
280 | 
281 |             result = TestResult(test_name, "agents_orchestrated_analysis")
282 | 
283 |             try:
284 |                 if TEST_CONFIG["performance_monitoring"]:
285 |                     tracemalloc.start()
286 | 
287 |                 # Add unique session ID for each test
288 |                 scenario_params = scenario.copy()
289 |                 scenario_params["session_id"] = f"{self.session_id}_{test_name}"
290 | 
291 |                 # Execute with timeout
292 |                 response = await asyncio.wait_for(
293 |                     self.orchestrated_analysis(**scenario_params),
294 |                     timeout=TEST_CONFIG["timeout_seconds"],
295 |                 )
296 | 
297 |                 # Validate response
298 |                 validation_results = self._validate_orchestrated_response(response)
299 |                 result.validation_results = validation_results
300 | 
301 |                 success = (
302 |                     all(validation_results.values())
303 |                     and response.get("status") == "success"
304 |                 )
305 |                 result.mark_completed(success, response)
306 | 
307 |                 if TEST_CONFIG["performance_monitoring"]:
308 |                     current, peak = tracemalloc.get_traced_memory()
309 |                     result.memory_usage_mb = peak / 1024 / 1024
310 |                     tracemalloc.stop()
311 | 
312 |                 self.logger.info(
313 |                     f"✓ {test_name}: {'PASS' if success else 'FAIL'} "
314 |                     f"({result.execution_time_ms:.0f}ms)"
315 |                 )
316 | 
317 |             except TimeoutError:
318 |                 result.mark_completed(False, error="Test timeout")
319 |                 self.logger.warning(f"✗ {test_name}: TIMEOUT")
320 | 
321 |             except Exception as e:
322 |                 result.mark_completed(False, error=str(e))
323 |                 self.logger.error(f"✗ {test_name}: ERROR - {e}")
324 | 
325 |             results.append(result)
326 | 
327 |         return results
328 | 
329 |     async def test_deep_research_financial(self) -> list[TestResult]:
330 |         """Test agents_deep_research_financial with various scenarios."""
331 |         self.print_header("Testing Deep Research Financial Tool")
332 |         results = []
333 | 
334 |         for i, scenario in enumerate(DEEP_RESEARCH_SCENARIOS):
335 |             test_name = f"deep_research_{scenario['name']}"
336 |             self.print_progress(i + 1, len(DEEP_RESEARCH_SCENARIOS), test_name)
337 | 
338 |             result = TestResult(test_name, "agents_deep_research_financial")
339 | 
340 |             try:
341 |                 if TEST_CONFIG["performance_monitoring"]:
342 |                     tracemalloc.start()
343 | 
344 |                 # Add unique session ID
345 |                 scenario_params = scenario.copy()
346 |                 scenario_params["session_id"] = f"{self.session_id}_{test_name}"
347 | 
348 |                 response = await asyncio.wait_for(
349 |                     self.deep_research_financial(**scenario_params),
350 |                     timeout=TEST_CONFIG["timeout_seconds"],
351 |                 )
352 | 
353 |                 validation_results = self._validate_research_response(response)
354 |                 result.validation_results = validation_results
355 | 
356 |                 success = (
357 |                     all(validation_results.values())
358 |                     and response.get("status") == "success"
359 |                 )
360 |                 result.mark_completed(success, response)
361 | 
362 |                 if TEST_CONFIG["performance_monitoring"]:
363 |                     current, peak = tracemalloc.get_traced_memory()
364 |                     result.memory_usage_mb = peak / 1024 / 1024
365 |                     tracemalloc.stop()
366 | 
367 |                 self.logger.info(
368 |                     f"✓ {test_name}: {'PASS' if success else 'FAIL'} "
369 |                     f"({result.execution_time_ms:.0f}ms)"
370 |                 )
371 | 
372 |             except TimeoutError:
373 |                 result.mark_completed(False, error="Test timeout")
374 |                 self.logger.warning(f"✗ {test_name}: TIMEOUT")
375 | 
376 |             except Exception as e:
377 |                 result.mark_completed(False, error=str(e))
378 |                 self.logger.error(f"✗ {test_name}: ERROR - {e}")
379 | 
380 |             results.append(result)
381 | 
382 |         return results
383 | 
384 |     async def test_compare_multi_agent_analysis(self) -> list[TestResult]:
385 |         """Test agents_compare_multi_agent_analysis with various scenarios."""
386 |         self.print_header("Testing Multi-Agent Comparison Tool")
387 |         results = []
388 | 
389 |         for i, scenario in enumerate(MULTI_AGENT_COMPARISON_SCENARIOS):
390 |             test_name = f"multi_agent_{scenario['name']}"
391 |             self.print_progress(i + 1, len(MULTI_AGENT_COMPARISON_SCENARIOS), test_name)
392 | 
393 |             result = TestResult(test_name, "agents_compare_multi_agent_analysis")
394 | 
395 |             try:
396 |                 if TEST_CONFIG["performance_monitoring"]:
397 |                     tracemalloc.start()
398 | 
399 |                 scenario_params = scenario.copy()
400 |                 scenario_params["session_id"] = f"{self.session_id}_{test_name}"
401 | 
402 |                 response = await asyncio.wait_for(
403 |                     self.compare_multi_agent_analysis(**scenario_params),
404 |                     timeout=TEST_CONFIG["timeout_seconds"],
405 |                 )
406 | 
407 |                 validation_results = self._validate_comparison_response(response)
408 |                 result.validation_results = validation_results
409 | 
410 |                 success = (
411 |                     all(validation_results.values())
412 |                     and response.get("status") == "success"
413 |                 )
414 |                 result.mark_completed(success, response)
415 | 
416 |                 if TEST_CONFIG["performance_monitoring"]:
417 |                     current, peak = tracemalloc.get_traced_memory()
418 |                     result.memory_usage_mb = peak / 1024 / 1024
419 |                     tracemalloc.stop()
420 | 
421 |                 self.logger.info(
422 |                     f"✓ {test_name}: {'PASS' if success else 'FAIL'} "
423 |                     f"({result.execution_time_ms:.0f}ms)"
424 |                 )
425 | 
426 |             except TimeoutError:
427 |                 result.mark_completed(False, error="Test timeout")
428 |                 self.logger.warning(f"✗ {test_name}: TIMEOUT")
429 | 
430 |             except Exception as e:
431 |                 result.mark_completed(False, error=str(e))
432 |                 self.logger.error(f"✗ {test_name}: ERROR - {e}")
433 | 
434 |             results.append(result)
435 | 
436 |         return results
437 | 
438 |     async def test_error_handling(self) -> list[TestResult]:
439 |         """Test error handling with invalid inputs."""
440 |         self.print_header("Testing Error Handling")
441 |         results = []
442 | 
443 |         for i, scenario in enumerate(ERROR_HANDLING_SCENARIOS):
444 |             test_name = f"error_handling_{scenario['tool']}"
445 |             self.print_progress(i + 1, len(ERROR_HANDLING_SCENARIOS), test_name)
446 | 
447 |             result = TestResult(test_name, scenario["tool"])
448 | 
449 |             try:
450 |                 # Get the tool function
451 |                 tool_func = getattr(self, scenario["tool"])
452 | 
453 |                 # Add session ID
454 |                 params = scenario["params"].copy()
455 |                 params["session_id"] = f"{self.session_id}_{test_name}"
456 | 
457 |                 response = await asyncio.wait_for(
458 |                     tool_func(**params),
459 |                     timeout=60,  # Shorter timeout for error cases
460 |                 )
461 | 
462 |                 # For error handling tests, we expect graceful error handling
463 |                 # Success means the tool returned an error response without crashing
464 |                 has_error_field = (
465 |                     "error" in response or response.get("status") == "error"
466 |                 )
467 |                 success = has_error_field and isinstance(response, dict)
468 | 
469 |                 result.validation_results = {"graceful_error_handling": success}
470 |                 result.mark_completed(success, response)
471 | 
472 |                 self.logger.info(
473 |                     f"✓ {test_name}: {'PASS' if success else 'FAIL'} - "
474 |                     f"Graceful error handling: {has_error_field}"
475 |                 )
476 | 
477 |             except TimeoutError:
478 |                 result.mark_completed(False, error="Test timeout")
479 |                 self.logger.warning(f"✗ {test_name}: TIMEOUT")
480 | 
481 |             except Exception as e:
482 |                 # For error handling tests, exceptions are actually failures
483 |                 result.mark_completed(False, error=f"Unhandled exception: {str(e)}")
484 |                 self.logger.error(f"✗ {test_name}: UNHANDLED EXCEPTION - {e}")
485 | 
486 |             results.append(result)
487 | 
488 |         return results
489 | 
490 |     async def test_concurrent_execution(self) -> list[TestResult]:
491 |         """Test concurrent execution of multiple tools."""
492 |         self.print_header("Testing Concurrent Execution")
493 |         results = []
494 | 
495 |         # Create concurrent test scenarios
496 |         concurrent_tasks = [
497 |             (
498 |                 "concurrent_orchestrated",
499 |                 self.orchestrated_analysis,
500 |                 {
501 |                     "query": "Analyze MSFT for investment",
502 |                     "persona": "moderate",
503 |                     "routing_strategy": "llm_powered",
504 |                     "session_id": f"{self.session_id}_concurrent_1",
505 |                 },
506 |             ),
507 |             (
508 |                 "concurrent_research",
509 |                 self.deep_research_financial,
510 |                 {
511 |                     "research_topic": "Amazon business model",
512 |                     "persona": "conservative",
513 |                     "research_depth": "standard",
514 |                     "session_id": f"{self.session_id}_concurrent_2",
515 |                 },
516 |             ),
517 |             (
518 |                 "concurrent_comparison",
519 |                 self.compare_multi_agent_analysis,
520 |                 {
521 |                     "query": "Best tech stocks for portfolio",
522 |                     "persona": "aggressive",
523 |                     "session_id": f"{self.session_id}_concurrent_3",
524 |                 },
525 |             ),
526 |         ]
527 | 
528 |         start_time = time.time()
529 | 
530 |         try:
531 |             # Execute all tasks concurrently
532 |             concurrent_results = await asyncio.gather(
533 |                 *[
534 |                     task_func(**task_params)
535 |                     for _, task_func, task_params in concurrent_tasks
536 |                 ],
537 |                 return_exceptions=True,
538 |             )
539 | 
540 |             execution_time = (time.time() - start_time) * 1000
541 | 
542 |             # Process results
543 |             for i, (task_name, _, _) in enumerate(concurrent_tasks):
544 |                 result = TestResult(task_name, "concurrent_execution")
545 | 
546 |                 if i < len(concurrent_results):
547 |                     response = concurrent_results[i]
548 | 
549 |                     if isinstance(response, Exception):
550 |                         result.mark_completed(False, error=str(response))
551 |                         success = False
552 |                     else:
553 |                         success = (
554 |                             isinstance(response, dict)
555 |                             and response.get("status") != "error"
556 |                         )
557 |                         result.mark_completed(success, response)
558 | 
559 |                     result.validation_results = {"concurrent_execution": success}
560 |                     self.logger.info(f"✓ {task_name}: {'PASS' if success else 'FAIL'}")
561 |                 else:
562 |                     result.mark_completed(False, error="No result returned")
563 |                     self.logger.error(f"✗ {task_name}: No result returned")
564 | 
565 |                 results.append(result)
566 | 
567 |             # Add overall concurrent test result
568 |             concurrent_summary = TestResult(
569 |                 "concurrent_execution_summary", "performance"
570 |             )
571 |             concurrent_summary.execution_time_ms = execution_time
572 |             concurrent_summary.validation_results = {
573 |                 "all_completed": len(concurrent_results) == len(concurrent_tasks),
574 |                 "no_crashes": all(
575 |                     not isinstance(r, Exception) for r in concurrent_results
576 |                 ),
577 |                 "reasonable_time": execution_time < 180000,  # 3 minutes
578 |             }
579 |             concurrent_summary.mark_completed(
580 |                 all(concurrent_summary.validation_results.values()),
581 |                 {
582 |                     "concurrent_tasks": len(concurrent_tasks),
583 |                     "total_time_ms": execution_time,
584 |                 },
585 |             )
586 |             results.append(concurrent_summary)
587 | 
588 |             self.logger.info(
589 |                 f"Concurrent execution completed in {execution_time:.0f}ms"
590 |             )
591 | 
592 |         except Exception as e:
593 |             error_result = TestResult("concurrent_execution_error", "performance")
594 |             error_result.mark_completed(False, error=str(e))
595 |             results.append(error_result)
596 |             self.logger.error(f"✗ Concurrent execution failed: {e}")
597 | 
598 |         return results
599 | 
600 |     def _validate_orchestrated_response(
601 |         self, response: dict[str, Any]
602 |     ) -> dict[str, bool]:
603 |         """Validate orchestrated analysis response format."""
604 |         validations = {
605 |             "has_status": "status" in response,
606 |             "has_agent_type": "agent_type" in response,
607 |             "has_persona": "persona" in response,
608 |             "has_session_id": "session_id" in response,
609 |             "status_is_success": response.get("status") == "success",
610 |             "has_routing_strategy": "routing_strategy" in response,
611 |             "has_execution_time": "execution_time_ms" in response
612 |             and isinstance(response.get("execution_time_ms"), int | float),
613 |         }
614 | 
615 |         # Additional validations for successful responses
616 |         if response.get("status") == "success":
617 |             validations.update(
618 |                 {
619 |                     "has_agents_used": "agents_used" in response,
620 |                     "agents_used_is_list": isinstance(
621 |                         response.get("agents_used"), list
622 |                     ),
623 |                     "has_synthesis_confidence": "synthesis_confidence" in response,
624 |                 }
625 |             )
626 | 
627 |         return validations
628 | 
629 |     def _validate_research_response(self, response: dict[str, Any]) -> dict[str, bool]:
630 |         """Validate deep research response format."""
631 |         validations = {
632 |             "has_status": "status" in response,
633 |             "has_agent_type": "agent_type" in response,
634 |             "has_persona": "persona" in response,
635 |             "has_research_topic": "research_topic" in response,
636 |             "status_is_success": response.get("status") == "success",
637 |             "has_research_depth": "research_depth" in response,
638 |             "has_focus_areas": "focus_areas" in response,
639 |         }
640 | 
641 |         if response.get("status") == "success":
642 |             validations.update(
643 |                 {
644 |                     "has_sources_analyzed": "sources_analyzed" in response,
645 |                     "sources_analyzed_is_numeric": isinstance(
646 |                         response.get("sources_analyzed"), int | float
647 |                     ),
648 |                     "has_research_confidence": "research_confidence" in response,
649 |                     "has_validation_checks": "validation_checks_passed" in response,
650 |                 }
651 |             )
652 | 
653 |         return validations
654 | 
655 |     def _validate_comparison_response(
656 |         self, response: dict[str, Any]
657 |     ) -> dict[str, bool]:
658 |         """Validate multi-agent comparison response format."""
659 |         validations = {
660 |             "has_status": "status" in response,
661 |             "has_query": "query" in response,
662 |             "has_persona": "persona" in response,
663 |             "status_is_success": response.get("status") == "success",
664 |             "has_agents_compared": "agents_compared" in response,
665 |         }
666 | 
667 |         if response.get("status") == "success":
668 |             validations.update(
669 |                 {
670 |                     "agents_compared_is_list": isinstance(
671 |                         response.get("agents_compared"), list
672 |                     ),
673 |                     "has_comparison": "comparison" in response,
674 |                     "comparison_is_dict": isinstance(response.get("comparison"), dict),
675 |                     "has_execution_times": "execution_times_ms" in response,
676 |                     "has_insights": "insights" in response,
677 |                 }
678 |             )
679 | 
680 |         return validations
681 | 
682 |     async def run_performance_benchmark(self):
683 |         """Run performance benchmarks for all tools."""
684 |         self.print_header("Performance Benchmarking")
685 | 
686 |         benchmark_scenarios = [
687 |             (
688 |                 "orchestrated_fast",
689 |                 self.orchestrated_analysis,
690 |                 {
691 |                     "query": "Quick AAPL analysis",
692 |                     "persona": "moderate",
693 |                     "routing_strategy": "rule_based",
694 |                     "max_agents": 1,
695 |                     "parallel_execution": False,
696 |                 },
697 |             ),
698 |             (
699 |                 "research_basic",
700 |                 self.deep_research_financial,
701 |                 {
702 |                     "research_topic": "Microsoft",
703 |                     "research_depth": "basic",
704 |                     "persona": "moderate",
705 |                 },
706 |             ),
707 |             (
708 |                 "comparison_minimal",
709 |                 self.compare_multi_agent_analysis,
710 |                 {
711 |                     "query": "Compare AAPL vs MSFT",
712 |                     "agent_types": ["market"],
713 |                     "persona": "moderate",
714 |                 },
715 |             ),
716 |         ]
717 | 
718 |         performance_results = []
719 | 
720 |         for test_name, tool_func, params in benchmark_scenarios:
721 |             print(f"Benchmarking {test_name}...")
722 | 
723 |             # Add session ID
724 |             params["session_id"] = f"{self.session_id}_benchmark_{test_name}"
725 | 
726 |             # Run multiple iterations for average performance
727 |             times = []
728 | 
729 |             for i in range(3):  # 3 iterations for average
730 |                 start_time = time.time()
731 | 
732 |                 try:
733 |                     await tool_func(**params)
734 |                     end_time = time.time()
735 |                     execution_time = (end_time - start_time) * 1000
736 |                     times.append(execution_time)
737 | 
738 |                 except Exception as e:
739 |                     self.logger.error(
740 |                         f"Benchmark {test_name} iteration {i + 1} failed: {e}"
741 |                     )
742 |                     times.append(float("inf"))
743 | 
744 |             # Calculate performance metrics
745 |             valid_times = [t for t in times if t != float("inf")]
746 |             if valid_times:
747 |                 avg_time = sum(valid_times) / len(valid_times)
748 |                 min_time = min(valid_times)
749 |                 max_time = max(valid_times)
750 | 
751 |                 performance_results.append(
752 |                     {
753 |                         "test": test_name,
754 |                         "avg_time_ms": avg_time,
755 |                         "min_time_ms": min_time,
756 |                         "max_time_ms": max_time,
757 |                         "successful_runs": len(valid_times),
758 |                     }
759 |                 )
760 | 
761 |                 print(
762 |                     f"  {test_name}: Avg={avg_time:.0f}ms, Min={min_time:.0f}ms, Max={max_time:.0f}ms"
763 |                 )
764 |             else:
765 |                 print(f"  {test_name}: All iterations failed")
766 | 
767 |         return performance_results
768 | 
769 |     def generate_test_report(self):
770 |         """Generate comprehensive test report."""
771 |         self.print_header("Test Results Summary")
772 | 
773 |         total_tests = len(self.results)
774 |         passed_tests = sum(1 for r in self.results if r.success)
775 |         failed_tests = total_tests - passed_tests
776 | 
777 |         total_time = time.time() - self.start_time
778 | 
779 |         print(f"Total Tests: {total_tests}")
780 |         print(f"Passed: {passed_tests} ({passed_tests / total_tests * 100:.1f}%)")
781 |         print(f"Failed: {failed_tests} ({failed_tests / total_tests * 100:.1f}%)")
782 |         print(f"Total Execution Time: {total_time:.2f}s")
783 | 
784 |         # Group results by tool
785 |         by_tool = {}
786 |         for result in self.results:
787 |             if result.tool_name not in by_tool:
788 |                 by_tool[result.tool_name] = []
789 |             by_tool[result.tool_name].append(result)
790 | 
791 |         print("\nResults by Tool:")
792 |         for tool_name, tool_results in by_tool.items():
793 |             tool_passed = sum(1 for r in tool_results if r.success)
794 |             tool_total = len(tool_results)
795 |             print(
796 |                 f"  {tool_name}: {tool_passed}/{tool_total} passed "
797 |                 f"({tool_passed / tool_total * 100:.1f}%)"
798 |             )
799 | 
800 |         # Performance summary
801 |         execution_times = [
802 |             r.execution_time_ms for r in self.results if r.execution_time_ms
803 |         ]
804 |         if execution_times:
805 |             avg_time = sum(execution_times) / len(execution_times)
806 |             print("\nPerformance Summary:")
807 |             print(f"  Average execution time: {avg_time:.0f}ms")
808 |             print(f"  Fastest test: {min(execution_times):.0f}ms")
809 |             print(f"  Slowest test: {max(execution_times):.0f}ms")
810 | 
811 |         # Failed tests details
812 |         failed_results = [r for r in self.results if not r.success]
813 |         if failed_results:
814 |             print("\nFailed Tests:")
815 |             for result in failed_results:
816 |                 print(f"  ✗ {result.test_name}: {result.error}")
817 | 
818 |         return {
819 |             "summary": {
820 |                 "total_tests": total_tests,
821 |                 "passed_tests": passed_tests,
822 |                 "failed_tests": failed_tests,
823 |                 "pass_rate": passed_tests / total_tests if total_tests > 0 else 0,
824 |                 "total_execution_time_s": total_time,
825 |             },
826 |             "by_tool": {
827 |                 tool: {
828 |                     "total": len(results),
829 |                     "passed": sum(1 for r in results if r.success),
830 |                     "pass_rate": sum(1 for r in results if r.success) / len(results),
831 |                 }
832 |                 for tool, results in by_tool.items()
833 |             },
834 |             "performance": {
835 |                 "avg_execution_time_ms": avg_time if execution_times else None,
836 |                 "min_execution_time_ms": min(execution_times)
837 |                 if execution_times
838 |                 else None,
839 |                 "max_execution_time_ms": max(execution_times)
840 |                 if execution_times
841 |                 else None,
842 |             },
843 |             "failed_tests": [r.to_dict() for r in failed_results],
844 |         }
845 | 
846 |     def save_results(self, report: dict[str, Any], performance_data: list[dict]):
847 |         """Save detailed results to files."""
848 |         if not TEST_CONFIG["save_results"]:
849 |             return
850 | 
851 |         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
852 | 
853 |         # Save detailed results
854 |         detailed_results = {
855 |             "test_session": self.session_id,
856 |             "timestamp": timestamp,
857 |             "config": TEST_CONFIG,
858 |             "summary": report,
859 |             "detailed_results": [r.to_dict() for r in self.results],
860 |             "performance_benchmarks": performance_data,
861 |         }
862 | 
863 |         with open(f"integration_test_results_{timestamp}.json", "w") as f:
864 |             json.dump(detailed_results, f, indent=2, default=str)
865 | 
866 |         print(f"\nDetailed results saved to: integration_test_results_{timestamp}.json")
867 | 
868 |     async def run_all_tests(self):
869 |         """Run the complete test suite."""
870 |         self.print_header(
871 |             f"MaverickMCP Orchestration Integration Test Suite - {self.session_id}"
872 |         )
873 | 
874 |         print("Test Configuration:")
875 |         for key, value in TEST_CONFIG.items():
876 |             print(f"  {key}: {value}")
877 | 
878 |         try:
879 |             # Run all test categories
880 |             orchestrated_results = await self.test_orchestrated_analysis()
881 |             self.results.extend(orchestrated_results)
882 | 
883 |             research_results = await self.test_deep_research_financial()
884 |             self.results.extend(research_results)
885 | 
886 |             comparison_results = await self.test_compare_multi_agent_analysis()
887 |             self.results.extend(comparison_results)
888 | 
889 |             error_handling_results = await self.test_error_handling()
890 |             self.results.extend(error_handling_results)
891 | 
892 |             concurrent_results = await self.test_concurrent_execution()
893 |             self.results.extend(concurrent_results)
894 | 
895 |             # Performance benchmarks
896 |             performance_data = await self.run_performance_benchmark()
897 | 
898 |             # Generate and save report
899 |             report = self.generate_test_report()
900 |             self.save_results(report, performance_data)
901 | 
902 |             # Final status
903 |             total_passed = sum(1 for r in self.results if r.success)
904 |             total_tests = len(self.results)
905 | 
906 |             if total_passed == total_tests:
907 |                 print(f"\n🎉 ALL TESTS PASSED! ({total_passed}/{total_tests})")
908 |                 return 0
909 |             else:
910 |                 print(f"\n⚠️  SOME TESTS FAILED ({total_passed}/{total_tests} passed)")
911 |                 return 1
912 | 
913 |         except Exception as e:
914 |             self.logger.error(f"Test suite execution failed: {e}")
915 |             print(f"\n💥 TEST SUITE EXECUTION FAILED: {e}")
916 |             return 2
917 | 
918 | 
919 | async def main():
920 |     """Main test execution function."""
921 |     # Set environment variables for testing if needed
922 |     if not os.getenv("OPENAI_API_KEY"):
923 |         print("⚠️  Warning: OPENAI_API_KEY not set - tests will use mock responses")
924 | 
925 |     test_suite = IntegrationTestSuite()
926 |     exit_code = await test_suite.run_all_tests()
927 | 
928 |     return exit_code
929 | 
930 | 
931 | if __name__ == "__main__":
932 |     # Make the script executable
933 |     import sys
934 | 
935 |     try:
936 |         exit_code = asyncio.run(main())
937 |         sys.exit(exit_code)
938 |     except KeyboardInterrupt:
939 |         print("\n🛑 Tests interrupted by user")
940 |         sys.exit(130)  # SIGINT exit code
941 |     except Exception as e:
942 |         print(f"\n💥 Unexpected error: {e}")
943 |         sys.exit(1)
944 | 
```