#
tokens: 46723/50000 3/437 files (page 36/39)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 36 of 39. Use http://codebase.md/wshobson/maverick-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .dockerignore
├── .env.example
├── .github
│   ├── dependabot.yml
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── feature_request.md
│   │   ├── question.md
│   │   └── security_report.md
│   ├── pull_request_template.md
│   └── workflows
│       ├── claude-code-review.yml
│       └── claude.yml
├── .gitignore
├── .jules
│   └── bolt.md
├── .python-version
├── .vscode
│   ├── launch.json
│   └── settings.json
├── alembic
│   ├── env.py
│   ├── script.py.mako
│   └── versions
│       ├── 001_initial_schema.py
│       ├── 003_add_performance_indexes.py
│       ├── 006_rename_metadata_columns.py
│       ├── 008_performance_optimization_indexes.py
│       ├── 009_rename_to_supply_demand.py
│       ├── 010_self_contained_schema.py
│       ├── 011_remove_proprietary_terms.py
│       ├── 013_add_backtest_persistence_models.py
│       ├── 014_add_portfolio_models.py
│       ├── 08e3945a0c93_merge_heads.py
│       ├── 9374a5c9b679_merge_heads_for_testing.py
│       ├── abf9b9afb134_merge_multiple_heads.py
│       ├── adda6d3fd84b_merge_proprietary_terms_removal_with_.py
│       ├── e0c75b0bdadb_fix_financial_data_precision_only.py
│       ├── f0696e2cac15_add_essential_performance_indexes.py
│       └── fix_database_integrity_issues.py
├── alembic.ini
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DATABASE_SETUP.md
├── docker-compose.override.yml.example
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── api
│   │   └── backtesting.md
│   ├── BACKTESTING.md
│   ├── COST_BASIS_SPECIFICATION.md
│   ├── deep_research_agent.md
│   ├── exa_research_testing_strategy.md
│   ├── PORTFOLIO_PERSONALIZATION_PLAN.md
│   ├── PORTFOLIO.md
│   ├── SETUP_SELF_CONTAINED.md
│   └── speed_testing_framework.md
├── examples
│   ├── complete_speed_validation.py
│   ├── deep_research_integration.py
│   ├── llm_optimization_example.py
│   ├── llm_speed_demo.py
│   ├── monitoring_example.py
│   ├── parallel_research_example.py
│   ├── speed_optimization_demo.py
│   └── timeout_fix_demonstration.py
├── LICENSE
├── Makefile
├── MANIFEST.in
├── maverick_mcp
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── circuit_breaker.py
│   │   ├── deep_research.py
│   │   ├── market_analysis.py
│   │   ├── optimized_research.py
│   │   ├── supervisor.py
│   │   └── technical_analysis.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── api_server.py
│   │   ├── connection_manager.py
│   │   ├── dependencies
│   │   │   ├── __init__.py
│   │   │   ├── stock_analysis.py
│   │   │   └── technical_analysis.py
│   │   ├── error_handling.py
│   │   ├── inspector_compatible_sse.py
│   │   ├── inspector_sse.py
│   │   ├── middleware
│   │   │   ├── error_handling.py
│   │   │   ├── mcp_logging.py
│   │   │   ├── rate_limiting_enhanced.py
│   │   │   └── security.py
│   │   ├── openapi_config.py
│   │   ├── routers
│   │   │   ├── __init__.py
│   │   │   ├── agents.py
│   │   │   ├── backtesting.py
│   │   │   ├── data_enhanced.py
│   │   │   ├── data.py
│   │   │   ├── health_enhanced.py
│   │   │   ├── health_tools.py
│   │   │   ├── health.py
│   │   │   ├── intelligent_backtesting.py
│   │   │   ├── introspection.py
│   │   │   ├── mcp_prompts.py
│   │   │   ├── monitoring.py
│   │   │   ├── news_sentiment_enhanced.py
│   │   │   ├── performance.py
│   │   │   ├── portfolio.py
│   │   │   ├── research.py
│   │   │   ├── screening_ddd.py
│   │   │   ├── screening_parallel.py
│   │   │   ├── screening.py
│   │   │   ├── technical_ddd.py
│   │   │   ├── technical_enhanced.py
│   │   │   ├── technical.py
│   │   │   └── tool_registry.py
│   │   ├── server.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   ├── base_service.py
│   │   │   ├── market_service.py
│   │   │   ├── portfolio_service.py
│   │   │   ├── prompt_service.py
│   │   │   └── resource_service.py
│   │   ├── simple_sse.py
│   │   └── utils
│   │       ├── __init__.py
│   │       ├── insomnia_export.py
│   │       └── postman_export.py
│   ├── application
│   │   ├── __init__.py
│   │   ├── commands
│   │   │   └── __init__.py
│   │   ├── dto
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_dto.py
│   │   ├── queries
│   │   │   ├── __init__.py
│   │   │   └── get_technical_analysis.py
│   │   └── screening
│   │       ├── __init__.py
│   │       ├── dtos.py
│   │       └── queries.py
│   ├── backtesting
│   │   ├── __init__.py
│   │   ├── ab_testing.py
│   │   ├── analysis.py
│   │   ├── batch_processing_stub.py
│   │   ├── batch_processing.py
│   │   ├── model_manager.py
│   │   ├── optimization.py
│   │   ├── persistence.py
│   │   ├── retraining_pipeline.py
│   │   ├── strategies
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── ml
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive.py
│   │   │   │   ├── ensemble.py
│   │   │   │   ├── feature_engineering.py
│   │   │   │   └── regime_aware.py
│   │   │   ├── ml_strategies.py
│   │   │   ├── parser.py
│   │   │   └── templates.py
│   │   ├── strategy_executor.py
│   │   ├── vectorbt_engine.py
│   │   └── visualization.py
│   ├── config
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── database_self_contained.py
│   │   ├── database.py
│   │   ├── llm_optimization_config.py
│   │   ├── logging_settings.py
│   │   ├── plotly_config.py
│   │   ├── security_utils.py
│   │   ├── security.py
│   │   ├── settings.py
│   │   ├── technical_constants.py
│   │   ├── tool_estimation.py
│   │   └── validation.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── technical_analysis.py
│   │   └── visualization.py
│   ├── data
│   │   ├── __init__.py
│   │   ├── cache_manager.py
│   │   ├── cache.py
│   │   ├── django_adapter.py
│   │   ├── health.py
│   │   ├── models.py
│   │   ├── performance.py
│   │   ├── session_management.py
│   │   └── validation.py
│   ├── database
│   │   ├── __init__.py
│   │   ├── base.py
│   │   └── optimization.py
│   ├── dependencies.py
│   ├── domain
│   │   ├── __init__.py
│   │   ├── entities
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis.py
│   │   ├── events
│   │   │   └── __init__.py
│   │   ├── portfolio.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   ├── entities.py
│   │   │   ├── services.py
│   │   │   └── value_objects.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_service.py
│   │   ├── stock_analysis
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis_service.py
│   │   └── value_objects
│   │       ├── __init__.py
│   │       └── technical_indicators.py
│   ├── exceptions.py
│   ├── infrastructure
│   │   ├── __init__.py
│   │   ├── cache
│   │   │   └── __init__.py
│   │   ├── caching
│   │   │   ├── __init__.py
│   │   │   └── cache_management_service.py
│   │   ├── connection_manager.py
│   │   ├── data_fetching
│   │   │   ├── __init__.py
│   │   │   └── stock_data_service.py
│   │   ├── health
│   │   │   ├── __init__.py
│   │   │   └── health_checker.py
│   │   ├── persistence
│   │   │   ├── __init__.py
│   │   │   └── stock_repository.py
│   │   ├── providers
│   │   │   └── __init__.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   └── repositories.py
│   │   └── sse_optimizer.py
│   ├── langchain_tools
│   │   ├── __init__.py
│   │   ├── adapters.py
│   │   └── registry.py
│   ├── logging_config.py
│   ├── memory
│   │   ├── __init__.py
│   │   └── stores.py
│   ├── monitoring
│   │   ├── __init__.py
│   │   ├── health_check.py
│   │   ├── health_monitor.py
│   │   ├── integration_example.py
│   │   ├── metrics.py
│   │   ├── middleware.py
│   │   └── status_dashboard.py
│   ├── providers
│   │   ├── __init__.py
│   │   ├── dependencies.py
│   │   ├── factories
│   │   │   ├── __init__.py
│   │   │   ├── config_factory.py
│   │   │   └── provider_factory.py
│   │   ├── implementations
│   │   │   ├── __init__.py
│   │   │   ├── cache_adapter.py
│   │   │   ├── macro_data_adapter.py
│   │   │   ├── market_data_adapter.py
│   │   │   ├── persistence_adapter.py
│   │   │   └── stock_data_adapter.py
│   │   ├── interfaces
│   │   │   ├── __init__.py
│   │   │   ├── cache.py
│   │   │   ├── config.py
│   │   │   ├── macro_data.py
│   │   │   ├── market_data.py
│   │   │   ├── persistence.py
│   │   │   └── stock_data.py
│   │   ├── llm_factory.py
│   │   ├── macro_data.py
│   │   ├── market_data.py
│   │   ├── mocks
│   │   │   ├── __init__.py
│   │   │   ├── mock_cache.py
│   │   │   ├── mock_config.py
│   │   │   ├── mock_macro_data.py
│   │   │   ├── mock_market_data.py
│   │   │   ├── mock_persistence.py
│   │   │   └── mock_stock_data.py
│   │   ├── openrouter_provider.py
│   │   ├── optimized_screening.py
│   │   ├── optimized_stock_data.py
│   │   └── stock_data.py
│   ├── README.md
│   ├── tests
│   │   ├── __init__.py
│   │   ├── README_INMEMORY_TESTS.md
│   │   ├── test_cache_debug.py
│   │   ├── test_fixes_validation.py
│   │   ├── test_in_memory_routers.py
│   │   ├── test_in_memory_server.py
│   │   ├── test_macro_data_provider.py
│   │   ├── test_mailgun_email.py
│   │   ├── test_market_calendar_caching.py
│   │   ├── test_mcp_tool_fixes_pytest.py
│   │   ├── test_mcp_tool_fixes.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_models_functional.py
│   │   ├── test_server.py
│   │   ├── test_stock_data_enhanced.py
│   │   ├── test_stock_data_provider.py
│   │   └── test_technical_analysis.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── performance_monitoring.py
│   │   ├── portfolio_manager.py
│   │   ├── risk_management.py
│   │   └── sentiment_analysis.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── agent_errors.py
│   │   ├── batch_processing.py
│   │   ├── cache_warmer.py
│   │   ├── circuit_breaker_decorators.py
│   │   ├── circuit_breaker_services.py
│   │   ├── circuit_breaker.py
│   │   ├── data_chunking.py
│   │   ├── database_monitoring.py
│   │   ├── debug_utils.py
│   │   ├── fallback_strategies.py
│   │   ├── llm_optimization.py
│   │   ├── logging_example.py
│   │   ├── logging_init.py
│   │   ├── logging.py
│   │   ├── mcp_logging.py
│   │   ├── memory_profiler.py
│   │   ├── monitoring_middleware.py
│   │   ├── monitoring.py
│   │   ├── orchestration_logging.py
│   │   ├── parallel_research.py
│   │   ├── parallel_screening.py
│   │   ├── quick_cache.py
│   │   ├── resource_manager.py
│   │   ├── shutdown.py
│   │   ├── stock_helpers.py
│   │   ├── structured_logger.py
│   │   ├── tool_monitoring.py
│   │   ├── tracing.py
│   │   └── yfinance_pool.py
│   ├── validation
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── data.py
│   │   ├── middleware.py
│   │   ├── portfolio.py
│   │   ├── responses.py
│   │   ├── screening.py
│   │   └── technical.py
│   └── workflows
│       ├── __init__.py
│       ├── agents
│       │   ├── __init__.py
│       │   ├── market_analyzer.py
│       │   ├── optimizer_agent.py
│       │   ├── strategy_selector.py
│       │   └── validator_agent.py
│       ├── backtesting_workflow.py
│       └── state.py
├── PLANS.md
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── dev.sh
│   ├── INSTALLATION_GUIDE.md
│   ├── load_example.py
│   ├── load_market_data.py
│   ├── load_tiingo_data.py
│   ├── migrate_db.py
│   ├── README_TIINGO_LOADER.md
│   ├── requirements_tiingo.txt
│   ├── run_stock_screening.py
│   ├── run-migrations.sh
│   ├── seed_db.py
│   ├── seed_sp500.py
│   ├── setup_database.sh
│   ├── setup_self_contained.py
│   ├── setup_sp500_database.sh
│   ├── test_seeded_data.py
│   ├── test_tiingo_loader.py
│   ├── tiingo_config.py
│   └── validate_setup.py
├── SECURITY.md
├── server.json
├── setup.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── core
│   │   └── test_technical_analysis.py
│   ├── data
│   │   └── test_portfolio_models.py
│   ├── domain
│   │   ├── conftest.py
│   │   ├── test_portfolio_entities.py
│   │   └── test_technical_analysis_service.py
│   ├── fixtures
│   │   └── orchestration_fixtures.py
│   ├── integration
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── README.md
│   │   ├── run_integration_tests.sh
│   │   ├── test_api_technical.py
│   │   ├── test_chaos_engineering.py
│   │   ├── test_config_management.py
│   │   ├── test_full_backtest_workflow_advanced.py
│   │   ├── test_full_backtest_workflow.py
│   │   ├── test_high_volume.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_orchestration_complete.py
│   │   ├── test_portfolio_persistence.py
│   │   ├── test_redis_cache.py
│   │   ├── test_security_integration.py.disabled
│   │   └── vcr_setup.py
│   ├── performance
│   │   ├── __init__.py
│   │   ├── test_benchmarks.py
│   │   ├── test_load.py
│   │   ├── test_profiling.py
│   │   └── test_stress.py
│   ├── providers
│   │   └── test_stock_data_simple.py
│   ├── README.md
│   ├── test_agents_router_mcp.py
│   ├── test_backtest_persistence.py
│   ├── test_cache_management_service.py
│   ├── test_cache_serialization.py
│   ├── test_circuit_breaker.py
│   ├── test_database_pool_config_simple.py
│   ├── test_database_pool_config.py
│   ├── test_deep_research_functional.py
│   ├── test_deep_research_integration.py
│   ├── test_deep_research_parallel_execution.py
│   ├── test_error_handling.py
│   ├── test_event_loop_integrity.py
│   ├── test_exa_research_integration.py
│   ├── test_exception_hierarchy.py
│   ├── test_financial_search.py
│   ├── test_graceful_shutdown.py
│   ├── test_integration_simple.py
│   ├── test_langgraph_workflow.py
│   ├── test_market_data_async.py
│   ├── test_market_data_simple.py
│   ├── test_mcp_orchestration_functional.py
│   ├── test_ml_strategies.py
│   ├── test_optimized_research_agent.py
│   ├── test_orchestration_integration.py
│   ├── test_orchestration_logging.py
│   ├── test_orchestration_tools_simple.py
│   ├── test_parallel_research_integration.py
│   ├── test_parallel_research_orchestrator.py
│   ├── test_parallel_research_performance.py
│   ├── test_performance_optimizations.py
│   ├── test_production_validation.py
│   ├── test_provider_architecture.py
│   ├── test_rate_limiting_enhanced.py
│   ├── test_runner_validation.py
│   ├── test_security_comprehensive.py.disabled
│   ├── test_security_cors.py
│   ├── test_security_enhancements.py.disabled
│   ├── test_security_headers.py
│   ├── test_security_penetration.py
│   ├── test_session_management.py
│   ├── test_speed_optimization_validation.py
│   ├── test_stock_analysis_dependencies.py
│   ├── test_stock_analysis_service.py
│   ├── test_stock_data_fetching_service.py
│   ├── test_supervisor_agent.py
│   ├── test_supervisor_functional.py
│   ├── test_tool_estimation_config.py
│   ├── test_visualization.py
│   ├── unit
│   │   └── test_stock_repository_adapter.py
│   └── utils
│       ├── test_agent_errors.py
│       ├── test_logging.py
│       ├── test_parallel_screening.py
│       └── test_quick_cache.py
├── tools
│   ├── check_orchestration_config.py
│   ├── experiments
│   │   ├── validation_examples.py
│   │   └── validation_fixed.py
│   ├── fast_dev.sh
│   ├── hot_reload.py
│   ├── quick_test.py
│   └── templates
│       ├── new_router_template.py
│       ├── new_tool_template.py
│       ├── screening_strategy_template.py
│       └── test_template.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/maverick_mcp/backtesting/vectorbt_engine.py:
--------------------------------------------------------------------------------

```python
   1 | """VectorBT backtesting engine implementation with memory management and structured logging."""
   2 | 
   3 | import gc
   4 | from typing import Any
   5 | 
   6 | import numpy as np
   7 | import pandas as pd
   8 | import vectorbt as vbt
   9 | from pandas import DataFrame, Series
  10 | 
  11 | from maverick_mcp.backtesting.batch_processing import BatchProcessingMixin
  12 | from maverick_mcp.data.cache import (
  13 |     CacheManager,
  14 |     ensure_timezone_naive,
  15 |     generate_cache_key,
  16 | )
  17 | from maverick_mcp.providers.stock_data import EnhancedStockDataProvider
  18 | from maverick_mcp.utils.cache_warmer import CacheWarmer
  19 | from maverick_mcp.utils.data_chunking import DataChunker, optimize_dataframe_dtypes
  20 | from maverick_mcp.utils.memory_profiler import (
  21 |     check_memory_leak,
  22 |     cleanup_dataframes,
  23 |     get_memory_stats,
  24 |     memory_context,
  25 |     profile_memory,
  26 | )
  27 | from maverick_mcp.utils.structured_logger import (
  28 |     get_performance_logger,
  29 |     get_structured_logger,
  30 |     with_structured_logging,
  31 | )
  32 | 
  33 | logger = get_structured_logger(__name__)
  34 | performance_logger = get_performance_logger("vectorbt_engine")
  35 | 
  36 | 
  37 | class VectorBTEngine(BatchProcessingMixin):
  38 |     """High-performance backtesting engine using VectorBT with memory management."""
  39 | 
  40 |     def __init__(
  41 |         self,
  42 |         data_provider: EnhancedStockDataProvider | None = None,
  43 |         cache_service=None,
  44 |         enable_memory_profiling: bool = True,
  45 |         chunk_size_mb: float = 100.0,
  46 |     ):
  47 |         """Initialize VectorBT engine.
  48 | 
  49 |         Args:
  50 |             data_provider: Stock data provider instance
  51 |             cache_service: Cache service for data persistence
  52 |             enable_memory_profiling: Enable memory profiling and optimization
  53 |             chunk_size_mb: Chunk size for large dataset processing
  54 |         """
  55 |         self.data_provider = data_provider or EnhancedStockDataProvider()
  56 |         self.cache = cache_service or CacheManager()
  57 |         self.cache_warmer = CacheWarmer(
  58 |             data_provider=self.data_provider, cache_manager=self.cache
  59 |         )
  60 | 
  61 |         # Memory management configuration
  62 |         self.enable_memory_profiling = enable_memory_profiling
  63 |         self.chunker = DataChunker(
  64 |             chunk_size_mb=chunk_size_mb, optimize_chunks=True, auto_gc=True
  65 |         )
  66 | 
  67 |         # Configure VectorBT settings for optimal performance and memory usage
  68 |         try:
  69 |             vbt.settings.array_wrapper["freq"] = "D"
  70 |             vbt.settings.caching["enabled"] = True  # Enable VectorBT's internal caching
  71 |             # Don't set whitelist to avoid cache condition issues
  72 |         except (KeyError, Exception) as e:
  73 |             logger.warning(f"Could not configure VectorBT settings: {e}")
  74 | 
  75 |         logger.info(
  76 |             f"VectorBT engine initialized with memory profiling: {enable_memory_profiling}"
  77 |         )
  78 | 
  79 |         # Initialize memory tracking
  80 |         if self.enable_memory_profiling:
  81 |             initial_stats = get_memory_stats()
  82 |             logger.debug(f"Initial memory stats: {initial_stats}")
  83 | 
  84 |     @with_structured_logging(
  85 |         "get_historical_data", include_performance=True, log_params=True
  86 |     )
  87 |     @profile_memory(log_results=True, threshold_mb=50.0)
  88 |     async def get_historical_data(
  89 |         self, symbol: str, start_date: str, end_date: str, interval: str = "1d"
  90 |     ) -> DataFrame:
  91 |         """Fetch historical data for backtesting with memory optimization.
  92 | 
  93 |         Args:
  94 |             symbol: Stock symbol
  95 |             start_date: Start date (YYYY-MM-DD)
  96 |             end_date: End date (YYYY-MM-DD)
  97 |             interval: Data interval (1d, 1h, etc.)
  98 | 
  99 |         Returns:
 100 |             Memory-optimized DataFrame with OHLCV data
 101 |         """
 102 |         # Generate versioned cache key
 103 |         cache_key = generate_cache_key(
 104 |             "backtest_data",
 105 |             symbol=symbol,
 106 |             start_date=start_date,
 107 |             end_date=end_date,
 108 |             interval=interval,
 109 |         )
 110 | 
 111 |         # Try cache first with improved deserialization
 112 |         cached_data = await self.cache.get(cache_key)
 113 |         if cached_data is not None:
 114 |             if isinstance(cached_data, pd.DataFrame):
 115 |                 # Already a DataFrame - ensure timezone-naive
 116 |                 df = ensure_timezone_naive(cached_data)
 117 |             else:
 118 |                 # Restore DataFrame from dict (legacy JSON cache)
 119 |                 df = pd.DataFrame.from_dict(cached_data, orient="index")
 120 |                 # Convert index back to datetime
 121 |                 df.index = pd.to_datetime(df.index)
 122 |                 df = ensure_timezone_naive(df)
 123 | 
 124 |             # Ensure column names are lowercase
 125 |             df.columns = [col.lower() for col in df.columns]
 126 |             return df
 127 | 
 128 |         # Fetch from provider - try async method first, fallback to sync
 129 |         try:
 130 |             data = await self._get_data_async(symbol, start_date, end_date, interval)
 131 |         except AttributeError:
 132 |             # Fallback to sync method if async not available
 133 |             data = self.data_provider.get_stock_data(
 134 |                 symbol=symbol,
 135 |                 start_date=start_date,
 136 |                 end_date=end_date,
 137 |                 interval=interval,
 138 |             )
 139 | 
 140 |         if data is None or data.empty:
 141 |             raise ValueError(f"No data available for {symbol}")
 142 | 
 143 |         # Normalize column names to lowercase for consistency
 144 |         data.columns = [col.lower() for col in data.columns]
 145 | 
 146 |         # Ensure timezone-naive index and fix any timezone comparison issues
 147 |         data = ensure_timezone_naive(data)
 148 | 
 149 |         # Optimize DataFrame memory usage
 150 |         if self.enable_memory_profiling:
 151 |             data = optimize_dataframe_dtypes(data, aggressive=False)
 152 |             logger.debug(f"Optimized {symbol} data memory usage")
 153 | 
 154 |         # Cache with adaptive TTL - longer for older data
 155 |         from datetime import datetime
 156 | 
 157 |         end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 158 |         days_old = (datetime.now() - end_dt).days
 159 |         ttl = 86400 if days_old > 7 else 3600  # 24h for older data, 1h for recent
 160 | 
 161 |         await self.cache.set(cache_key, data, ttl=ttl)
 162 | 
 163 |         return data
 164 | 
 165 |     async def _get_data_async(
 166 |         self, symbol: str, start_date: str, end_date: str, interval: str
 167 |     ) -> DataFrame:
 168 |         """Get data using async method if available."""
 169 |         if hasattr(self.data_provider, "get_stock_data_async"):
 170 |             return await self.data_provider.get_stock_data_async(
 171 |                 symbol=symbol,
 172 |                 start_date=start_date,
 173 |                 end_date=end_date,
 174 |                 interval=interval,
 175 |             )
 176 |         else:
 177 |             # Fallback to sync method
 178 |             return self.data_provider.get_stock_data(
 179 |                 symbol=symbol,
 180 |                 start_date=start_date,
 181 |                 end_date=end_date,
 182 |                 interval=interval,
 183 |             )
 184 | 
 185 |     @with_structured_logging(
 186 |         "run_backtest", include_performance=True, log_params=True, log_result=False
 187 |     )
 188 |     @profile_memory(log_results=True, threshold_mb=200.0)
 189 |     async def run_backtest(
 190 |         self,
 191 |         symbol: str,
 192 |         strategy_type: str,
 193 |         parameters: dict[str, Any],
 194 |         start_date: str,
 195 |         end_date: str,
 196 |         initial_capital: float = 10000.0,
 197 |         fees: float = 0.001,
 198 |         slippage: float = 0.001,
 199 |     ) -> dict[str, Any]:
 200 |         """Run a vectorized backtest with memory optimization.
 201 | 
 202 |         Args:
 203 |             symbol: Stock symbol
 204 |             strategy_type: Type of strategy (sma_cross, rsi, macd, etc.)
 205 |             parameters: Strategy parameters
 206 |             start_date: Start date
 207 |             end_date: End date
 208 |             initial_capital: Starting capital
 209 |             fees: Trading fees (percentage)
 210 |             slippage: Slippage (percentage)
 211 | 
 212 |         Returns:
 213 |             Dictionary with backtest results
 214 |         """
 215 |         with memory_context("backtest_execution"):
 216 |             # Fetch data
 217 |             data = await self.get_historical_data(symbol, start_date, end_date)
 218 | 
 219 |             # Check for large datasets and warn
 220 |             data_memory_mb = data.memory_usage(deep=True).sum() / (1024**2)
 221 |             if data_memory_mb > 100:
 222 |                 logger.warning(f"Large dataset detected: {data_memory_mb:.2f}MB")
 223 | 
 224 |             # Log business metrics
 225 |             performance_logger.log_business_metric(
 226 |                 "dataset_size_mb",
 227 |                 data_memory_mb,
 228 |                 symbol=symbol,
 229 |                 date_range_days=(
 230 |                     pd.to_datetime(end_date) - pd.to_datetime(start_date)
 231 |                 ).days,
 232 |             )
 233 | 
 234 |             # Generate signals based on strategy
 235 |             entries, exits = self._generate_signals(data, strategy_type, parameters)
 236 | 
 237 |             # Optimize memory usage - use efficient data types
 238 |             with memory_context("data_optimization"):
 239 |                 close_prices = data["close"].astype(np.float32)
 240 |                 entries = entries.astype(bool)
 241 |                 exits = exits.astype(bool)
 242 | 
 243 |                 # Clean up original data to free memory
 244 |                 if self.enable_memory_profiling:
 245 |                     cleanup_dataframes(data)
 246 |                     del data  # Explicit deletion
 247 |                     gc.collect()  # Force garbage collection
 248 | 
 249 |             # Run VectorBT portfolio simulation with memory optimizations
 250 |             with memory_context("portfolio_simulation"):
 251 |                 portfolio = vbt.Portfolio.from_signals(
 252 |                     close=close_prices,
 253 |                     entries=entries,
 254 |                     exits=exits,
 255 |                     init_cash=initial_capital,
 256 |                     fees=fees,
 257 |                     slippage=slippage,
 258 |                     freq="D",
 259 |                     cash_sharing=False,  # Disable cash sharing for single asset
 260 |                     call_seq="auto",  # Optimize call sequence
 261 |                     group_by=False,  # Disable grouping for memory efficiency
 262 |                     broadcast_kwargs={"wrapper_kwargs": {"freq": "D"}},
 263 |                 )
 264 | 
 265 |         # Extract comprehensive metrics with memory tracking
 266 |         with memory_context("results_extraction"):
 267 |             metrics = self._extract_metrics(portfolio)
 268 |             trades = self._extract_trades(portfolio)
 269 | 
 270 |             # Get equity curve - convert to list for smaller cache size
 271 |             equity_curve = {
 272 |                 str(k): float(v) for k, v in portfolio.value().to_dict().items()
 273 |             }
 274 |             drawdown_series = {
 275 |                 str(k): float(v) for k, v in portfolio.drawdown().to_dict().items()
 276 |             }
 277 | 
 278 |             # Clean up portfolio object to free memory
 279 |             if self.enable_memory_profiling:
 280 |                 del portfolio
 281 |                 cleanup_dataframes(close_prices) if hasattr(
 282 |                     close_prices, "_mgr"
 283 |                 ) else None
 284 |                 del close_prices, entries, exits
 285 |                 gc.collect()
 286 | 
 287 |         # Add memory statistics to results if profiling enabled
 288 |         result = {
 289 |             "symbol": symbol,
 290 |             "strategy": strategy_type,
 291 |             "parameters": parameters,
 292 |             "metrics": metrics,
 293 |             "trades": trades,
 294 |             "equity_curve": equity_curve,
 295 |             "drawdown_series": drawdown_series,
 296 |             "start_date": start_date,
 297 |             "end_date": end_date,
 298 |             "initial_capital": initial_capital,
 299 |         }
 300 | 
 301 |         if self.enable_memory_profiling:
 302 |             result["memory_stats"] = get_memory_stats()
 303 |             # Check for potential memory leaks
 304 |             if check_memory_leak(threshold_mb=50.0):
 305 |                 logger.warning("Potential memory leak detected during backtesting")
 306 | 
 307 |         # Log business metrics for backtesting results
 308 |         performance_logger.log_business_metric(
 309 |             "backtest_total_return",
 310 |             metrics.get("total_return", 0),
 311 |             symbol=symbol,
 312 |             strategy=strategy_type,
 313 |             trade_count=metrics.get("total_trades", 0),
 314 |         )
 315 |         performance_logger.log_business_metric(
 316 |             "backtest_sharpe_ratio",
 317 |             metrics.get("sharpe_ratio", 0),
 318 |             symbol=symbol,
 319 |             strategy=strategy_type,
 320 |         )
 321 | 
 322 |         return result
 323 | 
 324 |     def _generate_signals(
 325 |         self, data: DataFrame, strategy_type: str, parameters: dict[str, Any]
 326 |     ) -> tuple[Series, Series]:
 327 |         """Generate entry and exit signals based on strategy.
 328 | 
 329 |         Args:
 330 |             data: Price data
 331 |             strategy_type: Strategy type
 332 |             parameters: Strategy parameters
 333 | 
 334 |         Returns:
 335 |             Tuple of (entry_signals, exit_signals)
 336 |         """
 337 |         # Ensure we have the required price data
 338 |         if "close" not in data.columns:
 339 |             raise ValueError(
 340 |                 f"Missing 'close' column in price data. Available columns: {list(data.columns)}"
 341 |             )
 342 | 
 343 |         close = data["close"]
 344 | 
 345 |         if strategy_type in ["sma_cross", "sma_crossover"]:
 346 |             return self._sma_crossover_signals(close, parameters)
 347 |         elif strategy_type == "rsi":
 348 |             return self._rsi_signals(close, parameters)
 349 |         elif strategy_type == "macd":
 350 |             return self._macd_signals(close, parameters)
 351 |         elif strategy_type == "bollinger":
 352 |             return self._bollinger_bands_signals(close, parameters)
 353 |         elif strategy_type == "momentum":
 354 |             return self._momentum_signals(close, parameters)
 355 |         elif strategy_type == "ema_cross":
 356 |             return self._ema_crossover_signals(close, parameters)
 357 |         elif strategy_type == "mean_reversion":
 358 |             return self._mean_reversion_signals(close, parameters)
 359 |         elif strategy_type == "breakout":
 360 |             return self._breakout_signals(close, parameters)
 361 |         elif strategy_type == "volume_momentum":
 362 |             return self._volume_momentum_signals(data, parameters)
 363 |         elif strategy_type == "online_learning":
 364 |             return self._online_learning_signals(data, parameters)
 365 |         elif strategy_type == "regime_aware":
 366 |             return self._regime_aware_signals(data, parameters)
 367 |         elif strategy_type == "ensemble":
 368 |             return self._ensemble_signals(data, parameters)
 369 |         else:
 370 |             raise ValueError(f"Unknown strategy type: {strategy_type}")
 371 | 
 372 |     def _sma_crossover_signals(
 373 |         self, close: Series, params: dict[str, Any]
 374 |     ) -> tuple[Series, Series]:
 375 |         """Generate SMA crossover signals."""
 376 |         # Support both parameter naming conventions
 377 |         fast_period = params.get("fast_period", params.get("fast_window", 10))
 378 |         slow_period = params.get("slow_period", params.get("slow_window", 20))
 379 | 
 380 |         fast_sma = vbt.MA.run(close, fast_period, short_name="fast").ma.squeeze()
 381 |         slow_sma = vbt.MA.run(close, slow_period, short_name="slow").ma.squeeze()
 382 | 
 383 |         entries = (fast_sma > slow_sma) & (fast_sma.shift(1) <= slow_sma.shift(1))
 384 |         exits = (fast_sma < slow_sma) & (fast_sma.shift(1) >= slow_sma.shift(1))
 385 | 
 386 |         return entries, exits
 387 | 
 388 |     def _rsi_signals(
 389 |         self, close: Series, params: dict[str, Any]
 390 |     ) -> tuple[Series, Series]:
 391 |         """Generate RSI-based signals."""
 392 |         period = params.get("period", 14)
 393 |         oversold = params.get("oversold", 30)
 394 |         overbought = params.get("overbought", 70)
 395 | 
 396 |         rsi = vbt.RSI.run(close, period).rsi.squeeze()
 397 | 
 398 |         entries = (rsi < oversold) & (rsi.shift(1) >= oversold)
 399 |         exits = (rsi > overbought) & (rsi.shift(1) <= overbought)
 400 | 
 401 |         return entries, exits
 402 | 
 403 |     def _macd_signals(
 404 |         self, close: Series, params: dict[str, Any]
 405 |     ) -> tuple[Series, Series]:
 406 |         """Generate MACD signals."""
 407 |         fast_period = params.get("fast_period", 12)
 408 |         slow_period = params.get("slow_period", 26)
 409 |         signal_period = params.get("signal_period", 9)
 410 | 
 411 |         macd = vbt.MACD.run(
 412 |             close,
 413 |             fast_window=fast_period,
 414 |             slow_window=slow_period,
 415 |             signal_window=signal_period,
 416 |         )
 417 | 
 418 |         macd_line = macd.macd.squeeze()
 419 |         signal_line = macd.signal.squeeze()
 420 | 
 421 |         entries = (macd_line > signal_line) & (
 422 |             macd_line.shift(1) <= signal_line.shift(1)
 423 |         )
 424 |         exits = (macd_line < signal_line) & (macd_line.shift(1) >= signal_line.shift(1))
 425 | 
 426 |         return entries, exits
 427 | 
 428 |     def _bollinger_bands_signals(
 429 |         self, close: Series, params: dict[str, Any]
 430 |     ) -> tuple[Series, Series]:
 431 |         """Generate Bollinger Bands signals."""
 432 |         period = params.get("period", 20)
 433 |         std_dev = params.get("std_dev", 2)
 434 | 
 435 |         bb = vbt.BBANDS.run(close, window=period, alpha=std_dev)
 436 |         upper = bb.upper.squeeze()
 437 |         lower = bb.lower.squeeze()
 438 | 
 439 |         # Buy when price touches lower band, sell when touches upper
 440 |         entries = (close <= lower) & (close.shift(1) > lower.shift(1))
 441 |         exits = (close >= upper) & (close.shift(1) < upper.shift(1))
 442 | 
 443 |         return entries, exits
 444 | 
 445 |     def _momentum_signals(
 446 |         self, close: Series, params: dict[str, Any]
 447 |     ) -> tuple[Series, Series]:
 448 |         """Generate momentum-based signals."""
 449 |         lookback = params.get("lookback", 20)
 450 |         threshold = params.get("threshold", 0.05)
 451 | 
 452 |         returns = close.pct_change(lookback)
 453 | 
 454 |         entries = returns > threshold
 455 |         exits = returns < -threshold
 456 | 
 457 |         return entries, exits
 458 | 
 459 |     def _ema_crossover_signals(
 460 |         self, close: Series, params: dict[str, Any]
 461 |     ) -> tuple[Series, Series]:
 462 |         """Generate EMA crossover signals."""
 463 |         fast_period = params.get("fast_period", 12)
 464 |         slow_period = params.get("slow_period", 26)
 465 | 
 466 |         fast_ema = vbt.MA.run(close, fast_period, ewm=True).ma.squeeze()
 467 |         slow_ema = vbt.MA.run(close, slow_period, ewm=True).ma.squeeze()
 468 | 
 469 |         entries = (fast_ema > slow_ema) & (fast_ema.shift(1) <= slow_ema.shift(1))
 470 |         exits = (fast_ema < slow_ema) & (fast_ema.shift(1) >= slow_ema.shift(1))
 471 | 
 472 |         return entries, exits
 473 | 
 474 |     def _mean_reversion_signals(
 475 |         self, close: Series, params: dict[str, Any]
 476 |     ) -> tuple[Series, Series]:
 477 |         """Generate mean reversion signals."""
 478 |         ma_period = params.get("ma_period", 20)
 479 |         entry_threshold = params.get("entry_threshold", 0.02)
 480 |         exit_threshold = params.get("exit_threshold", 0.01)
 481 | 
 482 |         ma = vbt.MA.run(close, ma_period).ma.squeeze()
 483 | 
 484 |         # Avoid division by zero in deviation calculation
 485 |         with np.errstate(divide="ignore", invalid="ignore"):
 486 |             deviation = np.where(ma != 0, (close - ma) / ma, 0)
 487 | 
 488 |         entries = deviation < -entry_threshold
 489 |         exits = deviation > exit_threshold
 490 | 
 491 |         return entries, exits
 492 | 
 493 |     def _breakout_signals(
 494 |         self, close: Series, params: dict[str, Any]
 495 |     ) -> tuple[Series, Series]:
 496 |         """Generate channel breakout signals."""
 497 |         lookback = params.get("lookback", 20)
 498 |         exit_lookback = params.get("exit_lookback", 10)
 499 | 
 500 |         upper_channel = close.rolling(lookback).max()
 501 |         lower_channel = close.rolling(exit_lookback).min()
 502 | 
 503 |         entries = close > upper_channel.shift(1)
 504 |         exits = close < lower_channel.shift(1)
 505 | 
 506 |         return entries, exits
 507 | 
 508 |     def _volume_momentum_signals(
 509 |         self, data: DataFrame, params: dict[str, Any]
 510 |     ) -> tuple[Series, Series]:
 511 |         """Generate volume-weighted momentum signals."""
 512 |         momentum_period = params.get("momentum_period", 20)
 513 |         volume_period = params.get("volume_period", 20)
 514 |         momentum_threshold = params.get("momentum_threshold", 0.05)
 515 |         volume_multiplier = params.get("volume_multiplier", 1.5)
 516 | 
 517 |         close = data["close"]
 518 |         volume = data.get("volume")
 519 | 
 520 |         if volume is None:
 521 |             # Fallback to pure momentum if no volume data
 522 |             returns = close.pct_change(momentum_period)
 523 |             entries = returns > momentum_threshold
 524 |             exits = returns < -momentum_threshold
 525 |             return entries, exits
 526 | 
 527 |         returns = close.pct_change(momentum_period)
 528 |         avg_volume = volume.rolling(volume_period).mean()
 529 |         volume_surge = volume > (avg_volume * volume_multiplier)
 530 | 
 531 |         # Entry: positive momentum with volume surge
 532 |         entries = (returns > momentum_threshold) & volume_surge
 533 | 
 534 |         # Exit: negative momentum or volume dry up
 535 |         exits = (returns < -momentum_threshold) | (volume < avg_volume * 0.8)
 536 | 
 537 |         return entries, exits
 538 | 
 539 |     def _extract_metrics(self, portfolio: vbt.Portfolio) -> dict[str, Any]:
 540 |         """Extract comprehensive metrics from portfolio."""
 541 | 
 542 |         def safe_float_metric(metric_func, default=0.0):
 543 |             """Safely extract float metrics, handling None and NaN values."""
 544 |             try:
 545 |                 value = metric_func()
 546 |                 if value is None or np.isnan(value) or np.isinf(value):
 547 |                     return default
 548 |                 return float(value)
 549 |             except (ZeroDivisionError, ValueError, TypeError):
 550 |                 return default
 551 | 
 552 |         return {
 553 |             "total_return": safe_float_metric(portfolio.total_return),
 554 |             "annual_return": safe_float_metric(portfolio.annualized_return),
 555 |             "sharpe_ratio": safe_float_metric(portfolio.sharpe_ratio),
 556 |             "sortino_ratio": safe_float_metric(portfolio.sortino_ratio),
 557 |             "calmar_ratio": safe_float_metric(portfolio.calmar_ratio),
 558 |             "max_drawdown": safe_float_metric(portfolio.max_drawdown),
 559 |             "win_rate": safe_float_metric(lambda: portfolio.trades.win_rate()),
 560 |             "profit_factor": safe_float_metric(
 561 |                 lambda: portfolio.trades.profit_factor()
 562 |             ),
 563 |             "expectancy": safe_float_metric(lambda: portfolio.trades.expectancy()),
 564 |             "total_trades": int(portfolio.trades.count()),
 565 |             "winning_trades": int(portfolio.trades.winning.count())
 566 |             if hasattr(portfolio.trades, "winning")
 567 |             else 0,
 568 |             "losing_trades": int(portfolio.trades.losing.count())
 569 |             if hasattr(portfolio.trades, "losing")
 570 |             else 0,
 571 |             "avg_win": safe_float_metric(
 572 |                 lambda: portfolio.trades.winning.pnl.mean()
 573 |                 if hasattr(portfolio.trades, "winning")
 574 |                 and portfolio.trades.winning.count() > 0
 575 |                 else None
 576 |             ),
 577 |             "avg_loss": safe_float_metric(
 578 |                 lambda: portfolio.trades.losing.pnl.mean()
 579 |                 if hasattr(portfolio.trades, "losing")
 580 |                 and portfolio.trades.losing.count() > 0
 581 |                 else None
 582 |             ),
 583 |             "best_trade": safe_float_metric(
 584 |                 lambda: portfolio.trades.pnl.max()
 585 |                 if portfolio.trades.count() > 0
 586 |                 else None
 587 |             ),
 588 |             "worst_trade": safe_float_metric(
 589 |                 lambda: portfolio.trades.pnl.min()
 590 |                 if portfolio.trades.count() > 0
 591 |                 else None
 592 |             ),
 593 |             "avg_duration": safe_float_metric(lambda: portfolio.trades.duration.mean()),
 594 |             "kelly_criterion": self._calculate_kelly(portfolio),
 595 |             "recovery_factor": self._calculate_recovery_factor(portfolio),
 596 |             "risk_reward_ratio": self._calculate_risk_reward(portfolio),
 597 |         }
 598 | 
 599 |     def _extract_trades(self, portfolio: vbt.Portfolio) -> list:
 600 |         """Extract trade records from portfolio."""
 601 |         if portfolio.trades.count() == 0:
 602 |             return []
 603 | 
 604 |         trades = portfolio.trades.records_readable
 605 | 
 606 |         # Vectorized operation for better performance
 607 |         trade_list = [
 608 |             {
 609 |                 "entry_date": str(trade.get("Entry Timestamp", "")),
 610 |                 "exit_date": str(trade.get("Exit Timestamp", "")),
 611 |                 "entry_price": float(trade.get("Avg Entry Price", 0)),
 612 |                 "exit_price": float(trade.get("Avg Exit Price", 0)),
 613 |                 "size": float(trade.get("Size", 0)),
 614 |                 "pnl": float(trade.get("PnL", 0)),
 615 |                 "return": float(trade.get("Return", 0)),
 616 |                 "duration": str(trade.get("Duration", "")),
 617 |             }
 618 |             for _, trade in trades.iterrows()
 619 |         ]
 620 | 
 621 |         return trade_list
 622 | 
 623 |     def _calculate_kelly(self, portfolio: vbt.Portfolio) -> float:
 624 |         """Calculate Kelly Criterion."""
 625 |         if portfolio.trades.count() == 0:
 626 |             return 0.0
 627 | 
 628 |         try:
 629 |             win_rate = portfolio.trades.win_rate()
 630 |             if win_rate is None or np.isnan(win_rate):
 631 |                 return 0.0
 632 | 
 633 |             avg_win = (
 634 |                 abs(portfolio.trades.winning.returns.mean() or 0)
 635 |                 if hasattr(portfolio.trades, "winning")
 636 |                 and portfolio.trades.winning.count() > 0
 637 |                 else 0
 638 |             )
 639 |             avg_loss = (
 640 |                 abs(portfolio.trades.losing.returns.mean() or 0)
 641 |                 if hasattr(portfolio.trades, "losing")
 642 |                 and portfolio.trades.losing.count() > 0
 643 |                 else 0
 644 |             )
 645 | 
 646 |             # Check for division by zero and invalid values
 647 |             if avg_loss == 0 or avg_win == 0 or np.isnan(avg_win) or np.isnan(avg_loss):
 648 |                 return 0.0
 649 | 
 650 |             # Calculate Kelly with safe division
 651 |             with np.errstate(divide="ignore", invalid="ignore"):
 652 |                 kelly = (win_rate * avg_win - (1 - win_rate) * avg_loss) / avg_win
 653 | 
 654 |             # Check if result is valid
 655 |             if np.isnan(kelly) or np.isinf(kelly):
 656 |                 return 0.0
 657 | 
 658 |             return float(
 659 |                 min(max(kelly, -1.0), 0.25)
 660 |             )  # Cap between -100% and 25% for safety
 661 | 
 662 |         except (ZeroDivisionError, ValueError, TypeError):
 663 |             return 0.0
 664 | 
 665 |     def get_memory_report(self) -> dict[str, Any]:
 666 |         """Get comprehensive memory usage report."""
 667 |         if not self.enable_memory_profiling:
 668 |             return {"message": "Memory profiling disabled"}
 669 | 
 670 |         return get_memory_stats()
 671 | 
 672 |     def clear_memory_cache(self) -> None:
 673 |         """Clear internal memory caches and force garbage collection."""
 674 |         if hasattr(vbt.settings, "caching"):
 675 |             vbt.settings.caching.clear()
 676 | 
 677 |         gc.collect()
 678 |         logger.info("Memory cache cleared and garbage collection performed")
 679 | 
 680 |     def optimize_for_memory(self, aggressive: bool = False) -> None:
 681 |         """Optimize VectorBT settings for memory efficiency.
 682 | 
 683 |         Args:
 684 |             aggressive: Use aggressive memory optimizations
 685 |         """
 686 |         if aggressive:
 687 |             # Aggressive memory settings
 688 |             vbt.settings.caching["enabled"] = False  # Disable caching
 689 |             vbt.settings.array_wrapper["dtype"] = np.float32  # Use float32
 690 |             logger.info("Applied aggressive memory optimizations")
 691 |         else:
 692 |             # Conservative memory settings
 693 |             vbt.settings.caching["enabled"] = True
 694 |             vbt.settings.caching["max_size"] = 100  # Limit cache size
 695 |             logger.info("Applied conservative memory optimizations")
 696 | 
 697 |     async def run_memory_efficient_backtest(
 698 |         self,
 699 |         symbol: str,
 700 |         strategy_type: str,
 701 |         parameters: dict[str, Any],
 702 |         start_date: str,
 703 |         end_date: str,
 704 |         initial_capital: float = 10000.0,
 705 |         fees: float = 0.001,
 706 |         slippage: float = 0.001,
 707 |         chunk_data: bool = False,
 708 |     ) -> dict[str, Any]:
 709 |         """Run backtest with maximum memory efficiency.
 710 | 
 711 |         Args:
 712 |             symbol: Stock symbol
 713 |             strategy_type: Strategy type
 714 |             parameters: Strategy parameters
 715 |             start_date: Start date
 716 |             end_date: End date
 717 |             initial_capital: Starting capital
 718 |             fees: Trading fees
 719 |             slippage: Slippage
 720 |             chunk_data: Whether to process data in chunks
 721 | 
 722 |         Returns:
 723 |             Backtest results with memory statistics
 724 |         """
 725 |         # Temporarily optimize for memory
 726 |         original_settings = {
 727 |             "caching_enabled": vbt.settings.caching.get("enabled", True),
 728 |             "array_dtype": vbt.settings.array_wrapper.get("dtype", np.float64),
 729 |         }
 730 | 
 731 |         try:
 732 |             self.optimize_for_memory(aggressive=True)
 733 | 
 734 |             if chunk_data:
 735 |                 # Use chunked processing for very large datasets
 736 |                 return await self._run_chunked_backtest(
 737 |                     symbol,
 738 |                     strategy_type,
 739 |                     parameters,
 740 |                     start_date,
 741 |                     end_date,
 742 |                     initial_capital,
 743 |                     fees,
 744 |                     slippage,
 745 |                 )
 746 |             else:
 747 |                 return await self.run_backtest(
 748 |                     symbol,
 749 |                     strategy_type,
 750 |                     parameters,
 751 |                     start_date,
 752 |                     end_date,
 753 |                     initial_capital,
 754 |                     fees,
 755 |                     slippage,
 756 |                 )
 757 | 
 758 |         finally:
 759 |             # Restore original settings
 760 |             vbt.settings.caching["enabled"] = original_settings["caching_enabled"]
 761 |             vbt.settings.array_wrapper["dtype"] = original_settings["array_dtype"]
 762 | 
 763 |     async def _run_chunked_backtest(
 764 |         self,
 765 |         symbol: str,
 766 |         strategy_type: str,
 767 |         parameters: dict[str, Any],
 768 |         start_date: str,
 769 |         end_date: str,
 770 |         initial_capital: float,
 771 |         fees: float,
 772 |         slippage: float,
 773 |     ) -> dict[str, Any]:
 774 |         """Run backtest using data chunking for very large datasets."""
 775 |         from datetime import datetime, timedelta
 776 | 
 777 |         # Calculate date chunks (monthly)
 778 |         start_dt = datetime.strptime(start_date, "%Y-%m-%d")
 779 |         end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 780 | 
 781 |         results = []
 782 |         current_capital = initial_capital
 783 |         current_date = start_dt
 784 | 
 785 |         while current_date < end_dt:
 786 |             chunk_end = min(current_date + timedelta(days=90), end_dt)  # 3-month chunks
 787 | 
 788 |             chunk_start_str = current_date.strftime("%Y-%m-%d")
 789 |             chunk_end_str = chunk_end.strftime("%Y-%m-%d")
 790 | 
 791 |             logger.debug(f"Processing chunk: {chunk_start_str} to {chunk_end_str}")
 792 | 
 793 |             # Run backtest for chunk
 794 |             chunk_result = await self.run_backtest(
 795 |                 symbol,
 796 |                 strategy_type,
 797 |                 parameters,
 798 |                 chunk_start_str,
 799 |                 chunk_end_str,
 800 |                 current_capital,
 801 |                 fees,
 802 |                 slippage,
 803 |             )
 804 | 
 805 |             results.append(chunk_result)
 806 | 
 807 |             # Update capital for next chunk
 808 |             final_value = chunk_result.get("metrics", {}).get("total_return", 0)
 809 |             current_capital = current_capital * (1 + final_value)
 810 | 
 811 |             current_date = chunk_end
 812 | 
 813 |         # Combine results
 814 |         return self._combine_chunked_results(results, symbol, strategy_type, parameters)
 815 | 
 816 |     def _combine_chunked_results(
 817 |         self,
 818 |         chunk_results: list[dict],
 819 |         symbol: str,
 820 |         strategy_type: str,
 821 |         parameters: dict[str, Any],
 822 |     ) -> dict[str, Any]:
 823 |         """Combine results from chunked backtesting."""
 824 |         if not chunk_results:
 825 |             return {}
 826 | 
 827 |         # Combine trades
 828 |         all_trades = []
 829 |         for chunk in chunk_results:
 830 |             all_trades.extend(chunk.get("trades", []))
 831 | 
 832 |         # Combine equity curves
 833 |         combined_equity = {}
 834 |         combined_drawdown = {}
 835 | 
 836 |         for chunk in chunk_results:
 837 |             combined_equity.update(chunk.get("equity_curve", {}))
 838 |             combined_drawdown.update(chunk.get("drawdown_series", {}))
 839 | 
 840 |         # Calculate combined metrics
 841 |         total_return = 1.0
 842 |         for chunk in chunk_results:
 843 |             chunk_return = chunk.get("metrics", {}).get("total_return", 0)
 844 |             total_return *= 1 + chunk_return
 845 |         total_return -= 1.0
 846 | 
 847 |         combined_metrics = {
 848 |             "total_return": total_return,
 849 |             "total_trades": len(all_trades),
 850 |             "chunks_processed": len(chunk_results),
 851 |         }
 852 | 
 853 |         return {
 854 |             "symbol": symbol,
 855 |             "strategy": strategy_type,
 856 |             "parameters": parameters,
 857 |             "metrics": combined_metrics,
 858 |             "trades": all_trades,
 859 |             "equity_curve": combined_equity,
 860 |             "drawdown_series": combined_drawdown,
 861 |             "processing_method": "chunked",
 862 |             "memory_stats": get_memory_stats()
 863 |             if self.enable_memory_profiling
 864 |             else None,
 865 |         }
 866 | 
 867 |     def _calculate_recovery_factor(self, portfolio: vbt.Portfolio) -> float:
 868 |         """Calculate recovery factor (total return / max drawdown)."""
 869 |         try:
 870 |             max_dd = portfolio.max_drawdown()
 871 |             total_return = portfolio.total_return()
 872 | 
 873 |             # Check for invalid values
 874 |             if (
 875 |                 max_dd is None
 876 |                 or np.isnan(max_dd)
 877 |                 or max_dd == 0
 878 |                 or total_return is None
 879 |                 or np.isnan(total_return)
 880 |             ):
 881 |                 return 0.0
 882 | 
 883 |             # Calculate with safe division
 884 |             with np.errstate(divide="ignore", invalid="ignore"):
 885 |                 recovery_factor = total_return / abs(max_dd)
 886 | 
 887 |             # Check if result is valid
 888 |             if np.isnan(recovery_factor) or np.isinf(recovery_factor):
 889 |                 return 0.0
 890 | 
 891 |             return float(recovery_factor)
 892 | 
 893 |         except (ZeroDivisionError, ValueError, TypeError):
 894 |             return 0.0
 895 | 
 896 |     def _calculate_risk_reward(self, portfolio: vbt.Portfolio) -> float:
 897 |         """Calculate risk-reward ratio."""
 898 |         if portfolio.trades.count() == 0:
 899 |             return 0.0
 900 | 
 901 |         try:
 902 |             avg_win = (
 903 |                 abs(portfolio.trades.winning.pnl.mean() or 0)
 904 |                 if hasattr(portfolio.trades, "winning")
 905 |                 and portfolio.trades.winning.count() > 0
 906 |                 else 0
 907 |             )
 908 |             avg_loss = (
 909 |                 abs(portfolio.trades.losing.pnl.mean() or 0)
 910 |                 if hasattr(portfolio.trades, "losing")
 911 |                 and portfolio.trades.losing.count() > 0
 912 |                 else 0
 913 |             )
 914 | 
 915 |             # Check for division by zero and invalid values
 916 |             if (
 917 |                 avg_loss == 0
 918 |                 or avg_win == 0
 919 |                 or np.isnan(avg_win)
 920 |                 or np.isnan(avg_loss)
 921 |                 or np.isinf(avg_win)
 922 |                 or np.isinf(avg_loss)
 923 |             ):
 924 |                 return 0.0
 925 | 
 926 |             # Calculate with safe division
 927 |             with np.errstate(divide="ignore", invalid="ignore"):
 928 |                 risk_reward = avg_win / avg_loss
 929 | 
 930 |             # Check if result is valid
 931 |             if np.isnan(risk_reward) or np.isinf(risk_reward):
 932 |                 return 0.0
 933 | 
 934 |             return float(risk_reward)
 935 | 
 936 |         except (ZeroDivisionError, ValueError, TypeError):
 937 |             return 0.0
 938 | 
 939 |     @with_structured_logging(
 940 |         "optimize_parameters",
 941 |         include_performance=True,
 942 |         log_params=True,
 943 |         log_result=False,
 944 |     )
 945 |     @profile_memory(log_results=True, threshold_mb=500.0)
 946 |     async def optimize_parameters(
 947 |         self,
 948 |         symbol: str,
 949 |         strategy_type: str,
 950 |         param_grid: dict[str, list],
 951 |         start_date: str,
 952 |         end_date: str,
 953 |         optimization_metric: str = "sharpe_ratio",
 954 |         initial_capital: float = 10000.0,
 955 |         top_n: int = 10,
 956 |         use_chunking: bool = True,
 957 |     ) -> dict[str, Any]:
 958 |         """Optimize strategy parameters using memory-efficient grid search.
 959 | 
 960 |         Args:
 961 |             symbol: Stock symbol
 962 |             strategy_type: Strategy type
 963 |             param_grid: Parameter grid for optimization
 964 |             start_date: Start date
 965 |             end_date: End date
 966 |             optimization_metric: Metric to optimize
 967 |             initial_capital: Starting capital
 968 |             top_n: Number of top results to return
 969 |             use_chunking: Use chunking for large parameter grids
 970 | 
 971 |         Returns:
 972 |             Optimization results with best parameters
 973 |         """
 974 |         with memory_context("parameter_optimization"):
 975 |             # Fetch data once
 976 |             data = await self.get_historical_data(symbol, start_date, end_date)
 977 | 
 978 |             # Create parameter combinations
 979 |             param_combos = vbt.utils.params.create_param_combs(param_grid)
 980 |             total_combos = len(param_combos)
 981 | 
 982 |             logger.info(
 983 |                 f"Optimizing {total_combos} parameter combinations for {symbol}"
 984 |             )
 985 | 
 986 |             # Pre-convert data for optimization with memory efficiency
 987 |             close_prices = data["close"].astype(np.float32)
 988 | 
 989 |             # Check if we should use chunking for large parameter grids
 990 |             if use_chunking and total_combos > 100:
 991 |                 logger.info(f"Using chunked processing for {total_combos} combinations")
 992 |                 chunk_size = min(50, max(10, total_combos // 10))  # Adaptive chunk size
 993 |                 results = self._optimize_parameters_chunked(
 994 |                     data,
 995 |                     close_prices,
 996 |                     strategy_type,
 997 |                     param_combos,
 998 |                     optimization_metric,
 999 |                     initial_capital,
1000 |                     chunk_size,
1001 |                 )
1002 |             else:
1003 |                 results = []
1004 |                 for i, params in enumerate(param_combos):
1005 |                     try:
1006 |                         with memory_context(f"param_combo_{i}"):
1007 |                             # Generate signals for this parameter set
1008 |                             entries, exits = self._generate_signals(
1009 |                                 data, strategy_type, params
1010 |                             )
1011 | 
1012 |                             # Convert to boolean arrays for memory efficiency
1013 |                             entries = entries.astype(bool)
1014 |                             exits = exits.astype(bool)
1015 | 
1016 |                             # Run backtest with optimizations
1017 |                             portfolio = vbt.Portfolio.from_signals(
1018 |                                 close=close_prices,
1019 |                                 entries=entries,
1020 |                                 exits=exits,
1021 |                                 init_cash=initial_capital,
1022 |                                 fees=0.001,
1023 |                                 freq="D",
1024 |                                 cash_sharing=False,
1025 |                                 call_seq="auto",
1026 |                                 group_by=False,  # Memory optimization
1027 |                             )
1028 | 
1029 |                             # Get optimization metric
1030 |                             metric_value = self._get_metric_value(
1031 |                                 portfolio, optimization_metric
1032 |                             )
1033 | 
1034 |                             results.append(
1035 |                                 {
1036 |                                     "parameters": params,
1037 |                                     optimization_metric: metric_value,
1038 |                                     "total_return": float(portfolio.total_return()),
1039 |                                     "max_drawdown": float(portfolio.max_drawdown()),
1040 |                                     "total_trades": int(portfolio.trades.count()),
1041 |                                 }
1042 |                             )
1043 | 
1044 |                             # Clean up intermediate objects
1045 |                             del portfolio, entries, exits
1046 |                             if i % 20 == 0:  # Periodic cleanup
1047 |                                 gc.collect()
1048 | 
1049 |                     except Exception as e:
1050 |                         logger.debug(f"Skipping invalid parameter combination {i}: {e}")
1051 |                         continue
1052 | 
1053 |             # Clean up data objects
1054 |             if self.enable_memory_profiling:
1055 |                 cleanup_dataframes(data, close_prices) if hasattr(
1056 |                     data, "_mgr"
1057 |                 ) else None
1058 |                 del data, close_prices
1059 |                 gc.collect()
1060 | 
1061 |         # Sort by optimization metric
1062 |         results.sort(key=lambda x: x[optimization_metric], reverse=True)
1063 | 
1064 |         # Get top N results
1065 |         top_results = results[:top_n]
1066 | 
1067 |         result = {
1068 |             "symbol": symbol,
1069 |             "strategy": strategy_type,
1070 |             "optimization_metric": optimization_metric,
1071 |             "best_parameters": top_results[0]["parameters"] if top_results else {},
1072 |             "best_metric_value": top_results[0][optimization_metric]
1073 |             if top_results
1074 |             else 0,
1075 |             "top_results": top_results,
1076 |             "total_combinations_tested": total_combos,
1077 |             "valid_combinations": len(results),
1078 |         }
1079 | 
1080 |         if self.enable_memory_profiling:
1081 |             result["memory_stats"] = get_memory_stats()
1082 | 
1083 |         return result
1084 | 
1085 |     def _optimize_parameters_chunked(
1086 |         self,
1087 |         data: DataFrame,
1088 |         close_prices: Series,
1089 |         strategy_type: str,
1090 |         param_combos: list,
1091 |         optimization_metric: str,
1092 |         initial_capital: float,
1093 |         chunk_size: int,
1094 |     ) -> list[dict]:
1095 |         """Optimize parameters using chunked processing for memory efficiency."""
1096 |         results = []
1097 |         total_chunks = len(param_combos) // chunk_size + (
1098 |             1 if len(param_combos) % chunk_size else 0
1099 |         )
1100 | 
1101 |         for chunk_idx in range(0, len(param_combos), chunk_size):
1102 |             chunk_params = param_combos[chunk_idx : chunk_idx + chunk_size]
1103 |             logger.debug(
1104 |                 f"Processing chunk {chunk_idx // chunk_size + 1}/{total_chunks}"
1105 |             )
1106 | 
1107 |             with memory_context(f"param_chunk_{chunk_idx // chunk_size}"):
1108 |                 for _, params in enumerate(chunk_params):
1109 |                     try:
1110 |                         # Generate signals for this parameter set
1111 |                         entries, exits = self._generate_signals(
1112 |                             data, strategy_type, params
1113 |                         )
1114 | 
1115 |                         # Convert to boolean arrays for memory efficiency
1116 |                         entries = entries.astype(bool)
1117 |                         exits = exits.astype(bool)
1118 | 
1119 |                         # Run backtest with optimizations
1120 |                         portfolio = vbt.Portfolio.from_signals(
1121 |                             close=close_prices,
1122 |                             entries=entries,
1123 |                             exits=exits,
1124 |                             init_cash=initial_capital,
1125 |                             fees=0.001,
1126 |                             freq="D",
1127 |                             cash_sharing=False,
1128 |                             call_seq="auto",
1129 |                             group_by=False,
1130 |                         )
1131 | 
1132 |                         # Get optimization metric
1133 |                         metric_value = self._get_metric_value(
1134 |                             portfolio, optimization_metric
1135 |                         )
1136 | 
1137 |                         results.append(
1138 |                             {
1139 |                                 "parameters": params,
1140 |                                 optimization_metric: metric_value,
1141 |                                 "total_return": float(portfolio.total_return()),
1142 |                                 "max_drawdown": float(portfolio.max_drawdown()),
1143 |                                 "total_trades": int(portfolio.trades.count()),
1144 |                             }
1145 |                         )
1146 | 
1147 |                         # Clean up intermediate objects
1148 |                         del portfolio, entries, exits
1149 | 
1150 |                     except Exception as e:
1151 |                         logger.debug(f"Skipping invalid parameter combination: {e}")
1152 |                         continue
1153 | 
1154 |             # Force garbage collection after each chunk
1155 |             gc.collect()
1156 | 
1157 |         return results
1158 | 
1159 |     def _get_metric_value(self, portfolio: vbt.Portfolio, metric_name: str) -> float:
1160 |         """Get specific metric value from portfolio."""
1161 |         metric_map = {
1162 |             "total_return": portfolio.total_return,
1163 |             "sharpe_ratio": portfolio.sharpe_ratio,
1164 |             "sortino_ratio": portfolio.sortino_ratio,
1165 |             "calmar_ratio": portfolio.calmar_ratio,
1166 |             "max_drawdown": lambda: -portfolio.max_drawdown(),
1167 |             "win_rate": lambda: portfolio.trades.win_rate() or 0,
1168 |             "profit_factor": lambda: portfolio.trades.profit_factor() or 0,
1169 |         }
1170 | 
1171 |         if metric_name not in metric_map:
1172 |             raise ValueError(f"Unknown metric: {metric_name}")
1173 | 
1174 |         try:
1175 |             value = metric_map[metric_name]()
1176 | 
1177 |             # Check for invalid values
1178 |             if value is None or np.isnan(value) or np.isinf(value):
1179 |                 return 0.0
1180 | 
1181 |             return float(value)
1182 | 
1183 |         except (ZeroDivisionError, ValueError, TypeError):
1184 |             return 0.0
1185 | 
1186 |     def _online_learning_signals(
1187 |         self, data: DataFrame, params: dict[str, Any]
1188 |     ) -> tuple[Series, Series]:
1189 |         """Generate online learning ML strategy signals.
1190 | 
1191 |         Simple implementation using momentum with adaptive thresholds.
1192 |         """
1193 |         lookback = params.get("lookback", 20)
1194 |         learning_rate = params.get("learning_rate", 0.01)
1195 | 
1196 |         close = data["close"]
1197 |         returns = close.pct_change(lookback)
1198 | 
1199 |         # Adaptive threshold based on rolling statistics
1200 |         rolling_mean = returns.rolling(window=lookback).mean()
1201 |         rolling_std = returns.rolling(window=lookback).std()
1202 | 
1203 |         # Dynamic entry/exit thresholds
1204 |         entry_threshold = rolling_mean + learning_rate * rolling_std
1205 |         exit_threshold = rolling_mean - learning_rate * rolling_std
1206 | 
1207 |         # Generate signals
1208 |         entries = returns > entry_threshold
1209 |         exits = returns < exit_threshold
1210 | 
1211 |         # Fill NaN values
1212 |         entries = entries.fillna(False)
1213 |         exits = exits.fillna(False)
1214 | 
1215 |         return entries, exits
1216 | 
1217 |     def _regime_aware_signals(
1218 |         self, data: DataFrame, params: dict[str, Any]
1219 |     ) -> tuple[Series, Series]:
1220 |         """Generate regime-aware strategy signals.
1221 | 
1222 |         Detects market regime and applies appropriate strategy.
1223 |         """
1224 |         regime_window = params.get("regime_window", 50)
1225 |         threshold = params.get("threshold", 0.02)
1226 | 
1227 |         close = data["close"]
1228 | 
1229 |         # Calculate regime indicators
1230 |         returns = close.pct_change()
1231 |         volatility = returns.rolling(window=regime_window).std()
1232 |         trend_strength = close.rolling(window=regime_window).apply(
1233 |             lambda x: (x[-1] - x[0]) / x[0] if x[0] != 0 else 0
1234 |         )
1235 | 
1236 |         # Determine regime: trending vs ranging
1237 |         is_trending = abs(trend_strength) > threshold
1238 | 
1239 |         # Trend following signals
1240 |         sma_short = close.rolling(window=regime_window // 2).mean()
1241 |         sma_long = close.rolling(window=regime_window).mean()
1242 |         trend_entries = (close > sma_long) & (sma_short > sma_long)
1243 |         trend_exits = (close < sma_long) & (sma_short < sma_long)
1244 | 
1245 |         # Mean reversion signals
1246 |         bb_upper = sma_long + 2 * volatility
1247 |         bb_lower = sma_long - 2 * volatility
1248 |         reversion_entries = close < bb_lower
1249 |         reversion_exits = close > bb_upper
1250 | 
1251 |         # Combine based on regime
1252 |         entries = (is_trending & trend_entries) | (~is_trending & reversion_entries)
1253 |         exits = (is_trending & trend_exits) | (~is_trending & reversion_exits)
1254 | 
1255 |         # Fill NaN values
1256 |         entries = entries.fillna(False)
1257 |         exits = exits.fillna(False)
1258 | 
1259 |         return entries, exits
1260 | 
1261 |     def _ensemble_signals(
1262 |         self, data: DataFrame, params: dict[str, Any]
1263 |     ) -> tuple[Series, Series]:
1264 |         """Generate ensemble strategy signals.
1265 | 
1266 |         Combines multiple strategies with voting.
1267 |         """
1268 |         fast_period = params.get("fast_period", 10)
1269 |         slow_period = params.get("slow_period", 20)
1270 |         rsi_period = params.get("rsi_period", 14)
1271 | 
1272 |         close = data["close"]
1273 | 
1274 |         # Strategy 1: SMA Crossover
1275 |         fast_sma = close.rolling(window=fast_period).mean()
1276 |         slow_sma = close.rolling(window=slow_period).mean()
1277 |         sma_entries = (fast_sma > slow_sma) & (fast_sma.shift(1) <= slow_sma.shift(1))
1278 |         sma_exits = (fast_sma < slow_sma) & (fast_sma.shift(1) >= slow_sma.shift(1))
1279 | 
1280 |         # Strategy 2: RSI
1281 |         delta = close.diff()
1282 |         gain = (delta.where(delta > 0, 0)).rolling(window=rsi_period).mean()
1283 |         loss = (-delta.where(delta < 0, 0)).rolling(window=rsi_period).mean()
1284 |         rs = gain / loss.replace(0, 1e-10)
1285 |         rsi = 100 - (100 / (1 + rs))
1286 |         rsi_entries = (rsi < 30) & (rsi.shift(1) >= 30)
1287 |         rsi_exits = (rsi > 70) & (rsi.shift(1) <= 70)
1288 | 
1289 |         # Strategy 3: Momentum
1290 |         momentum = close.pct_change(20)
1291 |         mom_entries = momentum > 0.05
1292 |         mom_exits = momentum < -0.05
1293 | 
1294 |         # Ensemble voting - at least 2 out of 3 strategies agree
1295 |         entry_votes = (
1296 |             sma_entries.astype(int) + rsi_entries.astype(int) + mom_entries.astype(int)
1297 |         )
1298 |         exit_votes = (
1299 |             sma_exits.astype(int) + rsi_exits.astype(int) + mom_exits.astype(int)
1300 |         )
1301 | 
1302 |         entries = entry_votes >= 2
1303 |         exits = exit_votes >= 2
1304 | 
1305 |         # Fill NaN values
1306 |         entries = entries.fillna(False)
1307 |         exits = exits.fillna(False)
1308 | 
1309 |         return entries, exits
1310 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/providers/stock_data.py:
--------------------------------------------------------------------------------

```python
   1 | """
   2 | Enhanced stock data provider with SQLAlchemy integration and screening recommendations.
   3 | Provides comprehensive stock data retrieval with database caching and maverick screening.
   4 | """
   5 | 
   6 | # Suppress specific pyright warnings for pandas operations
   7 | # pyright: reportOperatorIssue=false
   8 | 
   9 | import logging
  10 | from datetime import UTC, datetime, timedelta
  11 | 
  12 | import pandas as pd
  13 | import pandas_market_calendars as mcal
  14 | import pytz
  15 | import yfinance as yf
  16 | from dotenv import load_dotenv
  17 | from sqlalchemy import text
  18 | from sqlalchemy.orm import Session
  19 | 
  20 | from maverick_mcp.data.models import (
  21 |     MaverickBearStocks,
  22 |     MaverickStocks,
  23 |     PriceCache,
  24 |     SessionLocal,
  25 |     Stock,
  26 |     SupplyDemandBreakoutStocks,
  27 |     bulk_insert_price_data,
  28 |     get_latest_maverick_screening,
  29 | )
  30 | from maverick_mcp.data.session_management import get_db_session_read_only
  31 | from maverick_mcp.utils.circuit_breaker_decorators import (
  32 |     with_stock_data_circuit_breaker,
  33 | )
  34 | from maverick_mcp.utils.yfinance_pool import get_yfinance_pool
  35 | 
  36 | # Load environment variables
  37 | load_dotenv()
  38 | 
  39 | # Configure logging
  40 | logging.basicConfig(
  41 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  42 | )
  43 | logger = logging.getLogger("maverick_mcp.stock_data")
  44 | 
  45 | 
  46 | class EnhancedStockDataProvider:
  47 |     """
  48 |     Enhanced provider for stock data with database caching and screening recommendations.
  49 |     """
  50 | 
  51 |     def __init__(self, db_session: Session | None = None):
  52 |         """
  53 |         Initialize the stock data provider.
  54 | 
  55 |         Args:
  56 |             db_session: Optional database session for dependency injection.
  57 |                        If not provided, will get sessions as needed.
  58 |         """
  59 |         self.timeout = 30
  60 |         self.max_retries = 3
  61 |         self.cache_days = 1  # Cache data for 1 day by default
  62 |         # Initialize NYSE calendar for US stock market
  63 |         self.market_calendar = mcal.get_calendar("NYSE")
  64 |         self._db_session = db_session
  65 |         # Initialize yfinance connection pool
  66 |         self._yf_pool = get_yfinance_pool()
  67 |         if db_session:
  68 |             # Test the provided session
  69 |             self._test_db_connection_with_session(db_session)
  70 |         else:
  71 |             # Test creating a new session
  72 |             self._test_db_connection()
  73 | 
  74 |     def _test_db_connection(self):
  75 |         """Test database connection on initialization."""
  76 |         try:
  77 |             # Use read-only context manager for automatic session management
  78 |             with get_db_session_read_only() as session:
  79 |                 # Try a simple query
  80 |                 result = session.execute(text("SELECT 1"))
  81 |                 result.fetchone()
  82 |                 logger.info("Database connection successful")
  83 |         except Exception as e:
  84 |             logger.warning(
  85 |                 f"Database connection test failed: {e}. Caching will be disabled."
  86 |             )
  87 | 
  88 |     def _test_db_connection_with_session(self, session: Session):
  89 |         """Test provided database session."""
  90 |         try:
  91 |             # Try a simple query
  92 |             result = session.execute(text("SELECT 1"))
  93 |             result.fetchone()
  94 |             logger.info("Database session test successful")
  95 |         except Exception as e:
  96 |             logger.warning(
  97 |                 f"Database session test failed: {e}. Caching may not work properly."
  98 |             )
  99 | 
 100 |     def _get_data_with_smart_cache(
 101 |         self, symbol: str, start_date: str, end_date: str, interval: str
 102 |     ) -> pd.DataFrame:
 103 |         """
 104 |         Get stock data using smart caching strategy.
 105 | 
 106 |         This method:
 107 |         1. Gets all available data from cache
 108 |         2. Identifies missing date ranges
 109 |         3. Fetches only missing data from yfinance
 110 |         4. Combines and returns the complete dataset
 111 | 
 112 |         Args:
 113 |             symbol: Stock ticker symbol
 114 |             start_date: Start date in YYYY-MM-DD format
 115 |             end_date: End date in YYYY-MM-DD format
 116 |             interval: Data interval (only '1d' is cached)
 117 | 
 118 |         Returns:
 119 |             DataFrame with complete stock data
 120 |         """
 121 |         symbol = symbol.upper()
 122 |         session, should_close = self._get_db_session()
 123 | 
 124 |         try:
 125 |             # Step 1: Get ALL available cached data for the date range
 126 |             logger.info(f"Checking cache for {symbol} from {start_date} to {end_date}")
 127 |             cached_df = self._get_cached_data_flexible(
 128 |                 session, symbol, start_date, end_date
 129 |             )
 130 | 
 131 |             # Convert dates for comparison - ensure timezone-naive for consistency
 132 |             start_dt = pd.to_datetime(start_date).tz_localize(None)
 133 |             end_dt = pd.to_datetime(end_date).tz_localize(None)
 134 | 
 135 |             # Step 2: Determine what data we need
 136 |             if cached_df is not None and not cached_df.empty:
 137 |                 logger.info(f"Found {len(cached_df)} cached records for {symbol}")
 138 | 
 139 |                 # Check if we have all the data we need - ensure timezone-naive for comparison
 140 |                 cached_start = pd.to_datetime(cached_df.index.min()).tz_localize(None)
 141 |                 cached_end = pd.to_datetime(cached_df.index.max()).tz_localize(None)
 142 | 
 143 |                 # Identify missing ranges
 144 |                 missing_ranges = []
 145 | 
 146 |                 # Missing data at the beginning?
 147 |                 if start_dt < cached_start:
 148 |                     # Get trading days in the missing range
 149 |                     missing_start_trading = self._get_trading_days(
 150 |                         start_dt, cached_start - timedelta(days=1)
 151 |                     )
 152 |                     if len(missing_start_trading) > 0:
 153 |                         # Only request data if there are trading days
 154 |                         missing_ranges.append(
 155 |                             (
 156 |                                 missing_start_trading[0].strftime("%Y-%m-%d"),
 157 |                                 missing_start_trading[-1].strftime("%Y-%m-%d"),
 158 |                             )
 159 |                         )
 160 | 
 161 |                 # Missing recent data?
 162 |                 if end_dt > cached_end:
 163 |                     # Check if there are any trading days after our cached data
 164 |                     if self._is_trading_day_between(cached_end, end_dt):
 165 |                         # Get the actual trading days we need
 166 |                         missing_end_trading = self._get_trading_days(
 167 |                             cached_end + timedelta(days=1), end_dt
 168 |                         )
 169 |                         if len(missing_end_trading) > 0:
 170 |                             missing_ranges.append(
 171 |                                 (
 172 |                                     missing_end_trading[0].strftime("%Y-%m-%d"),
 173 |                                     missing_end_trading[-1].strftime("%Y-%m-%d"),
 174 |                                 )
 175 |                             )
 176 | 
 177 |                 # If no missing data, return cached data
 178 |                 if not missing_ranges:
 179 |                     logger.info(
 180 |                         f"Cache hit! Returning {len(cached_df)} cached records for {symbol}"
 181 |                     )
 182 |                     # Filter to requested range - ensure index is timezone-naive
 183 |                     cached_df.index = pd.to_datetime(cached_df.index).tz_localize(None)
 184 |                     mask = (cached_df.index >= start_dt) & (cached_df.index <= end_dt)
 185 |                     return cached_df.loc[mask]
 186 | 
 187 |                 # Step 3: Fetch only missing data
 188 |                 logger.info(f"Cache partial hit. Missing ranges: {missing_ranges}")
 189 |                 all_dfs = [cached_df]
 190 | 
 191 |                 for miss_start, miss_end in missing_ranges:
 192 |                     logger.info(
 193 |                         f"Fetching missing data for {symbol} from {miss_start} to {miss_end}"
 194 |                     )
 195 |                     missing_df = self._fetch_stock_data_from_yfinance(
 196 |                         symbol, miss_start, miss_end, None, interval
 197 |                     )
 198 |                     if not missing_df.empty:
 199 |                         all_dfs.append(missing_df)
 200 |                         # Cache the new data
 201 |                         self._cache_price_data(session, symbol, missing_df)
 202 | 
 203 |                 # Combine all data
 204 |                 combined_df = pd.concat(all_dfs).sort_index()
 205 |                 # Remove any duplicates (keep first)
 206 |                 combined_df = combined_df[~combined_df.index.duplicated(keep="first")]
 207 | 
 208 |                 # Filter to requested range - ensure index is timezone-naive
 209 |                 combined_df.index = pd.to_datetime(combined_df.index).tz_localize(None)
 210 |                 mask = (combined_df.index >= start_dt) & (combined_df.index <= end_dt)
 211 |                 return combined_df.loc[mask]
 212 | 
 213 |             else:
 214 |                 # No cached data, fetch everything but only for trading days
 215 |                 logger.info(
 216 |                     f"No cached data found for {symbol}, fetching from yfinance"
 217 |                 )
 218 | 
 219 |                 # Adjust dates to trading days
 220 |                 trading_days = self._get_trading_days(start_date, end_date)
 221 |                 if len(trading_days) == 0:
 222 |                     logger.warning(
 223 |                         f"No trading days found between {start_date} and {end_date}"
 224 |                     )
 225 |                     return pd.DataFrame(
 226 |                         columns=[  # type: ignore[arg-type]
 227 |                             "Open",
 228 |                             "High",
 229 |                             "Low",
 230 |                             "Close",
 231 |                             "Volume",
 232 |                             "Dividends",
 233 |                             "Stock Splits",
 234 |                         ]
 235 |                     )
 236 | 
 237 |                 # Fetch data only for the trading day range
 238 |                 fetch_start = trading_days[0].strftime("%Y-%m-%d")
 239 |                 fetch_end = trading_days[-1].strftime("%Y-%m-%d")
 240 | 
 241 |                 logger.info(
 242 |                     f"Fetching data for trading days: {fetch_start} to {fetch_end}"
 243 |                 )
 244 |                 df = self._fetch_stock_data_from_yfinance(
 245 |                     symbol, fetch_start, fetch_end, None, interval
 246 |                 )
 247 |                 if not df.empty:
 248 |                     # Ensure stock exists and cache the data
 249 |                     self._get_or_create_stock(session, symbol)
 250 |                     self._cache_price_data(session, symbol, df)
 251 |                 return df
 252 | 
 253 |         finally:
 254 |             if should_close:
 255 |                 session.close()
 256 | 
 257 |     def _get_cached_data_flexible(
 258 |         self, session: Session, symbol: str, start_date: str, end_date: str
 259 |     ) -> pd.DataFrame | None:
 260 |         """
 261 |         Get cached data with flexible date range.
 262 | 
 263 |         Unlike the strict version, this returns whatever cached data exists
 264 |         within the requested range, even if incomplete.
 265 | 
 266 |         Args:
 267 |             session: Database session
 268 |             symbol: Stock ticker symbol (will be uppercased)
 269 |             start_date: Start date in YYYY-MM-DD format
 270 |             end_date: End date in YYYY-MM-DD format
 271 | 
 272 |         Returns:
 273 |             DataFrame with available cached data or None
 274 |         """
 275 |         try:
 276 |             # Get whatever data exists in the range
 277 |             df = PriceCache.get_price_data(session, symbol, start_date, end_date)
 278 | 
 279 |             if df.empty:
 280 |                 return None
 281 | 
 282 |             # Add expected columns for compatibility
 283 |             for col in ["Dividends", "Stock Splits"]:
 284 |                 if col not in df.columns:
 285 |                     df[col] = 0.0
 286 | 
 287 |             # Ensure column names match yfinance format
 288 |             column_mapping = {
 289 |                 "open": "Open",
 290 |                 "high": "High",
 291 |                 "low": "Low",
 292 |                 "close": "Close",
 293 |                 "volume": "Volume",
 294 |             }
 295 |             df.rename(columns=column_mapping, inplace=True)
 296 | 
 297 |             # Ensure proper data types to match yfinance
 298 |             # Convert Decimal to float for price columns
 299 |             for col in ["Open", "High", "Low", "Close"]:
 300 |                 if col in df.columns:
 301 |                     df[col] = pd.to_numeric(df[col], errors="coerce").astype("float64")
 302 | 
 303 |             # Convert volume to int
 304 |             if "Volume" in df.columns:
 305 |                 df["Volume"] = (
 306 |                     pd.to_numeric(df["Volume"], errors="coerce")
 307 |                     .fillna(0)
 308 |                     .astype("int64")
 309 |                 )
 310 | 
 311 |             # Ensure index is timezone-naive for consistency
 312 |             df.index = pd.to_datetime(df.index).tz_localize(None)
 313 | 
 314 |             return df
 315 | 
 316 |         except Exception as e:
 317 |             logger.error(f"Error getting flexible cached data: {e}")
 318 |             return None
 319 | 
 320 |     def _is_trading_day_between(
 321 |         self, start_date: pd.Timestamp, end_date: pd.Timestamp
 322 |     ) -> bool:
 323 |         """
 324 |         Check if there's a trading day between two dates using market calendar.
 325 | 
 326 |         Args:
 327 |             start_date: Start date
 328 |             end_date: End date
 329 | 
 330 |         Returns:
 331 |             True if there's a trading day between the dates
 332 |         """
 333 |         # Add one day to start since we're checking "between"
 334 |         check_start = start_date + timedelta(days=1)
 335 | 
 336 |         if check_start > end_date:
 337 |             return False
 338 | 
 339 |         # Get trading days in the range
 340 |         trading_days = self._get_trading_days(check_start, end_date)
 341 |         return len(trading_days) > 0
 342 | 
 343 |     def _get_trading_days(self, start_date, end_date) -> pd.DatetimeIndex:
 344 |         """
 345 |         Get all trading days between start and end dates.
 346 | 
 347 |         Args:
 348 |             start_date: Start date (can be string or datetime)
 349 |             end_date: End date (can be string or datetime)
 350 | 
 351 |         Returns:
 352 |             DatetimeIndex of trading days (timezone-naive)
 353 |         """
 354 |         # Ensure dates are datetime objects (timezone-naive)
 355 |         if isinstance(start_date, str):
 356 |             start_date = pd.to_datetime(start_date).tz_localize(None)
 357 |         else:
 358 |             start_date = pd.to_datetime(start_date).tz_localize(None)
 359 |         if isinstance(end_date, str):
 360 |             end_date = pd.to_datetime(end_date).tz_localize(None)
 361 |         else:
 362 |             end_date = pd.to_datetime(end_date).tz_localize(None)
 363 | 
 364 |         # Get valid trading days from market calendar
 365 |         schedule = self.market_calendar.schedule(
 366 |             start_date=start_date, end_date=end_date
 367 |         )
 368 |         # Return timezone-naive index
 369 |         return schedule.index.tz_localize(None)
 370 | 
 371 |     def _get_last_trading_day(self, date) -> pd.Timestamp:
 372 |         """
 373 |         Get the last trading day on or before the given date.
 374 | 
 375 |         Args:
 376 |             date: Date to check (can be string or datetime)
 377 | 
 378 |         Returns:
 379 |             Last trading day as pd.Timestamp
 380 |         """
 381 |         if isinstance(date, str):
 382 |             date = pd.to_datetime(date)
 383 | 
 384 |         # Check if the date itself is a trading day
 385 |         if self._is_trading_day(date):
 386 |             return date
 387 | 
 388 |         # Otherwise, find the previous trading day
 389 |         for i in range(1, 10):  # Look back up to 10 days
 390 |             check_date = date - timedelta(days=i)
 391 |             if self._is_trading_day(check_date):
 392 |                 return check_date
 393 | 
 394 |         # Fallback to the date itself if no trading day found
 395 |         return date
 396 | 
 397 |     def _is_trading_day(self, date) -> bool:
 398 |         """
 399 |         Check if a specific date is a trading day.
 400 | 
 401 |         Args:
 402 |             date: Date to check
 403 | 
 404 |         Returns:
 405 |             True if it's a trading day
 406 |         """
 407 |         if isinstance(date, str):
 408 |             date = pd.to_datetime(date)
 409 | 
 410 |         schedule = self.market_calendar.schedule(start_date=date, end_date=date)
 411 |         return len(schedule) > 0
 412 | 
 413 |     def _get_db_session(self) -> tuple[Session, bool]:
 414 |         """
 415 |         Get a database session.
 416 | 
 417 |         Returns:
 418 |             Tuple of (session, should_close) where should_close indicates
 419 |             whether the caller should close the session.
 420 |         """
 421 |         # Use injected session if available - should NOT be closed
 422 |         if self._db_session:
 423 |             return self._db_session, False
 424 | 
 425 |         # Otherwise, create a new session using session factory - should be closed
 426 |         try:
 427 |             session = SessionLocal()
 428 |             return session, True
 429 |         except Exception as e:
 430 |             logger.error(f"Failed to get database session: {e}", exc_info=True)
 431 |             raise
 432 | 
 433 |     def _get_or_create_stock(self, session: Session, symbol: str) -> Stock:
 434 |         """
 435 |         Get or create a stock in the database.
 436 | 
 437 |         Args:
 438 |             session: Database session
 439 |             symbol: Stock ticker symbol
 440 | 
 441 |         Returns:
 442 |             Stock object
 443 |         """
 444 |         stock = Stock.get_or_create(session, symbol)
 445 | 
 446 |         # Try to update stock info if it's missing
 447 |         company_name = getattr(stock, "company_name", None)
 448 |         if company_name is None or company_name == "":
 449 |             try:
 450 |                 # Use connection pool for info retrieval
 451 |                 info = self._yf_pool.get_info(symbol)
 452 | 
 453 |                 stock.company_name = info.get("longName", info.get("shortName"))
 454 |                 stock.sector = info.get("sector")
 455 |                 stock.industry = info.get("industry")
 456 |                 stock.exchange = info.get("exchange")
 457 |                 stock.currency = info.get("currency", "USD")
 458 |                 stock.country = info.get("country")
 459 | 
 460 |                 session.commit()
 461 |             except Exception as e:
 462 |                 logger.warning(f"Could not update stock info for {symbol}: {e}")
 463 |                 session.rollback()
 464 | 
 465 |         return stock
 466 | 
 467 |     def _get_cached_price_data(
 468 |         self, session: Session, symbol: str, start_date: str, end_date: str
 469 |     ) -> pd.DataFrame | None:
 470 |         """
 471 |         DEPRECATED: Use _get_data_with_smart_cache instead.
 472 | 
 473 |         This method is kept for backward compatibility but is no longer used
 474 |         in the main flow. The new smart caching approach provides better
 475 |         database prioritization.
 476 |         """
 477 |         logger.warning("Using deprecated _get_cached_price_data method")
 478 |         return self._get_cached_data_flexible(
 479 |             session, symbol.upper(), start_date, end_date
 480 |         )
 481 | 
 482 |     def _cache_price_data(
 483 |         self, session: Session, symbol: str, df: pd.DataFrame
 484 |     ) -> None:
 485 |         """
 486 |         Cache price data in the database.
 487 | 
 488 |         Args:
 489 |             session: Database session
 490 |             symbol: Stock ticker symbol
 491 |             df: DataFrame with price data
 492 |         """
 493 |         try:
 494 |             if df.empty:
 495 |                 return
 496 | 
 497 |             # Ensure symbol is uppercase to match database
 498 |             symbol = symbol.upper()
 499 | 
 500 |             # Ensure proper column names
 501 |             column_mapping = {
 502 |                 "Open": "open",
 503 |                 "High": "high",
 504 |                 "Low": "low",
 505 |                 "Close": "close",
 506 |                 "Volume": "volume",
 507 |             }
 508 |             # Rename returns a new DataFrame, avoiding the need for an explicit copy first
 509 |             cache_df = df.rename(columns=column_mapping)
 510 | 
 511 |             # Log DataFrame info for debugging
 512 |             logger.debug(
 513 |                 f"DataFrame columns before caching: {cache_df.columns.tolist()}"
 514 |             )
 515 |             logger.debug(f"DataFrame shape: {cache_df.shape}")
 516 |             logger.debug(f"DataFrame index type: {type(cache_df.index)}")
 517 |             if not cache_df.empty:
 518 |                 logger.debug(f"Sample row: {cache_df.iloc[0].to_dict()}")
 519 | 
 520 |             # Insert data
 521 |             count = bulk_insert_price_data(session, symbol, cache_df)
 522 |             if count == 0:
 523 |                 logger.info(
 524 |                     f"No new records cached for {symbol} (data may already exist)"
 525 |                 )
 526 |             else:
 527 |                 logger.info(f"Cached {count} new price records for {symbol}")
 528 | 
 529 |         except Exception as e:
 530 |             logger.error(f"Error caching price data for {symbol}: {e}", exc_info=True)
 531 |             session.rollback()
 532 | 
 533 |     def get_stock_data(
 534 |         self,
 535 |         symbol: str,
 536 |         start_date: str | None = None,
 537 |         end_date: str | None = None,
 538 |         period: str | None = None,
 539 |         interval: str = "1d",
 540 |         use_cache: bool = True,
 541 |     ) -> pd.DataFrame:
 542 |         """
 543 |         Fetch stock data with database caching support.
 544 | 
 545 |         This method prioritizes cached data from the database and only fetches
 546 |         missing data from yfinance when necessary.
 547 | 
 548 |         Args:
 549 |             symbol: Stock ticker symbol
 550 |             start_date: Start date in YYYY-MM-DD format
 551 |             end_date: End date in YYYY-MM-DD format
 552 |             period: Alternative to start/end dates (e.g., '1d', '5d', '1mo', '3mo', '1y', etc.)
 553 |             interval: Data interval ('1d', '1wk', '1mo', '1m', '5m', etc.)
 554 |             use_cache: Whether to use cached data if available
 555 | 
 556 |         Returns:
 557 |             DataFrame with stock data
 558 |         """
 559 |         # For non-daily intervals or periods, always fetch fresh data
 560 |         if interval != "1d" or period:
 561 |             return self._fetch_stock_data_from_yfinance(
 562 |                 symbol, start_date, end_date, period, interval
 563 |             )
 564 | 
 565 |         # Set default dates if not provided
 566 |         if start_date is None:
 567 |             start_date = (datetime.now(UTC) - timedelta(days=365)).strftime("%Y-%m-%d")
 568 |         if end_date is None:
 569 |             end_date = datetime.now(UTC).strftime("%Y-%m-%d")
 570 | 
 571 |         # For daily data, adjust end date to last trading day if it's not a trading day
 572 |         # This prevents unnecessary cache misses on weekends/holidays
 573 |         if interval == "1d" and use_cache:
 574 |             end_dt = pd.to_datetime(end_date)
 575 |             if not self._is_trading_day(end_dt):
 576 |                 last_trading = self._get_last_trading_day(end_dt)
 577 |                 logger.debug(
 578 |                     f"Adjusting end date from {end_date} to last trading day {last_trading.strftime('%Y-%m-%d')}"
 579 |                 )
 580 |                 end_date = last_trading.strftime("%Y-%m-%d")
 581 | 
 582 |         # If cache is disabled, fetch directly from yfinance
 583 |         if not use_cache:
 584 |             logger.info(f"Cache disabled, fetching from yfinance for {symbol}")
 585 |             return self._fetch_stock_data_from_yfinance(
 586 |                 symbol, start_date, end_date, period, interval
 587 |             )
 588 | 
 589 |         # Try a smarter caching approach
 590 |         try:
 591 |             return self._get_data_with_smart_cache(
 592 |                 symbol, start_date, end_date, interval
 593 |             )
 594 |         except Exception as e:
 595 |             logger.warning(f"Smart cache failed, falling back to yfinance: {e}")
 596 |             return self._fetch_stock_data_from_yfinance(
 597 |                 symbol, start_date, end_date, period, interval
 598 |             )
 599 | 
 600 |     async def get_stock_data_async(
 601 |         self,
 602 |         symbol: str,
 603 |         start_date: str | None = None,
 604 |         end_date: str | None = None,
 605 |         period: str | None = None,
 606 |         interval: str = "1d",
 607 |         use_cache: bool = True,
 608 |     ) -> pd.DataFrame:
 609 |         """
 610 |         Async version of get_stock_data for parallel processing.
 611 | 
 612 |         This method wraps the synchronous get_stock_data method to provide
 613 |         an async interface for use in parallel backtesting operations.
 614 | 
 615 |         Args:
 616 |             symbol: Stock ticker symbol
 617 |             start_date: Start date in YYYY-MM-DD format
 618 |             end_date: End date in YYYY-MM-DD format
 619 |             period: Alternative to start/end dates (e.g., '1d', '5d', '1mo', '3mo', '1y', etc.)
 620 |             interval: Data interval ('1d', '1wk', '1mo', '1m', '5m', etc.)
 621 |             use_cache: Whether to use cached data if available
 622 | 
 623 |         Returns:
 624 |             DataFrame with stock data
 625 |         """
 626 |         import asyncio
 627 |         import functools
 628 | 
 629 |         # Run the synchronous method in a thread pool to avoid blocking
 630 |         loop = asyncio.get_event_loop()
 631 | 
 632 |         # Use functools.partial to create a callable with all arguments
 633 |         sync_method = functools.partial(
 634 |             self.get_stock_data,
 635 |             symbol=symbol,
 636 |             start_date=start_date,
 637 |             end_date=end_date,
 638 |             period=period,
 639 |             interval=interval,
 640 |             use_cache=use_cache,
 641 |         )
 642 | 
 643 |         # Execute in thread pool to avoid blocking the event loop
 644 |         return await loop.run_in_executor(None, sync_method)
 645 | 
 646 |     @with_stock_data_circuit_breaker(
 647 |         use_fallback=False
 648 |     )  # Fallback handled at higher level
 649 |     def _fetch_stock_data_from_yfinance(
 650 |         self,
 651 |         symbol: str,
 652 |         start_date: str | None = None,
 653 |         end_date: str | None = None,
 654 |         period: str | None = None,
 655 |         interval: str = "1d",
 656 |     ) -> pd.DataFrame:
 657 |         """
 658 |         Fetch stock data from yfinance with circuit breaker protection.
 659 | 
 660 |         Note: Circuit breaker is applied with use_fallback=False because
 661 |         fallback strategies are handled at the get_stock_data level.
 662 |         """
 663 |         logger.info(
 664 |             f"Fetching data from yfinance for {symbol} - Start: {start_date}, End: {end_date}, Period: {period}, Interval: {interval}"
 665 |         )
 666 |         # Use connection pool for better performance
 667 |         # The pool handles session management and retries internally
 668 | 
 669 |         # Use the optimized connection pool
 670 |         df = self._yf_pool.get_history(
 671 |             symbol=symbol,
 672 |             start=start_date,
 673 |             end=end_date,
 674 |             period=period,
 675 |             interval=interval,
 676 |         )
 677 | 
 678 |         # Check if dataframe is empty or if required columns are missing
 679 |         if df.empty:
 680 |             logger.warning(f"Empty dataframe returned for {symbol}")
 681 |             return pd.DataFrame(
 682 |                 columns=["Open", "High", "Low", "Close", "Volume"]  # type: ignore[arg-type]
 683 |             )
 684 | 
 685 |         # Ensure all expected columns exist
 686 |         for col in ["Open", "High", "Low", "Close", "Volume"]:
 687 |             if col not in df.columns:
 688 |                 logger.warning(
 689 |                     f"Column {col} missing from data for {symbol}, adding empty column"
 690 |                 )
 691 |                 # Use appropriate default values
 692 |                 if col == "Volume":
 693 |                     df[col] = 0
 694 |                 else:
 695 |                     df[col] = 0.0
 696 | 
 697 |         df.index.name = "Date"
 698 |         return df
 699 | 
 700 |     def get_maverick_recommendations(
 701 |         self, limit: int = 20, min_score: int | None = None
 702 |     ) -> list[dict]:
 703 |         """
 704 |         Get top Maverick stock recommendations from the database.
 705 | 
 706 |         Args:
 707 |             limit: Maximum number of recommendations
 708 |             min_score: Minimum combined score filter
 709 | 
 710 |         Returns:
 711 |             List of stock recommendations with details
 712 |         """
 713 |         session, should_close = self._get_db_session()
 714 |         try:
 715 |             # Build query with filtering at database level
 716 |             query = session.query(MaverickStocks)
 717 | 
 718 |             # Apply min_score filter in the query if specified
 719 |             if min_score:
 720 |                 query = query.filter(MaverickStocks.combined_score >= min_score)
 721 | 
 722 |             # Order by score and limit results
 723 |             stocks = (
 724 |                 query.order_by(MaverickStocks.combined_score.desc()).limit(limit).all()
 725 |             )
 726 | 
 727 |             # Process results with list comprehension for better performance
 728 |             recommendations = [
 729 |                 {
 730 |                     **stock.to_dict(),
 731 |                     "recommendation_type": "maverick_bullish",
 732 |                     "reason": self._generate_maverick_reason(stock),
 733 |                 }
 734 |                 for stock in stocks
 735 |             ]
 736 | 
 737 |             return recommendations
 738 |         except Exception as e:
 739 |             logger.error(f"Error getting maverick recommendations: {e}")
 740 |             return []
 741 |         finally:
 742 |             if should_close:
 743 |                 session.close()
 744 | 
 745 |     def get_maverick_bear_recommendations(
 746 |         self, limit: int = 20, min_score: int | None = None
 747 |     ) -> list[dict]:
 748 |         """
 749 |         Get top Maverick bear stock recommendations from the database.
 750 | 
 751 |         Args:
 752 |             limit: Maximum number of recommendations
 753 |             min_score: Minimum score filter
 754 | 
 755 |         Returns:
 756 |             List of bear stock recommendations with details
 757 |         """
 758 |         session, should_close = self._get_db_session()
 759 |         try:
 760 |             # Build query with filtering at database level
 761 |             query = session.query(MaverickBearStocks)
 762 | 
 763 |             # Apply min_score filter in the query if specified
 764 |             if min_score:
 765 |                 query = query.filter(MaverickBearStocks.score >= min_score)
 766 | 
 767 |             # Order by score and limit results
 768 |             stocks = query.order_by(MaverickBearStocks.score.desc()).limit(limit).all()
 769 | 
 770 |             # Process results with list comprehension for better performance
 771 |             recommendations = [
 772 |                 {
 773 |                     **stock.to_dict(),
 774 |                     "recommendation_type": "maverick_bearish",
 775 |                     "reason": self._generate_bear_reason(stock),
 776 |                 }
 777 |                 for stock in stocks
 778 |             ]
 779 | 
 780 |             return recommendations
 781 |         except Exception as e:
 782 |             logger.error(f"Error getting bear recommendations: {e}")
 783 |             return []
 784 |         finally:
 785 |             if should_close:
 786 |                 session.close()
 787 | 
 788 |     def get_supply_demand_breakout_recommendations(
 789 |         self, limit: int = 20, min_momentum_score: float | None = None
 790 |     ) -> list[dict]:
 791 |         """
 792 |         Get stocks showing supply/demand breakout patterns from accumulation phases.
 793 | 
 794 |         Args:
 795 |             limit: Maximum number of recommendations
 796 |             min_momentum_score: Minimum momentum score filter
 797 | 
 798 |         Returns:
 799 |             List of supply/demand breakout recommendations with market structure analysis
 800 |         """
 801 |         session, should_close = self._get_db_session()
 802 |         try:
 803 |             # Build query with all filters at database level
 804 |             query = session.query(SupplyDemandBreakoutStocks).filter(
 805 |                 # Supply/demand breakout criteria: price above all moving averages (demand zone)
 806 |                 SupplyDemandBreakoutStocks.close_price
 807 |                 > SupplyDemandBreakoutStocks.sma_50,
 808 |                 SupplyDemandBreakoutStocks.close_price
 809 |                 > SupplyDemandBreakoutStocks.sma_150,
 810 |                 SupplyDemandBreakoutStocks.close_price
 811 |                 > SupplyDemandBreakoutStocks.sma_200,
 812 |                 # Moving average alignment indicates accumulation structure
 813 |                 SupplyDemandBreakoutStocks.sma_50 > SupplyDemandBreakoutStocks.sma_150,
 814 |                 SupplyDemandBreakoutStocks.sma_150 > SupplyDemandBreakoutStocks.sma_200,
 815 |             )
 816 | 
 817 |             # Apply min_momentum_score filter if specified
 818 |             if min_momentum_score:
 819 |                 query = query.filter(
 820 |                     SupplyDemandBreakoutStocks.momentum_score >= min_momentum_score
 821 |                 )
 822 | 
 823 |             # Order by momentum score and limit results
 824 |             stocks = (
 825 |                 query.order_by(SupplyDemandBreakoutStocks.momentum_score.desc())
 826 |                 .limit(limit)
 827 |                 .all()
 828 |             )
 829 | 
 830 |             # Process results with list comprehension for better performance
 831 |             recommendations = [
 832 |                 {
 833 |                     **stock.to_dict(),
 834 |                     "recommendation_type": "supply_demand_breakout",
 835 |                     "reason": self._generate_supply_demand_reason(stock),
 836 |                 }
 837 |                 for stock in stocks
 838 |             ]
 839 | 
 840 |             return recommendations
 841 |         except Exception as e:
 842 |             logger.error(f"Error getting trending recommendations: {e}")
 843 |             return []
 844 |         finally:
 845 |             if should_close:
 846 |                 session.close()
 847 | 
 848 |     def get_all_screening_recommendations(self) -> dict[str, list[dict]]:
 849 |         """
 850 |         Get all screening recommendations in one call.
 851 | 
 852 |         Returns:
 853 |             Dictionary with all screening types and their recommendations
 854 |         """
 855 |         try:
 856 |             results = get_latest_maverick_screening()
 857 | 
 858 |             # Add recommendation reasons
 859 |             for stock in results.get("maverick_stocks", []):
 860 |                 stock["recommendation_type"] = "maverick_bullish"
 861 |                 stock["reason"] = self._generate_maverick_reason_from_dict(stock)
 862 | 
 863 |             for stock in results.get("maverick_bear_stocks", []):
 864 |                 stock["recommendation_type"] = "maverick_bearish"
 865 |                 stock["reason"] = self._generate_bear_reason_from_dict(stock)
 866 | 
 867 |             for stock in results.get("supply_demand_breakouts", []):
 868 |                 stock["recommendation_type"] = "supply_demand_breakout"
 869 |                 stock["reason"] = self._generate_supply_demand_reason_from_dict(stock)
 870 | 
 871 |             return results
 872 |         except Exception as e:
 873 |             logger.error(f"Error getting all screening recommendations: {e}")
 874 |             return {
 875 |                 "maverick_stocks": [],
 876 |                 "maverick_bear_stocks": [],
 877 |                 "supply_demand_breakouts": [],
 878 |             }
 879 | 
 880 |     def _generate_maverick_reason(self, stock: MaverickStocks) -> str:
 881 |         """Generate recommendation reason for Maverick stock."""
 882 |         reasons = []
 883 | 
 884 |         combined_score = getattr(stock, "combined_score", None)
 885 |         if combined_score is not None and combined_score >= 90:
 886 |             reasons.append("Exceptional combined score")
 887 |         elif combined_score is not None and combined_score >= 80:
 888 |             reasons.append("Strong combined score")
 889 | 
 890 |         momentum_score = getattr(stock, "momentum_score", None)
 891 |         if momentum_score is not None and momentum_score >= 90:
 892 |             reasons.append("outstanding relative strength")
 893 |         elif momentum_score is not None and momentum_score >= 80:
 894 |             reasons.append("strong relative strength")
 895 | 
 896 |         pat = getattr(stock, "pat", None)
 897 |         if pat is not None and pat != "":
 898 |             reasons.append(f"{pat} pattern detected")
 899 | 
 900 |         consolidation = getattr(stock, "consolidation", None)
 901 |         if consolidation is not None and consolidation == "yes":
 902 |             reasons.append("consolidation characteristics")
 903 | 
 904 |         sqz = getattr(stock, "sqz", None)
 905 |         if sqz is not None and sqz != "":
 906 |             reasons.append(f"squeeze indicator: {sqz}")
 907 | 
 908 |         return (
 909 |             "Bullish setup with " + ", ".join(reasons)
 910 |             if reasons
 911 |             else "Strong technical setup"
 912 |         )
 913 | 
 914 |     def _generate_bear_reason(self, stock: MaverickBearStocks) -> str:
 915 |         """Generate recommendation reason for bear stock."""
 916 |         reasons = []
 917 | 
 918 |         score = getattr(stock, "score", None)
 919 |         if score is not None and score >= 90:
 920 |             reasons.append("Exceptional bear score")
 921 |         elif score is not None and score >= 80:
 922 |             reasons.append("Strong bear score")
 923 | 
 924 |         momentum_score = getattr(stock, "momentum_score", None)
 925 |         if momentum_score is not None and momentum_score <= 30:
 926 |             reasons.append("weak relative strength")
 927 | 
 928 |         rsi_14 = getattr(stock, "rsi_14", None)
 929 |         if rsi_14 is not None and rsi_14 <= 30:
 930 |             reasons.append("oversold RSI")
 931 | 
 932 |         atr_contraction = getattr(stock, "atr_contraction", False)
 933 |         if atr_contraction is True:
 934 |             reasons.append("ATR contraction")
 935 | 
 936 |         big_down_vol = getattr(stock, "big_down_vol", False)
 937 |         if big_down_vol is True:
 938 |             reasons.append("heavy selling volume")
 939 | 
 940 |         return (
 941 |             "Bearish setup with " + ", ".join(reasons)
 942 |             if reasons
 943 |             else "Weak technical setup"
 944 |         )
 945 | 
 946 |     def _generate_supply_demand_reason(self, stock: SupplyDemandBreakoutStocks) -> str:
 947 |         """Generate recommendation reason for supply/demand breakout stock."""
 948 |         reasons = ["Supply/demand breakout from accumulation"]
 949 | 
 950 |         momentum_score = getattr(stock, "momentum_score", None)
 951 |         if momentum_score is not None and momentum_score >= 90:
 952 |             reasons.append("exceptional relative strength")
 953 |         elif momentum_score is not None and momentum_score >= 80:
 954 |             reasons.append("strong relative strength")
 955 | 
 956 |         reasons.append("price above all major moving averages")
 957 |         reasons.append("moving averages in proper alignment")
 958 | 
 959 |         pat = getattr(stock, "pat", None)
 960 |         if pat is not None and pat != "":
 961 |             reasons.append(f"{pat} pattern")
 962 | 
 963 |         return " with ".join(reasons)
 964 | 
 965 |     def _generate_maverick_reason_from_dict(self, stock: dict) -> str:
 966 |         """Generate recommendation reason for Maverick stock from dict."""
 967 |         reasons = []
 968 | 
 969 |         score = stock.get("combined_score", 0)
 970 |         if score >= 90:
 971 |             reasons.append("Exceptional combined score")
 972 |         elif score >= 80:
 973 |             reasons.append("Strong combined score")
 974 | 
 975 |         momentum = stock.get("momentum_score", 0)
 976 |         if momentum >= 90:
 977 |             reasons.append("outstanding relative strength")
 978 |         elif momentum >= 80:
 979 |             reasons.append("strong relative strength")
 980 | 
 981 |         if stock.get("pattern"):
 982 |             reasons.append(f"{stock['pattern']} pattern detected")
 983 | 
 984 |         if stock.get("consolidation") == "yes":
 985 |             reasons.append("consolidation characteristics")
 986 | 
 987 |         if stock.get("squeeze"):
 988 |             reasons.append(f"squeeze indicator: {stock['squeeze']}")
 989 | 
 990 |         return (
 991 |             "Bullish setup with " + ", ".join(reasons)
 992 |             if reasons
 993 |             else "Strong technical setup"
 994 |         )
 995 | 
 996 |     def _generate_bear_reason_from_dict(self, stock: dict) -> str:
 997 |         """Generate recommendation reason for bear stock from dict."""
 998 |         reasons = []
 999 | 
1000 |         score = stock.get("score", 0)
1001 |         if score >= 90:
1002 |             reasons.append("Exceptional bear score")
1003 |         elif score >= 80:
1004 |             reasons.append("Strong bear score")
1005 | 
1006 |         momentum = stock.get("momentum_score", 100)
1007 |         if momentum <= 30:
1008 |             reasons.append("weak relative strength")
1009 | 
1010 |         rsi = stock.get("rsi_14")
1011 |         if rsi and rsi <= 30:
1012 |             reasons.append("oversold RSI")
1013 | 
1014 |         if stock.get("atr_contraction"):
1015 |             reasons.append("ATR contraction")
1016 | 
1017 |         if stock.get("big_down_vol"):
1018 |             reasons.append("heavy selling volume")
1019 | 
1020 |         return (
1021 |             "Bearish setup with " + ", ".join(reasons)
1022 |             if reasons
1023 |             else "Weak technical setup"
1024 |         )
1025 | 
1026 |     def _generate_supply_demand_reason_from_dict(self, stock: dict) -> str:
1027 |         """Generate recommendation reason for supply/demand breakout stock from dict."""
1028 |         reasons = ["Supply/demand breakout from accumulation"]
1029 | 
1030 |         momentum = stock.get("momentum_score", 0)
1031 |         if momentum >= 90:
1032 |             reasons.append("exceptional relative strength")
1033 |         elif momentum >= 80:
1034 |             reasons.append("strong relative strength")
1035 | 
1036 |         reasons.append("price above all major moving averages")
1037 |         reasons.append("moving averages in proper alignment")
1038 | 
1039 |         if stock.get("pattern"):
1040 |             reasons.append(f"{stock['pattern']} pattern")
1041 | 
1042 |         return " with ".join(reasons)
1043 | 
1044 |     # Keep all original methods for backward compatibility
1045 |     @with_stock_data_circuit_breaker(use_fallback=False)
1046 |     def get_stock_info(self, symbol: str) -> dict:
1047 |         """Get detailed stock information from yfinance with circuit breaker protection."""
1048 |         # Use connection pool for better performance
1049 |         return self._yf_pool.get_info(symbol)
1050 | 
1051 |     def get_realtime_data(self, symbol):
1052 |         """Get the latest real-time data for a symbol using yfinance."""
1053 |         try:
1054 |             # Use connection pool for real-time data
1055 |             data = self._yf_pool.get_history(symbol, period="1d")
1056 | 
1057 |             if data.empty:
1058 |                 return None
1059 | 
1060 |             latest = data.iloc[-1]
1061 | 
1062 |             # Get previous close for change calculation
1063 |             info = self._yf_pool.get_info(symbol)
1064 |             prev_close = info.get("previousClose", None)
1065 |             if prev_close is None:
1066 |                 # Try to get from 2-day history
1067 |                 data_2d = self._yf_pool.get_history(symbol, period="2d")
1068 |                 if len(data_2d) > 1:
1069 |                     prev_close = data_2d.iloc[0]["Close"]
1070 |                 else:
1071 |                     prev_close = latest["Close"]
1072 | 
1073 |             # Calculate change
1074 |             price = latest["Close"]
1075 |             change = price - prev_close
1076 |             change_percent = (change / prev_close) * 100 if prev_close != 0 else 0
1077 | 
1078 |             return {
1079 |                 "symbol": symbol,
1080 |                 "price": round(price, 2),
1081 |                 "change": round(change, 2),
1082 |                 "change_percent": round(change_percent, 2),
1083 |                 "volume": int(latest["Volume"]),
1084 |                 "timestamp": data.index[-1],
1085 |                 "timestamp_display": data.index[-1].strftime("%Y-%m-%d %H:%M:%S"),
1086 |                 "is_real_time": False,  # yfinance data has some delay
1087 |             }
1088 |         except Exception as e:
1089 |             logger.error(f"Error fetching realtime data for {symbol}: {str(e)}")
1090 |             return None
1091 | 
1092 |     def get_all_realtime_data(self, symbols):
1093 |         """
1094 |         Get all latest real-time data for multiple symbols efficiently.
1095 |         Optimized to use batch downloading to reduce network requests.
1096 |         """
1097 |         if not symbols:
1098 |             return {}
1099 | 
1100 |         results = {}
1101 |         try:
1102 |             # Batch download 5 days of data to ensure we have previous close
1103 |             # Using group_by='ticker' makes the structure predictable: Level 0 = Ticker, Level 1 = Price Type
1104 |             batch_df = self._yf_pool.batch_download(
1105 |                 symbols=symbols, period="5d", interval="1d", group_by="ticker"
1106 |             )
1107 | 
1108 |             # Check if we got any data
1109 |             if batch_df.empty:
1110 |                 logger.warning("Batch download returned empty DataFrame")
1111 |                 return {}
1112 | 
1113 |             # Handle both MultiIndex (multiple symbols) and single symbol cases
1114 |             is_multi_ticker = isinstance(batch_df.columns, pd.MultiIndex)
1115 | 
1116 |             for symbol in symbols:
1117 |                 try:
1118 |                     symbol_data = None
1119 | 
1120 |                     if is_multi_ticker:
1121 |                         if symbol in batch_df.columns:
1122 |                             symbol_data = batch_df[symbol]
1123 |                     elif len(symbols) == 1 and symbols[0] == symbol:
1124 |                         # Single symbol case, columns are just price types
1125 |                         symbol_data = batch_df
1126 | 
1127 |                     if symbol_data is None or symbol_data.empty:
1128 |                         logger.debug(f"No batch data for {symbol}, falling back to individual fetch")
1129 |                         # Fallback to individual fetch
1130 |                         data = self.get_realtime_data(symbol)
1131 |                         if data:
1132 |                             results[symbol] = data
1133 |                         continue
1134 | 
1135 |                     # Drop NaNs (e.g., if one stock has missing data for a day)
1136 |                     symbol_data = symbol_data.dropna(how="all")
1137 | 
1138 |                     if len(symbol_data) < 1:
1139 |                         continue
1140 | 
1141 |                     latest = symbol_data.iloc[-1]
1142 |                     price = float(latest["Close"])
1143 |                     volume = int(latest["Volume"])
1144 | 
1145 |                     # Calculate change
1146 |                     if len(symbol_data) > 1:
1147 |                         prev_close = float(symbol_data.iloc[-2]["Close"])
1148 |                         change = price - prev_close
1149 |                         change_percent = (
1150 |                             (change / prev_close) * 100 if prev_close != 0 else 0
1151 |                         )
1152 |                     else:
1153 |                         change = 0.0
1154 |                         change_percent = 0.0
1155 | 
1156 |                     results[symbol] = {
1157 |                         "symbol": symbol,
1158 |                         "price": round(price, 2),
1159 |                         "change": round(change, 2),
1160 |                         "change_percent": round(change_percent, 2),
1161 |                         "volume": volume,
1162 |                         "timestamp": symbol_data.index[-1],
1163 |                         "timestamp_display": symbol_data.index[-1].strftime(
1164 |                             "%Y-%m-%d %H:%M:%S"
1165 |                         ),
1166 |                         "is_real_time": False,  # yfinance data has some delay
1167 |                     }
1168 | 
1169 |                 except Exception as e:
1170 |                     logger.error(f"Error processing batch data for {symbol}: {e}")
1171 |                     # Try fallback
1172 |                     data = self.get_realtime_data(symbol)
1173 |                     if data:
1174 |                         results[symbol] = data
1175 | 
1176 |         except Exception as e:
1177 |             logger.error(f"Batch download failed: {e}")
1178 |             # Fallback to iterative approach
1179 |             for symbol in symbols:
1180 |                 data = self.get_realtime_data(symbol)
1181 |                 if data:
1182 |                     results[symbol] = data
1183 | 
1184 |         return results
1185 | 
1186 |     def is_market_open(self) -> bool:
1187 |         """Check if the US stock market is currently open."""
1188 |         now = datetime.now(pytz.timezone("US/Eastern"))
1189 | 
1190 |         # Check if it's a weekday
1191 |         if now.weekday() >= 5:  # 5 and 6 are Saturday and Sunday
1192 |             return False
1193 | 
1194 |         # Check if it's between 9:30 AM and 4:00 PM Eastern Time
1195 |         market_open = now.replace(hour=9, minute=30, second=0, microsecond=0)
1196 |         market_close = now.replace(hour=16, minute=0, second=0, microsecond=0)
1197 | 
1198 |         return market_open <= now <= market_close
1199 | 
1200 |     def get_news(self, symbol: str, limit: int = 10) -> pd.DataFrame:
1201 |         """Get news for a stock from yfinance."""
1202 |         try:
1203 |             ticker = yf.Ticker(symbol)
1204 |             news = ticker.news
1205 | 
1206 |             if not news:
1207 |                 return pd.DataFrame(
1208 |                     columns=[  # type: ignore[arg-type]
1209 |                         "title",
1210 |                         "publisher",
1211 |                         "link",
1212 |                         "providerPublishTime",
1213 |                         "type",
1214 |                     ]
1215 |                 )
1216 | 
1217 |             df = pd.DataFrame(news[:limit])
1218 | 
1219 |             # Convert timestamp to datetime
1220 |             if "providerPublishTime" in df.columns:
1221 |                 df["providerPublishTime"] = pd.to_datetime(
1222 |                     df["providerPublishTime"], unit="s"
1223 |                 )
1224 | 
1225 |             return df
1226 |         except Exception as e:
1227 |             logger.error(f"Error fetching news for {symbol}: {str(e)}")
1228 |             return pd.DataFrame(
1229 |                 columns=["title", "publisher", "link", "providerPublishTime", "type"]  # type: ignore[arg-type]
1230 |             )
1231 | 
1232 |     def get_earnings(self, symbol: str) -> dict:
1233 |         """Get earnings information for a stock."""
1234 |         try:
1235 |             ticker = yf.Ticker(symbol)
1236 |             return {
1237 |                 "earnings": ticker.earnings.to_dict()
1238 |                 if hasattr(ticker, "earnings") and not ticker.earnings.empty
1239 |                 else {},
1240 |                 "earnings_dates": ticker.earnings_dates.to_dict()
1241 |                 if hasattr(ticker, "earnings_dates") and not ticker.earnings_dates.empty
1242 |                 else {},
1243 |                 "earnings_trend": ticker.earnings_trend
1244 |                 if hasattr(ticker, "earnings_trend")
1245 |                 else {},
1246 |             }
1247 |         except Exception as e:
1248 |             logger.error(f"Error fetching earnings for {symbol}: {str(e)}")
1249 |             return {"earnings": {}, "earnings_dates": {}, "earnings_trend": {}}
1250 | 
1251 |     def get_recommendations(self, symbol: str) -> pd.DataFrame:
1252 |         """Get analyst recommendations for a stock."""
1253 |         try:
1254 |             ticker = yf.Ticker(symbol)
1255 |             recommendations = ticker.recommendations
1256 | 
1257 |             if recommendations is None or recommendations.empty:
1258 |                 return pd.DataFrame(columns=["firm", "toGrade", "fromGrade", "action"])  # type: ignore[arg-type]
1259 | 
1260 |             return recommendations
1261 |         except Exception as e:
1262 |             logger.error(f"Error fetching recommendations for {symbol}: {str(e)}")
1263 |             return pd.DataFrame(columns=["firm", "toGrade", "fromGrade", "action"])  # type: ignore[arg-type]
1264 | 
1265 |     def is_etf(self, symbol: str) -> bool:
1266 |         """Check if a given symbol is an ETF."""
1267 |         try:
1268 |             stock = yf.Ticker(symbol)
1269 |             # Check if quoteType exists and is ETF
1270 |             if "quoteType" in stock.info:
1271 |                 return stock.info["quoteType"].upper() == "ETF"  # type: ignore[no-any-return]
1272 |             # Fallback check for common ETF identifiers
1273 |             return any(
1274 |                 [
1275 |                     symbol.endswith(("ETF", "FUND")),
1276 |                     symbol
1277 |                     in [
1278 |                         "SPY",
1279 |                         "QQQ",
1280 |                         "IWM",
1281 |                         "DIA",
1282 |                         "XLB",
1283 |                         "XLE",
1284 |                         "XLF",
1285 |                         "XLI",
1286 |                         "XLK",
1287 |                         "XLP",
1288 |                         "XLU",
1289 |                         "XLV",
1290 |                         "XLY",
1291 |                         "XLC",
1292 |                         "XLRE",
1293 |                         "XME",
1294 |                     ],
1295 |                     "ETF" in stock.info.get("longName", "").upper(),
1296 |                 ]
1297 |             )
1298 |         except Exception as e:
1299 |             logger.error(f"Error checking if {symbol} is ETF: {e}")
1300 |             return False
1301 | 
1302 | 
1303 | # Maintain backward compatibility
1304 | StockDataProvider = EnhancedStockDataProvider
1305 | 
```

--------------------------------------------------------------------------------
/tests/fixtures/orchestration_fixtures.py:
--------------------------------------------------------------------------------

```python
   1 | """
   2 | Comprehensive test fixtures for orchestration testing.
   3 | 
   4 | Provides realistic mock data for LLM responses, API responses, market data,
   5 | and test scenarios for the SupervisorAgent and DeepResearchAgent orchestration system.
   6 | """
   7 | 
   8 | import json
   9 | from datetime import datetime, timedelta
  10 | from typing import Any
  11 | from unittest.mock import MagicMock
  12 | 
  13 | import numpy as np
  14 | import pandas as pd
  15 | import pytest
  16 | from langchain_core.messages import AIMessage
  17 | 
  18 | # ==============================================================================
  19 | # MOCK LLM RESPONSES
  20 | # ==============================================================================
  21 | 
  22 | 
  23 | class MockLLMResponses:
  24 |     """Realistic LLM responses for various orchestration scenarios."""
  25 | 
  26 |     @staticmethod
  27 |     def query_classification_response(
  28 |         category: str = "stock_investment_decision",
  29 |         confidence: float = 0.85,
  30 |         parallel_capable: bool = True,
  31 |     ) -> str:
  32 |         """Mock query classification response from LLM."""
  33 |         routing_agents_map = {
  34 |             "market_screening": ["market"],
  35 |             "technical_analysis": ["technical"],
  36 |             "stock_investment_decision": ["market", "technical"],
  37 |             "portfolio_analysis": ["market", "technical"],
  38 |             "deep_research": ["research"],
  39 |             "company_research": ["research"],
  40 |             "sentiment_analysis": ["research"],
  41 |             "risk_assessment": ["market", "technical"],
  42 |         }
  43 | 
  44 |         return json.dumps(
  45 |             {
  46 |                 "category": category,
  47 |                 "confidence": confidence,
  48 |                 "required_agents": routing_agents_map.get(category, ["market"]),
  49 |                 "complexity": "moderate" if confidence > 0.7 else "complex",
  50 |                 "estimated_execution_time_ms": 45000
  51 |                 if category == "deep_research"
  52 |                 else 30000,
  53 |                 "parallel_capable": parallel_capable,
  54 |                 "reasoning": f"Query classified as {category} based on content analysis and intent detection.",
  55 |             }
  56 |         )
  57 | 
  58 |     @staticmethod
  59 |     def result_synthesis_response(
  60 |         persona: str = "moderate",
  61 |         agents_used: list[str] = None,
  62 |         confidence: float = 0.82,
  63 |     ) -> str:
  64 |         """Mock result synthesis response from LLM."""
  65 |         if agents_used is None:
  66 |             agents_used = ["market", "technical"]
  67 | 
  68 |         persona_focused_content = {
  69 |             "conservative": """
  70 |             Based on comprehensive analysis from our specialist agents, AAPL presents a balanced investment opportunity
  71 |             with strong fundamentals and reasonable risk profile. The market analysis indicates stable sector
  72 |             positioning with consistent dividend growth, while technical indicators suggest a consolidation phase
  73 |             with support at $170. For conservative investors, consider gradual position building with
  74 |             strict stop-loss at $165 to preserve capital. The risk-adjusted return profile aligns well
  75 |             with conservative portfolio objectives, offering both income stability and modest growth potential.
  76 |             """,
  77 |             "moderate": """
  78 |             Our multi-agent analysis reveals AAPL as a compelling investment opportunity with balanced risk-reward
  79 |             characteristics. Market screening identified strong fundamentals including 15% revenue growth and
  80 |             expanding services segment. Technical analysis shows bullish momentum with RSI at 58 and MACD
  81 |             trending positive. Entry points around $175-180 offer favorable risk-reward with targets at $200-210.
  82 |             Position sizing of 3-5% of portfolio aligns with moderate risk tolerance while capitalizing on
  83 |             the current uptrend momentum.
  84 |             """,
  85 |             "aggressive": """
  86 |             Multi-agent analysis identifies AAPL as a high-conviction growth play with exceptional upside potential.
  87 |             Market analysis reveals accelerating AI adoption driving hardware refresh cycles, while technical
  88 |             indicators signal strong breakout momentum above $185 resistance. The confluence of fundamental
  89 |             catalysts and technical setup supports aggressive position sizing up to 8-10% allocation.
  90 |             Target price of $220+ represents 25% upside with momentum likely to continue through earnings season.
  91 |             This represents a prime opportunity for growth-focused portfolios seeking alpha generation.
  92 |             """,
  93 |         }
  94 | 
  95 |         return persona_focused_content.get(
  96 |             persona, persona_focused_content["moderate"]
  97 |         ).strip()
  98 | 
  99 |     @staticmethod
 100 |     def content_analysis_response(
 101 |         sentiment: str = "bullish", confidence: float = 0.75, credibility: float = 0.8
 102 |     ) -> str:
 103 |         """Mock content analysis response from LLM."""
 104 |         return json.dumps(
 105 |             {
 106 |                 "KEY_INSIGHTS": [
 107 |                     "Apple's Q4 earnings exceeded expectations with 15% revenue growth",
 108 |                     "Services segment continues to expand with 12% year-over-year growth",
 109 |                     "iPhone 15 sales showing strong adoption in key markets",
 110 |                     "Cash position remains robust at $165B supporting capital allocation",
 111 |                     "AI integration across product line driving next upgrade cycle",
 112 |                 ],
 113 |                 "SENTIMENT": {"direction": sentiment, "confidence": confidence},
 114 |                 "RISK_FACTORS": [
 115 |                     "China market regulatory concerns persist",
 116 |                     "Supply chain dependencies in Taiwan and South Korea",
 117 |                     "Increasing competition in services market",
 118 |                     "Currency headwinds affecting international revenue",
 119 |                 ],
 120 |                 "OPPORTUNITIES": [
 121 |                     "AI-powered device upgrade cycle beginning",
 122 |                     "Vision Pro market penetration expanding",
 123 |                     "Services recurring revenue model strengthening",
 124 |                     "Emerging markets iPhone adoption accelerating",
 125 |                 ],
 126 |                 "CREDIBILITY": credibility,
 127 |                 "RELEVANCE": 0.9,
 128 |                 "SUMMARY": f"Comprehensive analysis suggests {sentiment} outlook for Apple with strong fundamentals and growth catalysts, though regulatory and competitive risks require monitoring.",
 129 |             }
 130 |         )
 131 | 
 132 |     @staticmethod
 133 |     def research_synthesis_response(persona: str = "moderate") -> str:
 134 |         """Mock research synthesis response for deep research agent."""
 135 |         synthesis_by_persona = {
 136 |             "conservative": """
 137 |             ## Executive Summary
 138 |             Apple represents a stable, dividend-paying technology stock suitable for conservative portfolios seeking
 139 |             balanced growth and income preservation.
 140 | 
 141 |             ## Key Findings
 142 |             • Consistent dividend growth averaging 8% annually over past 5 years
 143 |             • Strong balance sheet with $165B cash providing downside protection
 144 |             • Services revenue provides recurring income stream growing at 12% annually
 145 |             • P/E ratio of 28x reasonable for quality growth stock
 146 |             • Beta of 1.1 indicates moderate volatility relative to market
 147 |             • Debt-to-equity ratio of 0.3 shows conservative capital structure
 148 |             • Free cash flow yield of 3.2% supports dividend sustainability
 149 | 
 150 |             ## Investment Implications for Conservative Investors
 151 |             Apple's combination of dividend growth, balance sheet strength, and market leadership makes it suitable
 152 |             for conservative portfolios. The company's pivot to services provides recurring revenue stability while
 153 |             hardware sales offer moderate growth potential.
 154 | 
 155 |             ## Risk Considerations
 156 |             Primary risks include China market exposure (19% of revenue), technology obsolescence, and regulatory
 157 |             pressure on App Store policies. However, strong cash position provides significant downside protection.
 158 | 
 159 |             ## Recommended Actions
 160 |             Consider 2-3% portfolio allocation with gradual accumulation on pullbacks below $170.
 161 |             Appropriate stop-loss at $160 to limit downside risk.
 162 |             """,
 163 |             "moderate": """
 164 |             ## Executive Summary
 165 |             Apple presents a balanced investment opportunity combining growth potential with quality fundamentals,
 166 |             well-suited for diversified moderate-risk portfolios.
 167 | 
 168 |             ## Key Findings
 169 |             • Revenue growth acceleration to 15% driven by AI-enhanced products
 170 |             • Services segment margins expanding to 70%, improving overall profitability
 171 |             • Strong competitive moats in ecosystem and brand loyalty
 172 |             • Capital allocation balance between growth investment and shareholder returns
 173 |             • Technical indicators suggesting continued uptrend momentum
 174 |             • Valuation appears fair at current levels with room for multiple expansion
 175 |             • Market leadership position in premium smartphone and tablet segments
 176 | 
 177 |             ## Investment Implications for Moderate Investors
 178 |             Apple offers an attractive blend of stability and growth potential. The company's evolution toward
 179 |             services provides recurring revenue while hardware innovation drives periodic upgrade cycles.
 180 | 
 181 |             ## Risk Considerations
 182 |             Key risks include supply chain disruption, China regulatory issues, and increasing competition
 183 |             in services. Currency headwinds may impact international revenue growth.
 184 | 
 185 |             ## Recommended Actions
 186 |             Target 4-5% portfolio allocation with entry points between $175-185. Consider taking profits
 187 |             above $210 and adding on weakness below $170.
 188 |             """,
 189 |             "aggressive": """
 190 |             ## Executive Summary
 191 |             Apple stands at the forefront of the next technology revolution with AI integration across its ecosystem,
 192 |             presenting significant alpha generation potential for growth-focused investors.
 193 | 
 194 |             ## Key Findings
 195 |             • AI-driven product refresh cycle beginning with iPhone 15 Pro and Vision Pro launch
 196 |             • Services revenue trajectory accelerating with 18% growth potential
 197 |             • Market share expansion opportunities in emerging markets and enterprise
 198 |             • Vision Pro early adoption exceeding expectations, validating spatial computing thesis
 199 |             • Developer ecosystem strengthening with AI tools integration
 200 |             • Operating leverage improving with services mix shift
 201 |             • Stock momentum indicators showing bullish technical setup
 202 | 
 203 |             ## Investment Implications for Aggressive Investors
 204 |             Apple represents a high-conviction growth play with multiple expansion catalysts. The convergence
 205 |             of AI adoption, new product categories, and services growth creates exceptional upside potential.
 206 | 
 207 |             ## Risk Considerations
 208 |             Execution risk on Vision Pro adoption, competitive response from Android ecosystem, and
 209 |             regulatory pressure on App Store represent key downside risks requiring active monitoring.
 210 | 
 211 |             ## Recommended Actions
 212 |             Consider aggressive 8-10% allocation with momentum-based entry above $185 resistance.
 213 |             Target price $230+ over 12-month horizon with trailing stop at 15% to protect gains.
 214 |             """,
 215 |         }
 216 | 
 217 |         return synthesis_by_persona.get(
 218 |             persona, synthesis_by_persona["moderate"]
 219 |         ).strip()
 220 | 
 221 | 
 222 | # ==============================================================================
 223 | # MOCK EXA API RESPONSES
 224 | # ==============================================================================
 225 | 
 226 | 
 227 | class MockExaResponses:
 228 |     """Realistic Exa API responses for financial research."""
 229 | 
 230 |     @staticmethod
 231 |     def search_results_aapl() -> list[dict[str, Any]]:
 232 |         """Mock Exa search results for AAPL analysis."""
 233 |         return [
 234 |             {
 235 |                 "url": "https://www.bloomberg.com/news/articles/2024-01-15/apple-earnings-beat",
 236 |                 "title": "Apple Earnings Beat Expectations as iPhone Sales Surge",
 237 |                 "content": "Apple Inc. reported quarterly revenue of $119.6 billion, surpassing analyst expectations as iPhone 15 sales showed strong momentum in key markets. The technology giant's services segment grew 12% year-over-year to $23.1 billion, demonstrating the recurring revenue model's strength. CEO Tim Cook highlighted AI integration across the product lineup as a key driver for the next upgrade cycle. Gross margins expanded to 45.9% compared to 43.3% in the prior year period, reflecting improved mix and operational efficiency. The company's cash position remains robust at $165.1 billion, providing flexibility for strategic investments and shareholder returns. China revenue declined 2% due to competitive pressures, though management expressed optimism about long-term opportunities in the region.",
 238 |                 "summary": "Apple exceeded Q4 earnings expectations with strong iPhone 15 sales and services growth, while maintaining robust cash position and expanding margins despite China headwinds.",
 239 |                 "highlights": [
 240 |                     "iPhone 15 strong sales momentum",
 241 |                     "Services grew 12% year-over-year",
 242 |                     "$165.1B cash position",
 243 |                 ],
 244 |                 "published_date": "2024-01-15T08:30:00Z",
 245 |                 "author": "Mark Gurman",
 246 |                 "score": 0.94,
 247 |                 "provider": "exa",
 248 |             },
 249 |             {
 250 |                 "url": "https://seekingalpha.com/article/4665432-apple-stock-analysis-ai-catalyst",
 251 |                 "title": "Apple Stock: AI Integration Could Drive Next Super Cycle",
 252 |                 "content": "Apple's integration of artificial intelligence across its ecosystem represents a potential catalyst for the next device super cycle. The company's on-device AI processing capabilities, enabled by the A17 Pro chip, position Apple uniquely in the mobile AI landscape. Industry analysts project AI-enhanced features could drive iPhone replacement cycles to accelerate from the current 3.5 years to approximately 2.8 years. The services ecosystem benefits significantly from AI integration, with enhanced Siri capabilities driving increased App Store engagement and subscription services adoption. Vision Pro early metrics suggest spatial computing adoption is tracking ahead of initial estimates, with developer interest surging 300% quarter-over-quarter. The convergence of AI, spatial computing, and services creates multiple revenue expansion vectors over the next 3-5 years.",
 253 |                 "summary": "AI integration across Apple's ecosystem could accelerate device replacement cycles and expand services revenue through enhanced user engagement.",
 254 |                 "highlights": [
 255 |                     "AI-driven replacement cycle acceleration",
 256 |                     "Vision Pro adoption tracking well",
 257 |                     "Services ecosystem AI benefits",
 258 |                 ],
 259 |                 "published_date": "2024-01-14T14:20:00Z",
 260 |                 "author": "Tech Analyst Team",
 261 |                 "score": 0.87,
 262 |                 "provider": "exa",
 263 |             },
 264 |             {
 265 |                 "url": "https://www.morningstar.com/stocks/aapl-valuation-analysis",
 266 |                 "title": "Apple Valuation Analysis: Fair Value Assessment",
 267 |                 "content": "Our discounted cash flow analysis suggests Apple's fair value ranges between $185-195 per share, indicating the stock trades near intrinsic value at current levels. The company's transition toward higher-margin services revenue supports multiple expansion, though hardware cycle dependency introduces valuation volatility. Key valuation drivers include services attach rates (currently 85% of active devices), gross margin trajectory (target 47-48% long-term), and capital allocation efficiency. The dividend yield of 0.5% appears sustainable with strong free cash flow generation of $95+ billion annually. Compared to technology peers, Apple trades at a 15% premium to the sector median, justified by superior return on invested capital and cash generation capabilities.",
 268 |                 "summary": "DCF analysis places Apple's fair value at $185-195, with current valuation supported by services transition and strong cash generation.",
 269 |                 "highlights": [
 270 |                     "Fair value $185-195 range",
 271 |                     "Services driving multiple expansion",
 272 |                     "Strong free cash flow $95B+",
 273 |                 ],
 274 |                 "published_date": "2024-01-13T11:45:00Z",
 275 |                 "author": "Sarah Chen",
 276 |                 "score": 0.91,
 277 |                 "provider": "exa",
 278 |             },
 279 |             {
 280 |                 "url": "https://www.reuters.com/technology/apple-china-challenges-2024-01-12",
 281 |                 "title": "Apple Faces Growing Competition in China Market",
 282 |                 "content": "Apple confronts intensifying competition in China as local brands gain market share and regulatory scrutiny increases. Huawei's Mate 60 Pro launch has resonated strongly with Chinese consumers, contributing to Apple's 2% revenue decline in Greater China for Q4. The Chinese government's restrictions on iPhone use in government agencies signal potential broader policy shifts. Despite challenges, Apple maintains premium market leadership with 47% share in smartphones priced above $600. Management highlighted ongoing investments in local partnerships and supply chain relationships to navigate the complex regulatory environment. The company's services revenue in China grew 8% despite hardware headwinds, demonstrating ecosystem stickiness among existing users.",
 283 |                 "summary": "Apple faces competitive and regulatory challenges in China, though maintains premium market leadership and growing services revenue.",
 284 |                 "highlights": [
 285 |                     "China revenue down 2%",
 286 |                     "Regulatory iPhone restrictions",
 287 |                     "Premium segment leadership maintained",
 288 |                 ],
 289 |                 "published_date": "2024-01-12T16:15:00Z",
 290 |                 "author": "Reuters Technology Team",
 291 |                 "score": 0.89,
 292 |                 "provider": "exa",
 293 |             },
 294 |         ]
 295 | 
 296 |     @staticmethod
 297 |     def search_results_market_sentiment() -> list[dict[str, Any]]:
 298 |         """Mock Exa results for market sentiment analysis."""
 299 |         return [
 300 |             {
 301 |                 "url": "https://www.cnbc.com/2024/01/16/market-outlook-tech-stocks",
 302 |                 "title": "Tech Stocks Rally on AI Optimism Despite Rate Concerns",
 303 |                 "content": "Technology stocks surged 2.3% as artificial intelligence momentum overcame Federal Reserve policy concerns. Investors rotated into AI-beneficiary names including Apple, Microsoft, and Nvidia following strong earnings guidance across the sector. The Technology Select Sector SPDR ETF (XLK) reached new 52-week highs despite 10-year Treasury yields hovering near 4.5%. Institutional flows show $12.8 billion net inflows to technology funds over the past month, the strongest since early 2023. Options activity indicates continued bullish sentiment with call volume exceeding puts by 1.8:1 across major tech names. Analyst upgrades accelerated with 67% of tech stocks carrying buy ratings versus 52% sector average.",
 304 |                 "summary": "Tech stocks rally on AI optimism with strong institutional inflows and bullish options activity despite interest rate headwinds.",
 305 |                 "highlights": [
 306 |                     "Tech sector +2.3%",
 307 |                     "$12.8B institutional inflows",
 308 |                     "Call/put ratio 1.8:1",
 309 |                 ],
 310 |                 "published_date": "2024-01-16T09:45:00Z",
 311 |                 "author": "CNBC Markets Team",
 312 |                 "score": 0.92,
 313 |                 "provider": "exa",
 314 |             },
 315 |             {
 316 |                 "url": "https://finance.yahoo.com/news/vix-fear-greed-market-sentiment",
 317 |                 "title": "VIX Falls to Multi-Month Lows as Fear Subsides",
 318 |                 "content": "The VIX volatility index dropped to 13.8, the lowest level since November 2021, signaling reduced market anxiety and increased risk appetite among investors. The CNN Fear & Greed Index shifted to 'Greed' territory at 72, up from 'Neutral' just two weeks ago. Credit spreads tightened across investment-grade and high-yield markets, with IG spreads at 85 basis points versus 110 in December. Equity put/call ratios declined to 0.45, indicating overwhelming bullish positioning. Margin debt increased 8% month-over-month as investors leverage up for continued market gains.",
 319 |                 "summary": "Market sentiment indicators show reduced fear and increased greed with VIX at multi-month lows and bullish positioning accelerating.",
 320 |                 "highlights": [
 321 |                     "VIX at 13.8 multi-month low",
 322 |                     "Fear & Greed at 72",
 323 |                     "Margin debt up 8%",
 324 |                 ],
 325 |                 "published_date": "2024-01-16T14:30:00Z",
 326 |                 "author": "Market Sentiment Team",
 327 |                 "score": 0.88,
 328 |                 "provider": "exa",
 329 |             },
 330 |         ]
 331 | 
 332 |     @staticmethod
 333 |     def search_results_empty() -> list[dict[str, Any]]:
 334 |         """Mock empty Exa search results for testing edge cases."""
 335 |         return []
 336 | 
 337 |     @staticmethod
 338 |     def search_results_low_quality() -> list[dict[str, Any]]:
 339 |         """Mock low-quality Exa search results for credibility testing."""
 340 |         return [
 341 |             {
 342 |                 "url": "https://sketchy-site.com/apple-prediction",
 343 |                 "title": "AAPL Will 100X - Trust Me Bro Analysis",
 344 |                 "content": "Apple stock is going to the moon because reasons. My uncle works at Apple and says they're releasing iPhones made of gold next year. This is not financial advice but also definitely is financial advice. Buy now or cry later. Diamond hands to the moon rockets.",
 345 |                 "summary": "Questionable analysis with unsubstantiated claims about Apple's prospects.",
 346 |                 "highlights": [
 347 |                     "Gold iPhones coming",
 348 |                     "100x returns predicted",
 349 |                     "Uncle insider info",
 350 |                 ],
 351 |                 "published_date": "2024-01-16T23:59:00Z",
 352 |                 "author": "Random Internet User",
 353 |                 "score": 0.12,
 354 |                 "provider": "exa",
 355 |             }
 356 |         ]
 357 | 
 358 | 
 359 | # ==============================================================================
 360 | # MOCK TAVILY API RESPONSES
 361 | # ==============================================================================
 362 | 
 363 | 
 364 | class MockTavilyResponses:
 365 |     """Realistic Tavily API responses for web search."""
 366 | 
 367 |     @staticmethod
 368 |     def search_results_aapl() -> dict[str, Any]:
 369 |         """Mock Tavily search response for AAPL analysis."""
 370 |         return {
 371 |             "query": "Apple stock analysis AAPL investment outlook",
 372 |             "follow_up_questions": [
 373 |                 "What are Apple's main revenue drivers?",
 374 |                 "How does Apple compare to competitors?",
 375 |                 "What are the key risks for Apple stock?",
 376 |             ],
 377 |             "answer": "Apple (AAPL) shows strong fundamentals with growing services revenue and AI integration opportunities, though faces competition in China and regulatory pressures.",
 378 |             "results": [
 379 |                 {
 380 |                     "title": "Apple Stock Analysis: Strong Fundamentals Despite Headwinds",
 381 |                     "url": "https://www.fool.com/investing/2024/01/15/apple-stock-analysis",
 382 |                     "content": "Apple's latest quarter demonstrated the resilience of its business model, with services revenue hitting a new record and iPhone sales exceeding expectations. The company's focus on artificial intelligence integration across its product ecosystem positions it well for future growth cycles. However, investors should monitor China market dynamics and App Store regulatory challenges that could impact long-term growth trajectories.",
 383 |                     "raw_content": "Apple Inc. (AAPL) continues to demonstrate strong business fundamentals in its latest quarterly report, with services revenue reaching new records and iPhone sales beating analyst expectations across key markets. The technology giant has strategically positioned itself at the forefront of artificial intelligence integration, with on-device AI processing capabilities that differentiate its products from competitors. Looking ahead, the company's ecosystem approach and services transition provide multiple growth vectors, though challenges in China and regulatory pressures on App Store policies require careful monitoring. The stock's current valuation appears reasonable given the company's cash generation capabilities and market position.",
 384 |                     "published_date": "2024-01-15",
 385 |                     "score": 0.89,
 386 |                 },
 387 |                 {
 388 |                     "title": "Tech Sector Outlook: AI Revolution Drives Growth",
 389 |                     "url": "https://www.barrons.com/articles/tech-outlook-ai-growth",
 390 |                     "content": "The technology sector stands at the beginning of a multi-year artificial intelligence transformation that could reshape revenue models and competitive dynamics. Companies with strong AI integration capabilities, including Apple, Microsoft, and Google, are positioned to benefit from this shift. Apple's approach of on-device AI processing provides privacy advantages and reduces cloud infrastructure costs compared to competitors relying heavily on cloud-based AI services.",
 391 |                     "raw_content": "The technology sector is experiencing a fundamental transformation as artificial intelligence capabilities become central to product differentiation and user experience. Companies that can effectively integrate AI while maintaining user privacy and system performance are likely to capture disproportionate value creation over the next 3-5 years. Apple's strategy of combining custom silicon with on-device AI processing provides competitive advantages in both performance and privacy, potentially driving accelerated device replacement cycles and services engagement. This positions Apple favorably compared to competitors relying primarily on cloud-based AI infrastructure.",
 392 |                     "published_date": "2024-01-14",
 393 |                     "score": 0.85,
 394 |                 },
 395 |                 {
 396 |                     "title": "Investment Analysis: Apple's Services Transformation",
 397 |                     "url": "https://www.investopedia.com/apple-services-analysis",
 398 |                     "content": "Apple's transformation from a hardware-centric to services-enabled company continues to gain momentum, with services revenue now representing over 22% of total revenue and growing at double-digit rates. This shift toward recurring revenue streams improves business model predictability and supports higher valuation multiples. The company's services ecosystem benefits from its large installed base and strong customer loyalty metrics.",
 399 |                     "raw_content": "Apple Inc.'s strategic evolution toward a services-centric business model represents one of the most successful corporate transformations in technology sector history. The company has leveraged its installed base of over 2 billion active devices to create a thriving services ecosystem encompassing the App Store, Apple Music, iCloud, Apple Pay, and various subscription services. This services revenue now exceeds $85 billion annually and continues growing at rates exceeding 10% year-over-year, providing both revenue diversification and margin enhancement. The recurring nature of services revenue creates more predictable cash flows and justifies premium valuation multiples compared to pure hardware companies.",
 400 |                     "published_date": "2024-01-13",
 401 |                     "score": 0.91,
 402 |                 },
 403 |             ],
 404 |             "response_time": 1.2,
 405 |         }
 406 | 
 407 |     @staticmethod
 408 |     def search_results_market_sentiment() -> dict[str, Any]:
 409 |         """Mock Tavily search response for market sentiment analysis."""
 410 |         return {
 411 |             "query": "stock market sentiment investor mood analysis 2024",
 412 |             "follow_up_questions": [
 413 |                 "What are current market sentiment indicators?",
 414 |                 "How do investors feel about tech stocks?",
 415 |                 "What factors are driving market optimism?",
 416 |             ],
 417 |             "answer": "Current market sentiment shows cautious optimism with reduced volatility and increased risk appetite, driven by AI enthusiasm and strong corporate earnings despite interest rate concerns.",
 418 |             "results": [
 419 |                 {
 420 |                     "title": "Market Sentiment Indicators Signal Bullish Mood",
 421 |                     "url": "https://www.marketwatch.com/story/market-sentiment-bullish",
 422 |                     "content": "Multiple sentiment indicators suggest investors have shifted from defensive to risk-on positioning as 2024 progresses. The VIX volatility index has declined to multi-month lows while institutional money flows accelerate into equities. Credit markets show tightening spreads and increased issuance activity, reflecting improved risk appetite across asset classes.",
 423 |                     "raw_content": "A comprehensive analysis of market sentiment indicators reveals a significant shift in investor psychology over the past month. The CBOE Volatility Index (VIX) has dropped below 14, its lowest level since late 2021, indicating reduced fear and increased complacency among options traders. Simultaneously, the American Association of Individual Investors (AAII) sentiment survey shows bullish respondents outnumbering bearish by a 2:1 margin, the widest spread since early 2023. Institutional flows data from EPFR shows $45 billion in net inflows to equity funds over the past four weeks, with technology and growth sectors receiving disproportionate allocation.",
 424 |                     "published_date": "2024-01-16",
 425 |                     "score": 0.93,
 426 |                 },
 427 |                 {
 428 |                     "title": "Investor Psychology: Fear of Missing Out Returns",
 429 |                     "url": "https://www.wsj.com/markets/stocks/fomo-returns-markets",
 430 |                     "content": "The fear of missing out (FOMO) mentality has returned to equity markets as investors chase performance and increase leverage. Margin debt has increased significantly while cash positions at major brokerages have declined to multi-year lows. This shift in behavior suggests sentiment has moved from cautious to optimistic, though some analysts warn of potential overextension.",
 431 |                     "raw_content": "Behavioral indicators suggest a fundamental shift in investor psychology from the cautious stance that characterized much of 2023 to a more aggressive, opportunity-seeking mindset. NYSE margin debt has increased 15% over the past two months, reaching $750 billion as investors leverage up to participate in market gains. Cash positions at major discount brokerages have declined to just 3.2% of assets, compared to 5.8% during peak uncertainty in October 2023. Options market activity shows call volume exceeding put volume by the widest margin in 18 months, with particular strength in technology and AI-related names.",
 432 |                     "published_date": "2024-01-15",
 433 |                     "score": 0.88,
 434 |                 },
 435 |             ],
 436 |             "response_time": 1.4,
 437 |         }
 438 | 
 439 |     @staticmethod
 440 |     def search_results_error() -> dict[str, Any]:
 441 |         """Mock Tavily error response for testing error handling."""
 442 |         return {
 443 |             "error": "rate_limit_exceeded",
 444 |             "message": "API rate limit exceeded. Please try again later.",
 445 |             "retry_after": 60,
 446 |         }
 447 | 
 448 | 
 449 | # ==============================================================================
 450 | # MOCK MARKET DATA
 451 | # ==============================================================================
 452 | 
 453 | 
 454 | class MockMarketData:
 455 |     """Realistic market data for testing financial analysis."""
 456 | 
 457 |     @staticmethod
 458 |     def stock_price_history(
 459 |         symbol: str = "AAPL", days: int = 100, current_price: float = 185.0
 460 |     ) -> pd.DataFrame:
 461 |         """Generate realistic stock price history."""
 462 |         end_date = datetime.now()
 463 |         start_date = end_date - timedelta(days=days)
 464 |         dates = pd.date_range(start=start_date, end=end_date, freq="D")
 465 | 
 466 |         # Generate realistic price movement
 467 |         np.random.seed(42)  # Consistent data for testing
 468 |         returns = np.random.normal(
 469 |             0.0008, 0.02, len(dates)
 470 |         )  # ~0.2% daily return, 2% volatility
 471 | 
 472 |         # Start with a base price and apply returns
 473 |         base_price = current_price * 0.9  # Start 10% lower
 474 |         prices = [base_price]
 475 | 
 476 |         for return_val in returns[1:]:
 477 |             next_price = prices[-1] * (1 + return_val)
 478 |             prices.append(max(next_price, 50))  # Floor price at $50
 479 | 
 480 |         # Create OHLCV data
 481 |         data = []
 482 |         for i, (date, close_price) in enumerate(zip(dates, prices, strict=False)):
 483 |             # Generate realistic OHLC from close price
 484 |             volatility = abs(np.random.normal(0, 0.015))  # Intraday volatility
 485 | 
 486 |             high = close_price * (1 + volatility)
 487 |             low = close_price * (1 - volatility)
 488 | 
 489 |             # Determine open based on previous close with gap
 490 |             if i == 0:
 491 |                 open_price = close_price
 492 |             else:
 493 |                 gap = np.random.normal(0, 0.005)  # Small gap
 494 |                 open_price = prices[i - 1] * (1 + gap)
 495 | 
 496 |             # Ensure OHLC relationships are valid
 497 |             high = max(high, open_price, close_price)
 498 |             low = min(low, open_price, close_price)
 499 | 
 500 |             # Generate volume
 501 |             base_volume = 50_000_000  # Base volume
 502 |             volume_multiplier = np.random.uniform(0.5, 2.0)
 503 |             volume = int(base_volume * volume_multiplier)
 504 | 
 505 |             data.append(
 506 |                 {
 507 |                     "Date": date,
 508 |                     "Open": round(open_price, 2),
 509 |                     "High": round(high, 2),
 510 |                     "Low": round(low, 2),
 511 |                     "Close": round(close_price, 2),
 512 |                     "Volume": volume,
 513 |                 }
 514 |             )
 515 | 
 516 |         df = pd.DataFrame(data)
 517 |         df.set_index("Date", inplace=True)
 518 |         return df
 519 | 
 520 |     @staticmethod
 521 |     def technical_indicators(symbol: str = "AAPL") -> dict[str, Any]:
 522 |         """Mock technical indicators for a stock."""
 523 |         return {
 524 |             "symbol": symbol,
 525 |             "timestamp": datetime.now(),
 526 |             "rsi": {
 527 |                 "value": 58.3,
 528 |                 "signal": "neutral",
 529 |                 "interpretation": "Neither overbought nor oversold",
 530 |             },
 531 |             "macd": {
 532 |                 "value": 2.15,
 533 |                 "signal_line": 1.89,
 534 |                 "histogram": 0.26,
 535 |                 "signal": "bullish",
 536 |                 "interpretation": "MACD above signal line suggests bullish momentum",
 537 |             },
 538 |             "bollinger_bands": {
 539 |                 "upper": 192.45,
 540 |                 "middle": 185.20,
 541 |                 "lower": 177.95,
 542 |                 "position": "middle",
 543 |                 "squeeze": False,
 544 |             },
 545 |             "moving_averages": {
 546 |                 "sma_20": 183.45,
 547 |                 "sma_50": 178.90,
 548 |                 "sma_200": 172.15,
 549 |                 "ema_12": 184.80,
 550 |                 "ema_26": 181.30,
 551 |             },
 552 |             "support_resistance": {
 553 |                 "support_levels": [175.00, 170.50, 165.25],
 554 |                 "resistance_levels": [190.00, 195.75, 200.50],
 555 |                 "current_level": "between_support_resistance",
 556 |             },
 557 |             "volume_analysis": {
 558 |                 "average_volume": 52_000_000,
 559 |                 "current_volume": 68_000_000,
 560 |                 "relative_volume": 1.31,
 561 |                 "volume_trend": "increasing",
 562 |             },
 563 |         }
 564 | 
 565 |     @staticmethod
 566 |     def market_overview() -> dict[str, Any]:
 567 |         """Mock market overview data."""
 568 |         return {
 569 |             "timestamp": datetime.now(),
 570 |             "indices": {
 571 |                 "SPY": {"price": 485.30, "change": +2.15, "change_pct": +0.44},
 572 |                 "QQQ": {"price": 412.85, "change": +5.42, "change_pct": +1.33},
 573 |                 "IWM": {"price": 195.67, "change": -1.23, "change_pct": -0.62},
 574 |                 "VIX": {"price": 13.8, "change": -1.2, "change_pct": -8.0},
 575 |             },
 576 |             "sector_performance": {
 577 |                 "Technology": +1.85,
 578 |                 "Healthcare": +0.45,
 579 |                 "Financial Services": -0.32,
 580 |                 "Consumer Cyclical": +0.78,
 581 |                 "Industrials": -0.15,
 582 |                 "Energy": -1.22,
 583 |                 "Utilities": +0.33,
 584 |                 "Real Estate": +0.91,
 585 |                 "Materials": -0.67,
 586 |                 "Consumer Defensive": +0.12,
 587 |                 "Communication Services": +1.34,
 588 |             },
 589 |             "market_breadth": {
 590 |                 "advancers": 1845,
 591 |                 "decliners": 1230,
 592 |                 "unchanged": 125,
 593 |                 "new_highs": 89,
 594 |                 "new_lows": 12,
 595 |                 "up_volume": 8.2e9,
 596 |                 "down_volume": 4.1e9,
 597 |             },
 598 |             "sentiment_indicators": {
 599 |                 "fear_greed_index": 72,
 600 |                 "vix_level": "low",
 601 |                 "put_call_ratio": 0.45,
 602 |                 "margin_debt_trend": "increasing",
 603 |             },
 604 |         }
 605 | 
 606 | 
 607 | # ==============================================================================
 608 | # TEST QUERY EXAMPLES
 609 | # ==============================================================================
 610 | 
 611 | 
 612 | class TestQueries:
 613 |     """Realistic user queries for different classification categories."""
 614 | 
 615 |     MARKET_SCREENING = [
 616 |         "Find me momentum stocks in the technology sector with strong earnings growth",
 617 |         "Screen for dividend-paying stocks with yields above 3% and consistent payout history",
 618 |         "Show me small-cap stocks with high revenue growth and low debt levels",
 619 |         "Find stocks breaking out of consolidation patterns with increasing volume",
 620 |         "Screen for value stocks trading below book value with improving fundamentals",
 621 |     ]
 622 | 
 623 |     COMPANY_RESEARCH = [
 624 |         "Analyze Apple's competitive position in the smartphone market",
 625 |         "Research Tesla's battery technology advantages and manufacturing scale",
 626 |         "Provide comprehensive analysis of Microsoft's cloud computing strategy",
 627 |         "Analyze Amazon's e-commerce margins and AWS growth potential",
 628 |         "Research Nvidia's AI chip market dominance and competitive threats",
 629 |     ]
 630 | 
 631 |     TECHNICAL_ANALYSIS = [
 632 |         "Analyze AAPL's chart patterns and provide entry/exit recommendations",
 633 |         "What do the technical indicators say about SPY's short-term direction?",
 634 |         "Analyze TSLA's support and resistance levels for swing trading",
 635 |         "Show me the RSI and MACD signals for QQQ",
 636 |         "Identify chart patterns in the Nasdaq that suggest market direction",
 637 |     ]
 638 | 
 639 |     SENTIMENT_ANALYSIS = [
 640 |         "What's the current market sentiment around tech stocks?",
 641 |         "Analyze investor sentiment toward electric vehicle companies",
 642 |         "How are traders feeling about the Fed's interest rate policy?",
 643 |         "What's the mood in crypto markets right now?",
 644 |         "Analyze sentiment around bank stocks after recent earnings",
 645 |     ]
 646 | 
 647 |     PORTFOLIO_ANALYSIS = [
 648 |         "Optimize my portfolio allocation for moderate risk tolerance",
 649 |         "Analyze the correlation between my holdings and suggest diversification",
 650 |         "Review my portfolio for sector concentration risk",
 651 |         "Suggest rebalancing strategy for my retirement portfolio",
 652 |         "Analyze my portfolio's beta and suggest hedging strategies",
 653 |     ]
 654 | 
 655 |     RISK_ASSESSMENT = [
 656 |         "Calculate appropriate position size for AAPL given my $100k account",
 657 |         "What's the maximum drawdown risk for a 60/40 portfolio?",
 658 |         "Analyze the tail risk in my growth stock positions",
 659 |         "Calculate VaR for my current portfolio allocation",
 660 |         "Assess concentration risk in my tech-heavy portfolio",
 661 |     ]
 662 | 
 663 |     @classmethod
 664 |     def get_random_query(cls, category: str) -> str:
 665 |         """Get a random query from the specified category."""
 666 |         queries_map = {
 667 |             "market_screening": cls.MARKET_SCREENING,
 668 |             "company_research": cls.COMPANY_RESEARCH,
 669 |             "technical_analysis": cls.TECHNICAL_ANALYSIS,
 670 |             "sentiment_analysis": cls.SENTIMENT_ANALYSIS,
 671 |             "portfolio_analysis": cls.PORTFOLIO_ANALYSIS,
 672 |             "risk_assessment": cls.RISK_ASSESSMENT,
 673 |         }
 674 | 
 675 |         queries = queries_map.get(category, cls.MARKET_SCREENING)
 676 |         return np.random.choice(queries)
 677 | 
 678 | 
 679 | # ==============================================================================
 680 | # PERSONA-SPECIFIC FIXTURES
 681 | # ==============================================================================
 682 | 
 683 | 
 684 | class PersonaFixtures:
 685 |     """Persona-specific test data and responses."""
 686 | 
 687 |     @staticmethod
 688 |     def conservative_investor_data() -> dict[str, Any]:
 689 |         """Data for conservative investor persona testing."""
 690 |         return {
 691 |             "persona": "conservative",
 692 |             "characteristics": [
 693 |                 "capital preservation",
 694 |                 "income generation",
 695 |                 "low volatility",
 696 |                 "dividend focus",
 697 |             ],
 698 |             "risk_tolerance": 0.3,
 699 |             "preferred_sectors": ["Utilities", "Consumer Defensive", "Healthcare"],
 700 |             "analysis_focus": [
 701 |                 "dividend yield",
 702 |                 "debt levels",
 703 |                 "stability",
 704 |                 "downside protection",
 705 |             ],
 706 |             "position_sizing": {
 707 |                 "max_single_position": 0.05,  # 5% max
 708 |                 "stop_loss_multiplier": 1.5,
 709 |                 "target_volatility": 0.12,
 710 |             },
 711 |             "sample_recommendations": [
 712 |                 "Consider gradual position building with strict risk management",
 713 |                 "Focus on dividend-paying stocks with consistent payout history",
 714 |                 "Maintain defensive positioning until market clarity improves",
 715 |                 "Prioritize capital preservation over aggressive growth",
 716 |             ],
 717 |         }
 718 | 
 719 |     @staticmethod
 720 |     def moderate_investor_data() -> dict[str, Any]:
 721 |         """Data for moderate investor persona testing."""
 722 |         return {
 723 |             "persona": "moderate",
 724 |             "characteristics": [
 725 |                 "balanced growth",
 726 |                 "diversification",
 727 |                 "moderate risk",
 728 |                 "long-term focus",
 729 |             ],
 730 |             "risk_tolerance": 0.6,
 731 |             "preferred_sectors": [
 732 |                 "Technology",
 733 |                 "Healthcare",
 734 |                 "Financial Services",
 735 |                 "Industrials",
 736 |             ],
 737 |             "analysis_focus": [
 738 |                 "risk-adjusted returns",
 739 |                 "diversification",
 740 |                 "growth potential",
 741 |                 "fundamentals",
 742 |             ],
 743 |             "position_sizing": {
 744 |                 "max_single_position": 0.08,  # 8% max
 745 |                 "stop_loss_multiplier": 2.0,
 746 |                 "target_volatility": 0.18,
 747 |             },
 748 |             "sample_recommendations": [
 749 |                 "Balance growth opportunities with risk management",
 750 |                 "Consider diversified allocation across sectors and market caps",
 751 |                 "Target 4-6% position sizing for high-conviction ideas",
 752 |                 "Monitor both technical and fundamental indicators",
 753 |             ],
 754 |         }
 755 | 
 756 |     @staticmethod
 757 |     def aggressive_investor_data() -> dict[str, Any]:
 758 |         """Data for aggressive investor persona testing."""
 759 |         return {
 760 |             "persona": "aggressive",
 761 |             "characteristics": [
 762 |                 "high growth",
 763 |                 "momentum",
 764 |                 "concentrated positions",
 765 |                 "active trading",
 766 |             ],
 767 |             "risk_tolerance": 0.9,
 768 |             "preferred_sectors": [
 769 |                 "Technology",
 770 |                 "Communication Services",
 771 |                 "Consumer Cyclical",
 772 |             ],
 773 |             "analysis_focus": [
 774 |                 "growth potential",
 775 |                 "momentum",
 776 |                 "catalysts",
 777 |                 "alpha generation",
 778 |             ],
 779 |             "position_sizing": {
 780 |                 "max_single_position": 0.15,  # 15% max
 781 |                 "stop_loss_multiplier": 3.0,
 782 |                 "target_volatility": 0.25,
 783 |             },
 784 |             "sample_recommendations": [
 785 |                 "Consider concentrated positions in high-conviction names",
 786 |                 "Target momentum stocks with strong catalysts",
 787 |                 "Use 10-15% position sizing for best opportunities",
 788 |                 "Focus on alpha generation over risk management",
 789 |             ],
 790 |         }
 791 | 
 792 | 
 793 | # ==============================================================================
 794 | # EDGE CASE AND ERROR FIXTURES
 795 | # ==============================================================================
 796 | 
 797 | 
 798 | class EdgeCaseFixtures:
 799 |     """Fixtures for testing edge cases and error conditions."""
 800 | 
 801 |     @staticmethod
 802 |     def api_failure_responses() -> dict[str, Any]:
 803 |         """Mock API failure responses for error handling testing."""
 804 |         return {
 805 |             "exa_rate_limit": {
 806 |                 "error": "rate_limit_exceeded",
 807 |                 "message": "You have exceeded your API rate limit",
 808 |                 "retry_after": 3600,
 809 |                 "status_code": 429,
 810 |             },
 811 |             "tavily_unauthorized": {
 812 |                 "error": "unauthorized",
 813 |                 "message": "Invalid API key provided",
 814 |                 "status_code": 401,
 815 |             },
 816 |             "llm_timeout": {
 817 |                 "error": "timeout",
 818 |                 "message": "Request timed out after 30 seconds",
 819 |                 "status_code": 408,
 820 |             },
 821 |             "network_error": {
 822 |                 "error": "network_error",
 823 |                 "message": "Unable to connect to external service",
 824 |                 "status_code": 503,
 825 |             },
 826 |         }
 827 | 
 828 |     @staticmethod
 829 |     def conflicting_agent_results() -> dict[str, dict[str, Any]]:
 830 |         """Mock conflicting results from different agents for synthesis testing."""
 831 |         return {
 832 |             "market": {
 833 |                 "recommendation": "BUY",
 834 |                 "confidence": 0.85,
 835 |                 "reasoning": "Strong fundamentals and sector rotation into technology",
 836 |                 "target_price": 210.0,
 837 |                 "sentiment": "bullish",
 838 |             },
 839 |             "technical": {
 840 |                 "recommendation": "SELL",
 841 |                 "confidence": 0.78,
 842 |                 "reasoning": "Bearish divergence in RSI and approaching strong resistance",
 843 |                 "target_price": 165.0,
 844 |                 "sentiment": "bearish",
 845 |             },
 846 |             "research": {
 847 |                 "recommendation": "HOLD",
 848 |                 "confidence": 0.72,
 849 |                 "reasoning": "Mixed signals from fundamental analysis and market conditions",
 850 |                 "target_price": 185.0,
 851 |                 "sentiment": "neutral",
 852 |             },
 853 |         }
 854 | 
 855 |     @staticmethod
 856 |     def incomplete_data() -> dict[str, Any]:
 857 |         """Mock incomplete or missing data scenarios."""
 858 |         return {
 859 |             "missing_price_data": {
 860 |                 "symbol": "AAPL",
 861 |                 "error": "Price data not available for requested timeframe",
 862 |                 "available_data": None,
 863 |             },
 864 |             "partial_search_results": {
 865 |                 "results_found": 2,
 866 |                 "results_expected": 10,
 867 |                 "provider_errors": ["exa_timeout", "tavily_rate_limit"],
 868 |                 "partial_data": True,
 869 |             },
 870 |             "llm_partial_response": {
 871 |                 "analysis": "Partial analysis completed before",
 872 |                 "truncated": True,
 873 |                 "completion_percentage": 0.6,
 874 |             },
 875 |         }
 876 | 
 877 |     @staticmethod
 878 |     def malformed_data() -> dict[str, Any]:
 879 |         """Mock malformed or invalid data for error testing."""
 880 |         return {
 881 |             "invalid_json": '{"analysis": "incomplete json"',  # Missing closing brace
 882 |             "wrong_schema": {
 883 |                 "unexpected_field": "value",
 884 |                 "missing_required_field": None,
 885 |             },
 886 |             "invalid_dates": {
 887 |                 "published_date": "not-a-date",
 888 |                 "timestamp": "invalid-timestamp",
 889 |             },
 890 |             "invalid_numbers": {"confidence": "not-a-number", "price": "invalid-price"},
 891 |         }
 892 | 
 893 | 
 894 | # ==============================================================================
 895 | # PYTEST FIXTURES
 896 | # ==============================================================================
 897 | 
 898 | 
 899 | @pytest.fixture
 900 | def mock_llm_responses():
 901 |     """Fixture providing mock LLM responses."""
 902 |     return MockLLMResponses()
 903 | 
 904 | 
 905 | @pytest.fixture
 906 | def mock_exa_responses():
 907 |     """Fixture providing mock Exa API responses."""
 908 |     return MockExaResponses()
 909 | 
 910 | 
 911 | @pytest.fixture
 912 | def mock_tavily_responses():
 913 |     """Fixture providing mock Tavily API responses."""
 914 |     return MockTavilyResponses()
 915 | 
 916 | 
 917 | @pytest.fixture
 918 | def mock_market_data():
 919 |     """Fixture providing mock market data."""
 920 |     return MockMarketData()
 921 | 
 922 | 
 923 | @pytest.fixture
 924 | def test_queries():
 925 |     """Fixture providing test queries."""
 926 |     return TestQueries()
 927 | 
 928 | 
 929 | @pytest.fixture
 930 | def persona_fixtures():
 931 |     """Fixture providing persona-specific data."""
 932 |     return PersonaFixtures()
 933 | 
 934 | 
 935 | @pytest.fixture
 936 | def edge_case_fixtures():
 937 |     """Fixture providing edge case test data."""
 938 |     return EdgeCaseFixtures()
 939 | 
 940 | 
 941 | @pytest.fixture(params=["conservative", "moderate", "aggressive"])
 942 | def investor_persona(request):
 943 |     """Parametrized fixture for testing across all investor personas."""
 944 |     return request.param
 945 | 
 946 | 
 947 | @pytest.fixture(
 948 |     params=[
 949 |         "market_screening",
 950 |         "company_research",
 951 |         "technical_analysis",
 952 |         "sentiment_analysis",
 953 |     ]
 954 | )
 955 | def query_category(request):
 956 |     """Parametrized fixture for testing across all query categories."""
 957 |     return request.param
 958 | 
 959 | 
 960 | # ==============================================================================
 961 | # HELPER FUNCTIONS
 962 | # ==============================================================================
 963 | 
 964 | 
 965 | def create_mock_llm_with_responses(responses: list[str]) -> MagicMock:
 966 |     """Create a mock LLM that returns specific responses in order."""
 967 |     mock_llm = MagicMock()
 968 | 
 969 |     # Create AIMessage objects for each response
 970 |     ai_messages = [AIMessage(content=response) for response in responses]
 971 |     mock_llm.ainvoke.side_effect = ai_messages
 972 | 
 973 |     return mock_llm
 974 | 
 975 | 
 976 | def create_mock_agent_result(
 977 |     agent_type: str,
 978 |     confidence: float = 0.8,
 979 |     recommendation: str = "BUY",
 980 |     additional_data: dict[str, Any] = None,
 981 | ) -> dict[str, Any]:
 982 |     """Create a mock agent result with realistic structure."""
 983 |     base_result = {
 984 |         "status": "success",
 985 |         "agent_type": agent_type,
 986 |         "confidence_score": confidence,
 987 |         "recommendation": recommendation,
 988 |         "timestamp": datetime.now(),
 989 |         "execution_time_ms": np.random.uniform(1000, 5000),
 990 |     }
 991 | 
 992 |     if additional_data:
 993 |         base_result.update(additional_data)
 994 | 
 995 |     return base_result
 996 | 
 997 | 
 998 | def create_realistic_stock_data(
 999 |     symbol: str = "AAPL", price: float = 185.0, volume: int = 50_000_000
1000 | ) -> dict[str, Any]:
1001 |     """Create realistic stock data for testing."""
1002 |     return {
1003 |         "symbol": symbol,
1004 |         "current_price": price,
1005 |         "volume": volume,
1006 |         "market_cap": 2_850_000_000_000,  # $2.85T for AAPL
1007 |         "pe_ratio": 28.5,
1008 |         "dividend_yield": 0.005,
1009 |         "beta": 1.1,
1010 |         "52_week_high": 198.23,
1011 |         "52_week_low": 164.08,
1012 |         "average_volume": 48_000_000,
1013 |         "sector": "Technology",
1014 |         "industry": "Consumer Electronics",
1015 |     }
1016 | 
1017 | 
1018 | # Export main classes for easy importing
1019 | __all__ = [
1020 |     "MockLLMResponses",
1021 |     "MockExaResponses",
1022 |     "MockTavilyResponses",
1023 |     "MockMarketData",
1024 |     "TestQueries",
1025 |     "PersonaFixtures",
1026 |     "EdgeCaseFixtures",
1027 |     "create_mock_llm_with_responses",
1028 |     "create_mock_agent_result",
1029 |     "create_realistic_stock_data",
1030 | ]
1031 | 
```
Page 36/39FirstPrevNextLast