wshobson/maverick-mcp # codebase.md

This is page 27 of 39. Use http://codebase.md/wshobson/maverick-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .dockerignore
├── .env.example
├── .github
│   ├── dependabot.yml
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── feature_request.md
│   │   ├── question.md
│   │   └── security_report.md
│   ├── pull_request_template.md
│   └── workflows
│       ├── claude-code-review.yml
│       └── claude.yml
├── .gitignore
├── .python-version
├── .vscode
│   ├── launch.json
│   └── settings.json
├── alembic
│   ├── env.py
│   ├── script.py.mako
│   └── versions
│       ├── 001_initial_schema.py
│       ├── 003_add_performance_indexes.py
│       ├── 006_rename_metadata_columns.py
│       ├── 008_performance_optimization_indexes.py
│       ├── 009_rename_to_supply_demand.py
│       ├── 010_self_contained_schema.py
│       ├── 011_remove_proprietary_terms.py
│       ├── 013_add_backtest_persistence_models.py
│       ├── 014_add_portfolio_models.py
│       ├── 08e3945a0c93_merge_heads.py
│       ├── 9374a5c9b679_merge_heads_for_testing.py
│       ├── abf9b9afb134_merge_multiple_heads.py
│       ├── adda6d3fd84b_merge_proprietary_terms_removal_with_.py
│       ├── e0c75b0bdadb_fix_financial_data_precision_only.py
│       ├── f0696e2cac15_add_essential_performance_indexes.py
│       └── fix_database_integrity_issues.py
├── alembic.ini
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DATABASE_SETUP.md
├── docker-compose.override.yml.example
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── api
│   │   └── backtesting.md
│   ├── BACKTESTING.md
│   ├── COST_BASIS_SPECIFICATION.md
│   ├── deep_research_agent.md
│   ├── exa_research_testing_strategy.md
│   ├── PORTFOLIO_PERSONALIZATION_PLAN.md
│   ├── PORTFOLIO.md
│   ├── SETUP_SELF_CONTAINED.md
│   └── speed_testing_framework.md
├── examples
│   ├── complete_speed_validation.py
│   ├── deep_research_integration.py
│   ├── llm_optimization_example.py
│   ├── llm_speed_demo.py
│   ├── monitoring_example.py
│   ├── parallel_research_example.py
│   ├── speed_optimization_demo.py
│   └── timeout_fix_demonstration.py
├── LICENSE
├── Makefile
├── MANIFEST.in
├── maverick_mcp
│   ├── __init__.py
│   ├── agents
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── circuit_breaker.py
│   │   ├── deep_research.py
│   │   ├── market_analysis.py
│   │   ├── optimized_research.py
│   │   ├── supervisor.py
│   │   └── technical_analysis.py
│   ├── api
│   │   ├── __init__.py
│   │   ├── api_server.py
│   │   ├── connection_manager.py
│   │   ├── dependencies
│   │   │   ├── __init__.py
│   │   │   ├── stock_analysis.py
│   │   │   └── technical_analysis.py
│   │   ├── error_handling.py
│   │   ├── inspector_compatible_sse.py
│   │   ├── inspector_sse.py
│   │   ├── middleware
│   │   │   ├── error_handling.py
│   │   │   ├── mcp_logging.py
│   │   │   ├── rate_limiting_enhanced.py
│   │   │   └── security.py
│   │   ├── openapi_config.py
│   │   ├── routers
│   │   │   ├── __init__.py
│   │   │   ├── agents.py
│   │   │   ├── backtesting.py
│   │   │   ├── data_enhanced.py
│   │   │   ├── data.py
│   │   │   ├── health_enhanced.py
│   │   │   ├── health_tools.py
│   │   │   ├── health.py
│   │   │   ├── intelligent_backtesting.py
│   │   │   ├── introspection.py
│   │   │   ├── mcp_prompts.py
│   │   │   ├── monitoring.py
│   │   │   ├── news_sentiment_enhanced.py
│   │   │   ├── performance.py
│   │   │   ├── portfolio.py
│   │   │   ├── research.py
│   │   │   ├── screening_ddd.py
│   │   │   ├── screening_parallel.py
│   │   │   ├── screening.py
│   │   │   ├── technical_ddd.py
│   │   │   ├── technical_enhanced.py
│   │   │   ├── technical.py
│   │   │   └── tool_registry.py
│   │   ├── server.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   ├── base_service.py
│   │   │   ├── market_service.py
│   │   │   ├── portfolio_service.py
│   │   │   ├── prompt_service.py
│   │   │   └── resource_service.py
│   │   ├── simple_sse.py
│   │   └── utils
│   │       ├── __init__.py
│   │       ├── insomnia_export.py
│   │       └── postman_export.py
│   ├── application
│   │   ├── __init__.py
│   │   ├── commands
│   │   │   └── __init__.py
│   │   ├── dto
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_dto.py
│   │   ├── queries
│   │   │   ├── __init__.py
│   │   │   └── get_technical_analysis.py
│   │   └── screening
│   │       ├── __init__.py
│   │       ├── dtos.py
│   │       └── queries.py
│   ├── backtesting
│   │   ├── __init__.py
│   │   ├── ab_testing.py
│   │   ├── analysis.py
│   │   ├── batch_processing_stub.py
│   │   ├── batch_processing.py
│   │   ├── model_manager.py
│   │   ├── optimization.py
│   │   ├── persistence.py
│   │   ├── retraining_pipeline.py
│   │   ├── strategies
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── ml
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adaptive.py
│   │   │   │   ├── ensemble.py
│   │   │   │   ├── feature_engineering.py
│   │   │   │   └── regime_aware.py
│   │   │   ├── ml_strategies.py
│   │   │   ├── parser.py
│   │   │   └── templates.py
│   │   ├── strategy_executor.py
│   │   ├── vectorbt_engine.py
│   │   └── visualization.py
│   ├── config
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── database_self_contained.py
│   │   ├── database.py
│   │   ├── llm_optimization_config.py
│   │   ├── logging_settings.py
│   │   ├── plotly_config.py
│   │   ├── security_utils.py
│   │   ├── security.py
│   │   ├── settings.py
│   │   ├── technical_constants.py
│   │   ├── tool_estimation.py
│   │   └── validation.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── technical_analysis.py
│   │   └── visualization.py
│   ├── data
│   │   ├── __init__.py
│   │   ├── cache_manager.py
│   │   ├── cache.py
│   │   ├── django_adapter.py
│   │   ├── health.py
│   │   ├── models.py
│   │   ├── performance.py
│   │   ├── session_management.py
│   │   └── validation.py
│   ├── database
│   │   ├── __init__.py
│   │   ├── base.py
│   │   └── optimization.py
│   ├── dependencies.py
│   ├── domain
│   │   ├── __init__.py
│   │   ├── entities
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis.py
│   │   ├── events
│   │   │   └── __init__.py
│   │   ├── portfolio.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   ├── entities.py
│   │   │   ├── services.py
│   │   │   └── value_objects.py
│   │   ├── services
│   │   │   ├── __init__.py
│   │   │   └── technical_analysis_service.py
│   │   ├── stock_analysis
│   │   │   ├── __init__.py
│   │   │   └── stock_analysis_service.py
│   │   └── value_objects
│   │       ├── __init__.py
│   │       └── technical_indicators.py
│   ├── exceptions.py
│   ├── infrastructure
│   │   ├── __init__.py
│   │   ├── cache
│   │   │   └── __init__.py
│   │   ├── caching
│   │   │   ├── __init__.py
│   │   │   └── cache_management_service.py
│   │   ├── connection_manager.py
│   │   ├── data_fetching
│   │   │   ├── __init__.py
│   │   │   └── stock_data_service.py
│   │   ├── health
│   │   │   ├── __init__.py
│   │   │   └── health_checker.py
│   │   ├── persistence
│   │   │   ├── __init__.py
│   │   │   └── stock_repository.py
│   │   ├── providers
│   │   │   └── __init__.py
│   │   ├── screening
│   │   │   ├── __init__.py
│   │   │   └── repositories.py
│   │   └── sse_optimizer.py
│   ├── langchain_tools
│   │   ├── __init__.py
│   │   ├── adapters.py
│   │   └── registry.py
│   ├── logging_config.py
│   ├── memory
│   │   ├── __init__.py
│   │   └── stores.py
│   ├── monitoring
│   │   ├── __init__.py
│   │   ├── health_check.py
│   │   ├── health_monitor.py
│   │   ├── integration_example.py
│   │   ├── metrics.py
│   │   ├── middleware.py
│   │   └── status_dashboard.py
│   ├── providers
│   │   ├── __init__.py
│   │   ├── dependencies.py
│   │   ├── factories
│   │   │   ├── __init__.py
│   │   │   ├── config_factory.py
│   │   │   └── provider_factory.py
│   │   ├── implementations
│   │   │   ├── __init__.py
│   │   │   ├── cache_adapter.py
│   │   │   ├── macro_data_adapter.py
│   │   │   ├── market_data_adapter.py
│   │   │   ├── persistence_adapter.py
│   │   │   └── stock_data_adapter.py
│   │   ├── interfaces
│   │   │   ├── __init__.py
│   │   │   ├── cache.py
│   │   │   ├── config.py
│   │   │   ├── macro_data.py
│   │   │   ├── market_data.py
│   │   │   ├── persistence.py
│   │   │   └── stock_data.py
│   │   ├── llm_factory.py
│   │   ├── macro_data.py
│   │   ├── market_data.py
│   │   ├── mocks
│   │   │   ├── __init__.py
│   │   │   ├── mock_cache.py
│   │   │   ├── mock_config.py
│   │   │   ├── mock_macro_data.py
│   │   │   ├── mock_market_data.py
│   │   │   ├── mock_persistence.py
│   │   │   └── mock_stock_data.py
│   │   ├── openrouter_provider.py
│   │   ├── optimized_screening.py
│   │   ├── optimized_stock_data.py
│   │   └── stock_data.py
│   ├── README.md
│   ├── tests
│   │   ├── __init__.py
│   │   ├── README_INMEMORY_TESTS.md
│   │   ├── test_cache_debug.py
│   │   ├── test_fixes_validation.py
│   │   ├── test_in_memory_routers.py
│   │   ├── test_in_memory_server.py
│   │   ├── test_macro_data_provider.py
│   │   ├── test_mailgun_email.py
│   │   ├── test_market_calendar_caching.py
│   │   ├── test_mcp_tool_fixes_pytest.py
│   │   ├── test_mcp_tool_fixes.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_models_functional.py
│   │   ├── test_server.py
│   │   ├── test_stock_data_enhanced.py
│   │   ├── test_stock_data_provider.py
│   │   └── test_technical_analysis.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── performance_monitoring.py
│   │   ├── portfolio_manager.py
│   │   ├── risk_management.py
│   │   └── sentiment_analysis.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── agent_errors.py
│   │   ├── batch_processing.py
│   │   ├── cache_warmer.py
│   │   ├── circuit_breaker_decorators.py
│   │   ├── circuit_breaker_services.py
│   │   ├── circuit_breaker.py
│   │   ├── data_chunking.py
│   │   ├── database_monitoring.py
│   │   ├── debug_utils.py
│   │   ├── fallback_strategies.py
│   │   ├── llm_optimization.py
│   │   ├── logging_example.py
│   │   ├── logging_init.py
│   │   ├── logging.py
│   │   ├── mcp_logging.py
│   │   ├── memory_profiler.py
│   │   ├── monitoring_middleware.py
│   │   ├── monitoring.py
│   │   ├── orchestration_logging.py
│   │   ├── parallel_research.py
│   │   ├── parallel_screening.py
│   │   ├── quick_cache.py
│   │   ├── resource_manager.py
│   │   ├── shutdown.py
│   │   ├── stock_helpers.py
│   │   ├── structured_logger.py
│   │   ├── tool_monitoring.py
│   │   ├── tracing.py
│   │   └── yfinance_pool.py
│   ├── validation
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── data.py
│   │   ├── middleware.py
│   │   ├── portfolio.py
│   │   ├── responses.py
│   │   ├── screening.py
│   │   └── technical.py
│   └── workflows
│       ├── __init__.py
│       ├── agents
│       │   ├── __init__.py
│       │   ├── market_analyzer.py
│       │   ├── optimizer_agent.py
│       │   ├── strategy_selector.py
│       │   └── validator_agent.py
│       ├── backtesting_workflow.py
│       └── state.py
├── PLANS.md
├── pyproject.toml
├── pyrightconfig.json
├── README.md
├── scripts
│   ├── dev.sh
│   ├── INSTALLATION_GUIDE.md
│   ├── load_example.py
│   ├── load_market_data.py
│   ├── load_tiingo_data.py
│   ├── migrate_db.py
│   ├── README_TIINGO_LOADER.md
│   ├── requirements_tiingo.txt
│   ├── run_stock_screening.py
│   ├── run-migrations.sh
│   ├── seed_db.py
│   ├── seed_sp500.py
│   ├── setup_database.sh
│   ├── setup_self_contained.py
│   ├── setup_sp500_database.sh
│   ├── test_seeded_data.py
│   ├── test_tiingo_loader.py
│   ├── tiingo_config.py
│   └── validate_setup.py
├── SECURITY.md
├── server.json
├── setup.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── core
│   │   └── test_technical_analysis.py
│   ├── data
│   │   └── test_portfolio_models.py
│   ├── domain
│   │   ├── conftest.py
│   │   ├── test_portfolio_entities.py
│   │   └── test_technical_analysis_service.py
│   ├── fixtures
│   │   └── orchestration_fixtures.py
│   ├── integration
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── README.md
│   │   ├── run_integration_tests.sh
│   │   ├── test_api_technical.py
│   │   ├── test_chaos_engineering.py
│   │   ├── test_config_management.py
│   │   ├── test_full_backtest_workflow_advanced.py
│   │   ├── test_full_backtest_workflow.py
│   │   ├── test_high_volume.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_orchestration_complete.py
│   │   ├── test_portfolio_persistence.py
│   │   ├── test_redis_cache.py
│   │   ├── test_security_integration.py.disabled
│   │   └── vcr_setup.py
│   ├── performance
│   │   ├── __init__.py
│   │   ├── test_benchmarks.py
│   │   ├── test_load.py
│   │   ├── test_profiling.py
│   │   └── test_stress.py
│   ├── providers
│   │   └── test_stock_data_simple.py
│   ├── README.md
│   ├── test_agents_router_mcp.py
│   ├── test_backtest_persistence.py
│   ├── test_cache_management_service.py
│   ├── test_cache_serialization.py
│   ├── test_circuit_breaker.py
│   ├── test_database_pool_config_simple.py
│   ├── test_database_pool_config.py
│   ├── test_deep_research_functional.py
│   ├── test_deep_research_integration.py
│   ├── test_deep_research_parallel_execution.py
│   ├── test_error_handling.py
│   ├── test_event_loop_integrity.py
│   ├── test_exa_research_integration.py
│   ├── test_exception_hierarchy.py
│   ├── test_financial_search.py
│   ├── test_graceful_shutdown.py
│   ├── test_integration_simple.py
│   ├── test_langgraph_workflow.py
│   ├── test_market_data_async.py
│   ├── test_market_data_simple.py
│   ├── test_mcp_orchestration_functional.py
│   ├── test_ml_strategies.py
│   ├── test_optimized_research_agent.py
│   ├── test_orchestration_integration.py
│   ├── test_orchestration_logging.py
│   ├── test_orchestration_tools_simple.py
│   ├── test_parallel_research_integration.py
│   ├── test_parallel_research_orchestrator.py
│   ├── test_parallel_research_performance.py
│   ├── test_performance_optimizations.py
│   ├── test_production_validation.py
│   ├── test_provider_architecture.py
│   ├── test_rate_limiting_enhanced.py
│   ├── test_runner_validation.py
│   ├── test_security_comprehensive.py.disabled
│   ├── test_security_cors.py
│   ├── test_security_enhancements.py.disabled
│   ├── test_security_headers.py
│   ├── test_security_penetration.py
│   ├── test_session_management.py
│   ├── test_speed_optimization_validation.py
│   ├── test_stock_analysis_dependencies.py
│   ├── test_stock_analysis_service.py
│   ├── test_stock_data_fetching_service.py
│   ├── test_supervisor_agent.py
│   ├── test_supervisor_functional.py
│   ├── test_tool_estimation_config.py
│   ├── test_visualization.py
│   └── utils
│       ├── test_agent_errors.py
│       ├── test_logging.py
│       ├── test_parallel_screening.py
│       └── test_quick_cache.py
├── tools
│   ├── check_orchestration_config.py
│   ├── experiments
│   │   ├── validation_examples.py
│   │   └── validation_fixed.py
│   ├── fast_dev.sh
│   ├── hot_reload.py
│   ├── quick_test.py
│   └── templates
│       ├── new_router_template.py
│       ├── new_tool_template.py
│       ├── screening_strategy_template.py
│       └── test_template.py
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/tests/integration/test_high_volume.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | High-Volume Integration Tests for Production Scenarios.
  3 | 
  4 | This test suite covers:
  5 | - Testing with 100+ symbols
  6 | - Testing with years of historical data
  7 | - Memory management under load
  8 | - Concurrent user scenarios
  9 | - Database performance under high load
 10 | - Cache efficiency with large datasets
 11 | - API rate limiting and throttling
 12 | """
 13 | 
 14 | import asyncio
 15 | import gc
 16 | import logging
 17 | import os
 18 | import random
 19 | import time
 20 | from datetime import datetime, timedelta
 21 | from unittest.mock import Mock
 22 | 
 23 | import numpy as np
 24 | import pandas as pd
 25 | import psutil
 26 | import pytest
 27 | 
 28 | from maverick_mcp.backtesting import VectorBTEngine
 29 | from maverick_mcp.backtesting.persistence import BacktestPersistenceManager
 30 | from maverick_mcp.backtesting.strategies import STRATEGY_TEMPLATES
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | # High volume test parameters
 35 | LARGE_SYMBOL_SET = [
 36 |     # Technology
 37 |     "AAPL",
 38 |     "MSFT",
 39 |     "GOOGL",
 40 |     "AMZN",
 41 |     "META",
 42 |     "TSLA",
 43 |     "NVDA",
 44 |     "ADBE",
 45 |     "CRM",
 46 |     "ORCL",
 47 |     "NFLX",
 48 |     "INTC",
 49 |     "AMD",
 50 |     "QCOM",
 51 |     "AVGO",
 52 |     "TXN",
 53 |     "MU",
 54 |     "AMAT",
 55 |     "LRCX",
 56 |     "KLAC",
 57 |     # Finance
 58 |     "JPM",
 59 |     "BAC",
 60 |     "WFC",
 61 |     "C",
 62 |     "GS",
 63 |     "MS",
 64 |     "AXP",
 65 |     "BRK-B",
 66 |     "BLK",
 67 |     "SPGI",
 68 |     "CME",
 69 |     "ICE",
 70 |     "MCO",
 71 |     "COF",
 72 |     "USB",
 73 |     "TFC",
 74 |     "PNC",
 75 |     "SCHW",
 76 |     "CB",
 77 |     "AIG",
 78 |     # Healthcare
 79 |     "JNJ",
 80 |     "PFE",
 81 |     "ABT",
 82 |     "MRK",
 83 |     "TMO",
 84 |     "DHR",
 85 |     "BMY",
 86 |     "ABBV",
 87 |     "AMGN",
 88 |     "GILD",
 89 |     "BIIB",
 90 |     "REGN",
 91 |     "VRTX",
 92 |     "ISRG",
 93 |     "SYK",
 94 |     "BSX",
 95 |     "MDT",
 96 |     "EW",
 97 |     "HOLX",
 98 |     "RMD",
 99 |     # Consumer
100 |     "WMT",
101 |     "PG",
102 |     "KO",
103 |     "PEP",
104 |     "COST",
105 |     "HD",
106 |     "MCD",
107 |     "NKE",
108 |     "SBUX",
109 |     "TGT",
110 |     "LOW",
111 |     "DIS",
112 |     "CMCSA",
113 |     "VZ",
114 |     "T",
115 |     "TMUS",
116 |     "CVX",
117 |     "XOM",
118 |     "UNH",
119 |     "CVS",
120 |     # Industrials
121 |     "BA",
122 |     "CAT",
123 |     "DE",
124 |     "GE",
125 |     "HON",
126 |     "MMM",
127 |     "LMT",
128 |     "RTX",
129 |     "UNP",
130 |     "UPS",
131 |     "FDX",
132 |     "WM",
133 |     "EMR",
134 |     "ETN",
135 |     "PH",
136 |     "CMI",
137 |     "PCAR",
138 |     "ROK",
139 |     "DOV",
140 |     "ITW",
141 |     # Extended set for 100+ symbols
142 |     "F",
143 |     "GM",
144 |     "FORD",
145 |     "RIVN",
146 |     "LCID",
147 |     "PLTR",
148 |     "SNOW",
149 |     "ZM",
150 |     "DOCU",
151 |     "OKTA",
152 | ]
153 | 
154 | STRATEGIES_FOR_VOLUME_TEST = ["sma_cross", "rsi", "macd", "bollinger", "momentum"]
155 | 
156 | 
157 | class TestHighVolumeIntegration:
158 |     """High-volume integration tests for production scenarios."""
159 | 
160 |     @pytest.fixture
161 |     async def high_volume_data_provider(self):
162 |         """Create data provider with large dataset simulation."""
163 |         provider = Mock()
164 | 
165 |         def generate_multi_year_data(symbol: str, years: int = 3) -> pd.DataFrame:
166 |             """Generate multi-year realistic data for a symbol."""
167 |             # Generate deterministic but varied data based on symbol hash
168 |             symbol_seed = hash(symbol) % 10000
169 |             np.random.seed(symbol_seed)
170 | 
171 |             # Create 3 years of daily data
172 |             start_date = datetime.now() - timedelta(days=years * 365)
173 |             dates = pd.date_range(
174 |                 start=start_date, periods=years * 252, freq="B"
175 |             )  # Business days
176 | 
177 |             # Generate realistic price movements
178 |             base_price = 50 + (symbol_seed % 200)  # Base price $50-$250
179 |             returns = np.random.normal(0.0005, 0.02, len(dates))  # Daily returns
180 | 
181 |             # Add some trend and volatility clustering
182 |             trend = (
183 |                 np.sin(np.arange(len(dates)) / 252 * 2 * np.pi) * 0.001
184 |             )  # Annual cycle
185 |             returns += trend
186 | 
187 |             # Generate prices
188 |             prices = base_price * np.cumprod(1 + returns)
189 | 
190 |             # Create OHLCV data
191 |             high_mult = np.random.uniform(1.005, 1.03, len(dates))
192 |             low_mult = np.random.uniform(0.97, 0.995, len(dates))
193 |             open_mult = np.random.uniform(0.995, 1.005, len(dates))
194 | 
195 |             volumes = np.random.randint(100000, 10000000, len(dates))
196 | 
197 |             data = pd.DataFrame(
198 |                 {
199 |                     "Open": prices * open_mult,
200 |                     "High": prices * high_mult,
201 |                     "Low": prices * low_mult,
202 |                     "Close": prices,
203 |                     "Volume": volumes,
204 |                     "Adj Close": prices,
205 |                 },
206 |                 index=dates,
207 |             )
208 | 
209 |             # Ensure OHLC constraints
210 |             data["High"] = np.maximum(
211 |                 data["High"], np.maximum(data["Open"], data["Close"])
212 |             )
213 |             data["Low"] = np.minimum(
214 |                 data["Low"], np.minimum(data["Open"], data["Close"])
215 |             )
216 | 
217 |             return data
218 | 
219 |         provider.get_stock_data.side_effect = generate_multi_year_data
220 |         return provider
221 | 
222 |     async def test_large_symbol_set_backtesting(
223 |         self, high_volume_data_provider, benchmark_timer
224 |     ):
225 |         """Test backtesting with 100+ symbols."""
226 |         symbols = LARGE_SYMBOL_SET[:100]  # Use first 100 symbols
227 |         strategy = "sma_cross"
228 | 
229 |         engine = VectorBTEngine(data_provider=high_volume_data_provider)
230 |         parameters = STRATEGY_TEMPLATES[strategy]["parameters"]
231 | 
232 |         results = []
233 |         failed_symbols = []
234 | 
235 |         # Track memory usage
236 |         process = psutil.Process(os.getpid())
237 |         initial_memory = process.memory_info().rss / 1024 / 1024  # MB
238 | 
239 |         with benchmark_timer() as timer:
240 |             # Process symbols in batches to manage memory
241 |             batch_size = 20
242 |             for i in range(0, len(symbols), batch_size):
243 |                 batch_symbols = symbols[i : i + batch_size]
244 | 
245 |                 # Process batch
246 |                 batch_tasks = []
247 |                 for symbol in batch_symbols:
248 |                     task = engine.run_backtest(
249 |                         symbol=symbol,
250 |                         strategy_type=strategy,
251 |                         parameters=parameters,
252 |                         start_date="2022-01-01",
253 |                         end_date="2023-12-31",
254 |                     )
255 |                     batch_tasks.append((symbol, task))
256 | 
257 |                 # Execute batch concurrently
258 |                 batch_results = await asyncio.gather(
259 |                     *[task for _, task in batch_tasks], return_exceptions=True
260 |                 )
261 | 
262 |                 # Process results
263 |                 for _j, (symbol, result) in enumerate(
264 |                     zip(batch_symbols, batch_results, strict=False)
265 |                 ):
266 |                     if isinstance(result, Exception):
267 |                         failed_symbols.append(symbol)
268 |                         logger.error(f"✗ {symbol} failed: {result}")
269 |                     else:
270 |                         results.append(result)
271 |                         if len(results) % 10 == 0:
272 |                             logger.info(f"Processed {len(results)} symbols...")
273 | 
274 |                 # Force garbage collection after each batch
275 |                 gc.collect()
276 | 
277 |                 # Check memory usage
278 |                 current_memory = process.memory_info().rss / 1024 / 1024
279 |                 memory_growth = current_memory - initial_memory
280 | 
281 |                 if memory_growth > 2000:  # More than 2GB growth
282 |                     logger.warning(f"High memory usage detected: {memory_growth:.1f}MB")
283 | 
284 |         execution_time = timer.elapsed
285 |         final_memory = process.memory_info().rss / 1024 / 1024
286 |         total_memory_growth = final_memory - initial_memory
287 | 
288 |         # Performance assertions
289 |         success_rate = len(results) / len(symbols)
290 |         assert success_rate >= 0.85, f"Success rate too low: {success_rate:.1%}"
291 |         assert execution_time < 1800, (
292 |             f"Execution time too long: {execution_time:.1f}s"
293 |         )  # 30 minutes max
294 |         assert total_memory_growth < 3000, (
295 |             f"Memory growth too high: {total_memory_growth:.1f}MB"
296 |         )  # Max 3GB growth
297 | 
298 |         # Calculate performance metrics
299 |         avg_execution_time = execution_time / len(symbols)
300 |         throughput = len(results) / execution_time  # Backtests per second
301 | 
302 |         logger.info(
303 |             f"Large Symbol Set Test Results:\n"
304 |             f"  • Total Symbols: {len(symbols)}\n"
305 |             f"  • Successful: {len(results)}\n"
306 |             f"  • Failed: {len(failed_symbols)}\n"
307 |             f"  • Success Rate: {success_rate:.1%}\n"
308 |             f"  • Total Execution Time: {execution_time:.1f}s\n"
309 |             f"  • Avg Time per Symbol: {avg_execution_time:.2f}s\n"
310 |             f"  • Throughput: {throughput:.2f} backtests/second\n"
311 |             f"  • Memory Growth: {total_memory_growth:.1f}MB\n"
312 |             f"  • Failed Symbols: {failed_symbols[:10]}{'...' if len(failed_symbols) > 10 else ''}"
313 |         )
314 | 
315 |         return {
316 |             "symbols_processed": len(results),
317 |             "execution_time": execution_time,
318 |             "throughput": throughput,
319 |             "memory_growth": total_memory_growth,
320 |             "success_rate": success_rate,
321 |         }
322 | 
323 |     async def test_multi_year_historical_data(
324 |         self, high_volume_data_provider, benchmark_timer
325 |     ):
326 |         """Test with years of historical data (high data volume)."""
327 |         symbols = ["AAPL", "GOOGL", "MSFT", "AMZN", "TSLA"]
328 |         strategy = "sma_cross"
329 | 
330 |         engine = VectorBTEngine(data_provider=high_volume_data_provider)
331 |         parameters = STRATEGY_TEMPLATES[strategy]["parameters"]
332 | 
333 |         # Test with different time periods
334 |         time_periods = [
335 |             ("1_year", "2023-01-01", "2023-12-31"),
336 |             ("2_years", "2022-01-01", "2023-12-31"),
337 |             ("3_years", "2021-01-01", "2023-12-31"),
338 |             ("5_years", "2019-01-01", "2023-12-31"),
339 |         ]
340 | 
341 |         period_results = {}
342 | 
343 |         for period_name, start_date, end_date in time_periods:
344 |             with benchmark_timer() as timer:
345 |                 period_data = []
346 | 
347 |                 for symbol in symbols:
348 |                     try:
349 |                         result = await engine.run_backtest(
350 |                             symbol=symbol,
351 |                             strategy_type=strategy,
352 |                             parameters=parameters,
353 |                             start_date=start_date,
354 |                             end_date=end_date,
355 |                         )
356 |                         period_data.append(result)
357 | 
358 |                     except Exception as e:
359 |                         logger.error(f"Failed {symbol} for {period_name}: {e}")
360 | 
361 |                 execution_time = timer.elapsed
362 | 
363 |                 # Calculate average data points processed
364 |                 avg_data_points = np.mean(
365 |                     [len(r.get("equity_curve", [])) for r in period_data]
366 |                 )
367 |                 data_throughput = avg_data_points * len(period_data) / execution_time
368 | 
369 |                 period_results[period_name] = {
370 |                     "execution_time": execution_time,
371 |                     "symbols_processed": len(period_data),
372 |                     "avg_data_points": avg_data_points,
373 |                     "data_throughput": data_throughput,
374 |                 }
375 | 
376 |                 logger.info(
377 |                     f"{period_name.upper()} Period Results:\n"
378 |                     f"  • Execution Time: {execution_time:.1f}s\n"
379 |                     f"  • Avg Data Points: {avg_data_points:.0f}\n"
380 |                     f"  • Data Throughput: {data_throughput:.0f} points/second"
381 |                 )
382 | 
383 |         # Validate performance scales reasonably with data size
384 |         one_year_time = period_results["1_year"]["execution_time"]
385 |         three_year_time = period_results["3_years"]["execution_time"]
386 | 
387 |         # 3 years should not take more than 5x the time of 1 year (allow for overhead)
388 |         time_scaling = three_year_time / one_year_time
389 |         assert time_scaling < 5.0, f"Time scaling too poor: {time_scaling:.1f}x"
390 | 
391 |         return period_results
392 | 
393 |     async def test_concurrent_user_scenarios(
394 |         self, high_volume_data_provider, benchmark_timer
395 |     ):
396 |         """Test concurrent user scenarios with multiple simultaneous backtests."""
397 |         symbols = LARGE_SYMBOL_SET[:50]
398 |         strategies = STRATEGIES_FOR_VOLUME_TEST
399 | 
400 |         # Simulate different user scenarios
401 |         user_scenarios = [
402 |             {
403 |                 "user_id": f"user_{i}",
404 |                 "symbols": random.sample(symbols, 5),
405 |                 "strategy": random.choice(strategies),
406 |                 "start_date": "2022-01-01",
407 |                 "end_date": "2023-12-31",
408 |             }
409 |             for i in range(20)  # Simulate 20 concurrent users
410 |         ]
411 | 
412 |         async def simulate_user_session(scenario):
413 |             """Simulate a single user session."""
414 |             engine = VectorBTEngine(data_provider=high_volume_data_provider)
415 |             parameters = STRATEGY_TEMPLATES[scenario["strategy"]]["parameters"]
416 | 
417 |             user_results = []
418 |             session_start = time.time()
419 | 
420 |             for symbol in scenario["symbols"]:
421 |                 try:
422 |                     result = await engine.run_backtest(
423 |                         symbol=symbol,
424 |                         strategy_type=scenario["strategy"],
425 |                         parameters=parameters,
426 |                         start_date=scenario["start_date"],
427 |                         end_date=scenario["end_date"],
428 |                     )
429 |                     user_results.append(result)
430 | 
431 |                 except Exception as e:
432 |                     logger.error(f"User {scenario['user_id']} failed on {symbol}: {e}")
433 | 
434 |             session_time = time.time() - session_start
435 | 
436 |             return {
437 |                 "user_id": scenario["user_id"],
438 |                 "results": user_results,
439 |                 "session_time": session_time,
440 |                 "symbols_processed": len(user_results),
441 |                 "success_rate": len(user_results) / len(scenario["symbols"]),
442 |             }
443 | 
444 |         # Execute all user sessions concurrently
445 |         with benchmark_timer() as timer:
446 |             # Use semaphore to control concurrency
447 |             semaphore = asyncio.Semaphore(10)  # Max 10 concurrent sessions
448 | 
449 |             async def run_with_semaphore(scenario):
450 |                 async with semaphore:
451 |                     return await simulate_user_session(scenario)
452 | 
453 |             session_results = await asyncio.gather(
454 |                 *[run_with_semaphore(scenario) for scenario in user_scenarios],
455 |                 return_exceptions=True,
456 |             )
457 | 
458 |         total_execution_time = timer.elapsed
459 | 
460 |         # Analyze results
461 |         successful_sessions = [r for r in session_results if isinstance(r, dict)]
462 |         failed_sessions = len(session_results) - len(successful_sessions)
463 | 
464 |         total_backtests = sum(r["symbols_processed"] for r in successful_sessions)
465 |         avg_session_time = np.mean([r["session_time"] for r in successful_sessions])
466 |         avg_success_rate = np.mean([r["success_rate"] for r in successful_sessions])
467 | 
468 |         # Performance assertions
469 |         session_success_rate = len(successful_sessions) / len(session_results)
470 |         assert session_success_rate >= 0.8, (
471 |             f"Session success rate too low: {session_success_rate:.1%}"
472 |         )
473 |         assert avg_success_rate >= 0.8, (
474 |             f"Average backtest success rate too low: {avg_success_rate:.1%}"
475 |         )
476 |         assert total_execution_time < 600, (
477 |             f"Total execution time too long: {total_execution_time:.1f}s"
478 |         )  # 10 minutes max
479 | 
480 |         concurrent_throughput = total_backtests / total_execution_time
481 | 
482 |         logger.info(
483 |             f"Concurrent User Scenarios Results:\n"
484 |             f"  • Total Users: {len(user_scenarios)}\n"
485 |             f"  • Successful Sessions: {len(successful_sessions)}\n"
486 |             f"  • Failed Sessions: {failed_sessions}\n"
487 |             f"  • Session Success Rate: {session_success_rate:.1%}\n"
488 |             f"  • Total Backtests: {total_backtests}\n"
489 |             f"  • Avg Session Time: {avg_session_time:.1f}s\n"
490 |             f"  • Avg Backtest Success Rate: {avg_success_rate:.1%}\n"
491 |             f"  • Total Execution Time: {total_execution_time:.1f}s\n"
492 |             f"  • Concurrent Throughput: {concurrent_throughput:.2f} backtests/second"
493 |         )
494 | 
495 |         return {
496 |             "session_success_rate": session_success_rate,
497 |             "avg_success_rate": avg_success_rate,
498 |             "concurrent_throughput": concurrent_throughput,
499 |             "total_execution_time": total_execution_time,
500 |         }
501 | 
502 |     async def test_database_performance_under_load(
503 |         self, high_volume_data_provider, db_session, benchmark_timer
504 |     ):
505 |         """Test database performance under high load."""
506 |         symbols = LARGE_SYMBOL_SET[:30]  # 30 symbols for DB test
507 |         strategy = "sma_cross"
508 | 
509 |         engine = VectorBTEngine(data_provider=high_volume_data_provider)
510 |         parameters = STRATEGY_TEMPLATES[strategy]["parameters"]
511 | 
512 |         # Run backtests and save to database
513 |         backtest_results = []
514 | 
515 |         with benchmark_timer() as timer:
516 |             # Generate backtest results
517 |             for symbol in symbols:
518 |                 try:
519 |                     result = await engine.run_backtest(
520 |                         symbol=symbol,
521 |                         strategy_type=strategy,
522 |                         parameters=parameters,
523 |                         start_date="2023-01-01",
524 |                         end_date="2023-12-31",
525 |                     )
526 |                     backtest_results.append(result)
527 |                 except Exception as e:
528 |                     logger.error(f"Backtest failed for {symbol}: {e}")
529 | 
530 |         backtest_generation_time = timer.elapsed
531 | 
532 |         # Test database operations under load
533 |         with benchmark_timer() as db_timer:
534 |             with BacktestPersistenceManager(session=db_session) as persistence:
535 |                 saved_ids = []
536 | 
537 |                 # Batch save results
538 |                 for result in backtest_results:
539 |                     try:
540 |                         backtest_id = persistence.save_backtest_result(
541 |                             vectorbt_results=result,
542 |                             execution_time=2.0,
543 |                             notes=f"High volume test - {result['symbol']}",
544 |                         )
545 |                         saved_ids.append(backtest_id)
546 |                     except Exception as e:
547 |                         logger.error(f"Save failed for {result['symbol']}: {e}")
548 | 
549 |                 # Test batch retrieval
550 |                 retrieved_results = []
551 |                 for backtest_id in saved_ids:
552 |                     try:
553 |                         retrieved = persistence.get_backtest_by_id(backtest_id)
554 |                         if retrieved:
555 |                             retrieved_results.append(retrieved)
556 |                     except Exception as e:
557 |                         logger.error(f"Retrieval failed for {backtest_id}: {e}")
558 | 
559 |                 # Test queries under load
560 |                 strategy_results = persistence.get_backtests_by_strategy(strategy)
561 | 
562 |         db_operation_time = db_timer.elapsed
563 | 
564 |         # Performance assertions
565 |         save_success_rate = len(saved_ids) / len(backtest_results)
566 |         retrieval_success_rate = (
567 |             len(retrieved_results) / len(saved_ids) if saved_ids else 0
568 |         )
569 | 
570 |         assert save_success_rate >= 0.95, (
571 |             f"Database save success rate too low: {save_success_rate:.1%}"
572 |         )
573 |         assert retrieval_success_rate >= 0.95, (
574 |             f"Database retrieval success rate too low: {retrieval_success_rate:.1%}"
575 |         )
576 |         assert db_operation_time < 300, (
577 |             f"Database operations too slow: {db_operation_time:.1f}s"
578 |         )  # 5 minutes max
579 | 
580 |         # Calculate database performance metrics
581 |         save_throughput = len(saved_ids) / db_operation_time
582 |         logger.info(
583 |             f"Database Performance Under Load Results:\n"
584 |             f"  • Backtest Generation: {backtest_generation_time:.1f}s\n"
585 |             f"  • Database Operations: {db_operation_time:.1f}s\n"
586 |             f"  • Backtests Generated: {len(backtest_results)}\n"
587 |             f"  • Records Saved: {len(saved_ids)}\n"
588 |             f"  • Records Retrieved: {len(retrieved_results)}\n"
589 |             f"  • Save Success Rate: {save_success_rate:.1%}\n"
590 |             f"  • Retrieval Success Rate: {retrieval_success_rate:.1%}\n"
591 |             f"  • Save Throughput: {save_throughput:.2f} saves/second\n"
592 |             f"  • Query Results: {len(strategy_results)} records"
593 |         )
594 | 
595 |         return {
596 |             "save_success_rate": save_success_rate,
597 |             "retrieval_success_rate": retrieval_success_rate,
598 |             "save_throughput": save_throughput,
599 |             "db_operation_time": db_operation_time,
600 |         }
601 | 
602 |     async def test_memory_management_large_datasets(
603 |         self, high_volume_data_provider, benchmark_timer
604 |     ):
605 |         """Test memory management with large datasets."""
606 |         symbols = LARGE_SYMBOL_SET[:25]  # 25 symbols for memory test
607 |         strategies = STRATEGIES_FOR_VOLUME_TEST
608 | 
609 |         process = psutil.Process(os.getpid())
610 |         initial_memory = process.memory_info().rss / 1024 / 1024  # MB
611 |         memory_snapshots = []
612 | 
613 |         engine = VectorBTEngine(data_provider=high_volume_data_provider)
614 | 
615 |         with benchmark_timer() as timer:
616 |             for i, symbol in enumerate(symbols):
617 |                 for strategy in strategies:
618 |                     try:
619 |                         parameters = STRATEGY_TEMPLATES[strategy]["parameters"]
620 | 
621 |                         # Run backtest
622 |                         await engine.run_backtest(
623 |                             symbol=symbol,
624 |                             strategy_type=strategy,
625 |                             parameters=parameters,
626 |                             start_date="2021-01-01",  # 3 years of data
627 |                             end_date="2023-12-31",
628 |                         )
629 | 
630 |                         # Take memory snapshot
631 |                         current_memory = process.memory_info().rss / 1024 / 1024
632 |                         memory_snapshots.append(
633 |                             {
634 |                                 "iteration": i * len(strategies)
635 |                                 + strategies.index(strategy),
636 |                                 "symbol": symbol,
637 |                                 "strategy": strategy,
638 |                                 "memory_mb": current_memory,
639 |                                 "memory_growth": current_memory - initial_memory,
640 |                             }
641 |                         )
642 | 
643 |                         # Force periodic garbage collection
644 |                         if (i * len(strategies) + strategies.index(strategy)) % 10 == 0:
645 |                             gc.collect()
646 | 
647 |                     except Exception as e:
648 |                         logger.error(f"Failed {symbol} with {strategy}: {e}")
649 | 
650 |         execution_time = timer.elapsed
651 |         final_memory = process.memory_info().rss / 1024 / 1024
652 |         total_memory_growth = final_memory - initial_memory
653 |         peak_memory = max(snapshot["memory_mb"] for snapshot in memory_snapshots)
654 | 
655 |         # Analyze memory patterns
656 |         memory_growths = [s["memory_growth"] for s in memory_snapshots]
657 |         avg_memory_growth = np.mean(memory_growths)
658 |         max_memory_growth = max(memory_growths)
659 | 
660 |         # Check for memory leaks (memory should not grow linearly with iterations)
661 |         if len(memory_snapshots) > 10:
662 |             # Linear regression to detect memory leaks
663 |             iterations = [s["iteration"] for s in memory_snapshots]
664 |             memory_values = [s["memory_growth"] for s in memory_snapshots]
665 | 
666 |             # Simple linear regression
667 |             n = len(iterations)
668 |             sum_x = sum(iterations)
669 |             sum_y = sum(memory_values)
670 |             sum_xy = sum(x * y for x, y in zip(iterations, memory_values, strict=False))
671 |             sum_xx = sum(x * x for x in iterations)
672 | 
673 |             slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x)
674 | 
675 |             # Memory leak detection (slope should be small)
676 |             memory_leak_rate = slope  # MB per iteration
677 |         else:
678 |             memory_leak_rate = 0
679 | 
680 |         # Performance assertions
681 |         assert total_memory_growth < 2000, (
682 |             f"Total memory growth too high: {total_memory_growth:.1f}MB"
683 |         )
684 |         assert peak_memory < initial_memory + 2500, (
685 |             f"Peak memory too high: {peak_memory:.1f}MB"
686 |         )
687 |         assert abs(memory_leak_rate) < 5, (
688 |             f"Potential memory leak detected: {memory_leak_rate:.2f}MB/iteration"
689 |         )
690 | 
691 |         logger.info(
692 |             f"Memory Management Large Datasets Results:\n"
693 |             f"  • Initial Memory: {initial_memory:.1f}MB\n"
694 |             f"  • Final Memory: {final_memory:.1f}MB\n"
695 |             f"  • Total Growth: {total_memory_growth:.1f}MB\n"
696 |             f"  • Peak Memory: {peak_memory:.1f}MB\n"
697 |             f"  • Avg Growth: {avg_memory_growth:.1f}MB\n"
698 |             f"  • Max Growth: {max_memory_growth:.1f}MB\n"
699 |             f"  • Memory Leak Rate: {memory_leak_rate:.2f}MB/iteration\n"
700 |             f"  • Execution Time: {execution_time:.1f}s\n"
701 |             f"  • Iterations: {len(memory_snapshots)}"
702 |         )
703 | 
704 |         return {
705 |             "total_memory_growth": total_memory_growth,
706 |             "peak_memory": peak_memory,
707 |             "memory_leak_rate": memory_leak_rate,
708 |             "execution_time": execution_time,
709 |             "memory_snapshots": memory_snapshots,
710 |         }
711 | 
712 |     async def test_cache_efficiency_large_dataset(
713 |         self, high_volume_data_provider, benchmark_timer
714 |     ):
715 |         """Test cache efficiency with large datasets."""
716 |         # Test cache with repeated access patterns
717 |         symbols = LARGE_SYMBOL_SET[:20]
718 |         strategy = "sma_cross"
719 | 
720 |         engine = VectorBTEngine(data_provider=high_volume_data_provider)
721 |         parameters = STRATEGY_TEMPLATES[strategy]["parameters"]
722 | 
723 |         # First pass - populate cache
724 |         with benchmark_timer() as timer:
725 |             first_pass_results = []
726 |             for symbol in symbols:
727 |                 try:
728 |                     result = await engine.run_backtest(
729 |                         symbol=symbol,
730 |                         strategy_type=strategy,
731 |                         parameters=parameters,
732 |                         start_date="2023-01-01",
733 |                         end_date="2023-12-31",
734 |                     )
735 |                     first_pass_results.append(result)
736 |                 except Exception as e:
737 |                     logger.error(f"First pass failed for {symbol}: {e}")
738 | 
739 |         first_pass_time = timer.elapsed
740 | 
741 |         # Second pass - should benefit from cache
742 |         with benchmark_timer() as timer:
743 |             second_pass_results = []
744 |             for symbol in symbols:
745 |                 try:
746 |                     result = await engine.run_backtest(
747 |                         symbol=symbol,
748 |                         strategy_type=strategy,
749 |                         parameters=parameters,
750 |                         start_date="2023-01-01",
751 |                         end_date="2023-12-31",
752 |                     )
753 |                     second_pass_results.append(result)
754 |                 except Exception as e:
755 |                     logger.error(f"Second pass failed for {symbol}: {e}")
756 | 
757 |         second_pass_time = timer.elapsed
758 | 
759 |         # Third pass - different parameters (no cache benefit)
760 |         modified_parameters = {
761 |             **parameters,
762 |             "fast_period": parameters.get("fast_period", 10) + 5,
763 |         }
764 |         with benchmark_timer() as timer:
765 |             third_pass_results = []
766 |             for symbol in symbols:
767 |                 try:
768 |                     result = await engine.run_backtest(
769 |                         symbol=symbol,
770 |                         strategy_type=strategy,
771 |                         parameters=modified_parameters,
772 |                         start_date="2023-01-01",
773 |                         end_date="2023-12-31",
774 |                     )
775 |                     third_pass_results.append(result)
776 |                 except Exception as e:
777 |                     logger.error(f"Third pass failed for {symbol}: {e}")
778 | 
779 |         third_pass_time = timer.elapsed
780 | 
781 |         # Calculate cache efficiency metrics
782 |         cache_speedup = (
783 |             first_pass_time / second_pass_time if second_pass_time > 0 else 1.0
784 |         )
785 |         no_cache_comparison = (
786 |             first_pass_time / third_pass_time if third_pass_time > 0 else 1.0
787 |         )
788 | 
789 |         # Cache hit rate estimation (if second pass is significantly faster)
790 |         estimated_cache_hit_rate = max(
791 |             0, min(1, (first_pass_time - second_pass_time) / first_pass_time)
792 |         )
793 | 
794 |         logger.info(
795 |             f"Cache Efficiency Large Dataset Results:\n"
796 |             f"  • First Pass (populate): {first_pass_time:.2f}s ({len(first_pass_results)} symbols)\n"
797 |             f"  • Second Pass (cached): {second_pass_time:.2f}s ({len(second_pass_results)} symbols)\n"
798 |             f"  • Third Pass (no cache): {third_pass_time:.2f}s ({len(third_pass_results)} symbols)\n"
799 |             f"  • Cache Speedup: {cache_speedup:.2f}x\n"
800 |             f"  • No Cache Comparison: {no_cache_comparison:.2f}x\n"
801 |             f"  • Estimated Cache Hit Rate: {estimated_cache_hit_rate:.1%}"
802 |         )
803 | 
804 |         return {
805 |             "first_pass_time": first_pass_time,
806 |             "second_pass_time": second_pass_time,
807 |             "third_pass_time": third_pass_time,
808 |             "cache_speedup": cache_speedup,
809 |             "estimated_cache_hit_rate": estimated_cache_hit_rate,
810 |         }
811 | 
812 | 
813 | if __name__ == "__main__":
814 |     # Run high-volume integration tests
815 |     pytest.main(
816 |         [
817 |             __file__,
818 |             "-v",
819 |             "--tb=short",
820 |             "--asyncio-mode=auto",
821 |             "--timeout=3600",  # 1 hour timeout for high-volume tests
822 |             "--durations=20",  # Show 20 slowest tests
823 |             "-x",  # Stop on first failure
824 |         ]
825 |     )
826 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/monitoring/metrics.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Comprehensive Prometheus metrics for MaverickMCP backtesting system.
  3 | 
  4 | This module provides specialized metrics for monitoring:
  5 | - Backtesting execution performance and reliability
  6 | - Strategy performance over time
  7 | - API rate limiting and failure tracking
  8 | - Resource usage and optimization
  9 | - Anomaly detection and alerting
 10 | """
 11 | 
 12 | import threading
 13 | import time
 14 | from contextlib import contextmanager
 15 | from dataclasses import dataclass
 16 | from datetime import datetime
 17 | from typing import Any
 18 | 
 19 | from prometheus_client import (
 20 |     CollectorRegistry,
 21 |     Counter,
 22 |     Gauge,
 23 |     Histogram,
 24 |     Summary,
 25 |     generate_latest,
 26 | )
 27 | 
 28 | from maverick_mcp.utils.logging import get_logger
 29 | 
 30 | logger = get_logger(__name__)
 31 | 
 32 | # Custom registry for backtesting metrics to avoid conflicts
 33 | BACKTESTING_REGISTRY = CollectorRegistry()
 34 | 
 35 | # =============================================================================
 36 | # BACKTESTING EXECUTION METRICS
 37 | # =============================================================================
 38 | 
 39 | # Backtest execution counters
 40 | backtest_executions_total = Counter(
 41 |     "maverick_backtest_executions_total",
 42 |     "Total number of backtesting executions",
 43 |     ["strategy_name", "status", "symbol", "timeframe"],
 44 |     registry=BACKTESTING_REGISTRY,
 45 | )
 46 | 
 47 | backtest_execution_duration = Histogram(
 48 |     "maverick_backtest_execution_duration_seconds",
 49 |     "Duration of backtesting executions in seconds",
 50 |     ["strategy_name", "symbol", "timeframe", "data_size"],
 51 |     buckets=(
 52 |         0.1,
 53 |         0.5,
 54 |         1.0,
 55 |         2.5,
 56 |         5.0,
 57 |         10.0,
 58 |         30.0,
 59 |         60.0,
 60 |         120.0,
 61 |         300.0,
 62 |         600.0,
 63 |         float("inf"),
 64 |     ),
 65 |     registry=BACKTESTING_REGISTRY,
 66 | )
 67 | 
 68 | backtest_data_points_processed = Counter(
 69 |     "maverick_backtest_data_points_processed_total",
 70 |     "Total number of data points processed during backtesting",
 71 |     ["strategy_name", "symbol", "timeframe"],
 72 |     registry=BACKTESTING_REGISTRY,
 73 | )
 74 | 
 75 | backtest_memory_usage = Histogram(
 76 |     "maverick_backtest_memory_usage_mb",
 77 |     "Memory usage during backtesting in MB",
 78 |     ["strategy_name", "symbol", "complexity"],
 79 |     buckets=(10, 25, 50, 100, 250, 500, 1000, 2500, 5000, float("inf")),
 80 |     registry=BACKTESTING_REGISTRY,
 81 | )
 82 | 
 83 | # =============================================================================
 84 | # STRATEGY PERFORMANCE METRICS
 85 | # =============================================================================
 86 | 
 87 | # Strategy returns and performance
 88 | strategy_returns = Histogram(
 89 |     "maverick_strategy_returns_percent",
 90 |     "Strategy returns in percentage",
 91 |     ["strategy_name", "symbol", "period"],
 92 |     buckets=(-50, -25, -10, -5, -1, 0, 1, 5, 10, 25, 50, 100, float("inf")),
 93 |     registry=BACKTESTING_REGISTRY,
 94 | )
 95 | 
 96 | strategy_sharpe_ratio = Histogram(
 97 |     "maverick_strategy_sharpe_ratio",
 98 |     "Strategy Sharpe ratio",
 99 |     ["strategy_name", "symbol", "period"],
100 |     buckets=(-2, -1, -0.5, 0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, float("inf")),
101 |     registry=BACKTESTING_REGISTRY,
102 | )
103 | 
104 | strategy_max_drawdown = Histogram(
105 |     "maverick_strategy_max_drawdown_percent",
106 |     "Maximum drawdown percentage for strategy",
107 |     ["strategy_name", "symbol", "period"],
108 |     buckets=(0, 5, 10, 15, 20, 30, 40, 50, 75, 100, float("inf")),
109 |     registry=BACKTESTING_REGISTRY,
110 | )
111 | 
112 | strategy_win_rate = Histogram(
113 |     "maverick_strategy_win_rate_percent",
114 |     "Win rate percentage for strategy",
115 |     ["strategy_name", "symbol", "period"],
116 |     buckets=(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100),
117 |     registry=BACKTESTING_REGISTRY,
118 | )
119 | 
120 | strategy_trades_total = Counter(
121 |     "maverick_strategy_trades_total",
122 |     "Total number of trades executed by strategy",
123 |     ["strategy_name", "symbol", "trade_type", "outcome"],
124 |     registry=BACKTESTING_REGISTRY,
125 | )
126 | 
127 | # Strategy execution latency
128 | strategy_execution_latency = Summary(
129 |     "maverick_strategy_execution_latency_seconds",
130 |     "Strategy execution latency for signal generation",
131 |     ["strategy_name", "complexity"],
132 |     registry=BACKTESTING_REGISTRY,
133 | )
134 | 
135 | # =============================================================================
136 | # API RATE LIMITING AND FAILURE METRICS
137 | # =============================================================================
138 | 
139 | # API call tracking
140 | api_calls_total = Counter(
141 |     "maverick_api_calls_total",
142 |     "Total API calls made to external providers",
143 |     ["provider", "endpoint", "method", "status_code"],
144 |     registry=BACKTESTING_REGISTRY,
145 | )
146 | 
147 | api_call_duration = Histogram(
148 |     "maverick_api_call_duration_seconds",
149 |     "API call duration in seconds",
150 |     ["provider", "endpoint"],
151 |     buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, float("inf")),
152 |     registry=BACKTESTING_REGISTRY,
153 | )
154 | 
155 | # Rate limiting metrics
156 | rate_limit_hits = Counter(
157 |     "maverick_rate_limit_hits_total",
158 |     "Total rate limit hits by provider",
159 |     ["provider", "endpoint", "limit_type"],
160 |     registry=BACKTESTING_REGISTRY,
161 | )
162 | 
163 | rate_limit_remaining = Gauge(
164 |     "maverick_rate_limit_remaining",
165 |     "Remaining API calls before hitting rate limit",
166 |     ["provider", "endpoint", "window"],
167 |     registry=BACKTESTING_REGISTRY,
168 | )
169 | 
170 | rate_limit_reset_time = Gauge(
171 |     "maverick_rate_limit_reset_timestamp",
172 |     "Timestamp when rate limit resets",
173 |     ["provider", "endpoint"],
174 |     registry=BACKTESTING_REGISTRY,
175 | )
176 | 
177 | # API failures and errors
178 | api_failures_total = Counter(
179 |     "maverick_api_failures_total",
180 |     "Total API failures by error type",
181 |     ["provider", "endpoint", "error_type", "error_code"],
182 |     registry=BACKTESTING_REGISTRY,
183 | )
184 | 
185 | api_retry_attempts = Counter(
186 |     "maverick_api_retry_attempts_total",
187 |     "Total API retry attempts",
188 |     ["provider", "endpoint", "retry_number"],
189 |     registry=BACKTESTING_REGISTRY,
190 | )
191 | 
192 | # Circuit breaker metrics
193 | circuit_breaker_state = Gauge(
194 |     "maverick_circuit_breaker_state",
195 |     "Circuit breaker state (0=closed, 1=open, 2=half-open)",
196 |     ["provider", "endpoint"],
197 |     registry=BACKTESTING_REGISTRY,
198 | )
199 | 
200 | circuit_breaker_failures = Counter(
201 |     "maverick_circuit_breaker_failures_total",
202 |     "Circuit breaker failure count",
203 |     ["provider", "endpoint"],
204 |     registry=BACKTESTING_REGISTRY,
205 | )
206 | 
207 | # =============================================================================
208 | # RESOURCE USAGE AND PERFORMANCE METRICS
209 | # =============================================================================
210 | 
211 | # VectorBT specific metrics
212 | vectorbt_memory_usage = Gauge(
213 |     "maverick_vectorbt_memory_usage_mb",
214 |     "VectorBT memory usage in MB",
215 |     ["operation_type"],
216 |     registry=BACKTESTING_REGISTRY,
217 | )
218 | 
219 | vectorbt_computation_time = Histogram(
220 |     "maverick_vectorbt_computation_time_seconds",
221 |     "VectorBT computation time in seconds",
222 |     ["operation_type", "data_size"],
223 |     buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, float("inf")),
224 |     registry=BACKTESTING_REGISTRY,
225 | )
226 | 
227 | # Database query performance
228 | database_query_duration = Histogram(
229 |     "maverick_database_query_duration_seconds",
230 |     "Database query execution time",
231 |     ["query_type", "table_name", "operation"],
232 |     buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, float("inf")),
233 |     registry=BACKTESTING_REGISTRY,
234 | )
235 | 
236 | database_connection_pool_usage = Gauge(
237 |     "maverick_database_connection_pool_usage",
238 |     "Database connection pool usage",
239 |     ["pool_type", "status"],
240 |     registry=BACKTESTING_REGISTRY,
241 | )
242 | 
243 | # Cache performance metrics
244 | cache_operations_total = Counter(
245 |     "maverick_cache_operations_total",
246 |     "Total cache operations",
247 |     ["cache_type", "operation", "status"],
248 |     registry=BACKTESTING_REGISTRY,
249 | )
250 | 
251 | cache_hit_ratio = Histogram(
252 |     "maverick_cache_hit_ratio",
253 |     "Cache hit ratio percentage",
254 |     ["cache_type", "key_pattern"],
255 |     buckets=(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100),
256 |     registry=BACKTESTING_REGISTRY,
257 | )
258 | 
259 | # =============================================================================
260 | # ANOMALY DETECTION METRICS
261 | # =============================================================================
262 | 
263 | # Performance anomaly detection
264 | strategy_performance_anomalies = Counter(
265 |     "maverick_strategy_performance_anomalies_total",
266 |     "Detected strategy performance anomalies",
267 |     ["strategy_name", "anomaly_type", "severity"],
268 |     registry=BACKTESTING_REGISTRY,
269 | )
270 | 
271 | data_quality_issues = Counter(
272 |     "maverick_data_quality_issues_total",
273 |     "Data quality issues detected",
274 |     ["data_source", "issue_type", "symbol"],
275 |     registry=BACKTESTING_REGISTRY,
276 | )
277 | 
278 | resource_usage_alerts = Counter(
279 |     "maverick_resource_usage_alerts_total",
280 |     "Resource usage threshold alerts",
281 |     ["resource_type", "threshold_type"],
282 |     registry=BACKTESTING_REGISTRY,
283 | )
284 | 
285 | # Threshold monitoring gauges
286 | performance_thresholds = Gauge(
287 |     "maverick_performance_thresholds",
288 |     "Performance monitoring thresholds",
289 |     ["metric_name", "threshold_type"],  # threshold_type: warning, critical
290 |     registry=BACKTESTING_REGISTRY,
291 | )
292 | 
293 | # =============================================================================
294 | # BUSINESS METRICS FOR TRADING
295 | # =============================================================================
296 | 
297 | # Portfolio performance metrics
298 | portfolio_value = Gauge(
299 |     "maverick_portfolio_value_usd",
300 |     "Current portfolio value in USD",
301 |     ["portfolio_id", "currency"],
302 |     registry=BACKTESTING_REGISTRY,
303 | )
304 | 
305 | portfolio_daily_pnl = Histogram(
306 |     "maverick_portfolio_daily_pnl_usd",
307 |     "Daily PnL in USD",
308 |     ["portfolio_id", "strategy"],
309 |     buckets=(
310 |         -10000,
311 |         -5000,
312 |         -1000,
313 |         -500,
314 |         -100,
315 |         0,
316 |         100,
317 |         500,
318 |         1000,
319 |         5000,
320 |         10000,
321 |         float("inf"),
322 |     ),
323 |     registry=BACKTESTING_REGISTRY,
324 | )
325 | 
326 | active_positions = Gauge(
327 |     "maverick_active_positions_count",
328 |     "Number of active positions",
329 |     ["portfolio_id", "symbol", "position_type"],
330 |     registry=BACKTESTING_REGISTRY,
331 | )
332 | 
333 | # =============================================================================
334 | # METRICS COLLECTOR CLASS
335 | # =============================================================================
336 | 
337 | 
338 | @dataclass
339 | class PerformanceThreshold:
340 |     """Configuration for performance thresholds."""
341 | 
342 |     metric_name: str
343 |     warning_value: float
344 |     critical_value: float
345 |     comparison_type: str = "greater_than"  # greater_than, less_than, equal_to
346 | 
347 | 
348 | class BacktestingMetricsCollector:
349 |     """
350 |     Comprehensive metrics collector for backtesting operations.
351 | 
352 |     Provides high-level interfaces for tracking backtesting performance,
353 |     strategy metrics, API usage, and anomaly detection.
354 |     """
355 | 
356 |     def __init__(self):
357 |         self.logger = get_logger(f"{__name__}.BacktestingMetricsCollector")
358 |         self._anomaly_thresholds = self._initialize_default_thresholds()
359 |         self._lock = threading.Lock()
360 | 
361 |         # Initialize performance thresholds in Prometheus
362 |         self._setup_performance_thresholds()
363 | 
364 |         self.logger.info("Backtesting metrics collector initialized")
365 | 
366 |     def _initialize_default_thresholds(self) -> dict[str, PerformanceThreshold]:
367 |         """Initialize default performance thresholds for anomaly detection."""
368 |         return {
369 |             "sharpe_ratio_low": PerformanceThreshold(
370 |                 "sharpe_ratio", 0.5, 0.0, "less_than"
371 |             ),
372 |             "max_drawdown_high": PerformanceThreshold(
373 |                 "max_drawdown", 20.0, 30.0, "greater_than"
374 |             ),
375 |             "win_rate_low": PerformanceThreshold("win_rate", 40.0, 30.0, "less_than"),
376 |             "execution_time_high": PerformanceThreshold(
377 |                 "execution_time", 60.0, 120.0, "greater_than"
378 |             ),
379 |             "api_failure_rate_high": PerformanceThreshold(
380 |                 "api_failure_rate", 5.0, 10.0, "greater_than"
381 |             ),
382 |             "memory_usage_high": PerformanceThreshold(
383 |                 "memory_usage", 1000, 2000, "greater_than"
384 |             ),
385 |         }
386 | 
387 |     def _setup_performance_thresholds(self):
388 |         """Setup performance threshold gauges."""
389 |         for _threshold_name, threshold in self._anomaly_thresholds.items():
390 |             performance_thresholds.labels(
391 |                 metric_name=threshold.metric_name, threshold_type="warning"
392 |             ).set(threshold.warning_value)
393 | 
394 |             performance_thresholds.labels(
395 |                 metric_name=threshold.metric_name, threshold_type="critical"
396 |             ).set(threshold.critical_value)
397 | 
398 |     @contextmanager
399 |     def track_backtest_execution(
400 |         self, strategy_name: str, symbol: str, timeframe: str, data_points: int = 0
401 |     ):
402 |         """
403 |         Context manager for tracking backtest execution metrics.
404 | 
405 |         Args:
406 |             strategy_name: Name of the trading strategy
407 |             symbol: Trading symbol (e.g., 'AAPL')
408 |             timeframe: Data timeframe (e.g., '1D', '1H')
409 |             data_points: Number of data points being processed
410 |         """
411 |         start_time = time.time()
412 |         start_memory = self._get_memory_usage()
413 | 
414 |         # Determine data size category
415 |         data_size = self._categorize_data_size(data_points)
416 | 
417 |         try:
418 |             yield
419 | 
420 |             # Success metrics
421 |             duration = time.time() - start_time
422 |             memory_used = self._get_memory_usage() - start_memory
423 | 
424 |             backtest_executions_total.labels(
425 |                 strategy_name=strategy_name,
426 |                 status="success",
427 |                 symbol=symbol,
428 |                 timeframe=timeframe,
429 |             ).inc()
430 | 
431 |             backtest_execution_duration.labels(
432 |                 strategy_name=strategy_name,
433 |                 symbol=symbol,
434 |                 timeframe=timeframe,
435 |                 data_size=data_size,
436 |             ).observe(duration)
437 | 
438 |             if data_points > 0:
439 |                 backtest_data_points_processed.labels(
440 |                     strategy_name=strategy_name, symbol=symbol, timeframe=timeframe
441 |                 ).inc(data_points)
442 | 
443 |             if memory_used > 0:
444 |                 complexity = self._categorize_complexity(data_points, duration)
445 |                 backtest_memory_usage.labels(
446 |                     strategy_name=strategy_name, symbol=symbol, complexity=complexity
447 |                 ).observe(memory_used)
448 | 
449 |             # Check for performance anomalies
450 |             self._check_execution_anomalies(strategy_name, duration, memory_used)
451 | 
452 |         except Exception as e:
453 |             # Error metrics
454 |             backtest_executions_total.labels(
455 |                 strategy_name=strategy_name,
456 |                 status="failure",
457 |                 symbol=symbol,
458 |                 timeframe=timeframe,
459 |             ).inc()
460 | 
461 |             self.logger.error(f"Backtest execution failed for {strategy_name}: {e}")
462 |             raise
463 | 
464 |     def track_strategy_performance(
465 |         self,
466 |         strategy_name: str,
467 |         symbol: str,
468 |         period: str,
469 |         returns: float,
470 |         sharpe_ratio: float,
471 |         max_drawdown: float,
472 |         win_rate: float,
473 |         total_trades: int,
474 |         winning_trades: int,
475 |     ):
476 |         """
477 |         Track comprehensive strategy performance metrics.
478 | 
479 |         Args:
480 |             strategy_name: Name of the trading strategy
481 |             symbol: Trading symbol
482 |             period: Performance period (e.g., '1Y', '6M', '3M')
483 |             returns: Total returns in percentage
484 |             sharpe_ratio: Sharpe ratio
485 |             max_drawdown: Maximum drawdown percentage
486 |             win_rate: Win rate percentage
487 |             total_trades: Total number of trades
488 |             winning_trades: Number of winning trades
489 |         """
490 |         # Record performance metrics
491 |         strategy_returns.labels(
492 |             strategy_name=strategy_name, symbol=symbol, period=period
493 |         ).observe(returns)
494 | 
495 |         strategy_sharpe_ratio.labels(
496 |             strategy_name=strategy_name, symbol=symbol, period=period
497 |         ).observe(sharpe_ratio)
498 | 
499 |         strategy_max_drawdown.labels(
500 |             strategy_name=strategy_name, symbol=symbol, period=period
501 |         ).observe(max_drawdown)
502 | 
503 |         strategy_win_rate.labels(
504 |             strategy_name=strategy_name, symbol=symbol, period=period
505 |         ).observe(win_rate)
506 | 
507 |         # Record trade counts
508 |         strategy_trades_total.labels(
509 |             strategy_name=strategy_name,
510 |             symbol=symbol,
511 |             trade_type="total",
512 |             outcome="all",
513 |         ).inc(total_trades)
514 | 
515 |         strategy_trades_total.labels(
516 |             strategy_name=strategy_name,
517 |             symbol=symbol,
518 |             trade_type="winning",
519 |             outcome="profit",
520 |         ).inc(winning_trades)
521 | 
522 |         losing_trades = total_trades - winning_trades
523 |         strategy_trades_total.labels(
524 |             strategy_name=strategy_name,
525 |             symbol=symbol,
526 |             trade_type="losing",
527 |             outcome="loss",
528 |         ).inc(losing_trades)
529 | 
530 |         # Check for performance anomalies
531 |         self._check_strategy_anomalies(
532 |             strategy_name, sharpe_ratio, max_drawdown, win_rate
533 |         )
534 | 
535 |     def track_api_call(
536 |         self,
537 |         provider: str,
538 |         endpoint: str,
539 |         method: str,
540 |         status_code: int,
541 |         duration: float,
542 |         error_type: str | None = None,
543 |         remaining_calls: int | None = None,
544 |         reset_time: datetime | None = None,
545 |     ):
546 |         """
547 |         Track API call metrics including rate limiting and failures.
548 | 
549 |         Args:
550 |             provider: API provider name (e.g., 'tiingo', 'yahoo')
551 |             endpoint: API endpoint
552 |             method: HTTP method
553 |             status_code: Response status code
554 |             duration: Request duration in seconds
555 |             error_type: Type of error if request failed
556 |             remaining_calls: Remaining API calls before rate limit
557 |             reset_time: When rate limit resets
558 |         """
559 |         # Basic API call tracking
560 |         api_calls_total.labels(
561 |             provider=provider,
562 |             endpoint=endpoint,
563 |             method=method,
564 |             status_code=str(status_code),
565 |         ).inc()
566 | 
567 |         api_call_duration.labels(provider=provider, endpoint=endpoint).observe(duration)
568 | 
569 |         # Rate limiting metrics
570 |         if remaining_calls is not None:
571 |             rate_limit_remaining.labels(
572 |                 provider=provider, endpoint=endpoint, window="current"
573 |             ).set(remaining_calls)
574 | 
575 |         if reset_time is not None:
576 |             rate_limit_reset_time.labels(provider=provider, endpoint=endpoint).set(
577 |                 reset_time.timestamp()
578 |             )
579 | 
580 |         # Failure tracking
581 |         if status_code >= 400:
582 |             error_code = self._categorize_error_code(status_code)
583 |             api_failures_total.labels(
584 |                 provider=provider,
585 |                 endpoint=endpoint,
586 |                 error_type=error_type or "unknown",
587 |                 error_code=error_code,
588 |             ).inc()
589 | 
590 |             # Check for rate limiting
591 |             if status_code == 429:
592 |                 rate_limit_hits.labels(
593 |                     provider=provider, endpoint=endpoint, limit_type="requests_per_hour"
594 |                 ).inc()
595 | 
596 |         # Check for API anomalies
597 |         self._check_api_anomalies(provider, endpoint, status_code, duration)
598 | 
599 |     def track_circuit_breaker(
600 |         self, provider: str, endpoint: str, state: str, failure_count: int
601 |     ):
602 |         """Track circuit breaker state and failures."""
603 |         state_mapping = {"closed": 0, "open": 1, "half-open": 2}
604 |         circuit_breaker_state.labels(provider=provider, endpoint=endpoint).set(
605 |             state_mapping.get(state, 0)
606 |         )
607 | 
608 |         if failure_count > 0:
609 |             circuit_breaker_failures.labels(provider=provider, endpoint=endpoint).inc(
610 |                 failure_count
611 |             )
612 | 
613 |     def track_resource_usage(
614 |         self,
615 |         operation_type: str,
616 |         memory_mb: float,
617 |         computation_time: float,
618 |         data_size: str = "unknown",
619 |     ):
620 |         """Track resource usage for VectorBT operations."""
621 |         vectorbt_memory_usage.labels(operation_type=operation_type).set(memory_mb)
622 | 
623 |         vectorbt_computation_time.labels(
624 |             operation_type=operation_type, data_size=data_size
625 |         ).observe(computation_time)
626 | 
627 |         # Check for resource usage anomalies
628 |         if memory_mb > self._anomaly_thresholds["memory_usage_high"].warning_value:
629 |             resource_usage_alerts.labels(
630 |                 resource_type="memory",
631 |                 threshold_type="warning"
632 |                 if memory_mb
633 |                 < self._anomaly_thresholds["memory_usage_high"].critical_value
634 |                 else "critical",
635 |             ).inc()
636 | 
637 |     def track_database_operation(
638 |         self, query_type: str, table_name: str, operation: str, duration: float
639 |     ):
640 |         """Track database operation performance."""
641 |         database_query_duration.labels(
642 |             query_type=query_type, table_name=table_name, operation=operation
643 |         ).observe(duration)
644 | 
645 |     def track_cache_operation(
646 |         self, cache_type: str, operation: str, hit: bool, key_pattern: str = "general"
647 |     ):
648 |         """Track cache operation performance."""
649 |         status = "hit" if hit else "miss"
650 |         cache_operations_total.labels(
651 |             cache_type=cache_type, operation=operation, status=status
652 |         ).inc()
653 | 
654 |     def detect_anomaly(self, anomaly_type: str, severity: str, context: dict[str, Any]):
655 |         """Record detected anomaly."""
656 |         strategy_name = context.get("strategy_name", "unknown")
657 | 
658 |         strategy_performance_anomalies.labels(
659 |             strategy_name=strategy_name, anomaly_type=anomaly_type, severity=severity
660 |         ).inc()
661 | 
662 |         self.logger.warning(
663 |             f"Anomaly detected: {anomaly_type} (severity: {severity})",
664 |             extra={"context": context},
665 |         )
666 | 
667 |     def update_portfolio_metrics(
668 |         self,
669 |         portfolio_id: str,
670 |         portfolio_value_usd: float,
671 |         daily_pnl_usd: float,
672 |         strategy: str,
673 |         positions: list[dict[str, Any]],
674 |     ):
675 |         """Update portfolio-related metrics."""
676 |         portfolio_value.labels(portfolio_id=portfolio_id, currency="USD").set(
677 |             portfolio_value_usd
678 |         )
679 | 
680 |         portfolio_daily_pnl.labels(
681 |             portfolio_id=portfolio_id, strategy=strategy
682 |         ).observe(daily_pnl_usd)
683 | 
684 |         # Update position counts
685 |         for position in positions:
686 |             active_positions.labels(
687 |                 portfolio_id=portfolio_id,
688 |                 symbol=position.get("symbol", "unknown"),
689 |                 position_type=position.get("type", "long"),
690 |             ).set(position.get("quantity", 0))
691 | 
692 |     def _get_memory_usage(self) -> float:
693 |         """Get current memory usage in MB."""
694 |         try:
695 |             import psutil
696 | 
697 |             process = psutil.Process()
698 |             return process.memory_info().rss / 1024 / 1024
699 |         except ImportError:
700 |             return 0.0
701 | 
702 |     def _categorize_data_size(self, data_points: int) -> str:
703 |         """Categorize data size for metrics labeling."""
704 |         if data_points < 100:
705 |             return "small"
706 |         elif data_points < 1000:
707 |             return "medium"
708 |         elif data_points < 10000:
709 |             return "large"
710 |         else:
711 |             return "xlarge"
712 | 
713 |     def _categorize_complexity(self, data_points: int, duration: float) -> str:
714 |         """Categorize operation complexity."""
715 |         if data_points < 1000 and duration < 10:
716 |             return "simple"
717 |         elif data_points < 10000 and duration < 60:
718 |             return "moderate"
719 |         else:
720 |             return "complex"
721 | 
722 |     def _categorize_error_code(self, status_code: int) -> str:
723 |         """Categorize HTTP error codes."""
724 |         if 400 <= status_code < 500:
725 |             return "client_error"
726 |         elif 500 <= status_code < 600:
727 |             return "server_error"
728 |         else:
729 |             return "other"
730 | 
731 |     def _check_execution_anomalies(
732 |         self, strategy_name: str, duration: float, memory_mb: float
733 |     ):
734 |         """Check for execution performance anomalies."""
735 |         threshold = self._anomaly_thresholds["execution_time_high"]
736 |         if duration > threshold.critical_value:
737 |             self.detect_anomaly(
738 |                 "execution_time_high",
739 |                 "critical",
740 |                 {
741 |                     "strategy_name": strategy_name,
742 |                     "duration": duration,
743 |                     "threshold": threshold.critical_value,
744 |                 },
745 |             )
746 |         elif duration > threshold.warning_value:
747 |             self.detect_anomaly(
748 |                 "execution_time_high",
749 |                 "warning",
750 |                 {
751 |                     "strategy_name": strategy_name,
752 |                     "duration": duration,
753 |                     "threshold": threshold.warning_value,
754 |                 },
755 |             )
756 | 
757 |     def _check_strategy_anomalies(
758 |         self,
759 |         strategy_name: str,
760 |         sharpe_ratio: float,
761 |         max_drawdown: float,
762 |         win_rate: float,
763 |     ):
764 |         """Check for strategy performance anomalies."""
765 |         # Check Sharpe ratio
766 |         threshold = self._anomaly_thresholds["sharpe_ratio_low"]
767 |         if sharpe_ratio < threshold.critical_value:
768 |             self.detect_anomaly(
769 |                 "sharpe_ratio_low",
770 |                 "critical",
771 |                 {"strategy_name": strategy_name, "sharpe_ratio": sharpe_ratio},
772 |             )
773 |         elif sharpe_ratio < threshold.warning_value:
774 |             self.detect_anomaly(
775 |                 "sharpe_ratio_low",
776 |                 "warning",
777 |                 {"strategy_name": strategy_name, "sharpe_ratio": sharpe_ratio},
778 |             )
779 | 
780 |         # Check max drawdown
781 |         threshold = self._anomaly_thresholds["max_drawdown_high"]
782 |         if max_drawdown > threshold.critical_value:
783 |             self.detect_anomaly(
784 |                 "max_drawdown_high",
785 |                 "critical",
786 |                 {"strategy_name": strategy_name, "max_drawdown": max_drawdown},
787 |             )
788 |         elif max_drawdown > threshold.warning_value:
789 |             self.detect_anomaly(
790 |                 "max_drawdown_high",
791 |                 "warning",
792 |                 {"strategy_name": strategy_name, "max_drawdown": max_drawdown},
793 |             )
794 | 
795 |         # Check win rate
796 |         threshold = self._anomaly_thresholds["win_rate_low"]
797 |         if win_rate < threshold.critical_value:
798 |             self.detect_anomaly(
799 |                 "win_rate_low",
800 |                 "critical",
801 |                 {"strategy_name": strategy_name, "win_rate": win_rate},
802 |             )
803 |         elif win_rate < threshold.warning_value:
804 |             self.detect_anomaly(
805 |                 "win_rate_low",
806 |                 "warning",
807 |                 {"strategy_name": strategy_name, "win_rate": win_rate},
808 |             )
809 | 
810 |     def _check_api_anomalies(
811 |         self, provider: str, endpoint: str, status_code: int, duration: float
812 |     ):
813 |         """Check for API call anomalies."""
814 |         # Check API response time
815 |         if duration > 30.0:  # 30 second threshold
816 |             self.detect_anomaly(
817 |                 "api_response_slow",
818 |                 "warning" if duration < 60.0 else "critical",
819 |                 {"provider": provider, "endpoint": endpoint, "duration": duration},
820 |             )
821 | 
822 |         # Check for repeated failures
823 |         if status_code >= 500:
824 |             self.detect_anomaly(
825 |                 "api_server_error",
826 |                 "critical",
827 |                 {
828 |                     "provider": provider,
829 |                     "endpoint": endpoint,
830 |                     "status_code": status_code,
831 |                 },
832 |             )
833 | 
834 |     def get_metrics_text(self) -> str:
835 |         """Get all backtesting metrics in Prometheus text format."""
836 |         return generate_latest(BACKTESTING_REGISTRY).decode("utf-8")
837 | 
838 | 
839 | # =============================================================================
840 | # GLOBAL INSTANCES AND CONVENIENCE FUNCTIONS
841 | # =============================================================================
842 | 
843 | # Global metrics collector instance
844 | _metrics_collector: BacktestingMetricsCollector | None = None
845 | _collector_lock = threading.Lock()
846 | 
847 | 
848 | def get_backtesting_metrics() -> BacktestingMetricsCollector:
849 |     """Get or create the global backtesting metrics collector."""
850 |     global _metrics_collector
851 |     if _metrics_collector is None:
852 |         with _collector_lock:
853 |             if _metrics_collector is None:
854 |                 _metrics_collector = BacktestingMetricsCollector()
855 |     return _metrics_collector
856 | 
857 | 
858 | # Convenience functions for common operations
859 | def track_backtest_execution(
860 |     strategy_name: str, symbol: str, timeframe: str, data_points: int = 0
861 | ):
862 |     """Convenience function to track backtest execution."""
863 |     return get_backtesting_metrics().track_backtest_execution(
864 |         strategy_name, symbol, timeframe, data_points
865 |     )
866 | 
867 | 
868 | def track_strategy_performance(
869 |     strategy_name: str,
870 |     symbol: str,
871 |     period: str,
872 |     returns: float,
873 |     sharpe_ratio: float,
874 |     max_drawdown: float,
875 |     win_rate: float,
876 |     total_trades: int,
877 |     winning_trades: int,
878 | ):
879 |     """Convenience function to track strategy performance."""
880 |     get_backtesting_metrics().track_strategy_performance(
881 |         strategy_name,
882 |         symbol,
883 |         period,
884 |         returns,
885 |         sharpe_ratio,
886 |         max_drawdown,
887 |         win_rate,
888 |         total_trades,
889 |         winning_trades,
890 |     )
891 | 
892 | 
893 | def track_api_call_metrics(
894 |     provider: str,
895 |     endpoint: str,
896 |     method: str,
897 |     status_code: int,
898 |     duration: float,
899 |     error_type: str | None = None,
900 |     remaining_calls: int | None = None,
901 |     reset_time: datetime | None = None,
902 | ):
903 |     """Convenience function to track API call metrics."""
904 |     get_backtesting_metrics().track_api_call(
905 |         provider,
906 |         endpoint,
907 |         method,
908 |         status_code,
909 |         duration,
910 |         error_type,
911 |         remaining_calls,
912 |         reset_time,
913 |     )
914 | 
915 | 
916 | def track_anomaly_detection(anomaly_type: str, severity: str, context: dict[str, Any]):
917 |     """Convenience function to track detected anomalies."""
918 |     get_backtesting_metrics().detect_anomaly(anomaly_type, severity, context)
919 | 
920 | 
921 | def get_metrics_for_prometheus() -> str:
922 |     """Get backtesting metrics in Prometheus format."""
923 |     return get_backtesting_metrics().get_metrics_text()
924 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/backtesting/strategies/ml/adaptive.py:
--------------------------------------------------------------------------------

```python
  1 | """Adaptive trading strategies with online learning and parameter adjustment."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from pandas import DataFrame, Series
  9 | from sklearn.linear_model import SGDClassifier
 10 | from sklearn.preprocessing import StandardScaler
 11 | 
 12 | from maverick_mcp.backtesting.strategies.base import Strategy
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class AdaptiveStrategy(Strategy):
 18 |     """Base class for adaptive strategies that adjust parameters based on performance."""
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         base_strategy: Strategy,
 23 |         adaptation_method: str = "gradient",
 24 |         learning_rate: float = 0.01,
 25 |         lookback_period: int = 50,
 26 |         adaptation_frequency: int = 10,
 27 |         parameters: dict[str, Any] | None = None,
 28 |     ):
 29 |         """Initialize adaptive strategy.
 30 | 
 31 |         Args:
 32 |             base_strategy: Base strategy to adapt
 33 |             adaptation_method: Method for parameter adaptation
 34 |             learning_rate: Learning rate for parameter updates
 35 |             lookback_period: Period for performance evaluation
 36 |             adaptation_frequency: How often to adapt parameters
 37 |             parameters: Additional parameters
 38 |         """
 39 |         super().__init__(parameters)
 40 |         self.base_strategy = base_strategy
 41 |         self.adaptation_method = adaptation_method
 42 |         self.learning_rate = learning_rate
 43 |         self.lookback_period = lookback_period
 44 |         self.adaptation_frequency = adaptation_frequency
 45 | 
 46 |         # Performance tracking
 47 |         self.performance_history = []
 48 |         self.parameter_history = []
 49 |         self.last_adaptation = 0
 50 | 
 51 |         # Store original parameters for reference
 52 |         self.original_parameters = base_strategy.parameters.copy()
 53 | 
 54 |     @property
 55 |     def name(self) -> str:
 56 |         """Get strategy name."""
 57 |         return f"Adaptive({self.base_strategy.name})"
 58 | 
 59 |     @property
 60 |     def description(self) -> str:
 61 |         """Get strategy description."""
 62 |         return f"Adaptive version of {self.base_strategy.name} using {self.adaptation_method} method"
 63 | 
 64 |     def calculate_performance_metric(self, returns: Series) -> float:
 65 |         """Calculate performance metric for parameter adaptation.
 66 | 
 67 |         Args:
 68 |             returns: Strategy returns
 69 | 
 70 |         Returns:
 71 |             Performance score
 72 |         """
 73 |         if len(returns) == 0:
 74 |             return 0.0
 75 | 
 76 |         # Use Sharpe ratio as default performance metric
 77 |         if returns.std() == 0:
 78 |             return 0.0
 79 | 
 80 |         sharpe = returns.mean() / returns.std() * np.sqrt(252)
 81 | 
 82 |         # Alternative metrics could be:
 83 |         # - Calmar ratio: returns.mean() / abs(max_drawdown)
 84 |         # - Sortino ratio: returns.mean() / downside_deviation
 85 |         # - Information ratio: excess_returns.mean() / tracking_error
 86 | 
 87 |         return sharpe
 88 | 
 89 |     def adapt_parameters_gradient(self, performance_gradient: float) -> None:
 90 |         """Adapt parameters using gradient-based method.
 91 | 
 92 |         Args:
 93 |             performance_gradient: Gradient of performance with respect to parameters
 94 |         """
 95 |         adaptable_params = self.get_adaptable_parameters()
 96 | 
 97 |         for param_name, param_info in adaptable_params.items():
 98 |             if param_name in self.base_strategy.parameters:
 99 |                 current_value = self.base_strategy.parameters[param_name]
100 | 
101 |                 # Calculate parameter update
102 |                 param_gradient = performance_gradient * self.learning_rate
103 | 
104 |                 # Apply bounds and constraints
105 |                 min_val = param_info.get("min", current_value * 0.5)
106 |                 max_val = param_info.get("max", current_value * 2.0)
107 |                 step_size = param_info.get("step", abs(current_value) * 0.01)
108 | 
109 |                 # Update parameter
110 |                 new_value = current_value + param_gradient * step_size
111 |                 new_value = max(min_val, min(max_val, new_value))
112 | 
113 |                 self.base_strategy.parameters[param_name] = new_value
114 | 
115 |                 logger.debug(
116 |                     f"Adapted {param_name}: {current_value:.4f} -> {new_value:.4f}"
117 |                 )
118 | 
119 |     def adapt_parameters_random_search(self) -> None:
120 |         """Adapt parameters using random search with performance feedback."""
121 |         adaptable_params = self.get_adaptable_parameters()
122 | 
123 |         # Try random perturbations and keep improvements
124 |         for param_name, param_info in adaptable_params.items():
125 |             if param_name in self.base_strategy.parameters:
126 |                 current_value = self.base_strategy.parameters[param_name]
127 | 
128 |                 # Generate random perturbation
129 |                 min_val = param_info.get("min", current_value * 0.5)
130 |                 max_val = param_info.get("max", current_value * 2.0)
131 | 
132 |                 # Small random step
133 |                 perturbation = np.random.normal(0, abs(current_value) * 0.1)
134 |                 new_value = current_value + perturbation
135 |                 new_value = max(min_val, min(max_val, new_value))
136 | 
137 |                 # Store new value for trial
138 |                 self.base_strategy.parameters[param_name] = new_value
139 | 
140 |                 # Note: Performance evaluation would happen in next cycle
141 |                 # For now, keep the change and let performance tracking decide
142 | 
143 |     def get_adaptable_parameters(self) -> dict[str, dict]:
144 |         """Get parameters that can be adapted.
145 | 
146 |         Returns:
147 |             Dictionary of adaptable parameters with their constraints
148 |         """
149 |         # Default adaptable parameters - can be overridden by subclasses
150 |         return {
151 |             "lookback_period": {"min": 5, "max": 200, "step": 1},
152 |             "threshold": {"min": 0.001, "max": 0.1, "step": 0.001},
153 |             "window": {"min": 5, "max": 100, "step": 1},
154 |             "period": {"min": 5, "max": 200, "step": 1},
155 |         }
156 | 
157 |     def adapt_parameters(self, recent_performance: float) -> None:
158 |         """Adapt strategy parameters based on recent performance.
159 | 
160 |         Args:
161 |             recent_performance: Recent performance metric
162 |         """
163 |         try:
164 |             if self.adaptation_method == "gradient":
165 |                 # Approximate gradient based on performance change
166 |                 if len(self.performance_history) > 1:
167 |                     performance_gradient = (
168 |                         recent_performance - self.performance_history[-2]
169 |                     )
170 |                     self.adapt_parameters_gradient(performance_gradient)
171 | 
172 |             elif self.adaptation_method == "random_search":
173 |                 # Use random search with performance feedback
174 |                 self.adapt_parameters_random_search()
175 | 
176 |             else:
177 |                 logger.warning(f"Unknown adaptation method: {self.adaptation_method}")
178 | 
179 |             # Store adapted parameters
180 |             self.parameter_history.append(self.base_strategy.parameters.copy())
181 | 
182 |         except Exception as e:
183 |             logger.error(f"Error adapting parameters: {e}")
184 | 
185 |     def generate_signals(self, data: DataFrame) -> tuple[Series, Series]:
186 |         """Generate adaptive trading signals.
187 | 
188 |         Args:
189 |             data: Price data with OHLCV columns
190 | 
191 |         Returns:
192 |             Tuple of (entry_signals, exit_signals) as boolean Series
193 |         """
194 |         try:
195 |             # Generate signals from base strategy
196 |             entry_signals, exit_signals = self.base_strategy.generate_signals(data)
197 | 
198 |             # Calculate strategy performance for adaptation
199 |             positions = entry_signals.astype(int) - exit_signals.astype(int)
200 |             returns = positions.shift(1) * data["close"].pct_change()
201 | 
202 |             # Track performance over time for adaptation
203 |             for idx in range(
204 |                 self.adaptation_frequency, len(data), self.adaptation_frequency
205 |             ):
206 |                 if idx > self.last_adaptation + self.adaptation_frequency:
207 |                     # Calculate recent performance
208 |                     recent_returns = returns.iloc[
209 |                         max(0, idx - self.lookback_period) : idx
210 |                     ]
211 |                     if len(recent_returns) > 0:
212 |                         recent_performance = self.calculate_performance_metric(
213 |                             recent_returns
214 |                         )
215 |                         self.performance_history.append(recent_performance)
216 | 
217 |                         # Adapt parameters based on performance
218 |                         self.adapt_parameters(recent_performance)
219 | 
220 |                         # Re-generate signals with adapted parameters
221 |                         entry_signals, exit_signals = (
222 |                             self.base_strategy.generate_signals(data)
223 |                         )
224 | 
225 |                     self.last_adaptation = idx
226 | 
227 |             return entry_signals, exit_signals
228 | 
229 |         except Exception as e:
230 |             logger.error(f"Error generating adaptive signals: {e}")
231 |             return pd.Series(False, index=data.index), pd.Series(
232 |                 False, index=data.index
233 |             )
234 | 
235 |     def get_adaptation_history(self) -> dict[str, Any]:
236 |         """Get history of parameter adaptations.
237 | 
238 |         Returns:
239 |             Dictionary with adaptation history
240 |         """
241 |         return {
242 |             "performance_history": self.performance_history,
243 |             "parameter_history": self.parameter_history,
244 |             "current_parameters": self.base_strategy.parameters,
245 |             "original_parameters": self.original_parameters,
246 |         }
247 | 
248 |     def reset_to_original(self) -> None:
249 |         """Reset strategy parameters to original values."""
250 |         self.base_strategy.parameters = self.original_parameters.copy()
251 |         self.performance_history = []
252 |         self.parameter_history = []
253 |         self.last_adaptation = 0
254 | 
255 | 
256 | class OnlineLearningStrategy(Strategy):
257 |     """Strategy with online learning capabilities using streaming ML algorithms."""
258 | 
259 |     def __init__(
260 |         self,
261 |         model_type: str = "sgd",
262 |         update_frequency: int = 10,
263 |         feature_window: int = 20,
264 |         confidence_threshold: float = 0.6,
265 |         min_training_samples: int = 100,
266 |         initial_training_period: int = 200,
267 |         parameters: dict[str, Any] | None = None,
268 |     ):
269 |         """Initialize online learning strategy.
270 | 
271 |         Args:
272 |             model_type: Type of online learning model
273 |             update_frequency: How often to update the model
274 |             feature_window: Window for feature calculation
275 |             confidence_threshold: Minimum confidence for signals
276 |             min_training_samples: Minimum samples before enabling predictions
277 |             initial_training_period: Period for initial batch training
278 |             parameters: Additional parameters
279 |         """
280 |         super().__init__(parameters)
281 |         self.model_type = model_type
282 |         self.update_frequency = update_frequency
283 |         self.feature_window = feature_window
284 |         self.confidence_threshold = confidence_threshold
285 |         self.min_training_samples = min_training_samples
286 |         self.initial_training_period = initial_training_period
287 | 
288 |         # Initialize online learning components
289 |         self.model = None
290 |         self.scaler = None
291 |         self.is_trained = False
292 |         self.is_initial_trained = False
293 |         self.training_buffer = []
294 |         self.last_update = 0
295 |         self.training_samples_count = 0
296 | 
297 |         # Feature consistency tracking
298 |         self.expected_feature_count = None
299 |         self.feature_stats_buffer = []
300 | 
301 |         self._initialize_model()
302 | 
303 |     def _initialize_model(self):
304 |         """Initialize online learning model with proper configuration."""
305 |         if self.model_type == "sgd":
306 |             self.model = SGDClassifier(
307 |                 loss="log_loss",
308 |                 learning_rate="adaptive",
309 |                 eta0=0.01,
310 |                 random_state=42,
311 |                 max_iter=1000,
312 |                 tol=1e-4,
313 |                 warm_start=True,  # Enable incremental learning
314 |                 alpha=0.01,  # Regularization
315 |                 fit_intercept=True,
316 |             )
317 |         else:
318 |             raise ValueError(f"Unsupported model type: {self.model_type}")
319 | 
320 |         # Initialize scaler as None - will be created during first fit
321 |         self.scaler = None
322 | 
323 |     @property
324 |     def name(self) -> str:
325 |         """Get strategy name."""
326 |         return f"OnlineLearning({self.model_type.upper()})"
327 | 
328 |     @property
329 |     def description(self) -> str:
330 |         """Get strategy description."""
331 |         return (
332 |             f"Online learning strategy using {self.model_type} with streaming updates"
333 |         )
334 | 
335 |     def extract_features(self, data: DataFrame, end_idx: int) -> np.ndarray:
336 |         """Extract features for online learning with enhanced stability.
337 | 
338 |         Args:
339 |             data: Price data
340 |             end_idx: End index for feature calculation
341 | 
342 |         Returns:
343 |             Feature array with consistent dimensionality
344 |         """
345 |         try:
346 |             start_idx = max(0, end_idx - self.feature_window)
347 |             window_data = data.iloc[start_idx : end_idx + 1]
348 | 
349 |             # Need minimum data for meaningful features
350 |             if len(window_data) < max(5, self.feature_window // 4):
351 |                 return np.array([])
352 | 
353 |             features = []
354 | 
355 |             # Price features with error handling
356 |             returns = window_data["close"].pct_change().dropna()
357 |             if len(returns) == 0:
358 |                 return np.array([])
359 | 
360 |             # Basic return statistics (robust to small samples)
361 |             mean_return = returns.mean() if len(returns) > 0 else 0.0
362 |             std_return = returns.std() if len(returns) > 1 else 0.01  # Small default
363 |             skew_return = returns.skew() if len(returns) > 3 else 0.0
364 |             kurt_return = returns.kurtosis() if len(returns) > 3 else 0.0
365 | 
366 |             # Replace NaN/inf values
367 |             features.extend(
368 |                 [
369 |                     mean_return if np.isfinite(mean_return) else 0.0,
370 |                     std_return if np.isfinite(std_return) else 0.01,
371 |                     skew_return if np.isfinite(skew_return) else 0.0,
372 |                     kurt_return if np.isfinite(kurt_return) else 0.0,
373 |                 ]
374 |             )
375 | 
376 |             # Technical indicators with fallbacks
377 |             current_price = window_data["close"].iloc[-1]
378 | 
379 |             # Short-term moving average ratio
380 |             if len(window_data) >= 5:
381 |                 sma_5 = window_data["close"].rolling(5).mean().iloc[-1]
382 |                 features.append(current_price / sma_5 if sma_5 > 0 else 1.0)
383 |             else:
384 |                 features.append(1.0)
385 | 
386 |             # Medium-term moving average ratio
387 |             if len(window_data) >= 10:
388 |                 sma_10 = window_data["close"].rolling(10).mean().iloc[-1]
389 |                 features.append(current_price / sma_10 if sma_10 > 0 else 1.0)
390 |             else:
391 |                 features.append(1.0)
392 | 
393 |             # Long-term moving average ratio (if enough data)
394 |             if len(window_data) >= 20:
395 |                 sma_20 = window_data["close"].rolling(20).mean().iloc[-1]
396 |                 features.append(current_price / sma_20 if sma_20 > 0 else 1.0)
397 |             else:
398 |                 features.append(1.0)
399 | 
400 |             # Volatility feature
401 |             if len(returns) > 10:
402 |                 vol_ratio = std_return / returns.rolling(10).std().mean()
403 |                 features.append(vol_ratio if np.isfinite(vol_ratio) else 1.0)
404 |             else:
405 |                 features.append(1.0)
406 | 
407 |             # Volume features (if available)
408 |             if "volume" in window_data.columns and len(window_data) >= 5:
409 |                 current_volume = window_data["volume"].iloc[-1]
410 |                 volume_ma = window_data["volume"].rolling(5).mean().iloc[-1]
411 |                 volume_ratio = current_volume / volume_ma if volume_ma > 0 else 1.0
412 |                 features.append(volume_ratio if np.isfinite(volume_ratio) else 1.0)
413 | 
414 |                 # Volume trend
415 |                 if len(window_data) >= 10:
416 |                     volume_ma_long = window_data["volume"].rolling(10).mean().iloc[-1]
417 |                     volume_trend = (
418 |                         volume_ma / volume_ma_long if volume_ma_long > 0 else 1.0
419 |                     )
420 |                     features.append(volume_trend if np.isfinite(volume_trend) else 1.0)
421 |                 else:
422 |                     features.append(1.0)
423 |             else:
424 |                 features.extend([1.0, 1.0])
425 | 
426 |             feature_array = np.array(features)
427 | 
428 |             # Validate feature consistency
429 |             if self.expected_feature_count is None:
430 |                 self.expected_feature_count = len(feature_array)
431 |             elif len(feature_array) != self.expected_feature_count:
432 |                 logger.warning(
433 |                     f"Feature count mismatch: expected {self.expected_feature_count}, got {len(feature_array)}"
434 |                 )
435 |                 return np.array([])
436 | 
437 |             # Check for any remaining NaN or inf values
438 |             if not np.all(np.isfinite(feature_array)):
439 |                 logger.warning("Non-finite features detected, replacing with defaults")
440 |                 feature_array = np.nan_to_num(
441 |                     feature_array, nan=0.0, posinf=1.0, neginf=-1.0
442 |                 )
443 | 
444 |             return feature_array
445 | 
446 |         except Exception as e:
447 |             logger.error(f"Error extracting features: {e}")
448 |             return np.array([])
449 | 
450 |     def create_target(self, data: DataFrame, idx: int, forward_periods: int = 3) -> int:
451 |         """Create target variable for online learning.
452 | 
453 |         Args:
454 |             data: Price data
455 |             idx: Current index
456 |             forward_periods: Periods to look forward
457 | 
458 |         Returns:
459 |             Target class (0: sell, 1: hold, 2: buy)
460 |         """
461 |         if idx + forward_periods >= len(data):
462 |             return 1  # Hold as default
463 | 
464 |         current_price = data["close"].iloc[idx]
465 |         future_price = data["close"].iloc[idx + forward_periods]
466 | 
467 |         return_threshold = 0.02  # 2% threshold
468 |         forward_return = (future_price - current_price) / current_price
469 | 
470 |         if forward_return > return_threshold:
471 |             return 2  # Buy
472 |         elif forward_return < -return_threshold:
473 |             return 0  # Sell
474 |         else:
475 |             return 1  # Hold
476 | 
477 |     def _initial_training(self, data: DataFrame, current_idx: int) -> bool:
478 |         """Perform initial batch training on historical data.
479 | 
480 |         Args:
481 |             data: Price data
482 |             current_idx: Current index
483 | 
484 |         Returns:
485 |             True if initial training successful
486 |         """
487 |         try:
488 |             if current_idx < self.initial_training_period:
489 |                 return False
490 | 
491 |             # Collect initial training data
492 |             training_examples = []
493 |             training_targets = []
494 | 
495 |             # Use a substantial portion of historical data for initial training
496 |             start_idx = max(
497 |                 self.feature_window, current_idx - self.initial_training_period
498 |             )
499 | 
500 |             for idx in range(
501 |                 start_idx, current_idx - 10
502 |             ):  # Leave some data for validation
503 |                 features = self.extract_features(data, idx)
504 |                 if len(features) > 0:
505 |                     target = self.create_target(data, idx)
506 |                     training_examples.append(features)
507 |                     training_targets.append(target)
508 | 
509 |             if len(training_examples) < self.min_training_samples:
510 |                 logger.debug(
511 |                     f"Insufficient training samples: {len(training_examples)} < {self.min_training_samples}"
512 |                 )
513 |                 return False
514 | 
515 |             X = np.array(training_examples)
516 |             y = np.array(training_targets)
517 | 
518 |             # Check for class balance
519 |             unique_classes, class_counts = np.unique(y, return_counts=True)
520 |             if len(unique_classes) < 2:
521 |                 logger.warning(
522 |                     f"Insufficient class diversity for training: {unique_classes}"
523 |                 )
524 |                 return False
525 | 
526 |             # Initialize scaler with training data
527 |             self.scaler = StandardScaler()
528 |             X_scaled = self.scaler.fit_transform(X)
529 | 
530 |             # Train initial model
531 |             self.model.fit(X_scaled, y)
532 |             self.is_initial_trained = True
533 |             self.is_trained = True
534 |             self.training_samples_count = len(X)
535 | 
536 |             logger.info(
537 |                 f"Initial training completed with {len(X)} samples, classes: {dict(zip(unique_classes, class_counts, strict=False))}"
538 |             )
539 |             return True
540 | 
541 |         except Exception as e:
542 |             logger.error(f"Error in initial training: {e}")
543 |             return False
544 | 
545 |     def update_model(self, data: DataFrame, current_idx: int) -> None:
546 |         """Update online learning model with new data.
547 | 
548 |         Args:
549 |             data: Price data
550 |             current_idx: Current index
551 |         """
552 |         # Perform initial training if not done yet
553 |         if not self.is_initial_trained:
554 |             if self._initial_training(data, current_idx):
555 |                 self.last_update = current_idx
556 |             return
557 | 
558 |         # Check if update is needed
559 |         if current_idx - self.last_update < self.update_frequency:
560 |             return
561 | 
562 |         try:
563 |             # Collect recent training examples for incremental update
564 |             recent_examples = []
565 |             recent_targets = []
566 | 
567 |             # Look back for recent training examples (smaller window for incremental updates)
568 |             lookback_start = max(0, current_idx - self.update_frequency)
569 | 
570 |             for idx in range(lookback_start, current_idx):
571 |                 features = self.extract_features(data, idx)
572 |                 if len(features) > 0:
573 |                     target = self.create_target(data, idx)
574 |                     recent_examples.append(features)
575 |                     recent_targets.append(target)
576 | 
577 |             if len(recent_examples) < 2:  # Need minimum samples for incremental update
578 |                 return
579 | 
580 |             X = np.array(recent_examples)
581 |             y = np.array(recent_targets)
582 | 
583 |             # Scale features using existing scaler
584 |             X_scaled = self.scaler.transform(X)
585 | 
586 |             # Incremental update using partial_fit
587 |             # Ensure we have all classes that the model has seen before
588 |             existing_classes = self.model.classes_
589 |             self.model.partial_fit(X_scaled, y, classes=existing_classes)
590 | 
591 |             self.training_samples_count += len(X)
592 |             self.last_update = current_idx
593 | 
594 |             logger.debug(
595 |                 f"Updated online model at index {current_idx} with {len(X)} samples (total: {self.training_samples_count})"
596 |             )
597 | 
598 |         except Exception as e:
599 |             logger.error(f"Error updating online model: {e}")
600 |             # Reset training flag to attempt re-initialization
601 |             if "partial_fit" in str(e).lower():
602 |                 logger.warning("Partial fit failed, will attempt re-initialization")
603 |                 self.is_trained = False
604 |                 self.is_initial_trained = False
605 | 
606 |     def generate_signals(self, data: DataFrame) -> tuple[Series, Series]:
607 |         """Generate signals using online learning.
608 | 
609 |         Args:
610 |             data: Price data with OHLCV columns
611 | 
612 |         Returns:
613 |             Tuple of (entry_signals, exit_signals) as boolean Series
614 |         """
615 |         entry_signals = pd.Series(False, index=data.index)
616 |         exit_signals = pd.Series(False, index=data.index)
617 | 
618 |         try:
619 |             # Need minimum data for features
620 |             start_idx = max(self.feature_window, self.initial_training_period + 10)
621 | 
622 |             if len(data) < start_idx:
623 |                 logger.warning(
624 |                     f"Insufficient data for online learning: {len(data)} < {start_idx}"
625 |                 )
626 |                 return entry_signals, exit_signals
627 | 
628 |             for idx in range(start_idx, len(data)):
629 |                 # Update model periodically
630 |                 self.update_model(data, idx)
631 | 
632 |                 if not self.is_trained or self.scaler is None:
633 |                     continue
634 | 
635 |                 # Extract features for current point
636 |                 features = self.extract_features(data, idx)
637 |                 if len(features) == 0:
638 |                     continue
639 | 
640 |                 try:
641 |                     # Make prediction with error handling
642 |                     X = self.scaler.transform([features])
643 |                     prediction = self.model.predict(X)[0]
644 | 
645 |                     # Get confidence score
646 |                     if hasattr(self.model, "predict_proba"):
647 |                         probabilities = self.model.predict_proba(X)[0]
648 |                         confidence = max(probabilities)
649 |                     else:
650 |                         # For models without predict_proba, use decision function
651 |                         if hasattr(self.model, "decision_function"):
652 |                             decision_values = self.model.decision_function(X)[0]
653 |                             # Convert to pseudo-probability (sigmoid-like)
654 |                             confidence = 1.0 / (1.0 + np.exp(-abs(decision_values)))
655 |                         else:
656 |                             confidence = 0.6  # Default confidence
657 | 
658 |                     # Generate signals based on prediction and confidence
659 |                     if confidence >= self.confidence_threshold:
660 |                         if prediction == 2:  # Buy signal
661 |                             entry_signals.iloc[idx] = True
662 |                         elif prediction == 0:  # Sell signal
663 |                             exit_signals.iloc[idx] = True
664 | 
665 |                 except Exception as pred_error:
666 |                     logger.debug(f"Prediction error at index {idx}: {pred_error}")
667 |                     continue
668 | 
669 |             # Log summary statistics
670 |             total_entry_signals = entry_signals.sum()
671 |             total_exit_signals = exit_signals.sum()
672 |             logger.info(
673 |                 f"Generated {total_entry_signals} entry and {total_exit_signals} exit signals using online learning"
674 |             )
675 | 
676 |         except Exception as e:
677 |             logger.error(f"Error generating online learning signals: {e}")
678 | 
679 |         return entry_signals, exit_signals
680 | 
681 |     def get_model_info(self) -> dict[str, Any]:
682 |         """Get information about the online learning model.
683 | 
684 |         Returns:
685 |             Dictionary with model information
686 |         """
687 |         info = {
688 |             "model_type": self.model_type,
689 |             "is_trained": self.is_trained,
690 |             "is_initial_trained": self.is_initial_trained,
691 |             "feature_window": self.feature_window,
692 |             "update_frequency": self.update_frequency,
693 |             "confidence_threshold": self.confidence_threshold,
694 |             "min_training_samples": self.min_training_samples,
695 |             "initial_training_period": self.initial_training_period,
696 |             "training_samples_count": self.training_samples_count,
697 |             "expected_feature_count": self.expected_feature_count,
698 |         }
699 | 
700 |         if hasattr(self.model, "coef_") and self.model.coef_ is not None:
701 |             info["model_coefficients"] = self.model.coef_.tolist()
702 | 
703 |         if hasattr(self.model, "classes_") and self.model.classes_ is not None:
704 |             info["model_classes"] = self.model.classes_.tolist()
705 | 
706 |         if self.scaler is not None:
707 |             info["feature_scaling"] = {
708 |                 "mean": self.scaler.mean_.tolist()
709 |                 if hasattr(self.scaler, "mean_")
710 |                 else None,
711 |                 "scale": self.scaler.scale_.tolist()
712 |                 if hasattr(self.scaler, "scale_")
713 |                 else None,
714 |             }
715 | 
716 |         return info
717 | 
718 | 
719 | class HybridAdaptiveStrategy(AdaptiveStrategy):
720 |     """Hybrid strategy combining parameter adaptation with online learning."""
721 | 
722 |     def __init__(
723 |         self, base_strategy: Strategy, online_learning_weight: float = 0.3, **kwargs
724 |     ):
725 |         """Initialize hybrid adaptive strategy.
726 | 
727 |         Args:
728 |             base_strategy: Base strategy to adapt
729 |             online_learning_weight: Weight for online learning component
730 |             **kwargs: Additional parameters for AdaptiveStrategy
731 |         """
732 |         super().__init__(base_strategy, **kwargs)
733 |         self.online_learning_weight = online_learning_weight
734 |         self.online_strategy = OnlineLearningStrategy()
735 | 
736 |     @property
737 |     def name(self) -> str:
738 |         """Get strategy name."""
739 |         return f"HybridAdaptive({self.base_strategy.name})"
740 | 
741 |     @property
742 |     def description(self) -> str:
743 |         """Get strategy description."""
744 |         return "Hybrid adaptive strategy combining parameter adaptation with online learning"
745 | 
746 |     def generate_signals(self, data: DataFrame) -> tuple[Series, Series]:
747 |         """Generate hybrid adaptive signals.
748 | 
749 |         Args:
750 |             data: Price data with OHLCV columns
751 | 
752 |         Returns:
753 |             Tuple of (entry_signals, exit_signals) as boolean Series
754 |         """
755 |         # Get signals from adaptive base strategy
756 |         adaptive_entry, adaptive_exit = super().generate_signals(data)
757 | 
758 |         # Get signals from online learning component
759 |         online_entry, online_exit = self.online_strategy.generate_signals(data)
760 | 
761 |         # Combine signals with weighting
762 |         base_weight = 1.0 - self.online_learning_weight
763 | 
764 |         # Weighted combination for entry signals
765 |         combined_entry = (
766 |             adaptive_entry.astype(float) * base_weight
767 |             + online_entry.astype(float) * self.online_learning_weight
768 |         ) > 0.5
769 | 
770 |         # Weighted combination for exit signals
771 |         combined_exit = (
772 |             adaptive_exit.astype(float) * base_weight
773 |             + online_exit.astype(float) * self.online_learning_weight
774 |         ) > 0.5
775 | 
776 |         return combined_entry, combined_exit
777 | 
778 |     def get_hybrid_info(self) -> dict[str, Any]:
779 |         """Get information about hybrid strategy components.
780 | 
781 |         Returns:
782 |             Dictionary with hybrid strategy information
783 |         """
784 |         return {
785 |             "adaptation_history": self.get_adaptation_history(),
786 |             "online_learning_info": self.online_strategy.get_model_info(),
787 |             "online_learning_weight": self.online_learning_weight,
788 |             "base_weight": 1.0 - self.online_learning_weight,
789 |         }
790 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/providers/macro_data.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Macroeconomic data providers and utilities for Maverick-MCP.
  3 | Provides GDP, inflation rate, unemployment rate, and other macroeconomic indicators.
  4 | """
  5 | 
  6 | import logging
  7 | import os
  8 | from datetime import UTC, datetime, timedelta
  9 | 
 10 | import pandas as pd
 11 | from dotenv import load_dotenv
 12 | from sklearn.preprocessing import MinMaxScaler
 13 | 
 14 | from maverick_mcp.utils.circuit_breaker_decorators import (
 15 |     with_economic_data_circuit_breaker,
 16 | )
 17 | 
 18 | # Load environment variables
 19 | load_dotenv()
 20 | 
 21 | # Configure logging
 22 | logging.basicConfig(
 23 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 24 | )
 25 | logger = logging.getLogger("maverick_mcp.macro_data")
 26 | 
 27 | # Configuration
 28 | FRED_API_KEY = os.getenv("FRED_API_KEY", "")
 29 | 
 30 | 
 31 | class MacroDataProvider:
 32 |     """Provider for macroeconomic data using FRED API."""
 33 | 
 34 |     MAX_WINDOW_DAYS = 365
 35 | 
 36 |     def __init__(self, window_days: int = MAX_WINDOW_DAYS):
 37 |         try:
 38 |             from fredapi import Fred
 39 | 
 40 |             self.fred = Fred(api_key=FRED_API_KEY)
 41 |             self.scaler = MinMaxScaler()
 42 |             self.window_days = window_days
 43 |             self.historical_data_bounds: dict[str, dict[str, str]] = {}
 44 |             self.update_historical_bounds()
 45 | 
 46 |             # For momentum swings, shorter lookbacks
 47 |             self.lookback_days = 30
 48 | 
 49 |             # Weights for macro sentiment
 50 |             self.weights = {
 51 |                 # Short-term signals (60% total)
 52 |                 "vix": 0.20,
 53 |                 "sp500_momentum": 0.20,
 54 |                 "nasdaq_momentum": 0.15,
 55 |                 "usd_momentum": 0.05,
 56 |                 # Macro signals (40% total)
 57 |                 "inflation_rate": 0.15,
 58 |                 "gdp_growth_rate": 0.15,
 59 |                 "unemployment_rate": 0.10,
 60 |             }
 61 | 
 62 |             self.previous_sentiment_score = None
 63 |         except ImportError:
 64 |             logger.error(
 65 |                 "fredapi not installed. Please install with 'pip install fredapi'"
 66 |             )
 67 |             raise
 68 | 
 69 |     @with_economic_data_circuit_breaker(
 70 |         use_fallback=False
 71 |     )  # Fallback handled at method level
 72 |     def _get_fred_series(
 73 |         self, series_id: str, start_date: str, end_date: str
 74 |     ) -> pd.Series:
 75 |         """
 76 |         Get FRED series data with circuit breaker protection.
 77 | 
 78 |         Args:
 79 |             series_id: FRED series identifier
 80 |             start_date: Start date in YYYY-MM-DD format
 81 |             end_date: End date in YYYY-MM-DD format
 82 | 
 83 |         Returns:
 84 |             Pandas Series with the data
 85 |         """
 86 |         return self.fred.get_series(series_id, start_date, end_date)
 87 | 
 88 |     def _calculate_weighted_rolling_performance(
 89 |         self, series_id: str, lookbacks: list[int], weights: list[float]
 90 |     ) -> float:
 91 |         """
 92 |         Utility method to compute a weighted performance measure over multiple rolling windows.
 93 |         For example, if lookbacks = [30, 90, 180] and weights = [0.5, 0.3, 0.2],
 94 |         it calculates performance for each window and returns the sum of the weighted values.
 95 |         """
 96 |         if len(lookbacks) != len(weights):
 97 |             logger.error("Lookbacks and weights must have the same length.")
 98 |             return 0.0
 99 | 
100 |         end_date = datetime.now(UTC)
101 |         total_performance = 0.0
102 | 
103 |         for days, w in zip(lookbacks, weights, strict=False):
104 |             start_date = end_date - timedelta(days=days)
105 |             series_data = self._get_fred_series(
106 |                 series_id,
107 |                 start_date.strftime("%Y-%m-%d"),
108 |                 end_date.strftime("%Y-%m-%d"),
109 |             )
110 |             # Ensure we have a pandas Series, then clean it
111 |             if isinstance(series_data, pd.Series):
112 |                 df = series_data.dropna()
113 |                 if not df.empty:
114 |                     # Simple rolling mean to reduce single-day spikes
115 |                     df = df.rolling(window=2).mean().dropna()
116 |                     if not df.empty:
117 |                         start_price = df.iloc[0]
118 |                         end_price = df.iloc[-1]
119 |                         performance = ((end_price - start_price) / start_price) * 100
120 |                         total_performance += performance * w
121 |                 else:
122 |                     logger.warning(
123 |                         f"No FRED data for {series_id} over last {days} days."
124 |                     )
125 |             else:
126 |                 logger.warning(
127 |                     f"Unexpected data type from FRED API for {series_id}: {type(series_data)}"
128 |                 )
129 |         return total_performance
130 | 
131 |     def get_sp500_performance(self) -> float:
132 |         """
133 |         Calculate a multi-timeframe rolling performance for S&P 500 (similar to SPY).
134 |         Example using 1-month, 3-month, and 6-month lookbacks with custom weights.
135 |         """
136 |         try:
137 |             lookbacks = [30, 90, 180]
138 |             weights = [0.5, 0.3, 0.2]
139 |             return self._calculate_weighted_rolling_performance(
140 |                 "SP500", lookbacks, weights
141 |             )
142 |         except Exception as e:
143 |             logger.error(f"Error fetching S&P 500 rolling performance: {e}")
144 |             return 0.0
145 | 
146 |     def get_nasdaq_performance(self) -> float:
147 |         """
148 |         Calculate a multi-timeframe rolling performance for NASDAQ-100 (similar to QQQ).
149 |         Example using 1-month, 3-month, and 6-month lookbacks with custom weights.
150 |         """
151 |         try:
152 |             lookbacks = [30, 90, 180]
153 |             weights = [0.5, 0.3, 0.2]
154 |             return self._calculate_weighted_rolling_performance(
155 |                 "NASDAQ100", lookbacks, weights
156 |             )
157 |         except Exception as e:
158 |             logger.error(f"Error fetching NASDAQ rolling performance: {e}")
159 |             return 0.0
160 | 
161 |     def get_gdp_growth_rate(self):
162 |         """
163 |         Fetch GDP growth rate with retry logic and better error handling.
164 |         """
165 |         try:
166 |             # Get last 2 quarters of data to ensure we have the latest
167 |             end_date = datetime.now(UTC)
168 |             start_date = end_date - timedelta(days=180)
169 | 
170 |             data = self._get_fred_series(
171 |                 "A191RL1Q225SBEA",
172 |                 start_date.strftime("%Y-%m-%d"),
173 |                 end_date.strftime("%Y-%m-%d"),
174 |             )
175 | 
176 |             if data.empty:
177 |                 logger.warning("No GDP data available from FRED")
178 |                 return {"current": 0.0, "previous": 0.0}
179 | 
180 |             # Get last two values
181 |             last_two = data.tail(2)
182 |             if len(last_two) >= 2:
183 |                 return {
184 |                     "current": float(last_two.iloc[-1]),
185 |                     "previous": float(last_two.iloc[-2]),
186 |                 }
187 |             return {
188 |                 "current": float(last_two.iloc[-1]),
189 |                 "previous": float(last_two.iloc[-1]),
190 |             }
191 | 
192 |         except Exception as e:
193 |             logger.error(f"Error fetching GDP growth rate: {e}")
194 |             return {"current": 0.0, "previous": 0.0}
195 | 
196 |     def get_unemployment_rate(self):
197 |         try:
198 |             # Get recent unemployment data
199 |             end_date = datetime.now(UTC)
200 |             start_date = end_date - timedelta(days=90)
201 |             series_data = self._get_fred_series(
202 |                 "UNRATE", start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")
203 |             )
204 |             if not isinstance(series_data, pd.Series):
205 |                 logger.error(
206 |                     f"Expected pandas Series from FRED API, got {type(series_data)}"
207 |                 )
208 |                 return {"current": 0.0, "previous": 0.0}
209 | 
210 |             data = series_data.dropna()
211 |             if len(data) >= 2:
212 |                 return {
213 |                     "current": float(data.iloc[-1]),
214 |                     "previous": float(data.iloc[-2]),
215 |                 }
216 |             return {"current": float(data.iloc[-1]), "previous": float(data.iloc[-1])}
217 |         except Exception as e:
218 |             logger.error(f"Error fetching Unemployment rate: {e}")
219 |             return {"current": None, "previous": None}
220 | 
221 |     def get_inflation_rate(self):
222 |         """
223 |         Fetch the annual core inflation rate based on CPI data from FRED.
224 |         Uses CPILFESL (Core CPI: All Items Less Food and Energy).
225 |         """
226 |         try:
227 |             end_date = datetime.now(UTC)
228 |             # Get ~5 years of data to ensure we have enough
229 |             start_date = end_date - timedelta(days=5 * 365)
230 | 
231 |             # 1) Fetch monthly CPILFESL data from FRED
232 |             series_data = self.fred.get_series(
233 |                 "CPILFESL",
234 |                 observation_start=start_date.strftime("%Y-%m-%d"),
235 |                 observation_end=end_date.strftime("%Y-%m-%d"),
236 |             )
237 | 
238 |             # 2) Ensure it's a pandas Series and clean it
239 |             if not isinstance(series_data, pd.Series):
240 |                 logger.error(
241 |                     f"Expected pandas Series from FRED API, got {type(series_data)}"
242 |                 )
243 |                 return {"current": None, "previous": None, "bounds": (None, None)}
244 | 
245 |             data = series_data.dropna().sort_index()
246 | 
247 |             # Optional: Force a monthly freq alignment in case data is stored daily
248 |             data = data.asfreq("MS").dropna()
249 | 
250 |             if data.empty:
251 |                 logger.error("No inflation data available from FRED")
252 |                 return {"current": None, "previous": None, "bounds": (None, None)}
253 | 
254 |             # 3) The latest monthly index is now guaranteed to be the first of the month
255 |             latest_idx = data.index[-1]
256 |             latest_value = data.iloc[-1]
257 | 
258 |             # 4) Get data for exactly one year prior (the matching month)
259 |             #    Because we forced MS freq, this is typically just `iloc[-13]` (12 steps back),
260 |             #    but let's keep the logic explicit:
261 |             if isinstance(latest_idx, pd.Timestamp):
262 |                 year_ago_idx = latest_idx - pd.DateOffset(years=1)
263 |             else:
264 |                 # Fallback for unexpected index types
265 |                 year_ago_idx = pd.Timestamp(latest_idx) - pd.DateOffset(years=1)
266 |             # If your data is strictly monthly, you can do:
267 |             # year_ago_value = data.loc[year_ago_idx]  # might fail if missing data
268 |             # Or fallback to "on or before" logic:
269 |             year_ago_series = data[data.index <= year_ago_idx]
270 |             if year_ago_series.empty:
271 |                 logger.warning(
272 |                     "Not enough data to get year-ago CPI. Using 0 as fallback."
273 |                 )
274 |                 current_inflation = 0.0
275 |             else:
276 |                 year_ago_value = year_ago_series.iloc[-1]
277 |                 current_inflation = (
278 |                     (latest_value - year_ago_value) / year_ago_value
279 |                 ) * 100
280 | 
281 |             # 5) Compute previous month's YoY
282 |             if isinstance(latest_idx, pd.Timestamp):
283 |                 prev_month_idx = latest_idx - pd.DateOffset(months=1)
284 |             else:
285 |                 prev_month_idx = pd.Timestamp(latest_idx) - pd.DateOffset(months=1)
286 |             prev_month_series = data[data.index <= prev_month_idx]
287 |             if prev_month_series.empty:
288 |                 logger.warning("No data for previous month. Using 0 as fallback.")
289 |                 previous_inflation = 0.0
290 |             else:
291 |                 prev_month_value = prev_month_series.iloc[-1]
292 |                 if isinstance(prev_month_idx, pd.Timestamp) and not pd.isna(
293 |                     prev_month_idx
294 |                 ):
295 |                     prev_year_ago_idx = prev_month_idx - pd.DateOffset(years=1)
296 |                 else:
297 |                     # Handle NaT or other types
298 |                     prev_year_ago_idx = pd.Timestamp(prev_month_idx) - pd.DateOffset(
299 |                         years=1
300 |                     )
301 |                 prev_year_ago_series = data[data.index <= prev_year_ago_idx]
302 |                 if prev_year_ago_series.empty:
303 |                     logger.warning(
304 |                         "No data for previous year's month. Using 0 as fallback."
305 |                     )
306 |                     previous_inflation = 0.0
307 |                 else:
308 |                     prev_year_ago_value = prev_year_ago_series.iloc[-1]
309 |                     previous_inflation = (
310 |                         (prev_month_value - prev_year_ago_value) / prev_year_ago_value
311 |                     ) * 100
312 | 
313 |             # 6) Optionally round
314 |             current_inflation = round(current_inflation, 2)
315 |             previous_inflation = round(previous_inflation, 2)
316 | 
317 |             # 7) Compute bounds
318 |             yoy_changes = data.pct_change(periods=12) * 100
319 |             yoy_changes = yoy_changes.dropna()
320 |             if yoy_changes.empty:
321 |                 inflation_min, inflation_max = 0.0, 0.0
322 |             else:
323 |                 inflation_min = yoy_changes.min()
324 |                 inflation_max = yoy_changes.max()
325 | 
326 |             bounds = (round(inflation_min, 2), round(inflation_max, 2))
327 | 
328 |             logger.info(
329 |                 f"Core CPI (YoY): current={current_inflation}%, previous={previous_inflation}%"
330 |             )
331 |             return {
332 |                 "current": current_inflation,
333 |                 "previous": previous_inflation,
334 |                 "bounds": bounds,
335 |             }
336 | 
337 |         except Exception as e:
338 |             logger.error(f"Error fetching core inflation rate: {e}", exc_info=True)
339 |             return {"current": None, "previous": None, "bounds": (None, None)}
340 | 
341 |     def get_vix(self) -> float | None:
342 |         """Get VIX data from FRED."""
343 |         try:
344 |             import yfinance as yf
345 | 
346 |             # Try Yahoo Finance first
347 |             ticker = yf.Ticker("^VIX")
348 |             data = ticker.history(period="1d")
349 |             if not data.empty:
350 |                 return float(data["Close"].iloc[-1])
351 | 
352 |             # fallback to FRED
353 |             end_date = datetime.now(UTC)
354 |             start_date = end_date - timedelta(days=7)
355 |             series_data = self.fred.get_series(
356 |                 "VIXCLS", start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")
357 |             )
358 |             if isinstance(series_data, pd.Series):
359 |                 df = series_data.dropna()
360 |                 if not df.empty:
361 |                     return float(df.iloc[-1])
362 | 
363 |             return None
364 |         except Exception as e:
365 |             logger.error(f"Error fetching VIX: {e}")
366 |             return None
367 | 
368 |     def get_sp500_momentum(self) -> float:
369 |         """
370 |         Calculate short-term momentum of the S&P 500 over multiple very short timeframes
371 |         (3-day, 7-day, 14-day). We also add a tiny rolling average to reduce spikiness.
372 |         """
373 |         try:
374 |             end_date = datetime.now(UTC)
375 |             lookbacks = [3, 7, 14]
376 |             momentums = []
377 | 
378 |             for days in lookbacks:
379 |                 start_date = end_date - timedelta(days=days)
380 |                 series_data = self.fred.get_series(
381 |                     "SP500",
382 |                     start_date.strftime("%Y-%m-%d"),
383 |                     end_date.strftime("%Y-%m-%d"),
384 |                 )
385 |                 if isinstance(series_data, pd.Series):
386 |                     df = series_data.dropna()
387 |                     df = df.rolling(window=2).mean().dropna()
388 | 
389 |                     if len(df) >= 2:
390 |                         momentum = ((df.iloc[-1] - df.iloc[0]) / df.iloc[0]) * 100
391 |                         momentums.append(momentum)
392 | 
393 |             if momentums:
394 |                 weighted: float = (
395 |                     0.5 * momentums[0] + 0.3 * momentums[1] + 0.2 * momentums[2]
396 |                     if len(momentums) == 3
397 |                     else sum(momentums) / len(momentums)
398 |                 )
399 |                 return weighted
400 |             return 0.0
401 |         except Exception as e:
402 |             logger.error(f"Error fetching S&P 500 momentum: {e}")
403 |             return 0.0
404 | 
405 |     def get_nasdaq_momentum(self) -> float:
406 |         """
407 |         Calculate short-term momentum of the NASDAQ-100 over multiple timeframes
408 |         (3-day, 7-day, 14-day).
409 |         """
410 |         try:
411 |             end_date = datetime.now(UTC)
412 |             lookbacks = [3, 7, 14]
413 |             momentums = []
414 | 
415 |             for days in lookbacks:
416 |                 start_date = end_date - timedelta(days=days + 5)
417 |                 series_data = self.fred.get_series(
418 |                     "NASDAQ100",
419 |                     start_date.strftime("%Y-%m-%d"),
420 |                     end_date.strftime("%Y-%m-%d"),
421 |                 )
422 |                 if isinstance(series_data, pd.Series):
423 |                     df = series_data.dropna()
424 |                     df = df.rolling(window=2).mean().dropna()
425 | 
426 |                     if len(df) >= 2:
427 |                         momentum = ((df.iloc[-1] - df.iloc[0]) / df.iloc[0]) * 100
428 |                         momentums.append(momentum)
429 |                 else:
430 |                     logger.warning(f"Insufficient NASDAQ data for {days}-day lookback")
431 |                     momentums.append(0.0)
432 | 
433 |             if len(momentums) == 3:
434 |                 result: float = (
435 |                     0.5 * momentums[0] + 0.3 * momentums[1] + 0.2 * momentums[2]
436 |                 )
437 |                 return result
438 | 
439 |             logger.warning("Insufficient data for NASDAQ momentum calculation")
440 |             return sum(momentums) / len(momentums) if momentums else 0.0
441 | 
442 |         except Exception as e:
443 |             logger.error(f"Error fetching NASDAQ momentum: {e}")
444 |             return 0.0
445 | 
446 |     def get_usd_momentum(self) -> float:
447 |         """
448 |         Calculate USD momentum using DTWEXBGS (Broad USD Index) from FRED
449 |         over multiple short-term lookbacks (3-day, 7-day, 14-day).
450 |         """
451 |         try:
452 |             end_date = datetime.now(UTC)
453 |             lookbacks = [3, 7, 14]
454 |             momentums = []
455 | 
456 |             for days in lookbacks:
457 |                 start_date = end_date - timedelta(days=days + 5)
458 |                 df = self.fred.get_series(
459 |                     "DTWEXBGS",
460 |                     start_date.strftime("%Y-%m-%d"),
461 |                     end_date.strftime("%Y-%m-%d"),
462 |                 )
463 |                 df = df.dropna()
464 |                 df = df.rolling(window=2).mean().dropna()
465 | 
466 |                 if len(df) >= 2:
467 |                     first_valid = df.iloc[0]
468 |                     last_valid = df.iloc[-1]
469 |                     momentum = ((last_valid - first_valid) / first_valid) * 100
470 |                     momentums.append(momentum)
471 |                 else:
472 |                     logger.warning(f"Insufficient USD data for {days}-day lookback")
473 |                     momentums.append(0.0)
474 | 
475 |             if len(momentums) == 3:
476 |                 result: float = (
477 |                     0.5 * momentums[0] + 0.3 * momentums[1] + 0.2 * momentums[2]
478 |                 )
479 |                 return result
480 | 
481 |             logger.warning("Insufficient data for USD momentum calculation")
482 |             return sum(momentums) / len(momentums) if momentums else 0.0
483 | 
484 |         except Exception as e:
485 |             logger.error(f"Error fetching USD momentum: {e}")
486 |             return 0.0
487 | 
488 |     def update_historical_bounds(self):
489 |         """
490 |         Update historical bounds based on the last `window_days` of data.
491 |         These bounds are used for normalization in `normalize_indicators()`.
492 |         """
493 |         end_date = datetime.now(UTC)
494 |         start_date = end_date - timedelta(days=self.window_days)
495 |         start_date_str = start_date.strftime("%Y-%m-%d")
496 |         end_date_str = end_date.strftime("%Y-%m-%d")
497 | 
498 |         indicators = {
499 |             "gdp_growth_rate": "A191RL1Q225SBEA",
500 |             "unemployment_rate": "UNRATE",
501 |             "inflation_rate": "CPILFESL",
502 |             "sp500_momentum": "SP500",
503 |             "nasdaq_momentum": "NASDAQCOM",
504 |             "vix": "VIXCLS",
505 |         }
506 | 
507 |         for key, series_id in indicators.items():
508 |             try:
509 |                 if key == "gdp_growth_rate":
510 |                     data = self.fred.get_series(series_id, start_date_str, end_date_str)
511 |                 elif key == "inflation_rate":
512 |                     # For inflation bounds, get a wider historical range
513 |                     wider_start = (end_date - timedelta(days=5 * 365)).strftime(
514 |                         "%Y-%m-%d"
515 |                     )
516 |                     cpi = self.fred.get_series(series_id, wider_start, end_date_str)
517 |                     cpi = cpi.dropna()
518 | 
519 |                     if len(cpi) > 13:
520 |                         # Calculate year-over-year inflation rates
521 |                         inflation_rates = []
522 |                         for i in range(12, len(cpi)):
523 |                             yoy_inflation = (
524 |                                 (cpi.iloc[i] - cpi.iloc[i - 12]) / cpi.iloc[i - 12]
525 |                             ) * 100
526 |                             inflation_rates.append(yoy_inflation)
527 | 
528 |                         if inflation_rates:
529 |                             data = pd.Series(inflation_rates)
530 |                         else:
531 |                             data = pd.Series([], dtype=float)
532 |                     else:
533 |                         # Not enough data for YoY calculation
534 |                         data = pd.Series([], dtype=float)
535 |                 elif key in ["sp500_momentum", "nasdaq_momentum"]:
536 |                     df = self.fred.get_series(series_id, start_date_str, end_date_str)
537 |                     df = df.dropna()
538 |                     df = df.rolling(window=2).mean().dropna()
539 |                     if not df.empty:
540 |                         start_price = df.iloc[0]
541 |                         end_price = df.iloc[-1]
542 |                         performance = ((end_price - start_price) / start_price) * 100
543 |                         data = pd.Series([performance], index=[df.index[-1]])
544 |                     else:
545 |                         data = pd.Series([], dtype=float)
546 |                 else:
547 |                     data = self.fred.get_series(series_id, start_date_str, end_date_str)
548 | 
549 |                 if not data.empty:
550 |                     min_val = data.min()
551 |                     max_val = data.max()
552 |                     self.historical_data_bounds[key] = {"min": min_val, "max": max_val}
553 |                 else:
554 |                     self.historical_data_bounds[key] = self.default_bounds(key)
555 |                     logger.warning(f"No data fetched for {key}. Using default bounds.")
556 |             except Exception as e:
557 |                 logger.error(f"Error updating historical bounds for {key}: {e}")
558 |                 self.historical_data_bounds[key] = self.default_bounds(key)
559 | 
560 |     def default_bounds(self, key: str):
561 |         """
562 |         Tighter or more relevant default bounds for short-term, but fix them
563 |         so we don't recalculate them daily in a way that swings the scale.
564 |         """
565 |         default_bounds = {
566 |             "vix": {"min": 10.0, "max": 50.0},
567 |             "sp500_momentum": {"min": -15.0, "max": 15.0},
568 |             "nasdaq_momentum": {"min": -20.0, "max": 20.0},
569 |             "usd_momentum": {"min": -5.0, "max": 5.0},
570 |             "inflation_rate": {"min": 0.0, "max": 10.0},
571 |             "gdp_growth_rate": {"min": -2.0, "max": 6.0},
572 |             "unemployment_rate": {"min": 2.0, "max": 10.0},
573 |         }
574 |         return default_bounds.get(key, {"min": 0.0, "max": 1.0})
575 | 
576 |     def normalize_indicators(self, indicators: dict) -> dict:
577 |         """
578 |         Convert raw indicators to [0,1], with risk-off indicators inverted (lower is better).
579 |         """
580 |         normalized = {}
581 |         for key, value in indicators.items():
582 |             if value is None:
583 |                 normalized[key] = 0.5
584 |                 continue
585 | 
586 |             bounds = self.historical_data_bounds.get(key, self.default_bounds(key))
587 |             min_val = float(bounds["min"])
588 |             max_val = float(bounds["max"])
589 |             denom = max_val - min_val if (max_val != min_val) else 1e-9
590 | 
591 |             norm_val = (value - min_val) / denom
592 | 
593 |             if key in ["vix", "unemployment_rate", "inflation_rate"]:
594 |                 norm_val = 1.0 - norm_val
595 | 
596 |             norm_val = max(0.0, min(1.0, norm_val))
597 |             normalized[key] = norm_val
598 | 
599 |         return normalized
600 | 
601 |     def get_historical_data(self) -> dict:
602 |         """Get historical data for all indicators over self.lookback_days."""
603 |         end_date = datetime.now(UTC)
604 |         start_date = end_date - timedelta(days=self.lookback_days)
605 |         start_date_str = start_date.strftime("%Y-%m-%d")
606 |         end_date_str = end_date.strftime("%Y-%m-%d")
607 | 
608 |         try:
609 |             sp500_data = self.fred.get_series("SP500", start_date_str, end_date_str)
610 |             sp500_performance = []
611 |             if not sp500_data.empty:
612 |                 first_value = sp500_data.iloc[0]
613 |                 sp500_performance = [
614 |                     (x - first_value) / first_value * 100 for x in sp500_data
615 |                 ]
616 | 
617 |             nasdaq_data = self.fred.get_series(
618 |                 "NASDAQ100", start_date_str, end_date_str
619 |             )
620 |             nasdaq_performance = []
621 |             if not nasdaq_data.empty:
622 |                 first_value = nasdaq_data.iloc[0]
623 |                 nasdaq_performance = [
624 |                     (x - first_value) / first_value * 100 for x in nasdaq_data
625 |                 ]
626 | 
627 |             vix_data = self.fred.get_series("VIXCLS", start_date_str, end_date_str)
628 |             vix_values = vix_data.tolist() if not vix_data.empty else []
629 | 
630 |             gdp_data = self.fred.get_series(
631 |                 "A191RL1Q225SBEA", start_date_str, end_date_str
632 |             )
633 |             gdp_values = gdp_data.tolist() if not gdp_data.empty else []
634 | 
635 |             unemployment_data = self.fred.get_series(
636 |                 "UNRATE", start_date_str, end_date_str
637 |             )
638 |             unemployment_values = (
639 |                 unemployment_data.tolist() if not unemployment_data.empty else []
640 |             )
641 | 
642 |             cpi_data = self.fred.get_series("CPILFESL", start_date_str, end_date_str)
643 |             inflation_values = []
644 |             if not cpi_data.empty and len(cpi_data) > 12:
645 |                 inflation_values = [
646 |                     ((cpi_data.iloc[i] - cpi_data.iloc[i - 12]) / cpi_data.iloc[i - 12])
647 |                     * 100
648 |                     for i in range(12, len(cpi_data))
649 |                 ]
650 | 
651 |             return {
652 |                 "sp500_performance": sp500_performance,
653 |                 "nasdaq_performance": nasdaq_performance,
654 |                 "vix": vix_values,
655 |                 "gdp_growth_rate": gdp_values,
656 |                 "unemployment_rate": unemployment_values,
657 |                 "inflation_rate": inflation_values,
658 |             }
659 |         except Exception as e:
660 |             logger.error(f"Error fetching historical data: {str(e)}")
661 |             return {
662 |                 "sp500_performance": [],
663 |                 "nasdaq_performance": [],
664 |                 "vix": [],
665 |                 "gdp_growth_rate": [],
666 |                 "unemployment_rate": [],
667 |                 "inflation_rate": [],
668 |             }
669 | 
670 |     def get_macro_statistics(self):
671 |         """
672 |         Main method to aggregate macro stats with better error handling and smoothing.
673 |         """
674 |         try:
675 |             self.update_historical_bounds()
676 | 
677 |             # Get inflation rate and bounds
678 |             inflation_data = self.get_inflation_rate()
679 |             gdp_data = self.get_gdp_growth_rate()
680 |             unemployment_data = self.get_unemployment_rate()
681 | 
682 |             # Pull raw indicator values with safe defaults
683 |             indicators = {
684 |                 "gdp_growth_rate": gdp_data["current"] or 0.0,
685 |                 "gdp_growth_rate_previous": gdp_data["previous"] or 0.0,
686 |                 "unemployment_rate": unemployment_data["current"] or 0.0,
687 |                 "unemployment_rate_previous": unemployment_data["previous"] or 0.0,
688 |                 "inflation_rate": inflation_data["current"] or 0.0,
689 |                 "inflation_rate_previous": inflation_data["previous"] or 0.0,
690 |                 "vix": self.get_vix() or 0.0,
691 |                 "sp500_momentum": self.get_sp500_momentum() or 0.0,
692 |                 "nasdaq_momentum": self.get_nasdaq_momentum() or 0.0,
693 |                 "usd_momentum": self.get_usd_momentum() or 0.0,
694 |             }
695 | 
696 |             # Normalize
697 |             normalized = self.normalize_indicators(indicators)
698 |             sentiment_score = sum(normalized[k] * self.weights[k] for k in self.weights)
699 |             sentiment_score = (sentiment_score / sum(self.weights.values())) * 100
700 |             sentiment_score = max(1, min(100, sentiment_score))
701 | 
702 |             # Increase smoothing factor to reduce big overnight moves
703 |             if self.previous_sentiment_score is not None:
704 |                 smoothing_factor = 0.8  # keep 80% old, 20% new
705 |                 sentiment_score = (
706 |                     smoothing_factor * self.previous_sentiment_score
707 |                     + (1 - smoothing_factor) * sentiment_score
708 |                 )
709 | 
710 |             self.previous_sentiment_score = sentiment_score
711 | 
712 |             historical_data = self.get_historical_data()
713 | 
714 |             # Return dictionary with all values guaranteed to be numeric
715 |             return {
716 |                 "gdp_growth_rate": float(indicators["gdp_growth_rate"]),
717 |                 "gdp_growth_rate_previous": float(
718 |                     indicators["gdp_growth_rate_previous"]
719 |                 ),
720 |                 "unemployment_rate": float(indicators["unemployment_rate"]),
721 |                 "unemployment_rate_previous": float(
722 |                     indicators["unemployment_rate_previous"]
723 |                 ),
724 |                 "inflation_rate": float(indicators["inflation_rate"]),
725 |                 "inflation_rate_previous": float(indicators["inflation_rate_previous"]),
726 |                 "sp500_performance": float(self.get_sp500_performance() or 0.0),
727 |                 "nasdaq_performance": float(self.get_nasdaq_performance() or 0.0),
728 |                 "vix": float(indicators["vix"]),
729 |                 "sentiment_score": float(sentiment_score),
730 |                 "historical_data": historical_data,
731 |             }
732 | 
733 |         except Exception as e:
734 |             logger.error(f"Error in get_macro_statistics: {e}")
735 |             # Return safe defaults if everything fails
736 |             return {
737 |                 "gdp_growth_rate": 0.0,
738 |                 "gdp_growth_rate_previous": 0.0,
739 |                 "unemployment_rate": 0.0,
740 |                 "unemployment_rate_previous": 0.0,
741 |                 "inflation_rate": 0.0,
742 |                 "inflation_rate_previous": 0.0,
743 |                 "sp500_performance": 0.0,
744 |                 "nasdaq_performance": 0.0,
745 |                 "vix": 0.0,
746 |                 "sentiment_score": 50.0,
747 |                 "historical_data": {},
748 |             }
749 | 
```

--------------------------------------------------------------------------------
/maverick_mcp/utils/structured_logger.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Enhanced structured logging infrastructure for backtesting system.
  3 | 
  4 | This module provides comprehensive structured logging capabilities with:
  5 | - Correlation ID generation and tracking across async boundaries
  6 | - Request context propagation
  7 | - JSON formatting for log aggregation
  8 | - Performance metrics logging
  9 | - Resource usage tracking
 10 | - Debug mode with verbose logging
 11 | - Async logging to avoid blocking operations
 12 | - Log rotation and compression
 13 | - Multiple output handlers (console, file, remote)
 14 | """
 15 | 
 16 | import asyncio
 17 | import gc
 18 | import json
 19 | import logging
 20 | import logging.handlers
 21 | import os
 22 | import sys
 23 | import threading
 24 | import time
 25 | import traceback
 26 | import uuid
 27 | from collections.abc import Callable
 28 | from concurrent.futures import ThreadPoolExecutor
 29 | from contextvars import ContextVar
 30 | from datetime import UTC, datetime
 31 | from functools import wraps
 32 | from pathlib import Path
 33 | from typing import Any
 34 | 
 35 | import psutil
 36 | 
 37 | # Context variables for request tracking across async boundaries
 38 | correlation_id_var: ContextVar[str | None] = ContextVar("correlation_id", default=None)
 39 | request_start_var: ContextVar[float | None] = ContextVar("request_start", default=None)
 40 | user_id_var: ContextVar[str | None] = ContextVar("user_id", default=None)
 41 | tool_name_var: ContextVar[str | None] = ContextVar("tool_name", default=None)
 42 | operation_context_var: ContextVar[dict[str, Any] | None] = ContextVar(
 43 |     "operation_context", default=None
 44 | )
 45 | 
 46 | # Global logger registry for performance metrics aggregation
 47 | _performance_logger_registry: dict[str, "PerformanceMetricsLogger"] = {}
 48 | _log_level_counts: dict[str, int] = {
 49 |     "DEBUG": 0,
 50 |     "INFO": 0,
 51 |     "WARNING": 0,
 52 |     "ERROR": 0,
 53 |     "CRITICAL": 0,
 54 | }
 55 | 
 56 | # Thread pool for async logging operations
 57 | _async_log_executor: ThreadPoolExecutor | None = None
 58 | _async_log_lock = threading.Lock()
 59 | 
 60 | 
 61 | class CorrelationIDGenerator:
 62 |     """Enhanced correlation ID generation with backtesting context."""
 63 | 
 64 |     @staticmethod
 65 |     def generate_correlation_id(prefix: str = "bt") -> str:
 66 |         """Generate a unique correlation ID with backtesting prefix."""
 67 |         timestamp = int(time.time() * 1000) % 1000000  # Last 6 digits of timestamp
 68 |         random_part = uuid.uuid4().hex[:8]
 69 |         return f"{prefix}-{timestamp}-{random_part}"
 70 | 
 71 |     @staticmethod
 72 |     def set_correlation_id(
 73 |         correlation_id: str | None = None, prefix: str = "bt"
 74 |     ) -> str:
 75 |         """Set correlation ID in context with automatic generation."""
 76 |         if not correlation_id:
 77 |             correlation_id = CorrelationIDGenerator.generate_correlation_id(prefix)
 78 |         correlation_id_var.set(correlation_id)
 79 |         return correlation_id
 80 | 
 81 |     @staticmethod
 82 |     def get_correlation_id() -> str | None:
 83 |         """Get current correlation ID from context."""
 84 |         return correlation_id_var.get()
 85 | 
 86 |     @staticmethod
 87 |     def propagate_context(target_context: dict[str, Any]) -> dict[str, Any]:
 88 |         """Propagate correlation context to target dict."""
 89 |         target_context.update(
 90 |             {
 91 |                 "correlation_id": correlation_id_var.get(),
 92 |                 "user_id": user_id_var.get(),
 93 |                 "tool_name": tool_name_var.get(),
 94 |                 "operation_context": operation_context_var.get(),
 95 |             }
 96 |         )
 97 |         return target_context
 98 | 
 99 | 
100 | class EnhancedStructuredFormatter(logging.Formatter):
101 |     """Enhanced JSON formatter with performance metrics and resource tracking."""
102 | 
103 |     def __init__(
104 |         self, include_performance: bool = True, include_resources: bool = True
105 |     ):
106 |         super().__init__()
107 |         self.include_performance = include_performance
108 |         self.include_resources = include_resources
109 |         self._process = psutil.Process()
110 | 
111 |     def format(self, record: logging.LogRecord) -> str:
112 |         """Format log record with comprehensive structured data."""
113 |         # Base structured log data
114 |         log_data = {
115 |             "timestamp": datetime.now(UTC).isoformat(),
116 |             "level": record.levelname,
117 |             "logger": record.name,
118 |             "message": record.getMessage(),
119 |             "module": record.module,
120 |             "function": record.funcName,
121 |             "line": record.lineno,
122 |             "thread": record.thread,
123 |             "process_id": record.process,
124 |         }
125 | 
126 |         # Add correlation context
127 |         CorrelationIDGenerator.propagate_context(log_data)
128 | 
129 |         # Add performance metrics if enabled
130 |         if self.include_performance:
131 |             request_start = request_start_var.get()
132 |             if request_start:
133 |                 log_data["duration_ms"] = int((time.time() - request_start) * 1000)
134 | 
135 |         # Add resource usage if enabled
136 |         if self.include_resources:
137 |             try:
138 |                 memory_info = self._process.memory_info()
139 |                 log_data["memory_rss_mb"] = round(memory_info.rss / 1024 / 1024, 2)
140 |                 log_data["memory_vms_mb"] = round(memory_info.vms / 1024 / 1024, 2)
141 |                 log_data["cpu_percent"] = self._process.cpu_percent(interval=None)
142 |             except (psutil.NoSuchProcess, psutil.AccessDenied):
143 |                 # Process might have ended or access denied
144 |                 pass
145 | 
146 |         # Add exception information
147 |         if record.exc_info:
148 |             log_data["exception"] = {
149 |                 "type": record.exc_info[0].__name__
150 |                 if record.exc_info[0]
151 |                 else "Unknown",
152 |                 "message": str(record.exc_info[1]),
153 |                 "traceback": traceback.format_exception(*record.exc_info),
154 |             }
155 | 
156 |         # Add extra fields from the record
157 |         extra_fields = {}
158 |         for key, value in record.__dict__.items():
159 |             if key not in {
160 |                 "name",
161 |                 "msg",
162 |                 "args",
163 |                 "created",
164 |                 "filename",
165 |                 "funcName",
166 |                 "levelname",
167 |                 "levelno",
168 |                 "lineno",
169 |                 "module",
170 |                 "msecs",
171 |                 "pathname",
172 |                 "process",
173 |                 "processName",
174 |                 "relativeCreated",
175 |                 "thread",
176 |                 "threadName",
177 |                 "exc_info",
178 |                 "exc_text",
179 |                 "stack_info",
180 |                 "getMessage",
181 |                 "message",
182 |             }:
183 |                 # Sanitize sensitive data
184 |                 if self._is_sensitive_field(key):
185 |                     extra_fields[key] = "***MASKED***"
186 |                 else:
187 |                     extra_fields[key] = self._serialize_value(value)
188 | 
189 |         if extra_fields:
190 |             log_data["extra"] = extra_fields
191 | 
192 |         return json.dumps(log_data, default=str, ensure_ascii=False)
193 | 
194 |     def _is_sensitive_field(self, field_name: str) -> bool:
195 |         """Check if field contains sensitive information."""
196 |         sensitive_keywords = {
197 |             "password",
198 |             "token",
199 |             "key",
200 |             "secret",
201 |             "auth",
202 |             "credential",
203 |             "bearer",
204 |             "session",
205 |             "cookie",
206 |             "api_key",
207 |             "access_token",
208 |             "refresh_token",
209 |             "private",
210 |             "confidential",
211 |         }
212 |         return any(keyword in field_name.lower() for keyword in sensitive_keywords)
213 | 
214 |     def _serialize_value(self, value: Any) -> Any:
215 |         """Safely serialize complex values for JSON output."""
216 |         if isinstance(value, str | int | float | bool) or value is None:
217 |             return value
218 |         elif isinstance(value, dict):
219 |             return {k: self._serialize_value(v) for k, v in value.items()}
220 |         elif isinstance(value, list | tuple):
221 |             return [self._serialize_value(item) for item in value]
222 |         else:
223 |             return str(value)
224 | 
225 | 
226 | class AsyncLogHandler(logging.Handler):
227 |     """Non-blocking async log handler to prevent performance impact."""
228 | 
229 |     def __init__(self, target_handler: logging.Handler, max_queue_size: int = 10000):
230 |         super().__init__()
231 |         self.target_handler = target_handler
232 |         self.max_queue_size = max_queue_size
233 |         self._queue: list[logging.LogRecord] = []
234 |         self._queue_lock = threading.Lock()
235 |         self._shutdown = False
236 | 
237 |         # Start background thread for processing logs
238 |         self._worker_thread = threading.Thread(target=self._process_logs, daemon=True)
239 |         self._worker_thread.start()
240 | 
241 |     def emit(self, record: logging.LogRecord):
242 |         """Queue log record for async processing."""
243 |         if self._shutdown:
244 |             return
245 | 
246 |         with self._queue_lock:
247 |             if len(self._queue) < self.max_queue_size:
248 |                 self._queue.append(record)
249 |             # If queue is full, drop oldest records
250 |             elif self._queue:
251 |                 self._queue.pop(0)
252 |                 self._queue.append(record)
253 | 
254 |     def _process_logs(self):
255 |         """Background thread to process queued log records."""
256 |         while not self._shutdown:
257 |             records_to_process = []
258 | 
259 |             with self._queue_lock:
260 |                 if self._queue:
261 |                     records_to_process = self._queue[:]
262 |                     self._queue.clear()
263 | 
264 |             for record in records_to_process:
265 |                 try:
266 |                     self.target_handler.emit(record)
267 |                 except Exception:
268 |                     # Silently ignore errors to prevent infinite recursion
269 |                     pass
270 | 
271 |             # Brief sleep to prevent busy waiting
272 |             time.sleep(0.01)
273 | 
274 |     def close(self):
275 |         """Close the handler and wait for queue to flush."""
276 |         self._shutdown = True
277 |         self._worker_thread.join(timeout=5.0)
278 |         self.target_handler.close()
279 |         super().close()
280 | 
281 | 
282 | class PerformanceMetricsLogger:
283 |     """Comprehensive performance metrics logging for backtesting operations."""
284 | 
285 |     def __init__(self, logger_name: str = "maverick_mcp.performance"):
286 |         self.logger = logging.getLogger(logger_name)
287 |         self.metrics: dict[str, list[float]] = {
288 |             "execution_times": [],
289 |             "memory_usage": [],
290 |             "cpu_usage": [],
291 |             "operation_counts": [],
292 |         }
293 |         self._start_times: dict[str, float] = {}
294 |         self._lock = threading.Lock()
295 | 
296 |         # Register for global aggregation
297 |         _performance_logger_registry[logger_name] = self
298 | 
299 |     def start_operation(self, operation_id: str, operation_type: str, **context):
300 |         """Start tracking a performance-critical operation."""
301 |         start_time = time.time()
302 | 
303 |         with self._lock:
304 |             self._start_times[operation_id] = start_time
305 | 
306 |         # Set request context
307 |         request_start_var.set(start_time)
308 |         if "tool_name" in context:
309 |             tool_name_var.set(context["tool_name"])
310 | 
311 |         self.logger.info(
312 |             f"Started {operation_type} operation",
313 |             extra={
314 |                 "operation_id": operation_id,
315 |                 "operation_type": operation_type,
316 |                 "start_time": start_time,
317 |                 **context,
318 |             },
319 |         )
320 | 
321 |     def end_operation(self, operation_id: str, success: bool = True, **metrics):
322 |         """End tracking of a performance-critical operation."""
323 |         end_time = time.time()
324 | 
325 |         with self._lock:
326 |             start_time = self._start_times.pop(operation_id, end_time)
327 | 
328 |         duration_ms = (end_time - start_time) * 1000
329 | 
330 |         # Collect system metrics
331 |         try:
332 |             process = psutil.Process()
333 |             memory_mb = process.memory_info().rss / 1024 / 1024
334 |             cpu_percent = process.cpu_percent(interval=None)
335 |         except (psutil.NoSuchProcess, psutil.AccessDenied):
336 |             memory_mb = 0
337 |             cpu_percent = 0
338 | 
339 |         # Update internal metrics
340 |         with self._lock:
341 |             self.metrics["execution_times"].append(duration_ms)
342 |             self.metrics["memory_usage"].append(memory_mb)
343 |             self.metrics["cpu_usage"].append(cpu_percent)
344 |             self.metrics["operation_counts"].append(1)
345 | 
346 |         log_level = logging.INFO if success else logging.ERROR
347 |         self.logger.log(
348 |             log_level,
349 |             f"{'Completed' if success else 'Failed'} operation in {duration_ms:.2f}ms",
350 |             extra={
351 |                 "operation_id": operation_id,
352 |                 "duration_ms": duration_ms,
353 |                 "memory_mb": memory_mb,
354 |                 "cpu_percent": cpu_percent,
355 |                 "success": success,
356 |                 **metrics,
357 |             },
358 |         )
359 | 
360 |     def log_business_metric(self, metric_name: str, value: int | float, **context):
361 |         """Log business-specific metrics like strategies processed, success rates."""
362 |         self.logger.info(
363 |             f"Business metric: {metric_name} = {value}",
364 |             extra={
365 |                 "metric_name": metric_name,
366 |                 "metric_value": value,
367 |                 "metric_type": "business",
368 |                 **context,
369 |             },
370 |         )
371 | 
372 |     def get_performance_summary(self) -> dict[str, Any]:
373 |         """Get aggregated performance metrics summary."""
374 |         with self._lock:
375 |             if not self.metrics["execution_times"]:
376 |                 return {"message": "No performance data available"}
377 | 
378 |             execution_times = self.metrics["execution_times"]
379 |             memory_usage = self.metrics["memory_usage"]
380 |             cpu_usage = self.metrics["cpu_usage"]
381 | 
382 |             return {
383 |                 "operations_count": len(execution_times),
384 |                 "execution_time_stats": {
385 |                     "avg_ms": sum(execution_times) / len(execution_times),
386 |                     "min_ms": min(execution_times),
387 |                     "max_ms": max(execution_times),
388 |                     "total_ms": sum(execution_times),
389 |                 },
390 |                 "memory_stats": {
391 |                     "avg_mb": sum(memory_usage) / len(memory_usage)
392 |                     if memory_usage
393 |                     else 0,
394 |                     "peak_mb": max(memory_usage) if memory_usage else 0,
395 |                 },
396 |                 "cpu_stats": {
397 |                     "avg_percent": sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0,
398 |                     "peak_percent": max(cpu_usage) if cpu_usage else 0,
399 |                 },
400 |                 "timestamp": datetime.now(UTC).isoformat(),
401 |             }
402 | 
403 | 
404 | class DebugModeManager:
405 |     """Manages debug mode configuration and verbose logging."""
406 | 
407 |     def __init__(self):
408 |         self._debug_enabled = os.getenv("MAVERICK_DEBUG", "false").lower() in (
409 |             "true",
410 |             "1",
411 |             "on",
412 |         )
413 |         self._verbose_modules: set = set()
414 |         self._debug_filters: dict[str, Any] = {}
415 | 
416 |     def is_debug_enabled(self, module_name: str = "") -> bool:
417 |         """Check if debug mode is enabled globally or for specific module."""
418 |         if not self._debug_enabled:
419 |             return False
420 | 
421 |         if not module_name:
422 |             return True
423 | 
424 |         # Check if specific module debug is enabled
425 |         return module_name in self._verbose_modules or not self._verbose_modules
426 | 
427 |     def enable_verbose_logging(self, module_pattern: str):
428 |         """Enable verbose logging for specific module pattern."""
429 |         self._verbose_modules.add(module_pattern)
430 | 
431 |     def add_debug_filter(self, filter_name: str, filter_config: dict[str, Any]):
432 |         """Add custom debug filter configuration."""
433 |         self._debug_filters[filter_name] = filter_config
434 | 
435 |     def should_log_request_response(self, operation_name: str) -> bool:
436 |         """Check if request/response should be logged for operation."""
437 |         if not self._debug_enabled:
438 |             return False
439 | 
440 |         # Check specific filters
441 |         for _filter_name, config in self._debug_filters.items():
442 |             if config.get("log_request_response") and operation_name in config.get(
443 |                 "operations", []
444 |             ):
445 |                 return True
446 | 
447 |         return True  # Default to true in debug mode
448 | 
449 | 
450 | class StructuredLoggerManager:
451 |     """Central manager for structured logging configuration."""
452 | 
453 |     def __init__(self):
454 |         self.debug_manager = DebugModeManager()
455 |         self.performance_loggers: dict[str, PerformanceMetricsLogger] = {}
456 |         self._configured = False
457 | 
458 |     def setup_structured_logging(
459 |         self,
460 |         log_level: str = "INFO",
461 |         log_format: str = "json",
462 |         log_file: str | None = None,
463 |         enable_async: bool = True,
464 |         enable_rotation: bool = True,
465 |         max_log_size: int = 10 * 1024 * 1024,  # 10MB
466 |         backup_count: int = 5,
467 |         console_output: str = "stdout",  # stdout, stderr
468 |         remote_handler_config: dict[str, Any] | None = None,
469 |     ):
470 |         """Setup comprehensive structured logging infrastructure."""
471 | 
472 |         if self._configured:
473 |             return
474 | 
475 |         # Configure root logger
476 |         root_logger = logging.getLogger()
477 |         root_logger.setLevel(getattr(logging, log_level.upper()))
478 | 
479 |         # Clear existing handlers
480 |         for handler in root_logger.handlers[:]:
481 |             root_logger.removeHandler(handler)
482 | 
483 |         handlers = []
484 | 
485 |         # Console handler
486 |         console_stream = sys.stdout if console_output == "stdout" else sys.stderr
487 |         console_handler = logging.StreamHandler(console_stream)
488 | 
489 |         if log_format == "json":
490 |             console_formatter = EnhancedStructuredFormatter(
491 |                 include_performance=True, include_resources=True
492 |             )
493 |         else:
494 |             console_formatter = logging.Formatter(
495 |                 "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
496 |             )
497 | 
498 |         console_handler.setFormatter(console_formatter)
499 |         handlers.append(console_handler)
500 | 
501 |         # File handler with rotation if specified
502 |         if log_file:
503 |             log_path = Path(log_file)
504 |             log_path.parent.mkdir(parents=True, exist_ok=True)
505 | 
506 |             if enable_rotation:
507 |                 file_handler = logging.handlers.RotatingFileHandler(
508 |                     log_file, maxBytes=max_log_size, backupCount=backup_count
509 |                 )
510 |             else:
511 |                 file_handler = logging.FileHandler(log_file)
512 | 
513 |             file_handler.setFormatter(EnhancedStructuredFormatter())
514 |             handlers.append(file_handler)
515 | 
516 |         # Remote handler if configured (for log aggregation)
517 |         if remote_handler_config:
518 |             remote_handler = self._create_remote_handler(remote_handler_config)
519 |             if remote_handler:
520 |                 handlers.append(remote_handler)
521 | 
522 |         # Wrap handlers with async processing if enabled
523 |         if enable_async:
524 |             handlers = [AsyncLogHandler(handler) for handler in handlers]
525 | 
526 |         # Add all handlers to root logger
527 |         for handler in handlers:
528 |             root_logger.addHandler(handler)
529 | 
530 |         # Set specific logger levels to reduce noise
531 |         logging.getLogger("urllib3").setLevel(logging.WARNING)
532 |         logging.getLogger("requests").setLevel(logging.WARNING)
533 |         logging.getLogger("httpx").setLevel(logging.WARNING)
534 |         logging.getLogger("asyncio").setLevel(logging.WARNING)
535 | 
536 |         # Enable debug mode loggers if configured
537 |         if self.debug_manager.is_debug_enabled():
538 |             self._setup_debug_loggers()
539 | 
540 |         self._configured = True
541 | 
542 |     def _create_remote_handler(self, config: dict[str, Any]) -> logging.Handler | None:
543 |         """Create remote handler for log aggregation (placeholder for future implementation)."""
544 |         # This would implement remote logging to services like ELK, Splunk, etc.
545 |         # For now, return None as it's not implemented
546 |         return None
547 | 
548 |     def _setup_debug_loggers(self):
549 |         """Setup additional loggers for debug mode."""
550 |         debug_logger = logging.getLogger("maverick_mcp.debug")
551 |         debug_logger.setLevel(logging.DEBUG)
552 | 
553 |         request_logger = logging.getLogger("maverick_mcp.requests")
554 |         request_logger.setLevel(logging.DEBUG)
555 | 
556 |     def get_performance_logger(self, logger_name: str) -> PerformanceMetricsLogger:
557 |         """Get or create performance logger for specific component."""
558 |         if logger_name not in self.performance_loggers:
559 |             self.performance_loggers[logger_name] = PerformanceMetricsLogger(
560 |                 logger_name
561 |             )
562 |         return self.performance_loggers[logger_name]
563 | 
564 |     def get_logger(self, name: str) -> logging.Logger:
565 |         """Get structured logger with correlation support."""
566 |         return logging.getLogger(name)
567 | 
568 |     def create_dashboard_metrics(self) -> dict[str, Any]:
569 |         """Create comprehensive metrics for performance dashboard."""
570 |         global _log_level_counts
571 | 
572 |         dashboard_data = {
573 |             "system_metrics": {
574 |                 "timestamp": datetime.now(UTC).isoformat(),
575 |                 "log_level_counts": _log_level_counts.copy(),
576 |                 "active_correlation_ids": len(
577 |                     [cid for cid in [correlation_id_var.get()] if cid]
578 |                 ),
579 |             },
580 |             "performance_metrics": {},
581 |             "memory_stats": {},
582 |         }
583 | 
584 |         # Aggregate performance metrics from all loggers
585 |         for logger_name, perf_logger in _performance_logger_registry.items():
586 |             dashboard_data["performance_metrics"][logger_name] = (
587 |                 perf_logger.get_performance_summary()
588 |             )
589 | 
590 |         # System memory stats
591 |         try:
592 |             process = psutil.Process()
593 |             memory_info = process.memory_info()
594 |             dashboard_data["memory_stats"] = {
595 |                 "rss_mb": round(memory_info.rss / 1024 / 1024, 2),
596 |                 "vms_mb": round(memory_info.vms / 1024 / 1024, 2),
597 |                 "cpu_percent": process.cpu_percent(interval=None),
598 |                 "gc_stats": {
599 |                     "generation_0": gc.get_count()[0],
600 |                     "generation_1": gc.get_count()[1],
601 |                     "generation_2": gc.get_count()[2],
602 |                 },
603 |             }
604 |         except (psutil.NoSuchProcess, psutil.AccessDenied):
605 |             dashboard_data["memory_stats"] = {"error": "Unable to collect memory stats"}
606 | 
607 |         return dashboard_data
608 | 
609 | 
610 | # Global instance
611 | _logger_manager: StructuredLoggerManager | None = None
612 | 
613 | 
614 | def get_logger_manager() -> StructuredLoggerManager:
615 |     """Get global logger manager instance."""
616 |     global _logger_manager
617 |     if _logger_manager is None:
618 |         _logger_manager = StructuredLoggerManager()
619 |     return _logger_manager
620 | 
621 | 
622 | def with_structured_logging(
623 |     operation_name: str,
624 |     include_performance: bool = True,
625 |     log_params: bool = True,
626 |     log_result: bool = False,
627 | ):
628 |     """Decorator for automatic structured logging of operations."""
629 | 
630 |     def decorator(func: Callable) -> Callable:
631 |         @wraps(func)
632 |         async def async_wrapper(*args, **kwargs):
633 |             # Generate correlation ID if not present
634 |             correlation_id = CorrelationIDGenerator.get_correlation_id()
635 |             if not correlation_id:
636 |                 correlation_id = CorrelationIDGenerator.set_correlation_id()
637 | 
638 |             # Setup operation context
639 |             operation_id = f"{operation_name}_{int(time.time() * 1000)}"
640 |             tool_name_var.set(operation_name)
641 | 
642 |             logger = get_logger_manager().get_logger(f"maverick_mcp.{operation_name}")
643 |             perf_logger = None
644 | 
645 |             if include_performance:
646 |                 perf_logger = get_logger_manager().get_performance_logger(
647 |                     f"performance.{operation_name}"
648 |                 )
649 |                 perf_logger.start_operation(
650 |                     operation_id=operation_id,
651 |                     operation_type=operation_name,
652 |                     tool_name=operation_name,
653 |                 )
654 | 
655 |             # Log operation start
656 |             extra_data = {
657 |                 "operation_id": operation_id,
658 |                 "correlation_id": correlation_id,
659 |             }
660 |             if log_params:
661 |                 # Sanitize parameters
662 |                 safe_kwargs = {
663 |                     k: "***MASKED***"
664 |                     if "password" in k.lower() or "token" in k.lower()
665 |                     else v
666 |                     for k, v in kwargs.items()
667 |                 }
668 |                 extra_data["parameters"] = safe_kwargs
669 | 
670 |             logger.info(f"Starting {operation_name}", extra=extra_data)
671 | 
672 |             try:
673 |                 # Execute the function
674 |                 result = await func(*args, **kwargs)
675 | 
676 |                 # Log success
677 |                 success_data = {"operation_id": operation_id, "success": True}
678 |                 if log_result and result is not None:
679 |                     # Limit result size for logging
680 |                     result_str = str(result)
681 |                     success_data["result"] = (
682 |                         result_str[:1000] + "..."
683 |                         if len(result_str) > 1000
684 |                         else result_str
685 |                     )
686 | 
687 |                 logger.info(f"Completed {operation_name}", extra=success_data)
688 | 
689 |                 if perf_logger:
690 |                     perf_logger.end_operation(operation_id, success=True)
691 | 
692 |                 return result
693 | 
694 |             except Exception as e:
695 |                 # Log error
696 |                 logger.error(
697 |                     f"Failed {operation_name}: {str(e)}",
698 |                     exc_info=True,
699 |                     extra={
700 |                         "operation_id": operation_id,
701 |                         "error_type": type(e).__name__,
702 |                         "success": False,
703 |                     },
704 |                 )
705 | 
706 |                 if perf_logger:
707 |                     perf_logger.end_operation(operation_id, success=False, error=str(e))
708 | 
709 |                 raise
710 | 
711 |         @wraps(func)
712 |         def sync_wrapper(*args, **kwargs):
713 |             # Similar logic for sync functions
714 |             correlation_id = CorrelationIDGenerator.get_correlation_id()
715 |             if not correlation_id:
716 |                 correlation_id = CorrelationIDGenerator.set_correlation_id()
717 | 
718 |             operation_id = f"{operation_name}_{int(time.time() * 1000)}"
719 |             tool_name_var.set(operation_name)
720 | 
721 |             logger = get_logger_manager().get_logger(f"maverick_mcp.{operation_name}")
722 |             perf_logger = None
723 | 
724 |             if include_performance:
725 |                 perf_logger = get_logger_manager().get_performance_logger(
726 |                     f"performance.{operation_name}"
727 |                 )
728 |                 perf_logger.start_operation(
729 |                     operation_id=operation_id,
730 |                     operation_type=operation_name,
731 |                     tool_name=operation_name,
732 |                 )
733 | 
734 |             extra_data = {
735 |                 "operation_id": operation_id,
736 |                 "correlation_id": correlation_id,
737 |             }
738 |             if log_params:
739 |                 safe_kwargs = {
740 |                     k: "***MASKED***"
741 |                     if any(
742 |                         sensitive in k.lower()
743 |                         for sensitive in ["password", "token", "key", "secret"]
744 |                     )
745 |                     else v
746 |                     for k, v in kwargs.items()
747 |                 }
748 |                 extra_data["parameters"] = safe_kwargs
749 | 
750 |             logger.info(f"Starting {operation_name}", extra=extra_data)
751 | 
752 |             try:
753 |                 result = func(*args, **kwargs)
754 | 
755 |                 success_data = {"operation_id": operation_id, "success": True}
756 |                 if log_result and result is not None:
757 |                     result_str = str(result)
758 |                     success_data["result"] = (
759 |                         result_str[:1000] + "..."
760 |                         if len(result_str) > 1000
761 |                         else result_str
762 |                     )
763 | 
764 |                 logger.info(f"Completed {operation_name}", extra=success_data)
765 | 
766 |                 if perf_logger:
767 |                     perf_logger.end_operation(operation_id, success=True)
768 | 
769 |                 return result
770 | 
771 |             except Exception as e:
772 |                 logger.error(
773 |                     f"Failed {operation_name}: {str(e)}",
774 |                     exc_info=True,
775 |                     extra={
776 |                         "operation_id": operation_id,
777 |                         "error_type": type(e).__name__,
778 |                         "success": False,
779 |                     },
780 |                 )
781 | 
782 |                 if perf_logger:
783 |                     perf_logger.end_operation(operation_id, success=False, error=str(e))
784 | 
785 |                 raise
786 | 
787 |         return async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper
788 | 
789 |     return decorator
790 | 
791 | 
792 | # Convenience functions
793 | def get_structured_logger(name: str) -> logging.Logger:
794 |     """Get structured logger instance."""
795 |     return get_logger_manager().get_logger(name)
796 | 
797 | 
798 | def get_performance_logger(component: str) -> PerformanceMetricsLogger:
799 |     """Get performance logger for specific component."""
800 |     return get_logger_manager().get_performance_logger(component)
801 | 
802 | 
803 | def setup_backtesting_logging(
804 |     log_level: str = "INFO", enable_debug: bool = False, log_file: str | None = None
805 | ):
806 |     """Setup logging specifically configured for backtesting operations."""
807 | 
808 |     # Set debug environment if requested
809 |     if enable_debug:
810 |         os.environ["MAVERICK_DEBUG"] = "true"
811 | 
812 |     # Setup structured logging
813 |     manager = get_logger_manager()
814 |     manager.setup_structured_logging(
815 |         log_level=log_level,
816 |         log_format="json",
817 |         log_file=log_file or "logs/backtesting.log",
818 |         enable_async=True,
819 |         enable_rotation=True,
820 |         console_output="stderr",  # Use stderr for MCP compatibility
821 |     )
822 | 
823 |     # Configure debug filters for backtesting
824 |     if enable_debug:
825 |         manager.debug_manager.add_debug_filter(
826 |             "backtesting",
827 |             {
828 |                 "log_request_response": True,
829 |                 "operations": [
830 |                     "run_backtest",
831 |                     "optimize_parameters",
832 |                     "get_historical_data",
833 |                 ],
834 |             },
835 |         )
836 | 
837 | 
838 | # Update log level counts (for dashboard metrics)
839 | class LogLevelCounterFilter(logging.Filter):
840 |     """Filter to count log levels for dashboard metrics."""
841 | 
842 |     def filter(self, record: logging.LogRecord) -> bool:
843 |         global _log_level_counts
844 |         _log_level_counts[record.levelname] = (
845 |             _log_level_counts.get(record.levelname, 0) + 1
846 |         )
847 |         return True
848 | 
849 | 
850 | # Add the counter filter to root logger
851 | logging.getLogger().addFilter(LogLevelCounterFilter())
852 | 
```