genomoncology/biomcp # codebase.md

This is page 6 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/retry.py:
--------------------------------------------------------------------------------

```python
  1 | """Retry logic with exponential backoff for handling transient failures."""
  2 | 
  3 | import asyncio
  4 | import functools
  5 | import logging
  6 | import secrets
  7 | from collections.abc import Callable, Coroutine
  8 | from typing import Any, TypeVar
  9 | 
 10 | from .constants import (
 11 |     DEFAULT_EXPONENTIAL_BASE,
 12 |     DEFAULT_INITIAL_RETRY_DELAY,
 13 |     DEFAULT_MAX_RETRY_ATTEMPTS,
 14 |     DEFAULT_MAX_RETRY_DELAY,
 15 |     METRIC_JITTER_RANGE,
 16 | )
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | T = TypeVar("T")
 21 | 
 22 | 
 23 | class RetryConfig:
 24 |     """Configuration for retry behavior."""
 25 | 
 26 |     def __init__(
 27 |         self,
 28 |         max_attempts: int = DEFAULT_MAX_RETRY_ATTEMPTS,
 29 |         initial_delay: float = DEFAULT_INITIAL_RETRY_DELAY,
 30 |         max_delay: float = DEFAULT_MAX_RETRY_DELAY,
 31 |         exponential_base: float = DEFAULT_EXPONENTIAL_BASE,
 32 |         jitter: bool = True,
 33 |         retryable_exceptions: tuple[type[Exception], ...] = (
 34 |             ConnectionError,
 35 |             TimeoutError,
 36 |             OSError,
 37 |         ),
 38 |         retryable_status_codes: tuple[int, ...] = (429, 502, 503, 504),
 39 |     ):
 40 |         """Initialize retry configuration.
 41 | 
 42 |         Args:
 43 |             max_attempts: Maximum number of retry attempts
 44 |             initial_delay: Initial delay between retries in seconds
 45 |             max_delay: Maximum delay between retries in seconds
 46 |             exponential_base: Base for exponential backoff calculation
 47 |             jitter: Whether to add random jitter to delays
 48 |             retryable_exceptions: Exception types that should trigger retry
 49 |             retryable_status_codes: HTTP status codes that should trigger retry
 50 |         """
 51 |         self.max_attempts = max_attempts
 52 |         self.initial_delay = initial_delay
 53 |         self.max_delay = max_delay
 54 |         self.exponential_base = exponential_base
 55 |         self.jitter = jitter
 56 |         self.retryable_exceptions = retryable_exceptions
 57 |         self.retryable_status_codes = retryable_status_codes
 58 | 
 59 | 
 60 | def calculate_delay(attempt: int, config: RetryConfig) -> float:
 61 |     """Calculate delay for the next retry attempt.
 62 | 
 63 |     Args:
 64 |         attempt: Current attempt number (0-based)
 65 |         config: Retry configuration
 66 | 
 67 |     Returns:
 68 |         Delay in seconds before the next retry
 69 |     """
 70 |     # Exponential backoff: delay = initial_delay * (base ^ attempt)
 71 |     delay = config.initial_delay * (config.exponential_base**attempt)
 72 | 
 73 |     # Cap at maximum delay
 74 |     delay = min(delay, config.max_delay)
 75 | 
 76 |     # Add jitter to prevent thundering herd
 77 |     if config.jitter:
 78 |         jitter_range = delay * METRIC_JITTER_RANGE  # 10% jitter
 79 |         # Use secrets for cryptographically secure randomness
 80 |         # Generate random float between -1 and 1, then scale
 81 |         random_factor = (secrets.randbits(32) / (2**32 - 1)) * 2 - 1
 82 |         jitter = random_factor * jitter_range
 83 |         delay += jitter
 84 | 
 85 |     return max(0, delay)  # Ensure non-negative
 86 | 
 87 | 
 88 | def is_retryable_exception(exc: Exception, config: RetryConfig) -> bool:
 89 |     """Check if an exception should trigger a retry.
 90 | 
 91 |     Args:
 92 |         exc: The exception that occurred
 93 |         config: Retry configuration
 94 | 
 95 |     Returns:
 96 |         True if the exception is retryable
 97 |     """
 98 |     return isinstance(exc, config.retryable_exceptions)
 99 | 
100 | 
101 | def is_retryable_status(status_code: int, config: RetryConfig) -> bool:
102 |     """Check if an HTTP status code should trigger a retry.
103 | 
104 |     Args:
105 |         status_code: HTTP status code
106 |         config: Retry configuration
107 | 
108 |     Returns:
109 |         True if the status code is retryable
110 |     """
111 |     return status_code in config.retryable_status_codes
112 | 
113 | 
114 | def with_retry(
115 |     config: RetryConfig | None = None,
116 | ) -> Callable[
117 |     [Callable[..., Coroutine[Any, Any, T]]],
118 |     Callable[..., Coroutine[Any, Any, T]],
119 | ]:
120 |     """Decorator to add retry logic to async functions.
121 | 
122 |     Args:
123 |         config: Retry configuration (uses defaults if not provided)
124 | 
125 |     Returns:
126 |         Decorated function with retry logic
127 |     """
128 |     if config is None:
129 |         config = RetryConfig()
130 | 
131 |     def decorator(
132 |         func: Callable[..., Coroutine[Any, Any, T]],
133 |     ) -> Callable[..., Coroutine[Any, Any, T]]:
134 |         @functools.wraps(func)
135 |         async def wrapper(*args: Any, **kwargs: Any) -> T:
136 |             last_exception = None
137 | 
138 |             for attempt in range(config.max_attempts):
139 |                 try:
140 |                     return await func(*args, **kwargs)
141 |                 except Exception as exc:
142 |                     last_exception = exc
143 | 
144 |                     # Check if this is the last attempt
145 |                     if attempt == config.max_attempts - 1:
146 |                         logger.error(
147 |                             f"Max retry attempts ({config.max_attempts}) "
148 |                             f"reached for {func.__name__}: {exc}"
149 |                         )
150 |                         raise
151 | 
152 |                     # Check if the exception is retryable
153 |                     if not is_retryable_exception(exc, config):
154 |                         logger.debug(
155 |                             f"Non-retryable exception in {func.__name__}: {exc}"
156 |                         )
157 |                         raise
158 | 
159 |                     # Calculate delay for next attempt
160 |                     delay = calculate_delay(attempt, config)
161 |                     logger.warning(
162 |                         f"Retry attempt {attempt + 1}/{config.max_attempts} "
163 |                         f"for {func.__name__} after {delay:.2f}s delay. "
164 |                         f"Error: {exc}"
165 |                     )
166 | 
167 |                     # Wait before retrying
168 |                     await asyncio.sleep(delay)
169 | 
170 |             # This should never be reached due to the raise in the loop
171 |             if last_exception:
172 |                 raise last_exception
173 |             raise RuntimeError("Unexpected retry loop exit")
174 | 
175 |         return wrapper
176 | 
177 |     return decorator
178 | 
179 | 
180 | class RetryableHTTPError(Exception):
181 |     """Exception wrapper for HTTP errors that should be retried."""
182 | 
183 |     def __init__(self, status_code: int, message: str):
184 |         self.status_code = status_code
185 |         self.message = message
186 |         super().__init__(f"HTTP {status_code}: {message}")
187 | 
188 | 
189 | async def retry_with_backoff(
190 |     func: Callable[..., Coroutine[Any, Any, T]],
191 |     *args: Any,
192 |     config: RetryConfig | None = None,
193 |     **kwargs: Any,
194 | ) -> T:
195 |     """Execute a function with retry logic and exponential backoff.
196 | 
197 |     This is an alternative to the decorator for cases where you need
198 |     more control over retry behavior.
199 | 
200 |     Args:
201 |         func: Async function to execute
202 |         *args: Positional arguments for the function
203 |         config: Retry configuration (uses defaults if not provided)
204 |         **kwargs: Keyword arguments for the function
205 | 
206 |     Returns:
207 |         Result of the function call
208 | 
209 |     Raises:
210 |         The last exception if all retries fail
211 |     """
212 |     if config is None:
213 |         config = RetryConfig()
214 | 
215 |     last_exception = None
216 | 
217 |     for attempt in range(config.max_attempts):
218 |         try:
219 |             return await func(*args, **kwargs)
220 |         except Exception as exc:
221 |             last_exception = exc
222 | 
223 |             # Check if this is the last attempt
224 |             if attempt == config.max_attempts - 1:
225 |                 logger.error(
226 |                     f"Max retry attempts ({config.max_attempts}) "
227 |                     f"reached for {func.__name__}: {exc}"
228 |                 )
229 |                 raise
230 | 
231 |             # Check if the exception is retryable
232 |             if not is_retryable_exception(exc, config):
233 |                 logger.debug(
234 |                     f"Non-retryable exception in {func.__name__}: {exc}"
235 |                 )
236 |                 raise
237 | 
238 |             # Calculate delay for next attempt
239 |             delay = calculate_delay(attempt, config)
240 |             logger.warning(
241 |                 f"Retry attempt {attempt + 1}/{config.max_attempts} "
242 |                 f"for {func.__name__} after {delay:.2f}s delay. "
243 |                 f"Error: {exc}"
244 |             )
245 | 
246 |             # Wait before retrying
247 |             await asyncio.sleep(delay)
248 | 
249 |     # This should never be reached due to the raise in the loop
250 |     if last_exception:
251 |         raise last_exception
252 |     raise RuntimeError("Unexpected retry loop exit")
253 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_openfda_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for OpenFDA API.
  2 | 
  3 | These tests make real API calls to verify FDA integration works correctly.
  4 | They are marked with pytest.mark.integration and can be skipped with --ignore-integration.
  5 | """
  6 | 
  7 | import os
  8 | 
  9 | import pytest
 10 | 
 11 | from biomcp.openfda.adverse_events import search_adverse_events
 12 | from biomcp.openfda.device_events import search_device_events
 13 | from biomcp.openfda.drug_approvals import search_drug_approvals
 14 | from biomcp.openfda.drug_labels import search_drug_labels
 15 | from biomcp.openfda.drug_recalls import search_drug_recalls
 16 | 
 17 | 
 18 | @pytest.mark.integration
 19 | class TestOpenFDAIntegration:
 20 |     """Integration tests for OpenFDA API endpoints."""
 21 | 
 22 |     @pytest.mark.asyncio
 23 |     async def test_adverse_events_real_api(self):
 24 |         """Test real adverse event API call."""
 25 |         result = await search_adverse_events(drug="aspirin", limit=5)
 26 | 
 27 |         # Should return formatted results
 28 |         assert isinstance(result, str)
 29 |         assert len(result) > 100  # Non-trivial response
 30 | 
 31 |         # Should contain disclaimer
 32 |         assert "FDA Data Notice" in result
 33 | 
 34 |         # Should have structure
 35 |         if "No adverse events found" not in result:
 36 |             assert (
 37 |                 "Total Reports Found:" in result or "adverse" in result.lower()
 38 |             )
 39 | 
 40 |     @pytest.mark.asyncio
 41 |     async def test_drug_labels_real_api(self):
 42 |         """Test real drug label API call."""
 43 |         result = await search_drug_labels(name="ibuprofen", limit=5)
 44 | 
 45 |         # Should return formatted results
 46 |         assert isinstance(result, str)
 47 |         assert len(result) > 100
 48 | 
 49 |         # Should contain disclaimer
 50 |         assert "FDA Data Notice" in result
 51 | 
 52 |         # Should have label information
 53 |         if "No drug labels found" not in result:
 54 |             assert "Total Labels Found:" in result or "label" in result.lower()
 55 | 
 56 |     @pytest.mark.asyncio
 57 |     async def test_device_events_real_api(self):
 58 |         """Test real device event API call."""
 59 |         result = await search_device_events(device="insulin pump", limit=5)
 60 | 
 61 |         # Should return formatted results
 62 |         assert isinstance(result, str)
 63 |         assert len(result) > 100
 64 | 
 65 |         # Should contain disclaimer
 66 |         assert "FDA Data Notice" in result
 67 | 
 68 |         # Should have device information
 69 |         if "No device events found" not in result:
 70 |             assert (
 71 |                 "Total Events Found:" in result or "device" in result.lower()
 72 |             )
 73 | 
 74 |     @pytest.mark.asyncio
 75 |     async def test_drug_approvals_real_api(self):
 76 |         """Test real drug approval API call."""
 77 |         result = await search_drug_approvals(drug="pembrolizumab", limit=5)
 78 | 
 79 |         # Should return formatted results
 80 |         assert isinstance(result, str)
 81 |         assert len(result) > 100
 82 | 
 83 |         # Should contain disclaimer
 84 |         assert "FDA Data Notice" in result
 85 | 
 86 |         # Pembrolizumab (Keytruda) should have results
 87 |         if "No drug approvals found" not in result:
 88 |             assert "KEYTRUDA" in result or "pembrolizumab" in result.lower()
 89 | 
 90 |     @pytest.mark.asyncio
 91 |     async def test_drug_recalls_real_api(self):
 92 |         """Test real drug recall API call."""
 93 |         # Use drug parameter which is more likely to return results
 94 |         result = await search_drug_recalls(drug="acetaminophen", limit=5)
 95 | 
 96 |         # Should return formatted results
 97 |         assert isinstance(result, str)
 98 |         assert len(result) > 100
 99 | 
100 |         # Should contain disclaimer OR error message (API might return no results)
101 |         assert "FDA Data Notice" in result or "Error" in result
102 | 
103 |         # Should have recall information if not an error
104 |         if "Error" not in result and "No drug recalls found" not in result:
105 |             assert "recall" in result.lower()
106 | 
107 |     @pytest.mark.asyncio
108 |     async def test_rate_limiting_without_key(self):
109 |         """Test that rate limiting is handled gracefully without API key."""
110 |         # Temporarily remove API key if present
111 |         original_key = os.environ.get("OPENFDA_API_KEY")
112 |         if original_key:
113 |             del os.environ["OPENFDA_API_KEY"]
114 | 
115 |         try:
116 |             # Make multiple rapid requests
117 |             results = []
118 |             for i in range(5):
119 |                 result = await search_adverse_events(drug=f"drug{i}", limit=1)
120 |                 results.append(result)
121 | 
122 |             # All should return strings (not crash)
123 |             assert all(isinstance(r, str) for r in results)
124 | 
125 |         finally:
126 |             # Restore API key
127 |             if original_key:
128 |                 os.environ["OPENFDA_API_KEY"] = original_key
129 | 
130 |     @pytest.mark.asyncio
131 |     async def test_api_key_usage(self):
132 |         """Test that API key is used when provided."""
133 |         # This test only runs if API key is available
134 |         if not os.environ.get("OPENFDA_API_KEY"):
135 |             pytest.skip("OPENFDA_API_KEY not set")
136 | 
137 |         result = await search_adverse_events(drug="acetaminophen", limit=10)
138 | 
139 |         # With API key, should be able to get results
140 |         assert isinstance(result, str)
141 |         assert len(result) > 100
142 | 
143 |     @pytest.mark.asyncio
144 |     async def test_error_handling_invalid_params(self):
145 |         """Test graceful handling of invalid parameters."""
146 |         # Search with invalid/nonsense parameters
147 |         result = await search_adverse_events(
148 |             drug="xyzabc123notarealdrugname999", limit=5
149 |         )
150 | 
151 |         # Should handle gracefully
152 |         assert isinstance(result, str)
153 | 
154 |         # Should either show no results or error message
155 |         assert (
156 |             "No adverse events found" in result
157 |             or "Error" in result
158 |             or "no results" in result.lower()
159 |         )
160 | 
161 |     @pytest.mark.asyncio
162 |     async def test_cross_domain_consistency(self):
163 |         """Test that different FDA domains return consistent formats."""
164 |         # Search for a common drug across domains
165 |         drug_name = "aspirin"
166 | 
167 |         adverse_result = await search_adverse_events(drug=drug_name, limit=2)
168 |         label_result = await search_drug_labels(name=drug_name, limit=2)
169 | 
170 |         # Both should have disclaimers
171 |         assert "FDA Data Notice" in adverse_result
172 |         assert "FDA Data Notice" in label_result
173 | 
174 |         # Both should be properly formatted strings
175 |         assert isinstance(adverse_result, str)
176 |         assert isinstance(label_result, str)
177 | 
178 |         # Both should mention the drug or indicate no results
179 |         assert (
180 |             drug_name in adverse_result.lower()
181 |             or "no " in adverse_result.lower()
182 |         )
183 |         assert (
184 |             drug_name in label_result.lower() or "no " in label_result.lower()
185 |         )
186 | 
187 |     @pytest.mark.asyncio
188 |     async def test_special_characters_handling(self):
189 |         """Test handling of special characters in queries."""
190 |         # Test with special characters
191 |         result = await search_drug_labels(name="aspirin/dipyridamole", limit=5)
192 | 
193 |         # Should handle forward slash gracefully
194 |         assert isinstance(result, str)
195 |         # API might return error or no results for complex drug names
196 |         assert isinstance(result, str)  # Just verify we get a response
197 | 
198 |     @pytest.mark.asyncio
199 |     async def test_large_result_handling(self):
200 |         """Test handling of large result sets."""
201 |         # Request maximum allowed results
202 |         result = await search_adverse_events(
203 |             drug="ibuprofen",  # Common drug with many reports
204 |             limit=100,  # Maximum limit
205 |         )
206 | 
207 |         # Should handle large results
208 |         assert isinstance(result, str)
209 |         assert len(result) > 500  # Should be substantial
210 | 
211 |         # Should still include disclaimer
212 |         assert "FDA Data Notice" in result
213 | 
214 |     @pytest.mark.asyncio
215 |     async def test_empty_query_handling(self):
216 |         """Test handling of empty/missing query parameters."""
217 |         # Search without specifying a drug
218 |         result = await search_drug_recalls(
219 |             limit=5  # Only limit, no other filters
220 |         )
221 | 
222 |         # Should return recent recalls
223 |         assert isinstance(result, str)
224 |         assert len(result) > 100
225 | 
226 |         # Should have results (there are always some recalls)
227 |         if "Error" not in result:
228 |             assert "recall" in result.lower()
229 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_metrics.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for performance metrics collection."""
  2 | 
  3 | import asyncio
  4 | import time
  5 | from datetime import datetime
  6 | from unittest.mock import patch
  7 | 
  8 | import pytest
  9 | 
 10 | from biomcp.metrics import (
 11 |     MetricSample,
 12 |     MetricsCollector,
 13 |     MetricSummary,
 14 |     Timer,
 15 |     get_all_metrics,
 16 |     get_metric_summary,
 17 |     record_metric,
 18 |     track_performance,
 19 | )
 20 | 
 21 | 
 22 | @pytest.fixture(autouse=True)
 23 | def enable_metrics(monkeypatch):
 24 |     """Enable metrics for all tests in this module."""
 25 |     monkeypatch.setenv("BIOMCP_METRICS_ENABLED", "true")
 26 |     # Force reload of the module to pick up the new env var
 27 |     import importlib
 28 | 
 29 |     import biomcp.metrics
 30 | 
 31 |     importlib.reload(biomcp.metrics)
 32 | 
 33 | 
 34 | def test_metric_sample():
 35 |     """Test MetricSample dataclass."""
 36 |     sample = MetricSample(
 37 |         timestamp=datetime.now(),
 38 |         duration=1.5,
 39 |         success=True,
 40 |         error=None,
 41 |         tags={"domain": "article"},
 42 |     )
 43 | 
 44 |     assert sample.duration == 1.5
 45 |     assert sample.success is True
 46 |     assert sample.error is None
 47 |     assert sample.tags["domain"] == "article"
 48 | 
 49 | 
 50 | def test_metric_summary_from_samples():
 51 |     """Test MetricSummary calculation from samples."""
 52 |     now = datetime.now()
 53 |     samples = [
 54 |         MetricSample(timestamp=now, duration=0.1, success=True),
 55 |         MetricSample(timestamp=now, duration=0.2, success=True),
 56 |         MetricSample(
 57 |             timestamp=now, duration=0.3, success=False, error="timeout"
 58 |         ),
 59 |         MetricSample(timestamp=now, duration=0.4, success=True),
 60 |         MetricSample(timestamp=now, duration=0.5, success=True),
 61 |     ]
 62 | 
 63 |     summary = MetricSummary.from_samples("test_metric", samples)
 64 | 
 65 |     assert summary.name == "test_metric"
 66 |     assert summary.count == 5
 67 |     assert summary.success_count == 4
 68 |     assert summary.error_count == 1
 69 |     assert summary.total_duration == 1.5
 70 |     assert summary.min_duration == 0.1
 71 |     assert summary.max_duration == 0.5
 72 |     assert summary.avg_duration == 0.3
 73 |     assert summary.error_rate == 0.2  # 1/5
 74 | 
 75 |     # Check percentiles
 76 |     assert summary.p50_duration == 0.3  # median
 77 |     assert 0.4 <= summary.p95_duration <= 0.5
 78 |     assert 0.4 <= summary.p99_duration <= 0.5
 79 | 
 80 | 
 81 | def test_metric_summary_empty():
 82 |     """Test MetricSummary with no samples."""
 83 |     summary = MetricSummary.from_samples("empty", [])
 84 | 
 85 |     assert summary.count == 0
 86 |     assert summary.success_count == 0
 87 |     assert summary.error_count == 0
 88 |     assert summary.total_duration == 0.0
 89 |     assert summary.error_rate == 0.0
 90 | 
 91 | 
 92 | @pytest.mark.asyncio
 93 | async def test_metrics_collector():
 94 |     """Test MetricsCollector functionality."""
 95 |     collector = MetricsCollector(max_samples_per_metric=3)
 96 | 
 97 |     # Record some metrics
 98 |     await collector.record("api_call", 0.1, success=True)
 99 |     await collector.record("api_call", 0.2, success=True)
100 |     await collector.record("api_call", 0.3, success=False, error="timeout")
101 | 
102 |     # Get summary
103 |     summary = await collector.get_summary("api_call")
104 |     assert summary is not None
105 |     assert summary.count == 3
106 |     assert summary.success_count == 2
107 |     assert summary.error_count == 1
108 | 
109 |     # Test max samples limit
110 |     await collector.record("api_call", 0.4, success=True)
111 |     await collector.record("api_call", 0.5, success=True)
112 | 
113 |     summary = await collector.get_summary("api_call")
114 |     assert summary.count == 3  # Still 3 due to limit
115 |     assert summary.min_duration == 0.3  # Oldest samples dropped
116 | 
117 |     # Test clear
118 |     await collector.clear("api_call")
119 |     summary = await collector.get_summary("api_call")
120 |     assert summary is None
121 | 
122 | 
123 | @pytest.mark.asyncio
124 | async def test_global_metrics_functions():
125 |     """Test global metrics functions."""
126 |     # Clear any existing metrics
127 |     from biomcp.metrics import _metrics_collector
128 | 
129 |     await _metrics_collector.clear()
130 | 
131 |     # Record metrics
132 |     await record_metric("test_op", 0.5, success=True)
133 |     await record_metric("test_op", 0.7, success=False, error="failed")
134 | 
135 |     # Get summary
136 |     summary = await get_metric_summary("test_op")
137 |     assert summary is not None
138 |     assert summary.count == 2
139 |     assert summary.success_count == 1
140 | 
141 |     # Get all metrics
142 |     all_metrics = await get_all_metrics()
143 |     assert "test_op" in all_metrics
144 | 
145 | 
146 | @pytest.mark.asyncio
147 | async def test_track_performance_decorator_async():
148 |     """Test track_performance decorator on async functions."""
149 |     from biomcp.metrics import _metrics_collector
150 | 
151 |     await _metrics_collector.clear()
152 | 
153 |     @track_performance("test_async_func")
154 |     async def slow_operation():
155 |         await asyncio.sleep(0.1)
156 |         return "done"
157 | 
158 |     result = await slow_operation()
159 |     assert result == "done"
160 | 
161 |     # Check metric was recorded
162 |     summary = await get_metric_summary("test_async_func")
163 |     assert summary is not None
164 |     assert summary.count == 1
165 |     assert summary.success_count == 1
166 |     assert summary.min_duration >= 0.1
167 | 
168 | 
169 | @pytest.mark.asyncio
170 | async def test_track_performance_decorator_async_error():
171 |     """Test track_performance decorator on async functions with errors."""
172 |     from biomcp.metrics import _metrics_collector
173 | 
174 |     await _metrics_collector.clear()
175 | 
176 |     @track_performance("test_async_error")
177 |     async def failing_operation():
178 |         await asyncio.sleep(0.05)
179 |         raise ValueError("Test error")
180 | 
181 |     with pytest.raises(ValueError, match="Test error"):
182 |         await failing_operation()
183 | 
184 |     # Check metric was recorded with error
185 |     summary = await get_metric_summary("test_async_error")
186 |     assert summary is not None
187 |     assert summary.count == 1
188 |     assert summary.success_count == 0
189 |     assert summary.error_count == 1
190 | 
191 | 
192 | def test_track_performance_decorator_sync():
193 |     """Test track_performance decorator on sync functions."""
194 | 
195 |     @track_performance("test_sync_func")
196 |     def fast_operation():
197 |         time.sleep(0.05)
198 |         return "done"
199 | 
200 |     # Need to run in an event loop context
201 |     async def run_test():
202 |         from biomcp.metrics import _metrics_collector
203 | 
204 |         await _metrics_collector.clear()
205 | 
206 |         result = fast_operation()
207 |         assert result == "done"
208 | 
209 |         # Give time for the metric to be recorded
210 |         await asyncio.sleep(0.1)
211 | 
212 |         summary = await get_metric_summary("test_sync_func")
213 |         assert summary is not None
214 |         assert summary.count == 1
215 |         assert summary.success_count == 1
216 | 
217 |     asyncio.run(run_test())
218 | 
219 | 
220 | @pytest.mark.asyncio
221 | async def test_timer_context_manager():
222 |     """Test Timer context manager."""
223 |     from biomcp.metrics import _metrics_collector
224 | 
225 |     await _metrics_collector.clear()
226 | 
227 |     # Test async timer
228 |     async with Timer("test_timer", tags={"operation": "test"}):
229 |         await asyncio.sleep(0.1)
230 | 
231 |     summary = await get_metric_summary("test_timer")
232 |     assert summary is not None
233 |     assert summary.count == 1
234 |     assert summary.success_count == 1
235 |     assert summary.min_duration >= 0.1
236 | 
237 |     # Test sync timer (in async context)
238 |     with Timer("test_sync_timer"):
239 |         time.sleep(0.05)
240 | 
241 |     # Give time for metric to be recorded
242 |     await asyncio.sleep(0.1)
243 | 
244 |     summary = await get_metric_summary("test_sync_timer")
245 |     assert summary is not None
246 |     assert summary.count == 1
247 | 
248 | 
249 | @pytest.mark.asyncio
250 | async def test_timer_with_exception():
251 |     """Test Timer context manager with exceptions."""
252 |     from biomcp.metrics import _metrics_collector
253 | 
254 |     await _metrics_collector.clear()
255 | 
256 |     # Test async timer with exception
257 |     with pytest.raises(ValueError):
258 |         async with Timer("test_timer_error"):
259 |             await asyncio.sleep(0.05)
260 |             raise ValueError("Test error")
261 | 
262 |     summary = await get_metric_summary("test_timer_error")
263 |     assert summary is not None
264 |     assert summary.count == 1
265 |     assert summary.success_count == 0
266 |     assert summary.error_count == 1
267 | 
268 | 
269 | def test_timer_without_event_loop():
270 |     """Test Timer when no event loop is running."""
271 |     # This simulates using Timer in a non-async context
272 |     with patch("biomcp.metrics.logger") as mock_logger:
273 |         with Timer("test_no_loop"):
274 |             time.sleep(0.01)
275 | 
276 |         # Should log instead of recording metric
277 |         mock_logger.debug.assert_called_once()
278 |         call_args = mock_logger.debug.call_args[0][0]
279 |         assert "test_no_loop" in call_args
280 |         assert "duration=" in call_args
281 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/rate_limiter.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Rate limiting and circuit breaker for OpenFDA API requests.
  3 | 
  4 | This module provides client-side rate limiting to prevent API quota exhaustion
  5 | and circuit breaker pattern to handle API failures gracefully.
  6 | """
  7 | 
  8 | import asyncio
  9 | import logging
 10 | import os
 11 | import time
 12 | from collections.abc import Callable
 13 | from datetime import datetime
 14 | from enum import Enum
 15 | from typing import Any
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | class CircuitState(Enum):
 21 |     """Circuit breaker states."""
 22 | 
 23 |     CLOSED = "closed"  # Normal operation
 24 |     OPEN = "open"  # Blocking requests
 25 |     HALF_OPEN = "half_open"  # Testing recovery
 26 | 
 27 | 
 28 | class RateLimiter:
 29 |     """
 30 |     Token bucket rate limiter for FDA API requests.
 31 |     """
 32 | 
 33 |     def __init__(self, rate: int = 10, per: float = 1.0):
 34 |         """
 35 |         Initialize rate limiter.
 36 | 
 37 |         Args:
 38 |             rate: Number of requests allowed
 39 |             per: Time period in seconds
 40 |         """
 41 |         self.rate = rate
 42 |         self.per = per
 43 |         self.allowance = float(rate)
 44 |         self.last_check = time.monotonic()
 45 |         self._lock = asyncio.Lock()
 46 | 
 47 |     async def acquire(self) -> None:
 48 |         """
 49 |         Acquire permission to make a request.
 50 |         Blocks if rate limit would be exceeded.
 51 |         """
 52 |         async with self._lock:
 53 |             current = time.monotonic()
 54 |             time_passed = current - self.last_check
 55 |             self.last_check = current
 56 | 
 57 |             # Add tokens based on time passed
 58 |             self.allowance += time_passed * (self.rate / self.per)
 59 | 
 60 |             # Cap at maximum rate
 61 |             if self.allowance > self.rate:
 62 |                 self.allowance = float(self.rate)
 63 | 
 64 |             # Check if we can proceed
 65 |             if self.allowance < 1.0:
 66 |                 # Calculate wait time
 67 |                 deficit = 1.0 - self.allowance
 68 |                 wait_time = deficit * (self.per / self.rate)
 69 | 
 70 |                 logger.debug(f"Rate limit: waiting {wait_time:.2f}s")
 71 |                 await asyncio.sleep(wait_time)
 72 | 
 73 |                 # Update allowance after waiting
 74 |                 self.allowance = 0.0
 75 |             else:
 76 |                 # Consume one token
 77 |                 self.allowance -= 1.0
 78 | 
 79 | 
 80 | class CircuitBreaker:
 81 |     """
 82 |     Circuit breaker to prevent cascading failures.
 83 |     """
 84 | 
 85 |     def __init__(
 86 |         self,
 87 |         failure_threshold: int = 5,
 88 |         recovery_timeout: int = 60,
 89 |         half_open_max_calls: int = 3,
 90 |     ):
 91 |         """
 92 |         Initialize circuit breaker.
 93 | 
 94 |         Args:
 95 |             failure_threshold: Number of failures before opening circuit
 96 |             recovery_timeout: Seconds to wait before attempting recovery
 97 |             half_open_max_calls: Max calls allowed in half-open state
 98 |         """
 99 |         self.failure_threshold = failure_threshold
100 |         self.recovery_timeout = recovery_timeout
101 |         self.half_open_max_calls = half_open_max_calls
102 | 
103 |         self.failure_count = 0
104 |         self.last_failure_time: float | None = None
105 |         self.state = CircuitState.CLOSED
106 |         self.half_open_calls = 0
107 |         self._lock = asyncio.Lock()
108 | 
109 |     async def call(self, func: Callable, *args, **kwargs) -> Any:
110 |         """
111 |         Execute function through circuit breaker.
112 | 
113 |         Args:
114 |             func: Async function to call
115 |             *args: Function arguments
116 |             **kwargs: Function keyword arguments
117 | 
118 |         Returns:
119 |             Function result
120 | 
121 |         Raises:
122 |             Exception: If circuit is open or function fails
123 |         """
124 |         async with self._lock:
125 |             # Check circuit state
126 |             if self.state == CircuitState.OPEN:
127 |                 if self._should_attempt_reset():
128 |                     self.state = CircuitState.HALF_OPEN
129 |                     self.half_open_calls = 0
130 |                     logger.info(
131 |                         "Circuit breaker: attempting recovery (half-open)"
132 |                     )
133 |                 else:
134 |                     if self.last_failure_time is not None:
135 |                         time_left = self.recovery_timeout - (
136 |                             time.time() - self.last_failure_time
137 |                         )
138 |                         raise Exception(
139 |                             f"Circuit breaker is OPEN. Retry in {time_left:.0f} seconds"
140 |                         )
141 |                     else:
142 |                         raise Exception("Circuit breaker is OPEN")
143 | 
144 |             elif self.state == CircuitState.HALF_OPEN:
145 |                 if self.half_open_calls >= self.half_open_max_calls:
146 |                     # Don't allow more calls in half-open state
147 |                     raise Exception(
148 |                         "Circuit breaker is HALF_OPEN. Max test calls reached"
149 |                     )
150 |                 self.half_open_calls += 1
151 | 
152 |         # Execute the function
153 |         try:
154 |             result = await func(*args, **kwargs)
155 |             await self._on_success()
156 |             return result
157 |         except Exception as e:
158 |             await self._on_failure()
159 |             raise e
160 | 
161 |     async def _on_success(self) -> None:
162 |         """Handle successful call."""
163 |         async with self._lock:
164 |             if self.state == CircuitState.HALF_OPEN:
165 |                 # Recovery succeeded
166 |                 self.state = CircuitState.CLOSED
167 |                 self.failure_count = 0
168 |                 logger.info("Circuit breaker: recovered (closed)")
169 |             else:
170 |                 # Reset failure count on success
171 |                 self.failure_count = 0
172 | 
173 |     async def _on_failure(self) -> None:
174 |         """Handle failed call."""
175 |         async with self._lock:
176 |             self.failure_count += 1
177 |             self.last_failure_time = time.time()
178 | 
179 |             if self.state == CircuitState.HALF_OPEN:
180 |                 # Recovery failed, reopen circuit
181 |                 self.state = CircuitState.OPEN
182 |                 logger.warning("Circuit breaker: recovery failed (open)")
183 |             elif self.failure_count >= self.failure_threshold:
184 |                 # Too many failures, open circuit
185 |                 self.state = CircuitState.OPEN
186 |                 logger.warning(
187 |                     f"Circuit breaker: opened after {self.failure_count} failures"
188 |                 )
189 | 
190 |     def _should_attempt_reset(self) -> bool:
191 |         """Check if enough time has passed to attempt reset."""
192 |         return (
193 |             self.last_failure_time is not None
194 |             and time.time() - self.last_failure_time >= self.recovery_timeout
195 |         )
196 | 
197 |     @property
198 |     def is_closed(self) -> bool:
199 |         """Check if circuit is closed (normal operation)."""
200 |         return self.state == CircuitState.CLOSED
201 | 
202 |     @property
203 |     def is_open(self) -> bool:
204 |         """Check if circuit is open (blocking requests)."""
205 |         return self.state == CircuitState.OPEN
206 | 
207 |     def get_state(self) -> dict[str, Any]:
208 |         """Get current circuit breaker state."""
209 |         return {
210 |             "state": self.state.value,
211 |             "failure_count": self.failure_count,
212 |             "last_failure": (
213 |                 datetime.fromtimestamp(self.last_failure_time).isoformat()
214 |                 if self.last_failure_time
215 |                 else None
216 |             ),
217 |         }
218 | 
219 | 
220 | # Global instances
221 | # Configure based on API key availability
222 | _has_api_key = bool(os.environ.get("OPENFDA_API_KEY"))
223 | _rate_limit = 240 if _has_api_key else 40  # per minute
224 | 
225 | # Create rate limiter (convert to per-second rate)
226 | FDA_RATE_LIMITER = RateLimiter(rate=_rate_limit, per=60.0)
227 | 
228 | # Create circuit breaker
229 | FDA_CIRCUIT_BREAKER = CircuitBreaker(
230 |     failure_threshold=5, recovery_timeout=60, half_open_max_calls=3
231 | )
232 | 
233 | # Semaphore for concurrent request limiting
234 | FDA_SEMAPHORE = asyncio.Semaphore(10)  # Max 10 concurrent requests
235 | 
236 | 
237 | async def rate_limited_request(func: Callable, *args, **kwargs) -> Any:
238 |     """
239 |     Execute FDA API request with rate limiting and circuit breaker.
240 | 
241 |     Args:
242 |         func: Async function to call
243 |         *args: Function arguments
244 |         **kwargs: Function keyword arguments
245 | 
246 |     Returns:
247 |         Function result
248 |     """
249 |     # Apply semaphore for concurrent limiting
250 |     async with FDA_SEMAPHORE:
251 |         # Apply rate limiting
252 |         await FDA_RATE_LIMITER.acquire()
253 | 
254 |         # Apply circuit breaker
255 |         return await FDA_CIRCUIT_BREAKER.call(func, *args, **kwargs)
256 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_nci_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Unit tests for NCI CTS API integration."""
  2 | 
  3 | from unittest.mock import patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.biomarkers import search_biomarkers
  8 | from biomcp.diseases.search import search_diseases
  9 | from biomcp.integrations.cts_api import CTSAPIError, make_cts_request
 10 | from biomcp.interventions import search_interventions
 11 | from biomcp.organizations import get_organization, search_organizations
 12 | from biomcp.trials.nci_getter import get_trial_nci
 13 | from biomcp.trials.nci_search import convert_query_to_nci, search_trials_nci
 14 | from biomcp.trials.search import TrialQuery
 15 | 
 16 | 
 17 | class TestCTSAPIIntegration:
 18 |     """Test CTS API helper functions."""
 19 | 
 20 |     @pytest.mark.asyncio
 21 |     async def test_make_cts_request_no_api_key(self):
 22 |         """Test that missing API key raises appropriate error."""
 23 |         with (
 24 |             patch.dict("os.environ", {}, clear=True),
 25 |             pytest.raises(CTSAPIError, match="NCI API key required"),
 26 |         ):
 27 |             await make_cts_request("https://example.com/api")
 28 | 
 29 |     @pytest.mark.asyncio
 30 |     async def test_make_cts_request_with_api_key(self):
 31 |         """Test successful request with API key."""
 32 |         with patch("biomcp.integrations.cts_api.request_api") as mock_request:
 33 |             mock_request.return_value = ({"data": "test"}, None)
 34 | 
 35 |             result = await make_cts_request(
 36 |                 "https://example.com/api", api_key="test-key"
 37 |             )
 38 | 
 39 |             assert result == {"data": "test"}
 40 |             mock_request.assert_called_once()
 41 | 
 42 |             # Verify headers were included
 43 |             call_args = mock_request.call_args
 44 |             request_data = call_args.kwargs["request"]
 45 |             assert "_headers" in request_data
 46 | 
 47 | 
 48 | class TestOrganizationsModule:
 49 |     """Test organizations module functions."""
 50 | 
 51 |     @pytest.mark.asyncio
 52 |     async def test_search_organizations(self):
 53 |         """Test organization search."""
 54 |         with patch(
 55 |             "biomcp.organizations.search.make_cts_request"
 56 |         ) as mock_request:
 57 |             mock_request.return_value = {
 58 |                 "data": [{"id": "ORG001", "name": "Test Cancer Center"}],
 59 |                 "total": 1,
 60 |             }
 61 | 
 62 |             result = await search_organizations(
 63 |                 name="Cancer Center", api_key="test-key"
 64 |             )
 65 | 
 66 |             assert result["total"] == 1
 67 |             assert len(result["organizations"]) == 1
 68 |             assert result["organizations"][0]["name"] == "Test Cancer Center"
 69 | 
 70 |     @pytest.mark.asyncio
 71 |     async def test_get_organization(self):
 72 |         """Test getting specific organization."""
 73 |         with patch(
 74 |             "biomcp.organizations.getter.make_cts_request"
 75 |         ) as mock_request:
 76 |             mock_request.return_value = {
 77 |                 "data": {
 78 |                     "id": "ORG001",
 79 |                     "name": "Test Cancer Center",
 80 |                     "type": "Academic",
 81 |                 }
 82 |             }
 83 | 
 84 |             result = await get_organization("ORG001", api_key="test-key")
 85 | 
 86 |             assert result["id"] == "ORG001"
 87 |             assert result["name"] == "Test Cancer Center"
 88 |             assert result["type"] == "Academic"
 89 | 
 90 | 
 91 | class TestInterventionsModule:
 92 |     """Test interventions module functions."""
 93 | 
 94 |     @pytest.mark.asyncio
 95 |     async def test_search_interventions(self):
 96 |         """Test intervention search."""
 97 |         with patch(
 98 |             "biomcp.interventions.search.make_cts_request"
 99 |         ) as mock_request:
100 |             mock_request.return_value = {
101 |                 "data": [
102 |                     {"id": "INT001", "name": "Pembrolizumab", "type": "Drug"}
103 |                 ],
104 |                 "total": 1,
105 |             }
106 | 
107 |             result = await search_interventions(
108 |                 name="Pembrolizumab", api_key="test-key"
109 |             )
110 | 
111 |             assert result["total"] == 1
112 |             assert len(result["interventions"]) == 1
113 |             assert result["interventions"][0]["name"] == "Pembrolizumab"
114 | 
115 | 
116 | class TestBiomarkersModule:
117 |     """Test biomarkers module functions."""
118 | 
119 |     @pytest.mark.asyncio
120 |     async def test_search_biomarkers(self):
121 |         """Test biomarker search."""
122 |         with patch(
123 |             "biomcp.biomarkers.search.make_cts_request"
124 |         ) as mock_request:
125 |             mock_request.return_value = {
126 |                 "data": [{"id": "BIO001", "name": "PD-L1", "gene": "CD274"}],
127 |                 "total": 1,
128 |             }
129 | 
130 |             result = await search_biomarkers(name="PD-L1", api_key="test-key")
131 | 
132 |             assert result["total"] == 1
133 |             assert len(result["biomarkers"]) == 1
134 |             assert result["biomarkers"][0]["name"] == "PD-L1"
135 | 
136 | 
137 | class TestDiseasesModule:
138 |     """Test diseases module functions."""
139 | 
140 |     @pytest.mark.asyncio
141 |     async def test_search_diseases_nci(self):
142 |         """Test disease search via NCI API."""
143 |         with patch("biomcp.diseases.search.make_cts_request") as mock_request:
144 |             mock_request.return_value = {
145 |                 "data": [
146 |                     {
147 |                         "id": "DIS001",
148 |                         "name": "Melanoma",
149 |                         "synonyms": ["Malignant Melanoma"],
150 |                     }
151 |                 ],
152 |                 "total": 1,
153 |             }
154 | 
155 |             result = await search_diseases(name="Melanoma", api_key="test-key")
156 | 
157 |             assert result["total"] == 1
158 |             assert len(result["diseases"]) == 1
159 |             assert result["diseases"][0]["name"] == "Melanoma"
160 | 
161 | 
162 | class TestNCITrialIntegration:
163 |     """Test NCI trial search and getter."""
164 | 
165 |     @pytest.mark.asyncio
166 |     async def test_convert_query_to_nci(self):
167 |         """Test converting TrialQuery to NCI parameters."""
168 |         query = TrialQuery(
169 |             conditions=["melanoma"],
170 |             phase="PHASE2",
171 |             recruiting_status="OPEN",
172 |             allow_brain_mets=True,
173 |         )
174 | 
175 |         # Mock the disease/intervention lookups
176 |         with (
177 |             patch("biomcp.trials.nci_search.search_diseases") as mock_diseases,
178 |             patch(
179 |                 "biomcp.trials.nci_search.search_interventions"
180 |             ) as mock_interventions,
181 |         ):
182 |             mock_diseases.return_value = {"diseases": []}
183 |             mock_interventions.return_value = {"interventions": []}
184 | 
185 |             params = await convert_query_to_nci(query)
186 | 
187 |             assert params["diseases"] == ["melanoma"]
188 |             assert params["phase"] == "II"
189 |             assert params["recruitment_status"] == [
190 |                 "recruiting",
191 |                 "enrolling_by_invitation",
192 |             ]
193 |             assert params["accepts_brain_mets"] is True
194 | 
195 |     @pytest.mark.asyncio
196 |     async def test_search_trials_nci(self):
197 |         """Test NCI trial search."""
198 |         query = TrialQuery(conditions=["melanoma"])
199 | 
200 |         with (
201 |             patch(
202 |                 "biomcp.trials.nci_search.convert_query_to_nci"
203 |             ) as mock_convert,
204 |             patch("biomcp.trials.nci_search.make_cts_request") as mock_request,
205 |         ):
206 |             mock_convert.return_value = {"diseases": ["melanoma"]}
207 |             mock_request.return_value = {
208 |                 "data": [
209 |                     {
210 |                         "nct_id": "NCT12345",
211 |                         "title": "Test Trial",
212 |                         "phase": "II",
213 |                     }
214 |                 ],
215 |                 "total": 1,
216 |             }
217 | 
218 |             result = await search_trials_nci(query, api_key="test-key")
219 | 
220 |             assert result["total"] == 1
221 |             assert result["source"] == "nci"
222 |             assert len(result["trials"]) == 1
223 |             assert result["trials"][0]["nct_id"] == "NCT12345"
224 | 
225 |     @pytest.mark.asyncio
226 |     async def test_get_trial_nci(self):
227 |         """Test getting specific trial from NCI."""
228 |         with patch(
229 |             "biomcp.trials.nci_getter.make_cts_request"
230 |         ) as mock_request:
231 |             mock_request.return_value = {
232 |                 "data": {
233 |                     "nct_id": "NCT12345",
234 |                     "title": "Test Trial",
235 |                     "phase": "II",
236 |                     "overall_status": "Recruiting",
237 |                 }
238 |             }
239 | 
240 |             result = await get_trial_nci("NCT12345", api_key="test-key")
241 | 
242 |             assert result["nct_id"] == "NCT12345"
243 |             assert result["title"] == "Test Trial"
244 |             assert result["phase"] == "II"
245 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/variants.py:
--------------------------------------------------------------------------------

```python
  1 | """BioMCP Command Line Interface for genetic variants."""
  2 | 
  3 | import asyncio
  4 | from typing import Annotated
  5 | 
  6 | import typer
  7 | 
  8 | from ..constants import DEFAULT_ASSEMBLY, SYSTEM_PAGE_SIZE
  9 | from ..variants import getter, search
 10 | 
 11 | variant_app = typer.Typer(help="Search and get variants from MyVariant.info.")
 12 | 
 13 | 
 14 | @variant_app.command("get")
 15 | def get_variant(
 16 |     variant_id: Annotated[
 17 |         str,
 18 |         typer.Argument(
 19 |             help="rsID (rs456) or MyVariant ID (chr1:g.1234A>G)",
 20 |         ),
 21 |     ],
 22 |     output_json: Annotated[
 23 |         bool,
 24 |         typer.Option(
 25 |             "--json",
 26 |             "-j",
 27 |             help="Render in JSON format",
 28 |             case_sensitive=False,
 29 |         ),
 30 |     ] = False,
 31 |     include_external: Annotated[
 32 |         bool,
 33 |         typer.Option(
 34 |             "--include-external/--no-external",
 35 |             help="Include annotations from external sources (TCGA, 1000 Genomes, cBioPortal)",
 36 |         ),
 37 |     ] = True,
 38 |     assembly: Annotated[
 39 |         str,
 40 |         typer.Option(
 41 |             "--assembly",
 42 |             help="Genome assembly (hg19 or hg38)",
 43 |             case_sensitive=False,
 44 |         ),
 45 |     ] = DEFAULT_ASSEMBLY,
 46 | ):
 47 |     """
 48 |     Get detailed information about a specific genetic variant.
 49 | 
 50 |     Supports HGVS identifiers (e.g., 'chr7:g.140453136A>T') or dbSNP rsIDs.
 51 | 
 52 |     Examples:
 53 |         Get by HGVS: biomcp variant get "chr7:g.140453136A>T"
 54 |         Get by rsID: biomcp variant get rs113488022
 55 |         Get as JSON: biomcp variant get rs113488022 --json
 56 |         Get without external annotations: biomcp variant get rs113488022 --no-external
 57 |         Get with hg38 assembly: biomcp variant get rs113488022 --assembly hg38
 58 |     """
 59 |     if not variant_id:
 60 |         typer.echo("Error: A variant identifier must be provided.", err=True)
 61 |         raise typer.Exit(code=1)
 62 | 
 63 |     # Validate assembly value
 64 |     if assembly not in ["hg19", "hg38"]:
 65 |         typer.echo(
 66 |             f"Error: Invalid assembly '{assembly}'. Must be 'hg19' or 'hg38'.",
 67 |             err=True,
 68 |         )
 69 |         raise typer.Exit(code=1)
 70 | 
 71 |     result = asyncio.run(
 72 |         getter.get_variant(
 73 |             variant_id,
 74 |             output_json=output_json,
 75 |             include_external=include_external,
 76 |             assembly=assembly,
 77 |         )
 78 |     )
 79 |     typer.echo(result)
 80 | 
 81 | 
 82 | @variant_app.command("search")
 83 | def search_variant_cmd(
 84 |     gene: Annotated[
 85 |         str | None,
 86 |         typer.Option(
 87 |             "--gene",
 88 |             help="Gene symbol (e.g., BRCA1)",
 89 |         ),
 90 |     ] = None,
 91 |     hgvsp: Annotated[
 92 |         str | None,
 93 |         typer.Option(
 94 |             "--hgvsp",
 95 |             help="Protein notation (e.g., p.Val600Glu).",
 96 |         ),
 97 |     ] = None,
 98 |     hgvsc: Annotated[
 99 |         str | None,
100 |         typer.Option(
101 |             "--hgvsc",
102 |             help="cDNA notation (e.g., c.1799T>A).",
103 |         ),
104 |     ] = None,
105 |     rsid: Annotated[
106 |         str | None,
107 |         typer.Option(
108 |             "--rsid",
109 |             help="dbSNP rsID (e.g., rs113488022)",
110 |         ),
111 |     ] = None,
112 |     region: Annotated[
113 |         str | None,
114 |         typer.Option(
115 |             "--region",
116 |             help="Genomic region (e.g., chr1:69000-70000)",
117 |         ),
118 |     ] = None,
119 |     significance: Annotated[
120 |         search.ClinicalSignificance | None,
121 |         typer.Option(
122 |             "--significance",
123 |             help="Clinical significance (e.g., pathogenic, likely benign)",
124 |             case_sensitive=False,
125 |         ),
126 |     ] = None,
127 |     min_frequency: Annotated[
128 |         float | None,
129 |         typer.Option(
130 |             "--min-frequency",
131 |             help="Minimum gnomAD exome allele frequency (0.0 to 1.0)",
132 |             min=0.0,
133 |             max=1.0,
134 |         ),
135 |     ] = None,
136 |     max_frequency: Annotated[
137 |         float | None,
138 |         typer.Option(
139 |             "--max-frequency",
140 |             help="Maximum gnomAD exome allele frequency (0.0 to 1.0)",
141 |             min=0.0,
142 |             max=1.0,
143 |         ),
144 |     ] = None,
145 |     cadd: Annotated[
146 |         float | None,
147 |         typer.Option(
148 |             "--cadd",
149 |             help="Minimum CADD phred score",
150 |             min=0.0,
151 |         ),
152 |     ] = None,
153 |     polyphen: Annotated[
154 |         search.PolyPhenPrediction | None,
155 |         typer.Option(
156 |             "--polyphen",
157 |             help="PolyPhen-2 prediction: Probably damaging = D,"
158 |             "Possibly damaging = P, Benign = B",
159 |             case_sensitive=False,
160 |         ),
161 |     ] = None,
162 |     sift: Annotated[
163 |         search.SiftPrediction | None,
164 |         typer.Option(
165 |             "--sift",
166 |             help="SIFT prediction: D = Deleterious, T = Tolerated",
167 |             case_sensitive=False,
168 |         ),
169 |     ] = None,
170 |     size: Annotated[
171 |         int,
172 |         typer.Option(
173 |             "--size",
174 |             help="Maximum number of results to return",
175 |             min=1,
176 |             max=100,
177 |         ),
178 |     ] = SYSTEM_PAGE_SIZE,
179 |     sources: Annotated[
180 |         str | None,
181 |         typer.Option(
182 |             "--sources",
183 |             help="Specific sources to include in results (comma-separated)",
184 |         ),
185 |     ] = None,
186 |     output_json: Annotated[
187 |         bool,
188 |         typer.Option(
189 |             "--json",
190 |             "-j",
191 |             help="Render in JSON format",
192 |             case_sensitive=False,
193 |         ),
194 |     ] = False,
195 | ):
196 |     query = search.VariantQuery(
197 |         gene=gene,
198 |         hgvsp=hgvsp,
199 |         hgvsc=hgvsc,
200 |         rsid=rsid,
201 |         region=region,
202 |         significance=significance,
203 |         min_frequency=min_frequency,
204 |         max_frequency=max_frequency,
205 |         cadd=cadd,
206 |         polyphen=polyphen,
207 |         sift=sift,
208 |         size=size,
209 |         sources=sources.split(",") if sources else [],
210 |     )
211 | 
212 |     result = asyncio.run(search.search_variants(query, output_json))
213 |     typer.echo(result)
214 | 
215 | 
216 | @variant_app.command("predict")
217 | def predict_variant_effects(
218 |     chromosome: Annotated[
219 |         str,
220 |         typer.Argument(help="Chromosome (e.g., chr7, chrX)"),
221 |     ],
222 |     position: Annotated[
223 |         int,
224 |         typer.Argument(help="1-based genomic position"),
225 |     ],
226 |     reference: Annotated[
227 |         str,
228 |         typer.Argument(help="Reference allele(s) (e.g., A, ATG)"),
229 |     ],
230 |     alternate: Annotated[
231 |         str,
232 |         typer.Argument(help="Alternate allele(s) (e.g., T, A)"),
233 |     ],
234 |     interval_size: Annotated[
235 |         int,
236 |         typer.Option(
237 |             "--interval",
238 |             "-i",
239 |             help="Analysis interval size in bp (max 1000000)",
240 |             min=2000,
241 |             max=1000000,
242 |         ),
243 |     ] = 131072,
244 |     tissue: Annotated[
245 |         list[str] | None,
246 |         typer.Option(
247 |             "--tissue",
248 |             "-t",
249 |             help="UBERON ontology terms for tissue-specific predictions",
250 |         ),
251 |     ] = None,
252 |     threshold: Annotated[
253 |         float,
254 |         typer.Option(
255 |             "--threshold",
256 |             help="Significance threshold for log2 fold changes",
257 |             min=0.0,
258 |             max=5.0,
259 |         ),
260 |     ] = 0.5,
261 |     api_key: Annotated[
262 |         str | None,
263 |         typer.Option(
264 |             "--api-key",
265 |             help="AlphaGenome API key (overrides ALPHAGENOME_API_KEY env var)",
266 |             envvar="ALPHAGENOME_API_KEY",
267 |         ),
268 |     ] = None,
269 | ):
270 |     """
271 |     Predict variant effects using Google DeepMind's AlphaGenome:\n
272 |     - Gene expression changes\n
273 |     - Chromatin accessibility\n
274 |     - Splicing alterations\n
275 |     - Promoter activity\n
276 |     \n
277 |     Requires AlphaGenome API key via --api-key or ALPHAGENOME_API_KEY env var.\n
278 |     \n
279 |     Examples:
280 |     \n\t# Predict BRAF V600E mutation
281 |     \n\tbiomcp variant predict chr7 140753336 A T
282 |     \n
283 |     \n\t# With API key specified
284 |     \n\tbiomcp variant predict chr7 140753336 A T --api-key YOUR_KEY
285 |     \n
286 |     \n\t# With tissue-specific predictions
287 |     \n\tbiomcp variant predict chr7 140753336 A T --tissue UBERON:0002367
288 |     \n
289 |     \n\t# With larger analysis interval
290 |     \n\tbiomcp variant predict chr7 140753336 A T --interval 500000
291 |     """
292 |     from ..variants.alphagenome import predict_variant_effects
293 | 
294 |     result = asyncio.run(
295 |         predict_variant_effects(
296 |             chromosome=chromosome,
297 |             position=position,
298 |             reference=reference,
299 |             alternate=alternate,
300 |             interval_size=interval_size,
301 |             tissue_types=tissue,
302 |             significance_threshold=threshold,
303 |             api_key=api_key,
304 |         )
305 |     )
306 |     typer.echo(result)
307 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_variants_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for external variant data sources."""
  2 | 
  3 | import asyncio
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.variants.external import (
  8 |     ExternalVariantAggregator,
  9 |     TCGAClient,
 10 |     ThousandGenomesClient,
 11 | )
 12 | from biomcp.variants.getter import get_variant
 13 | 
 14 | 
 15 | class TestTCGAIntegration:
 16 |     """Integration tests for TCGA/GDC API."""
 17 | 
 18 |     @pytest.mark.asyncio
 19 |     async def test_tcga_real_variant(self):
 20 |         """Test real TCGA API with known variant."""
 21 |         client = TCGAClient()
 22 | 
 23 |         # Try with BRAF V600E - a well-known cancer mutation
 24 |         # TCGA can search by gene AA change format
 25 |         result = await client.get_variant_data("BRAF V600E")
 26 | 
 27 |         print(f"TCGA result: {result}")
 28 | 
 29 |         if result:
 30 |             print(f"COSMIC ID: {result.cosmic_id}")
 31 |             print(f"Tumor types: {result.tumor_types}")
 32 |             print(f"Affected cases: {result.affected_cases}")
 33 |             print(f"Consequence: {result.consequence_type}")
 34 |         else:
 35 |             print("No TCGA data found for this variant")
 36 | 
 37 | 
 38 | class TestThousandGenomesIntegration:
 39 |     """Integration tests for 1000 Genomes via Ensembl."""
 40 | 
 41 |     @pytest.mark.asyncio
 42 |     async def test_1000g_real_variant(self):
 43 |         """Test real 1000 Genomes API with known variant."""
 44 |         client = ThousandGenomesClient()
 45 | 
 46 |         # Try with a known rsID
 47 |         result = await client.get_variant_data("rs7412")  # APOE variant
 48 | 
 49 |         print(f"1000 Genomes result: {result}")
 50 | 
 51 |         if result:
 52 |             print(f"Global MAF: {result.global_maf}")
 53 |             print(f"EUR MAF: {result.eur_maf}")
 54 |             print(f"AFR MAF: {result.afr_maf}")
 55 |             print(f"Consequence: {result.most_severe_consequence}")
 56 |             print(f"Ancestral allele: {result.ancestral_allele}")
 57 | 
 58 |             # This variant should have frequency data
 59 |             assert result.global_maf is not None
 60 |         else:
 61 |             print("No 1000 Genomes data found")
 62 | 
 63 | 
 64 | class TestExternalAggregatorIntegration:
 65 |     """Integration tests for the aggregator."""
 66 | 
 67 |     @pytest.mark.asyncio
 68 |     async def test_aggregator_basic(self):
 69 |         """Test aggregator with basic functionality."""
 70 |         aggregator = ExternalVariantAggregator()
 71 | 
 72 |         # Test with a known variant
 73 |         result = await aggregator.get_enhanced_annotations(
 74 |             "rs7412",  # APOE variant
 75 |             include_tcga=True,
 76 |             include_1000g=True,
 77 |         )
 78 | 
 79 |         print(f"Variant ID: {result.variant_id}")
 80 |         print(f"TCGA data: {'Present' if result.tcga else 'Not found'}")
 81 |         print(
 82 |             f"1000G data: {'Present' if result.thousand_genomes else 'Not found'}"
 83 |         )
 84 |         print(f"Errors: {result.error_sources}")
 85 | 
 86 |         # Should still work
 87 |         assert result.variant_id == "rs7412"
 88 | 
 89 |     @pytest.mark.asyncio
 90 |     async def test_aggregator_partial_failures(self):
 91 |         """Test aggregator handles partial failures gracefully."""
 92 |         aggregator = ExternalVariantAggregator()
 93 | 
 94 |         # Use a variant that might not be in all databases
 95 |         result = await aggregator.get_enhanced_annotations(
 96 |             "chr1:g.12345678A>G",  # Arbitrary variant
 97 |             include_tcga=True,
 98 |             include_1000g=True,
 99 |         )
100 | 
101 |         print("Results for arbitrary variant:")
102 |         print(f"- TCGA: {'Found' if result.tcga else 'Not found'}")
103 |         print(
104 |             f"- 1000G: {'Found' if result.thousand_genomes else 'Not found'}"
105 |         )
106 |         print(f"- Errors: {result.error_sources}")
107 | 
108 |         # Should complete without crashing
109 |         assert result.variant_id == "chr1:g.12345678A>G"
110 | 
111 | 
112 | class TestAssemblyParameter:
113 |     """Integration tests for assembly parameter."""
114 | 
115 |     @pytest.mark.integration
116 |     @pytest.mark.asyncio
117 |     async def test_get_variant_hg19_assembly(self):
118 |         """Test get_variant with hg19 assembly on real API."""
119 |         # Use a well-known variant: BRAF V600E
120 |         variant_id = "rs113488022"
121 | 
122 |         result = await get_variant(
123 |             variant_id,
124 |             output_json=True,
125 |             include_external=False,
126 |             assembly="hg19",
127 |         )
128 | 
129 |         # Should return valid JSON
130 |         assert result is not None
131 |         assert len(result) > 0
132 | 
133 |         # Parse and check for hg19 data
134 |         import json
135 | 
136 |         data = json.loads(result)
137 |         if data and len(data) > 0:
138 |             variant_data = data[0]
139 |             # BRAF V600E should have hg19 coordinates
140 |             if "hg19" in variant_data:
141 |                 print(f"hg19 coordinates: {variant_data['hg19']}")
142 |                 assert "start" in variant_data["hg19"]
143 |                 assert "end" in variant_data["hg19"]
144 |             else:
145 |                 pytest.skip("hg19 data not available in API response")
146 |         else:
147 |             pytest.skip("No data returned from API")
148 | 
149 |     @pytest.mark.integration
150 |     @pytest.mark.asyncio
151 |     async def test_get_variant_hg38_assembly(self):
152 |         """Test get_variant with hg38 assembly on real API."""
153 |         # Use the same variant but request hg38
154 |         variant_id = "rs113488022"
155 | 
156 |         result = await get_variant(
157 |             variant_id,
158 |             output_json=True,
159 |             include_external=False,
160 |             assembly="hg38",
161 |         )
162 | 
163 |         # Should return valid JSON
164 |         assert result is not None
165 |         assert len(result) > 0
166 | 
167 |         # Parse and check for hg38 data
168 |         import json
169 | 
170 |         data = json.loads(result)
171 |         if data and len(data) > 0:
172 |             variant_data = data[0]
173 |             # Should have hg38 coordinates
174 |             if "hg38" in variant_data:
175 |                 print(f"hg38 coordinates: {variant_data['hg38']}")
176 |                 assert "start" in variant_data["hg38"]
177 |                 assert "end" in variant_data["hg38"]
178 |             else:
179 |                 pytest.skip("hg38 data not available in API response")
180 |         else:
181 |             pytest.skip("No data returned from API")
182 | 
183 |     @pytest.mark.integration
184 |     @pytest.mark.asyncio
185 |     async def test_assembly_coordinate_differences(self):
186 |         """Test that hg19 and hg38 return different coordinates for same variant."""
187 |         variant_id = "rs113488022"  # BRAF V600E
188 | 
189 |         # Get both assemblies
190 |         result_hg19 = await get_variant(
191 |             variant_id,
192 |             output_json=True,
193 |             include_external=False,
194 |             assembly="hg19",
195 |         )
196 | 
197 |         result_hg38 = await get_variant(
198 |             variant_id,
199 |             output_json=True,
200 |             include_external=False,
201 |             assembly="hg38",
202 |         )
203 | 
204 |         import json
205 | 
206 |         data_hg19 = json.loads(result_hg19)
207 |         data_hg38 = json.loads(result_hg38)
208 | 
209 |         # Both should return data
210 |         if not data_hg19 or not data_hg38:
211 |             pytest.skip("API did not return data for both assemblies")
212 | 
213 |         # Compare coordinates if available
214 |         if len(data_hg19) > 0 and len(data_hg38) > 0:
215 |             v19 = data_hg19[0]
216 |             v38 = data_hg38[0]
217 | 
218 |             # BRAF V600E has different coordinates in hg19 vs hg38
219 |             # hg19: chr7:140453136
220 |             # hg38: chr7:140753336
221 |             if "hg19" in v19 and "hg38" in v38:
222 |                 print(f"hg19 start: {v19['hg19']['start']}")
223 |                 print(f"hg38 start: {v38['hg38']['start']}")
224 | 
225 |                 # Coordinates should be different (BRAF moved between assemblies)
226 |                 assert v19["hg19"]["start"] != v38["hg38"]["start"]
227 |             else:
228 |                 pytest.skip("Assembly-specific coordinates not in response")
229 | 
230 | 
231 | if __name__ == "__main__":
232 |     print("Testing TCGA/GDC...")
233 |     asyncio.run(TestTCGAIntegration().test_tcga_real_variant())
234 | 
235 |     print("\n" + "=" * 50 + "\n")
236 |     print("Testing 1000 Genomes...")
237 |     asyncio.run(TestThousandGenomesIntegration().test_1000g_real_variant())
238 | 
239 |     print("\n" + "=" * 50 + "\n")
240 |     print("Testing aggregator...")
241 |     asyncio.run(TestExternalAggregatorIntegration().test_aggregator_basic())
242 | 
243 |     print("\n" + "=" * 50 + "\n")
244 |     print("Testing aggregator with partial failures...")
245 |     asyncio.run(
246 |         TestExternalAggregatorIntegration().test_aggregator_partial_failures()
247 |     )
248 | 
249 |     print("\n" + "=" * 50 + "\n")
250 |     print("Testing assembly parameter...")
251 |     asyncio.run(TestAssemblyParameter().test_get_variant_hg19_assembly())
252 |     asyncio.run(TestAssemblyParameter().test_get_variant_hg38_assembly())
253 |     asyncio.run(TestAssemblyParameter().test_assembly_coordinate_differences())
254 | 
```

--------------------------------------------------------------------------------
/tests/tdd/trials/test_backward_compatibility.py:
--------------------------------------------------------------------------------

```python
  1 | """Test backward compatibility for trial search and getter functions."""
  2 | 
  3 | from unittest.mock import patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.trials.getter import Module, get_trial, get_trial_unified
  8 | from biomcp.trials.search import (
  9 |     TrialQuery,
 10 |     search_trials,
 11 |     search_trials_unified,
 12 | )
 13 | 
 14 | 
 15 | class TestTrialSearchBackwardCompatibility:
 16 |     """Test that existing trial search functionality remains unchanged."""
 17 | 
 18 |     @pytest.mark.asyncio
 19 |     async def test_search_trials_defaults_to_clinicaltrials(self):
 20 |         """Test that search_trials still defaults to ClinicalTrials.gov."""
 21 |         query = TrialQuery(conditions=["diabetes"])
 22 | 
 23 |         with patch("biomcp.http_client.request_api") as mock_request:
 24 |             mock_request.return_value = (
 25 |                 {
 26 |                     "studies": [
 27 |                         {
 28 |                             "protocolSection": {
 29 |                                 "identificationModule": {"nctId": "NCT12345"}
 30 |                             }
 31 |                         }
 32 |                     ]
 33 |                 },
 34 |                 None,
 35 |             )
 36 | 
 37 |             await search_trials(query, output_json=True)
 38 | 
 39 |             # Verify it called the ClinicalTrials.gov API
 40 |             assert mock_request.called
 41 |             call_args = mock_request.call_args
 42 |             # Check the URL argument
 43 |             url_arg = call_args.kwargs.get("url")
 44 |             assert url_arg is not None
 45 |             assert "clinicaltrials.gov" in url_arg
 46 | 
 47 |     @pytest.mark.asyncio
 48 |     async def test_search_trials_no_source_parameter(self):
 49 |         """Test that search_trials function signature hasn't changed."""
 50 |         # This test ensures the function can still be called without source
 51 |         query = TrialQuery(conditions=["cancer"])
 52 | 
 53 |         with patch("biomcp.http_client.request_api") as mock_request:
 54 |             mock_request.return_value = ({"studies": []}, None)
 55 | 
 56 |             # Should not raise TypeError about unexpected keyword argument
 57 |             await search_trials(query)
 58 |             assert mock_request.called
 59 | 
 60 |     @pytest.mark.asyncio
 61 |     async def test_search_trials_unified_with_source(self):
 62 |         """Test unified function supports source parameter."""
 63 |         query = TrialQuery(conditions=["melanoma"])
 64 | 
 65 |         # Test with ClinicalTrials.gov
 66 |         with patch("biomcp.trials.search.search_trials") as mock_ct:
 67 |             mock_ct.return_value = "CT results"
 68 | 
 69 |             result = await search_trials_unified(
 70 |                 query, source="clinicaltrials"
 71 |             )
 72 |             assert result == "CT results"
 73 |             mock_ct.assert_called_once_with(query, False)
 74 | 
 75 |         # Test with NCI
 76 |         with (
 77 |             patch("biomcp.trials.nci_search.search_trials_nci") as mock_nci,
 78 |             patch(
 79 |                 "biomcp.trials.nci_search.format_nci_trial_results"
 80 |             ) as mock_format,
 81 |         ):
 82 |             mock_nci.return_value = {"source": "nci", "trials": []}
 83 |             mock_format.return_value = "NCI formatted results"
 84 | 
 85 |             result = await search_trials_unified(
 86 |                 query, source="nci", api_key="test-key"
 87 |             )
 88 |             assert result == "NCI formatted results"
 89 |             mock_nci.assert_called_once_with(query, "test-key")
 90 | 
 91 | 
 92 | class TestTrialGetterBackwardCompatibility:
 93 |     """Test that existing trial getter functionality remains unchanged."""
 94 | 
 95 |     @pytest.mark.asyncio
 96 |     async def test_get_trial_defaults_to_clinicaltrials(self):
 97 |         """Test that get_trial still defaults to ClinicalTrials.gov."""
 98 |         with patch("biomcp.http_client.request_api") as mock_request:
 99 |             mock_request.return_value = (
100 |                 {
101 |                     "protocolSection": {
102 |                         "identificationModule": {"nctId": "NCT12345"}
103 |                     }
104 |                 },
105 |                 None,
106 |             )
107 | 
108 |             await get_trial("NCT12345", Module.PROTOCOL)
109 | 
110 |             # Verify it called the ClinicalTrials.gov API
111 |             assert mock_request.called
112 |             call_args = mock_request.call_args
113 |             # Check the URL argument
114 |             url_arg = call_args.kwargs.get("url")
115 |             assert url_arg is not None
116 |             assert "clinicaltrials.gov" in url_arg
117 |             # NCT ID would be in the request params, not the URL
118 | 
119 |     @pytest.mark.asyncio
120 |     async def test_get_trial_no_source_parameter(self):
121 |         """Test that get_trial function signature hasn't changed."""
122 |         with patch("biomcp.http_client.request_api") as mock_request:
123 |             mock_request.return_value = (
124 |                 {
125 |                     "protocolSection": {
126 |                         "identificationModule": {"nctId": "NCT99999"}
127 |                     }
128 |                 },
129 |                 None,
130 |             )
131 | 
132 |             # Should not raise TypeError about unexpected keyword argument
133 |             await get_trial("NCT99999", Module.PROTOCOL, output_json=True)
134 |             assert mock_request.called
135 | 
136 |     @pytest.mark.asyncio
137 |     async def test_get_trial_unified_with_source(self):
138 |         """Test unified function supports source parameter."""
139 |         # Test with ClinicalTrials.gov - uses private functions
140 |         with patch("biomcp.trials.getter._trial_protocol") as mock_protocol:
141 |             mock_protocol.return_value = "CT trial details"
142 | 
143 |             result = await get_trial_unified(
144 |                 "NCT12345", source="clinicaltrials", sections=["protocol"]
145 |             )
146 |             assert result == "CT trial details"
147 |             mock_protocol.assert_called_once_with(
148 |                 nct_id="NCT12345",
149 |                 call_benefit="Getting protocol information for trial NCT12345",
150 |             )
151 | 
152 |         # Test with NCI
153 |         with (
154 |             patch("biomcp.trials.nci_getter.get_trial_nci") as mock_nci,
155 |             patch(
156 |                 "biomcp.trials.nci_getter.format_nci_trial_details"
157 |             ) as mock_format,
158 |         ):
159 |             mock_nci.return_value = {"nct_id": "NCT12345", "source": "nci"}
160 |             mock_format.return_value = "NCI formatted trial"
161 | 
162 |             result = await get_trial_unified(
163 |                 "NCT12345", source="nci", api_key="test-key"
164 |             )
165 |             assert result == "NCI formatted trial"
166 |             mock_nci.assert_called_once_with("NCT12345", "test-key")
167 | 
168 |     @pytest.mark.asyncio
169 |     async def test_get_trial_all_modules_still_work(self):
170 |         """Test that all existing Module options still work."""
171 |         modules_to_test = [
172 |             Module.PROTOCOL,
173 |             Module.LOCATIONS,
174 |             Module.REFERENCES,
175 |             Module.OUTCOMES,
176 |         ]
177 | 
178 |         for module in modules_to_test:
179 |             with patch("biomcp.http_client.request_api") as mock_request:
180 |                 mock_request.return_value = (
181 |                     {
182 |                         "protocolSection": {
183 |                             "identificationModule": {"nctId": "NCT12345"}
184 |                         }
185 |                     },
186 |                     None,
187 |                 )
188 | 
189 |                 await get_trial("NCT12345", module)
190 |             assert mock_request.called
191 |             # Reset for next iteration
192 |             mock_request.reset_mock()
193 | 
194 | 
195 | class TestCLIBackwardCompatibility:
196 |     """Test that CLI commands maintain backward compatibility."""
197 | 
198 |     def test_cli_imports_exist(self):
199 |         """Test that CLI still imports the expected functions."""
200 |         # These imports should not raise ImportError
201 |         from biomcp.cli.trials import get_trial_cli, search_trials_cli
202 | 
203 |         assert search_trials_cli is not None
204 |         assert get_trial_cli is not None
205 | 
206 |     def test_search_defaults_without_source(self):
207 |         """Test CLI search works without source parameter."""
208 |         from typer.testing import CliRunner
209 | 
210 |         from biomcp.cli.main import app
211 | 
212 |         runner = CliRunner()
213 | 
214 |         with patch("biomcp.cli.trials.asyncio.run") as mock_run:
215 |             mock_run.return_value = None
216 | 
217 |             # Run CLI command without --source
218 |             result = runner.invoke(
219 |                 app, ["trial", "search", "--condition", "diabetes"]
220 |             )
221 | 
222 |             # Should succeed
223 |             assert result.exit_code == 0
224 | 
225 |             # Verify asyncio.run was called with the right function
226 |             mock_run.assert_called()
227 |             args = mock_run.call_args[0][0]
228 |             # Check that it's the unified search function being called
229 |             assert hasattr(args, "__name__") or hasattr(args, "func")
230 | 
```

--------------------------------------------------------------------------------
/docs/reference/architecture-diagrams.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP Architecture Diagrams
  2 | 
  3 | This page describes BioMCP's architecture, data flows, and workflows.
  4 | 
  5 | ## System Architecture Overview
  6 | 
  7 | BioMCP consists of three main layers:
  8 | 
  9 | ### Client Layer
 10 | 
 11 | - **CLI Interface**: Command-line tool for direct interaction
 12 | - **Claude Desktop**: AI assistant integration via MCP protocol
 13 | - **Python SDK**: Programmatic access for custom applications
 14 | - **Custom MCP Clients**: Any MCP-compatible client
 15 | 
 16 | ### BioMCP Core
 17 | 
 18 | - **MCP Server**: Handles protocol communication
 19 | - **Request Router**: Directs queries to appropriate handlers
 20 | - **Cache Layer**: Intelligent caching for API responses
 21 | - **Domain Handlers**: Specialized processors for each data type
 22 |   - Articles Handler (PubMed/PubTator3)
 23 |   - Trials Handler (ClinicalTrials.gov, NCI)
 24 |   - Variants Handler (MyVariant.info)
 25 |   - Genes Handler (MyGene.info)
 26 | 
 27 | ### External APIs
 28 | 
 29 | - **PubMed/PubTator3**: Biomedical literature
 30 | - **ClinicalTrials.gov**: US clinical trials registry
 31 | - **NCI CTS API**: National Cancer Institute trials
 32 | - **MyVariant.info**: Genetic variant annotations
 33 | - **MyGene.info**: Gene information
 34 | - **cBioPortal**: Cancer genomics data
 35 | - **AlphaGenome**: Variant effect predictions
 36 | 
 37 | ## Data Flow Architecture
 38 | 
 39 | 1. **User Request**: Query submitted via CLI, Claude, or SDK
 40 | 2. **Cache Check**: System checks for cached results
 41 | 3. **API Request**: If cache miss, fetch from external API
 42 | 4. **Result Processing**: Normalize and enrich data
 43 | 5. **Cache Storage**: Store results for future use
 44 | 6. **Response Delivery**: Return formatted results to user
 45 | 
 46 | ## Key Workflows
 47 | 
 48 | ### Search Workflow
 49 | 
 50 | 1. **Think Tool**: Plan search strategy
 51 | 2. **Execute Search**: Query relevant data sources
 52 | 3. **Enrich Results**: Add contextual information
 53 | 4. **Combine Data**: Merge results from multiple sources
 54 | 5. **Format Output**: Present in user-friendly format
 55 | 
 56 | ### Article Search Pipeline
 57 | 
 58 | 1. **Query Processing**: Parse user input
 59 | 2. **Entity Recognition**: Normalize gene/disease names
 60 | 3. **PubTator3 Search**: Query literature database
 61 | 4. **Preprint Integration**: Include bioRxiv/medRxiv if enabled
 62 | 5. **cBioPortal Enrichment**: Add cancer genomics data for genes
 63 | 6. **Result Merging**: Combine all data sources
 64 | 
 65 | ### Clinical Trial Matching
 66 | 
 67 | 1. **Patient Profile**: Parse eligibility criteria
 68 | 2. **Location Filter**: Geographic constraints
 69 | 3. **Molecular Profile**: Mutation requirements
 70 | 4. **Prior Treatments**: Treatment history matching
 71 | 5. **Scoring Algorithm**: Rank trials by relevance
 72 | 6. **Contact Extraction**: Retrieve site information
 73 | 
 74 | ### Variant Interpretation
 75 | 
 76 | 1. **Input Parsing**: Process VCF/MAF files
 77 | 2. **Batch Processing**: Group variants efficiently
 78 | 3. **Annotation Gathering**:
 79 |    - Clinical significance from MyVariant.info
 80 |    - Population frequency data
 81 |    - In silico predictions
 82 |    - Literature evidence
 83 |    - Clinical trial associations
 84 | 4. **AlphaGenome Integration**: Regulatory predictions (optional)
 85 | 5. **Tier Classification**: Categorize by clinical relevance
 86 | 6. **Report Generation**: Create interpretation summary
 87 | 
 88 | ## Architecture Patterns
 89 | 
 90 | ### Caching Strategy
 91 | 
 92 | - **Multi-tier Cache**: Memory → Disk → External
 93 | - **Smart TTL**: Domain-specific expiration times
 94 | - **Cache Key Generation**: Include all query parameters
 95 | - **Invalidation Logic**: Clear on errors or updates
 96 | 
 97 | ### Error Handling
 98 | 
 99 | - **Retry Logic**: Exponential backoff for transient errors
100 | - **Rate Limiting**: Respect API limits with queuing
101 | - **Graceful Degradation**: Return partial results when possible
102 | - **Clear Error Messages**: Help users troubleshoot issues
103 | 
104 | ### Authentication Flow
105 | 
106 | 1. Check for user-provided API key
107 | 2. Fall back to environment variable
108 | 3. Use public access if no key available
109 | 4. Handle authentication errors gracefully
110 | 
111 | ### Performance Optimization
112 | 
113 | - **Request Batching**: Combine multiple queries
114 | - **Parallel Execution**: Concurrent API calls
115 | - **Connection Pooling**: Reuse HTTP connections
116 | - **Result Streaming**: Return data as available
117 | 
118 | ## Deployment Options
119 | 
120 | ### Local Development
121 | 
122 | - Single process with in-memory cache
123 | - Direct file system access
124 | - Simple configuration
125 | 
126 | ### Docker Deployment
127 | 
128 | - Containerized application
129 | - Volume-mounted cache
130 | - Environment-based configuration
131 | 
132 | ### Cloud Deployment
133 | 
134 | - Load-balanced instances
135 | - Shared Redis cache
136 | - Auto-scaling capabilities
137 | - Monitoring integration
138 | 
139 | ## Creating Documentation Diagrams
140 | 
141 | For visual diagrams, we recommend:
142 | 
143 | 1. **ASCII Art**: Universal compatibility
144 | 
145 |    - Use tools like asciiflow.com
146 |    - Store in `docs/assets/` directory
147 | 
148 | 2. **Screenshots**: For complex UIs
149 | 
150 |    - Annotate with arrows/labels
151 |    - Save as PNG in `docs/assets/`
152 | 
153 | 3. **External Tools**:
154 |    - draw.io for flowcharts
155 |    - Lucidchart for professional diagrams
156 |    - Export as static images
157 | 
158 | ## ASCII System Architecture
159 | 
160 | ```
161 | ┌─────────────────────────────────────────────────────────────────────────┐
162 | │                              USER INTERFACES                             │
163 | ├────────────────┬───────────────────┬───────────────┬───────────────────┤
164 | │                │                   │               │                   │
165 | │   CLI Tool     │  Claude Desktop   │  Python SDK   │   Custom Client   │
166 | │  (biomcp)      │   (MCP Client)    │   (async)     │    (your app)     │
167 | │                │                   │               │                   │
168 | └────────┬───────┴─────────┬─────────┴───────┬───────┴───────────┬───────┘
169 |          │                 │                 │                   │
170 |          └─────────────────┴─────────────────┴───────────────────┘
171 |                                     │
172 |                                     ▼
173 | ┌─────────────────────────────────────────────────────────────────────────┐
174 | │                            BioMCP CORE SERVER                            │
175 | ├─────────────────────────────────────────────────────────────────────────┤
176 | │                                                                         │
177 | │  ┌─────────────┐  ┌──────────────┐  ┌──────────────┐  ┌────────────┐  │
178 | │  │   Router    │  │ Rate Limiter │  │ Cache Manager│  │   Logger   │  │
179 | │  │             │  │              │  │              │  │            │  │
180 | │  └──────┬──────┘  └──────────────┘  └──────────────┘  └────────────┘  │
181 | │         │                                                               │
182 | │         ▼                                                               │
183 | │  ┌─────────────────────────────────────────────────────────────────┐   │
184 | │  │                      Domain Handlers                             │   │
185 | │  ├─────────────┬─────────────┬─────────────┬──────────────────────┤   │
186 | │  │  Articles   │   Trials    │  Variants   │  Genes/Drugs/Disease │   │
187 | │  │  Handler    │   Handler   │  Handler    │      Handler         │   │
188 | │  └──────┬──────┴──────┬──────┴──────┬──────┴──────────┬───────────┘   │
189 | │         │             │             │                 │                 │
190 | └─────────┼─────────────┼─────────────┼─────────────────┼─────────────────┘
191 |           │             │             │                 │
192 |           ▼             ▼             ▼                 ▼
193 | ┌─────────────────────────────────────────────────────────────────────────┐
194 | │                          EXTERNAL DATA SOURCES                           │
195 | ├─────────────┬─────────────┬─────────────┬──────────────────────────────┤
196 | │             │             │             │                              │
197 | │  PubMed/    │ Clinical    │ MyVariant   │        BioThings Suite       │
198 | │  PubTator3  │ Trials.gov  │   .info     │  (MyGene/MyDisease/MyChem)  │
199 | │             │    + NCI    │             │                              │
200 | │             │             │             │                              │
201 | ├─────────────┴─────────────┴─────────────┴──────────────────────────────┤
202 | │                                                                         │
203 | │  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐                 │
204 | │  │  cBioPortal  │  │  AlphaGenome │  │  Europe PMC  │                 │
205 | │  │   (Cancer)   │  │ (Predictions)│  │  (Preprints) │                 │
206 | │  └──────────────┘  └──────────────┘  └──────────────┘                 │
207 | │                                                                         │
208 | └─────────────────────────────────────────────────────────────────────────┘
209 | ```
210 | 
211 | See also: [Quick Architecture Reference](quick-architecture.md)
212 | 
213 | ## Next Steps
214 | 
215 | - View the [Quick Architecture Guide](quick-architecture.md) for a concise overview
216 | - Check [Developer Guides](../developer-guides/01-server-deployment.md) for implementation details
217 | - See [API Reference](../apis/overview.md) for detailed specifications
218 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_circuit_breaker.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for circuit breaker pattern."""
  2 | 
  3 | import asyncio
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.circuit_breaker import (
  8 |     CircuitBreaker,
  9 |     CircuitBreakerConfig,
 10 |     CircuitBreakerError,
 11 |     CircuitState,
 12 |     circuit_breaker,
 13 |     get_circuit_breaker,
 14 | )
 15 | 
 16 | 
 17 | class CircuitBreakerTestException(Exception):
 18 |     """Test exception for circuit breaker tests."""
 19 | 
 20 |     pass
 21 | 
 22 | 
 23 | class IgnoredException(Exception):
 24 |     """Exception that should be ignored by circuit breaker."""
 25 | 
 26 |     pass
 27 | 
 28 | 
 29 | @pytest.mark.asyncio
 30 | async def test_circuit_breaker_closed_state():
 31 |     """Test circuit breaker in closed state allows calls."""
 32 |     breaker = CircuitBreaker("test_closed")
 33 |     call_count = 0
 34 | 
 35 |     async def test_func():
 36 |         nonlocal call_count
 37 |         call_count += 1
 38 |         return "success"
 39 | 
 40 |     # Should allow calls in closed state
 41 |     assert breaker.is_closed
 42 |     result = await breaker.call(test_func)
 43 |     assert result == "success"
 44 |     assert call_count == 1
 45 | 
 46 | 
 47 | @pytest.mark.asyncio
 48 | async def test_circuit_breaker_opens_on_threshold():
 49 |     """Test circuit breaker opens after failure threshold."""
 50 |     config = CircuitBreakerConfig(
 51 |         failure_threshold=3,
 52 |         expected_exception=CircuitBreakerTestException,
 53 |     )
 54 |     breaker = CircuitBreaker("test_threshold", config)
 55 | 
 56 |     async def failing_func():
 57 |         raise CircuitBreakerTestException("Test failure")
 58 | 
 59 |     # First 2 failures should pass through
 60 |     for _i in range(2):
 61 |         with pytest.raises(CircuitBreakerTestException):
 62 |             await breaker.call(failing_func)
 63 |         assert breaker.is_closed
 64 | 
 65 |     # Third failure should open the circuit
 66 |     with pytest.raises(CircuitBreakerTestException):
 67 |         await breaker.call(failing_func)
 68 |     assert breaker.is_open
 69 | 
 70 |     # Subsequent calls should fail fast
 71 |     with pytest.raises(CircuitBreakerError):
 72 |         await breaker.call(failing_func)
 73 | 
 74 | 
 75 | @pytest.mark.asyncio
 76 | async def test_circuit_breaker_half_open_recovery():
 77 |     """Test circuit breaker recovery through half-open state."""
 78 |     config = CircuitBreakerConfig(
 79 |         failure_threshold=2,
 80 |         recovery_timeout=0.1,  # 100ms for testing
 81 |         success_threshold=2,
 82 |     )
 83 |     breaker = CircuitBreaker("test_recovery", config)
 84 | 
 85 |     call_count = 0
 86 |     should_fail = True
 87 | 
 88 |     async def test_func():
 89 |         nonlocal call_count
 90 |         call_count += 1
 91 |         if should_fail:
 92 |             raise CircuitBreakerTestException("Failure")
 93 |         return "success"
 94 | 
 95 |     # Open the circuit
 96 |     for _ in range(2):
 97 |         with pytest.raises(CircuitBreakerTestException):
 98 |             await breaker.call(test_func)
 99 |     assert breaker.is_open
100 | 
101 |     # Wait for recovery timeout
102 |     await asyncio.sleep(0.15)
103 | 
104 |     # Next call should attempt (half-open state)
105 |     should_fail = False
106 |     result = await breaker.call(test_func)
107 |     assert result == "success"
108 |     assert breaker.state == CircuitState.HALF_OPEN
109 | 
110 |     # Need one more success to close
111 |     result = await breaker.call(test_func)
112 |     assert result == "success"
113 |     assert breaker.is_closed
114 | 
115 | 
116 | @pytest.mark.asyncio
117 | async def test_circuit_breaker_half_open_failure():
118 |     """Test circuit breaker reopens on failure in half-open state."""
119 |     config = CircuitBreakerConfig(
120 |         failure_threshold=2,
121 |         recovery_timeout=0.1,
122 |     )
123 |     breaker = CircuitBreaker("test_half_open_fail", config)
124 | 
125 |     async def failing_func():
126 |         raise CircuitBreakerTestException("Failure")
127 | 
128 |     # Open the circuit
129 |     for _ in range(2):
130 |         with pytest.raises(CircuitBreakerTestException):
131 |             await breaker.call(failing_func)
132 |     assert breaker.is_open
133 | 
134 |     # Wait for recovery timeout
135 |     await asyncio.sleep(0.15)
136 | 
137 |     # Failure in half-open should reopen immediately
138 |     with pytest.raises(CircuitBreakerTestException):
139 |         await breaker.call(failing_func)
140 |     assert breaker.is_open
141 | 
142 | 
143 | @pytest.mark.asyncio
144 | async def test_circuit_breaker_ignored_exceptions():
145 |     """Test that certain exceptions don't trigger circuit breaker."""
146 |     config = CircuitBreakerConfig(
147 |         failure_threshold=2,
148 |         expected_exception=Exception,
149 |         exclude_exceptions=(IgnoredException,),
150 |     )
151 |     breaker = CircuitBreaker("test_ignored", config)
152 | 
153 |     async def func_with_ignored_exception():
154 |         raise IgnoredException("Should be ignored")
155 | 
156 |     # These exceptions shouldn't count
157 |     for _ in range(5):
158 |         with pytest.raises(IgnoredException):
159 |             await breaker.call(func_with_ignored_exception)
160 |         assert breaker.is_closed
161 | 
162 | 
163 | @pytest.mark.asyncio
164 | async def test_circuit_breaker_reset():
165 |     """Test manual reset of circuit breaker."""
166 |     config = CircuitBreakerConfig(failure_threshold=1)
167 |     breaker = CircuitBreaker("test_reset", config)
168 | 
169 |     async def failing_func():
170 |         raise CircuitBreakerTestException("Failure")
171 | 
172 |     # Open the circuit
173 |     with pytest.raises(CircuitBreakerTestException):
174 |         await breaker.call(failing_func)
175 |     assert breaker.is_open
176 | 
177 |     # Manual reset
178 |     await breaker.reset()
179 |     assert breaker.is_closed
180 | 
181 |     # Should allow calls again
182 |     async def success_func():
183 |         return "success"
184 | 
185 |     result = await breaker.call(success_func)
186 |     assert result == "success"
187 | 
188 | 
189 | @pytest.mark.asyncio
190 | async def test_circuit_breaker_decorator():
191 |     """Test circuit breaker decorator."""
192 |     call_count = 0
193 | 
194 |     @circuit_breaker(
195 |         "test_decorator", CircuitBreakerConfig(failure_threshold=2)
196 |     )
197 |     async def decorated_func(should_fail=False):
198 |         nonlocal call_count
199 |         call_count += 1
200 |         if should_fail:
201 |             raise CircuitBreakerTestException("Failure")
202 |         return "success"
203 | 
204 |     # Success calls
205 |     result = await decorated_func()
206 |     assert result == "success"
207 | 
208 |     # Open circuit with failures
209 |     for _ in range(2):
210 |         with pytest.raises(CircuitBreakerTestException):
211 |             await decorated_func(should_fail=True)
212 | 
213 |     # Circuit should be open
214 |     with pytest.raises(CircuitBreakerError):
215 |         await decorated_func()
216 | 
217 | 
218 | def test_get_circuit_breaker():
219 |     """Test getting circuit breaker from registry."""
220 |     # First call creates breaker
221 |     breaker1 = get_circuit_breaker("test_registry")
222 |     assert breaker1.name == "test_registry"
223 | 
224 |     # Second call returns same instance
225 |     breaker2 = get_circuit_breaker("test_registry")
226 |     assert breaker1 is breaker2
227 | 
228 |     # Different name creates different breaker
229 |     breaker3 = get_circuit_breaker("test_registry_2")
230 |     assert breaker3 is not breaker1
231 | 
232 | 
233 | @pytest.mark.asyncio
234 | async def test_circuit_breaker_concurrent_calls():
235 |     """Test circuit breaker handles concurrent calls correctly."""
236 |     config = CircuitBreakerConfig(
237 |         failure_threshold=5,
238 |         expected_exception=CircuitBreakerTestException,
239 |     )
240 |     breaker = CircuitBreaker("test_concurrent", config)
241 | 
242 |     failure_count = 0
243 | 
244 |     async def failing_func():
245 |         nonlocal failure_count
246 |         failure_count += 1
247 |         if failure_count <= 5:
248 |             raise CircuitBreakerTestException("Failure")
249 |         return "success"
250 | 
251 |     # Run concurrent failing calls
252 |     tasks = []
253 |     for _ in range(10):
254 |         tasks.append(breaker.call(failing_func))
255 | 
256 |     results = await asyncio.gather(*tasks, return_exceptions=True)
257 | 
258 |     # Should have some CircuitBreakerTestExceptions and some CircuitBreakerErrors
259 |     test_exceptions = sum(
260 |         1 for r in results if isinstance(r, CircuitBreakerTestException)
261 |     )
262 |     breaker_errors = sum(
263 |         1 for r in results if isinstance(r, CircuitBreakerError)
264 |     )
265 | 
266 |     # At least failure_threshold CircuitBreakerTestExceptions
267 |     assert test_exceptions >= config.failure_threshold
268 |     # Some calls should have been blocked
269 |     assert breaker_errors > 0
270 |     # Circuit should be open
271 |     assert breaker.is_open
272 | 
273 | 
274 | @pytest.mark.asyncio
275 | async def test_circuit_breaker_success_resets_failures():
276 |     """Test that successes reset failure count in closed state."""
277 |     config = CircuitBreakerConfig(failure_threshold=3)
278 |     breaker = CircuitBreaker("test_success_reset", config)
279 | 
280 |     async def sometimes_failing_func(fail=False):
281 |         if fail:
282 |             raise CircuitBreakerTestException("Failure")
283 |         return "success"
284 | 
285 |     # Two failures
286 |     for _ in range(2):
287 |         with pytest.raises(CircuitBreakerTestException):
288 |             await breaker.call(sometimes_failing_func, fail=True)
289 | 
290 |     # Success should reset failure count
291 |     result = await breaker.call(sometimes_failing_func, fail=False)
292 |     assert result == "success"
293 |     assert breaker.is_closed
294 | 
295 |     # Can now fail 2 more times without opening
296 |     for _ in range(2):
297 |         with pytest.raises(CircuitBreakerTestException):
298 |             await breaker.call(sometimes_failing_func, fail=True)
299 |     assert breaker.is_closed
300 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_drug_recalls.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for FDA drug recalls module."""
  2 | 
  3 | import json
  4 | from pathlib import Path
  5 | from unittest.mock import AsyncMock, patch
  6 | 
  7 | import pytest
  8 | 
  9 | from biomcp.openfda.drug_recalls import (
 10 |     get_drug_recall,
 11 |     search_drug_recalls,
 12 | )
 13 | 
 14 | # Load mock data
 15 | MOCK_DIR = Path(__file__).parent.parent / "data" / "openfda"
 16 | MOCK_RECALLS_SEARCH = json.loads(
 17 |     (MOCK_DIR / "enforcement_search.json").read_text()
 18 | )
 19 | MOCK_RECALL_DETAIL = json.loads(
 20 |     (MOCK_DIR / "enforcement_detail.json").read_text()
 21 | )
 22 | 
 23 | 
 24 | class TestDrugRecalls:
 25 |     """Test drug recalls functionality."""
 26 | 
 27 |     @pytest.mark.asyncio
 28 |     async def test_search_drug_recalls_success(self):
 29 |         """Test successful drug recall search."""
 30 |         with patch(
 31 |             "biomcp.openfda.drug_recalls.make_openfda_request",
 32 |             new_callable=AsyncMock,
 33 |         ) as mock_request:
 34 |             mock_request.return_value = (MOCK_RECALLS_SEARCH, None)
 35 | 
 36 |             result = await search_drug_recalls(
 37 |                 drug="valsartan",
 38 |                 limit=10,
 39 |             )
 40 | 
 41 |             assert "Drug Recall" in result or "FDA Drug Recall" in result
 42 |             assert "valsartan" in result.lower()
 43 |             # Check for presence of key recall info
 44 |             assert "Recall" in result or "recall" in result.lower()
 45 |             mock_request.assert_called_once()
 46 | 
 47 |     @pytest.mark.asyncio
 48 |     async def test_search_drug_recalls_with_filters(self):
 49 |         """Test drug recall search with multiple filters."""
 50 |         with patch(
 51 |             "biomcp.openfda.drug_recalls.make_openfda_request",
 52 |             new_callable=AsyncMock,
 53 |         ) as mock_request:
 54 |             mock_request.return_value = (MOCK_RECALLS_SEARCH, None)
 55 | 
 56 |             result = await search_drug_recalls(
 57 |                 drug="metformin",
 58 |                 recall_class="2",
 59 |                 status="ongoing",
 60 |                 reason="contamination",
 61 |                 since_date="20230101",
 62 |                 limit=5,
 63 |                 api_key="test-key",
 64 |             )
 65 | 
 66 |             assert "Drug Recall" in result or "FDA Drug Recall" in result
 67 |             # Verify API key was passed as the 4th positional argument
 68 |             call_args = mock_request.call_args
 69 |             assert (
 70 |                 call_args[0][3] == "test-key"
 71 |             )  # api_key is 4th positional arg
 72 | 
 73 |     @pytest.mark.asyncio
 74 |     async def test_search_drug_recalls_no_results(self):
 75 |         """Test drug recall search with no results."""
 76 |         with patch(
 77 |             "biomcp.openfda.drug_recalls.make_openfda_request",
 78 |             new_callable=AsyncMock,
 79 |         ) as mock_request:
 80 |             mock_request.return_value = ({"results": []}, None)
 81 | 
 82 |             result = await search_drug_recalls(drug="nonexistent-drug")
 83 | 
 84 |             assert "No drug recall records found" in result
 85 | 
 86 |     @pytest.mark.asyncio
 87 |     async def test_search_drug_recalls_api_error(self):
 88 |         """Test drug recall search with API error."""
 89 |         with patch(
 90 |             "biomcp.openfda.drug_recalls.make_openfda_request",
 91 |             new_callable=AsyncMock,
 92 |         ) as mock_request:
 93 |             mock_request.return_value = (None, "API rate limit exceeded")
 94 | 
 95 |             result = await search_drug_recalls(drug="test")
 96 | 
 97 |             assert "Error searching drug recalls" in result
 98 |             assert "API rate limit exceeded" in result
 99 | 
100 |     @pytest.mark.asyncio
101 |     async def test_get_drug_recall_success(self):
102 |         """Test getting specific drug recall details."""
103 |         with patch(
104 |             "biomcp.openfda.drug_recalls.make_openfda_request",
105 |             new_callable=AsyncMock,
106 |         ) as mock_request:
107 |             mock_request.return_value = (MOCK_RECALL_DETAIL, None)
108 | 
109 |             result = await get_drug_recall("D-0001-2023")
110 | 
111 |             assert "Drug Recall" in result or "D-0001-2023" in result
112 |             assert "D-0001-2023" in result
113 |             # Check for key details in the output (formats may vary)
114 |             assert "product" in result.lower() or "valsartan" in result.lower()
115 | 
116 |     @pytest.mark.asyncio
117 |     async def test_get_drug_recall_not_found(self):
118 |         """Test getting drug recall that doesn't exist."""
119 |         with patch(
120 |             "biomcp.openfda.drug_recalls.make_openfda_request",
121 |             new_callable=AsyncMock,
122 |         ) as mock_request:
123 |             mock_request.return_value = ({"results": []}, None)
124 | 
125 |             result = await get_drug_recall("INVALID-RECALL")
126 | 
127 |             assert "No recall record found" in result
128 |             assert "INVALID-RECALL" in result
129 | 
130 |     @pytest.mark.asyncio
131 |     async def test_get_drug_recall_with_api_key(self):
132 |         """Test getting drug recall with API key."""
133 |         with patch(
134 |             "biomcp.openfda.drug_recalls.make_openfda_request",
135 |             new_callable=AsyncMock,
136 |         ) as mock_request:
137 |             mock_request.return_value = (MOCK_RECALL_DETAIL, None)
138 | 
139 |             result = await get_drug_recall(
140 |                 "D-0001-2023",
141 |                 api_key="test-api-key",
142 |             )
143 | 
144 |             assert "Drug Recall" in result or "D-0001-2023" in result
145 |             # Verify API key was passed as the 4th positional argument
146 |             call_args = mock_request.call_args
147 |             assert (
148 |                 call_args[0][3] == "test-api-key"
149 |             )  # api_key is 4th positional arg
150 | 
151 |     @pytest.mark.asyncio
152 |     async def test_recall_class_validation(self):
153 |         """Test that recall class is validated."""
154 |         with patch(
155 |             "biomcp.openfda.drug_recalls.make_openfda_request",
156 |             new_callable=AsyncMock,
157 |         ) as mock_request:
158 |             mock_request.return_value = (MOCK_RECALLS_SEARCH, None)
159 | 
160 |             # Valid recall classes
161 |             for recall_class in ["1", "2", "3"]:
162 |                 result = await search_drug_recalls(recall_class=recall_class)
163 |                 assert "Drug Recall" in result or "FDA Drug Recall" in result
164 | 
165 |             # Test with Class I, II, III format
166 |             result = await search_drug_recalls(recall_class="Class I")
167 |             call_args = mock_request.call_args
168 |             params = call_args[0][1]  # params is 2nd positional arg
169 |             assert 'classification:"Class I"' in params["search"]
170 | 
171 |     @pytest.mark.asyncio
172 |     async def test_recall_status_mapping(self):
173 |         """Test that recall status is properly mapped."""
174 |         with patch(
175 |             "biomcp.openfda.drug_recalls.make_openfda_request",
176 |             new_callable=AsyncMock,
177 |         ) as mock_request:
178 |             mock_request.return_value = (MOCK_RECALLS_SEARCH, None)
179 | 
180 |             # Test ongoing status
181 |             await search_drug_recalls(status="ongoing")
182 |             call_args = mock_request.call_args
183 |             params = call_args[0][1]  # params is 2nd positional arg
184 |             assert "Ongoing" in params["search"]
185 | 
186 |             # Test completed status
187 |             await search_drug_recalls(status="completed")
188 |             call_args = mock_request.call_args
189 |             params = call_args[0][1]  # params is 2nd positional arg
190 |             assert "Completed" in params["search"]
191 | 
192 |     @pytest.mark.asyncio
193 |     async def test_search_drug_recalls_pagination(self):
194 |         """Test drug recall search pagination."""
195 |         with patch(
196 |             "biomcp.openfda.drug_recalls.make_openfda_request",
197 |             new_callable=AsyncMock,
198 |         ) as mock_request:
199 |             mock_response = {
200 |                 "meta": {"results": {"total": 150}},
201 |                 "results": MOCK_RECALLS_SEARCH["results"],
202 |             }
203 |             mock_request.return_value = (mock_response, None)
204 | 
205 |             result = await search_drug_recalls(
206 |                 drug="aspirin",
207 |                 limit=10,
208 |                 skip=30,
209 |             )
210 | 
211 |             # Check for total count instead of specific pagination format
212 |             assert "150" in result
213 |             # Verify skip parameter was passed
214 |             call_args = mock_request.call_args
215 |             assert (
216 |                 call_args[0][1]["skip"] == "30"
217 |             )  # params is 2nd positional arg, value is string
218 | 
219 |     @pytest.mark.asyncio
220 |     async def test_date_filtering(self):
221 |         """Test that date filtering works correctly."""
222 |         with patch(
223 |             "biomcp.openfda.drug_recalls.make_openfda_request",
224 |             new_callable=AsyncMock,
225 |         ) as mock_request:
226 |             mock_request.return_value = (MOCK_RECALLS_SEARCH, None)
227 | 
228 |             await search_drug_recalls(
229 |                 since_date="20230615",
230 |             )
231 | 
232 |             # Check that date was properly formatted in query
233 |             call_args = mock_request.call_args
234 |             params = call_args[0][1]  # params is 2nd positional arg
235 |             assert "recall_initiation_date" in params["search"]
236 |             assert "[2023-06-15 TO *]" in params["search"]
237 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/validation.py:
--------------------------------------------------------------------------------

```python
  1 | """Validation functions for OpenFDA API responses."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | from .exceptions import OpenFDAValidationError
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | def validate_fda_response(
 12 |     response: dict[str, Any],
 13 |     required_fields: list[str] | None = None,
 14 |     response_type: str = "generic",
 15 | ) -> bool:
 16 |     """
 17 |     Validate FDA API response structure.
 18 | 
 19 |     Args:
 20 |         response: The FDA API response dictionary
 21 |         required_fields: List of required top-level fields
 22 |         response_type: Type of response for specific validation
 23 | 
 24 |     Returns:
 25 |         True if valid
 26 | 
 27 |     Raises:
 28 |         OpenFDAValidationError: If validation fails
 29 |     """
 30 |     if not isinstance(response, dict):
 31 |         raise OpenFDAValidationError(
 32 |             f"Expected dict response, got {type(response).__name__}"
 33 |         )
 34 | 
 35 |     # Default required fields for most FDA responses
 36 |     if required_fields is None:
 37 |         required_fields = ["results"] if "results" in response else []
 38 | 
 39 |     # Check required fields
 40 |     missing_fields = [
 41 |         field for field in required_fields if field not in response
 42 |     ]
 43 |     if missing_fields:
 44 |         raise OpenFDAValidationError(
 45 |             f"Missing required fields in FDA response: {', '.join(missing_fields)}"
 46 |         )
 47 | 
 48 |     # Type-specific validation
 49 |     if response_type == "search":
 50 |         validate_search_response(response)
 51 |     elif response_type == "detail":
 52 |         validate_detail_response(response)
 53 | 
 54 |     return True
 55 | 
 56 | 
 57 | def validate_search_response(response: dict[str, Any]) -> bool:
 58 |     """
 59 |     Validate FDA search response structure.
 60 | 
 61 |     Args:
 62 |         response: FDA search response
 63 | 
 64 |     Returns:
 65 |         True if valid
 66 | 
 67 |     Raises:
 68 |         OpenFDAValidationError: If validation fails
 69 |     """
 70 |     # Search responses should have results array
 71 |     if "results" not in response:
 72 |         raise OpenFDAValidationError("Search response missing 'results' field")
 73 | 
 74 |     if not isinstance(response["results"], list):
 75 |         raise OpenFDAValidationError(
 76 |             f"Expected 'results' to be a list, got {type(response['results']).__name__}"
 77 |         )
 78 | 
 79 |     # If meta is present, validate it
 80 |     if "meta" in response:
 81 |         validate_meta_field(response["meta"])
 82 | 
 83 |     return True
 84 | 
 85 | 
 86 | def validate_detail_response(response: dict[str, Any]) -> bool:
 87 |     """
 88 |     Validate FDA detail response structure.
 89 | 
 90 |     Args:
 91 |         response: FDA detail response
 92 | 
 93 |     Returns:
 94 |         True if valid
 95 | 
 96 |     Raises:
 97 |         OpenFDAValidationError: If validation fails
 98 |     """
 99 |     # Detail responses usually have a single result
100 |     if "results" in response:
101 |         if not isinstance(response["results"], list):
102 |             raise OpenFDAValidationError(
103 |                 f"Expected 'results' to be a list, got {type(response['results']).__name__}"
104 |             )
105 | 
106 |         if len(response["results"]) == 0:
107 |             # Empty results is valid (not found)
108 |             return True
109 | 
110 |         if len(response["results"]) > 1:
111 |             logger.warning(
112 |                 f"Detail response contains {len(response['results'])} results, expected 1"
113 |             )
114 | 
115 |     return True
116 | 
117 | 
118 | def validate_meta_field(meta: dict[str, Any]) -> bool:
119 |     """
120 |     Validate FDA response meta field.
121 | 
122 |     Args:
123 |         meta: Meta field from FDA response
124 | 
125 |     Returns:
126 |         True if valid
127 | 
128 |     Raises:
129 |         OpenFDAValidationError: If validation fails
130 |     """
131 |     if not isinstance(meta, dict):
132 |         raise OpenFDAValidationError(
133 |             f"Expected 'meta' to be a dict, got {type(meta).__name__}"
134 |         )
135 | 
136 |     # Check for results metadata
137 |     if "results" in meta:
138 |         results_meta = meta["results"]
139 |         if not isinstance(results_meta, dict):
140 |             raise OpenFDAValidationError(
141 |                 f"Expected 'meta.results' to be a dict, got {type(results_meta).__name__}"
142 |             )
143 | 
144 |         # Validate pagination fields if present
145 |         for field in ["skip", "limit", "total"]:
146 |             if field in results_meta and not isinstance(
147 |                 results_meta[field], int | float
148 |             ):
149 |                 raise OpenFDAValidationError(
150 |                     f"Expected 'meta.results.{field}' to be numeric, "
151 |                     f"got {type(results_meta[field]).__name__}"
152 |                 )
153 | 
154 |     return True
155 | 
156 | 
157 | def validate_adverse_event(event: dict[str, Any]) -> bool:
158 |     """
159 |     Validate an adverse event record.
160 | 
161 |     Args:
162 |         event: Adverse event record
163 | 
164 |     Returns:
165 |         True if valid
166 | 
167 |     Raises:
168 |         OpenFDAValidationError: If validation fails
169 |     """
170 |     if not isinstance(event, dict):
171 |         raise OpenFDAValidationError(
172 |             f"Expected adverse event to be a dict, got {type(event).__name__}"
173 |         )
174 | 
175 |     # Key fields that should be present (but may be null)
176 |     important_fields = ["patient", "safetyreportid"]
177 | 
178 |     for field in important_fields:
179 |         if field not in event:
180 |             logger.warning(f"Adverse event missing expected field: {field}")
181 | 
182 |     return True
183 | 
184 | 
185 | def validate_drug_label(label: dict[str, Any]) -> bool:
186 |     """
187 |     Validate a drug label record.
188 | 
189 |     Args:
190 |         label: Drug label record
191 | 
192 |     Returns:
193 |         True if valid
194 | 
195 |     Raises:
196 |         OpenFDAValidationError: If validation fails
197 |     """
198 |     if not isinstance(label, dict):
199 |         raise OpenFDAValidationError(
200 |             f"Expected drug label to be a dict, got {type(label).__name__}"
201 |         )
202 | 
203 |     # Labels should have OpenFDA section
204 |     if "openfda" not in label:
205 |         logger.warning("Drug label missing 'openfda' section")
206 | 
207 |     # Should have at least one section
208 |     label_sections = [
209 |         "indications_and_usage",
210 |         "contraindications",
211 |         "warnings_and_precautions",
212 |         "adverse_reactions",
213 |         "dosage_and_administration",
214 |     ]
215 | 
216 |     has_section = any(section in label for section in label_sections)
217 |     if not has_section:
218 |         logger.warning("Drug label has no standard sections")
219 | 
220 |     return True
221 | 
222 | 
223 | def validate_device_event(event: dict[str, Any]) -> bool:
224 |     """
225 |     Validate a device event record.
226 | 
227 |     Args:
228 |         event: Device event record
229 | 
230 |     Returns:
231 |         True if valid
232 | 
233 |     Raises:
234 |         OpenFDAValidationError: If validation fails
235 |     """
236 |     if not isinstance(event, dict):
237 |         raise OpenFDAValidationError(
238 |             f"Expected device event to be a dict, got {type(event).__name__}"
239 |         )
240 | 
241 |     # Device events should have MDR report key
242 |     if "mdr_report_key" not in event:
243 |         logger.warning("Device event missing 'mdr_report_key'")
244 | 
245 |     # Should have device information
246 |     if "device" not in event and "devices" not in event:
247 |         logger.warning("Device event missing device information")
248 | 
249 |     return True
250 | 
251 | 
252 | def validate_recall(recall: dict[str, Any]) -> bool:
253 |     """
254 |     Validate a recall record.
255 | 
256 |     Args:
257 |         recall: Recall record
258 | 
259 |     Returns:
260 |         True if valid
261 | 
262 |     Raises:
263 |         OpenFDAValidationError: If validation fails
264 |     """
265 |     if not isinstance(recall, dict):
266 |         raise OpenFDAValidationError(
267 |             f"Expected recall to be a dict, got {type(recall).__name__}"
268 |         )
269 | 
270 |     # Required fields for recalls
271 |     required = ["recall_number", "classification", "product_description"]
272 | 
273 |     for field in required:
274 |         if field not in recall:
275 |             logger.warning(f"Recall missing required field: {field}")
276 | 
277 |     # Validate classification if present
278 |     if "classification" in recall:
279 |         valid_classes = ["Class I", "Class II", "Class III", "1", "2", "3"]
280 |         if recall["classification"] not in valid_classes:
281 |             logger.warning(
282 |                 f"Invalid recall classification: {recall['classification']}"
283 |             )
284 | 
285 |     return True
286 | 
287 | 
288 | def sanitize_response(response: dict[str, Any]) -> dict[str, Any]:
289 |     """
290 |     Sanitize FDA response to handle common issues.
291 | 
292 |     Args:
293 |         response: Raw FDA response
294 | 
295 |     Returns:
296 |         Sanitized response
297 |     """
298 |     if not response:
299 |         return {}
300 | 
301 |     # Handle fields that can be string or list
302 |     if "results" in response and isinstance(response["results"], list):
303 |         for result in response["results"]:
304 |             if isinstance(result, dict):
305 |                 # Fields that can be string or list
306 |                 polymorphic_fields = [
307 |                     "source_type",
308 |                     "remedial_action",
309 |                     "medical_specialty_description",
310 |                     "manufacturer_name",
311 |                     "brand_name",
312 |                     "generic_name",
313 |                 ]
314 | 
315 |                 for field in polymorphic_fields:
316 |                     if field in result:
317 |                         value = result[field]
318 |                         # Ensure consistent list format
319 |                         if not isinstance(value, list):
320 |                             result[field] = [value] if value else []
321 | 
322 |     return response
323 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/input_validation.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Input validation and sanitization for OpenFDA API requests.
  3 | 
  4 | This module provides security-focused input validation to prevent injection attacks
  5 | and ensure data integrity for all FDA API requests.
  6 | """
  7 | 
  8 | import logging
  9 | import re
 10 | from typing import Any
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | # Maximum lengths for different input types
 15 | MAX_DRUG_NAME_LENGTH = 100
 16 | MAX_REACTION_LENGTH = 200
 17 | MAX_GENERAL_QUERY_LENGTH = 500
 18 | MAX_DATE_LENGTH = 10
 19 | 
 20 | # Patterns for validation
 21 | SAFE_CHARS_PATTERN = re.compile(r"^[a-zA-Z0-9\s\-\.\,\(\)\/\*]+$")
 22 | DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$")
 23 | # Include SQL comment pattern -- and other injection patterns
 24 | INJECTION_CHARS = re.compile(r"[<>\"\';&|\\`${}]|--")
 25 | 
 26 | 
 27 | def sanitize_input(
 28 |     value: str | None, max_length: int = MAX_GENERAL_QUERY_LENGTH
 29 | ) -> str | None:
 30 |     """
 31 |     Sanitize user input to prevent injection attacks.
 32 | 
 33 |     Args:
 34 |         value: Input string to sanitize
 35 |         max_length: Maximum allowed length
 36 | 
 37 |     Returns:
 38 |         Sanitized string or None if input is invalid
 39 |     """
 40 |     if not value:
 41 |         return None
 42 | 
 43 |     # Convert to string and strip whitespace
 44 |     value = str(value).strip()
 45 | 
 46 |     # Check length
 47 |     if len(value) > max_length:
 48 |         logger.warning(
 49 |             f"Input truncated from {len(value)} to {max_length} characters"
 50 |         )
 51 |         value = value[:max_length]
 52 | 
 53 |     # Remove potential injection characters
 54 |     cleaned = INJECTION_CHARS.sub("", value)
 55 | 
 56 |     # Warn if characters were removed
 57 |     if cleaned != value:
 58 |         logger.warning("Removed potentially dangerous characters from input")
 59 | 
 60 |     # Normalize whitespace
 61 |     cleaned = " ".join(cleaned.split())
 62 | 
 63 |     return cleaned if cleaned else None
 64 | 
 65 | 
 66 | def validate_drug_name(drug: str | None) -> str | None:
 67 |     """
 68 |     Validate and sanitize drug name input.
 69 | 
 70 |     Args:
 71 |         drug: Drug name to validate
 72 | 
 73 |     Returns:
 74 |         Validated drug name or None
 75 |     """
 76 |     if not drug:
 77 |         return None
 78 | 
 79 |     sanitized = sanitize_input(drug, MAX_DRUG_NAME_LENGTH)
 80 | 
 81 |     if not sanitized:
 82 |         return None
 83 | 
 84 |     # Drug names should only contain alphanumeric, spaces, hyphens, and slashes
 85 |     if not re.match(r"^[a-zA-Z0-9\s\-\/\(\)]+$", sanitized):
 86 |         logger.warning(f"Invalid drug name format: {sanitized[:20]}...")
 87 |         return None
 88 | 
 89 |     return sanitized
 90 | 
 91 | 
 92 | def validate_date(date_str: str | None) -> str | None:
 93 |     """
 94 |     Validate date string format.
 95 | 
 96 |     Args:
 97 |         date_str: Date string in YYYY-MM-DD format
 98 | 
 99 |     Returns:
100 |         Validated date string or None
101 |     """
102 |     if not date_str:
103 |         return None
104 | 
105 |     sanitized = sanitize_input(date_str, MAX_DATE_LENGTH)
106 | 
107 |     if not sanitized:
108 |         return None
109 | 
110 |     # Check date format
111 |     if not DATE_PATTERN.match(sanitized):
112 |         logger.warning(f"Invalid date format: {sanitized}")
113 |         return None
114 | 
115 |     # Basic date validation
116 |     try:
117 |         year, month, day = map(int, sanitized.split("-"))
118 |         if not (1900 <= year <= 2100 and 1 <= month <= 12 and 1 <= day <= 31):
119 |             logger.warning(f"Date out of valid range: {sanitized}")
120 |             return None
121 |     except (ValueError, IndexError):
122 |         logger.warning(f"Cannot parse date: {sanitized}")
123 |         return None
124 | 
125 |     return sanitized
126 | 
127 | 
128 | def validate_limit(limit: int | None, max_limit: int = 100) -> int:
129 |     """
130 |     Validate and constrain limit parameter.
131 | 
132 |     Args:
133 |         limit: Requested limit
134 |         max_limit: Maximum allowed limit
135 | 
136 |     Returns:
137 |         Valid limit value
138 |     """
139 |     if limit is None:
140 |         return 25  # Default
141 | 
142 |     try:
143 |         limit = int(limit)
144 |     except (ValueError, TypeError):
145 |         logger.warning(f"Invalid limit value: {limit}")
146 |         return 25
147 | 
148 |     if limit < 1:
149 |         return 1
150 |     elif limit > max_limit:
151 |         logger.warning(f"Limit {limit} exceeds maximum {max_limit}")
152 |         return max_limit
153 | 
154 |     return limit
155 | 
156 | 
157 | def validate_skip(skip: int | None, max_skip: int = 10000) -> int:
158 |     """
159 |     Validate and constrain skip/offset parameter.
160 | 
161 |     Args:
162 |         skip: Requested skip/offset
163 |         max_skip: Maximum allowed skip
164 | 
165 |     Returns:
166 |         Valid skip value
167 |     """
168 |     if skip is None:
169 |         return 0
170 | 
171 |     try:
172 |         skip = int(skip)
173 |     except (ValueError, TypeError):
174 |         logger.warning(f"Invalid skip value: {skip}")
175 |         return 0
176 | 
177 |     if skip < 0:
178 |         return 0
179 |     elif skip > max_skip:
180 |         logger.warning(f"Skip {skip} exceeds maximum {max_skip}")
181 |         return max_skip
182 | 
183 |     return skip
184 | 
185 | 
186 | def validate_classification(classification: str | None) -> str | None:
187 |     """
188 |     Validate recall classification.
189 | 
190 |     Args:
191 |         classification: Classification string (Class I, II, or III)
192 | 
193 |     Returns:
194 |         Validated classification or None
195 |     """
196 |     if not classification:
197 |         return None
198 | 
199 |     sanitized = sanitize_input(classification, 20)
200 | 
201 |     if not sanitized:
202 |         return None
203 | 
204 |     # Normalize classification format
205 |     sanitized = sanitized.upper()
206 | 
207 |     # Check valid classifications
208 |     valid_classes = [
209 |         "CLASS I",
210 |         "CLASS II",
211 |         "CLASS III",
212 |         "I",
213 |         "II",
214 |         "III",
215 |         "1",
216 |         "2",
217 |         "3",
218 |     ]
219 | 
220 |     if sanitized not in valid_classes:
221 |         logger.warning(f"Invalid classification: {sanitized}")
222 |         return None
223 | 
224 |     # Normalize to standard format
225 |     if sanitized in ["I", "1"]:
226 |         return "Class I"
227 |     elif sanitized in ["II", "2"]:
228 |         return "Class II"
229 |     elif sanitized in ["III", "3"]:
230 |         return "Class III"
231 | 
232 |     return sanitized.title()  # "CLASS I" -> "Class I"
233 | 
234 | 
235 | def validate_status(status: str | None) -> str | None:
236 |     """
237 |     Validate status parameter.
238 | 
239 |     Args:
240 |         status: Status string
241 | 
242 |     Returns:
243 |         Validated status or None
244 |     """
245 |     if not status:
246 |         return None
247 | 
248 |     sanitized = sanitize_input(status, 50)
249 | 
250 |     if not sanitized:
251 |         return None
252 | 
253 |     # Normalize status
254 |     sanitized = sanitized.lower()
255 | 
256 |     # Check valid statuses
257 |     valid_statuses = [
258 |         "ongoing",
259 |         "terminated",
260 |         "completed",
261 |         "current",
262 |         "resolved",
263 |     ]
264 | 
265 |     if sanitized not in valid_statuses:
266 |         logger.warning(f"Invalid status: {sanitized}")
267 |         return None
268 | 
269 |     return sanitized.title()  # "ongoing" -> "Ongoing"
270 | 
271 | 
272 | def validate_boolean(value: Any) -> bool | None:
273 |     """
274 |     Validate boolean parameter.
275 | 
276 |     Args:
277 |         value: Boolean-like value
278 | 
279 |     Returns:
280 |         Boolean value or None
281 |     """
282 |     if value is None:
283 |         return None
284 | 
285 |     if isinstance(value, bool):
286 |         return value
287 | 
288 |     if isinstance(value, str):
289 |         value = value.lower().strip()
290 |         if value in ["true", "1", "yes", "y"]:
291 |             return True
292 |         elif value in ["false", "0", "no", "n"]:
293 |             return False
294 | 
295 |     return None
296 | 
297 | 
298 | def validate_api_key(api_key: str | None) -> str | None:
299 |     """
300 |     Validate API key format.
301 | 
302 |     Args:
303 |         api_key: API key string
304 | 
305 |     Returns:
306 |         Validated API key or None
307 |     """
308 |     if not api_key:
309 |         return None
310 | 
311 |     # API keys should be alphanumeric with possible hyphens
312 |     if not re.match(r"^[a-zA-Z0-9\-_]+$", api_key):
313 |         logger.warning("Invalid API key format")
314 |         return None
315 | 
316 |     # Check reasonable length
317 |     if len(api_key) < 10 or len(api_key) > 100:
318 |         logger.warning("API key length out of expected range")
319 |         return None
320 | 
321 |     return api_key
322 | 
323 | 
324 | def _validate_parameter(key: str, value: Any) -> Any:
325 |     """Validate a single parameter based on its key."""
326 |     if key in ["drug", "brand", "generic"]:
327 |         return validate_drug_name(value)
328 |     elif key in ["limit"]:
329 |         return validate_limit(value)
330 |     elif key in ["skip", "offset"]:
331 |         return validate_skip(value)
332 |     elif key in ["classification"]:
333 |         return validate_classification(value)
334 |     elif key in ["status"]:
335 |         return validate_status(value)
336 |     elif key in ["serious", "death", "ongoing"]:
337 |         return validate_boolean(value)
338 |     elif key in ["api_key"]:
339 |         return validate_api_key(value)
340 |     elif "date" in key.lower():
341 |         return validate_date(value)
342 |     else:
343 |         return sanitize_input(value)
344 | 
345 | 
346 | def build_safe_query(params: dict[str, Any]) -> dict[str, Any]:
347 |     """
348 |     Build a safe query dictionary with validated parameters.
349 | 
350 |     Args:
351 |         params: Raw parameters dictionary
352 | 
353 |     Returns:
354 |         Dictionary with validated parameters
355 |     """
356 |     safe_params = {}
357 | 
358 |     for key, value in params.items():
359 |         if value is None:
360 |             continue
361 | 
362 |         # Validate key name
363 |         if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", key):
364 |             logger.warning(f"Skipping invalid parameter key: {key}")
365 |             continue
366 | 
367 |         # Validate parameter value
368 |         validated = _validate_parameter(key, value)
369 | 
370 |         if validated is not None:
371 |             safe_params[key] = validated
372 | 
373 |     return safe_params
374 | 
```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_device_events.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Unit tests for OpenFDA device events integration.
  3 | """
  4 | 
  5 | from unittest.mock import patch
  6 | 
  7 | import pytest
  8 | 
  9 | from biomcp.openfda.device_events import get_device_event, search_device_events
 10 | 
 11 | 
 12 | @pytest.mark.asyncio
 13 | async def test_search_device_events_by_device():
 14 |     """Test searching device events by device name."""
 15 |     mock_response = {
 16 |         "meta": {"results": {"total": 3}},
 17 |         "results": [
 18 |             {
 19 |                 "event_type": "M",
 20 |                 "date_received": "2024-01-15",
 21 |                 "device": [
 22 |                     {
 23 |                         "brand_name": "FoundationOne CDx",
 24 |                         "manufacturer_d_name": "Foundation Medicine",
 25 |                         "model_number": "F1CDX",
 26 |                         "device_problem_text": ["False negative result"],
 27 |                         "openfda": {
 28 |                             "device_class": "2",
 29 |                             "medical_specialty_description": ["Pathology"],
 30 |                             "product_code": "PQP",
 31 |                         },
 32 |                     }
 33 |                 ],
 34 |                 "event_description": "Device failed to detect known mutation",
 35 |                 "mdr_report_key": "MDR123456",
 36 |             }
 37 |         ],
 38 |     }
 39 | 
 40 |     with patch(
 41 |         "biomcp.openfda.device_events.make_openfda_request"
 42 |     ) as mock_request:
 43 |         mock_request.return_value = (mock_response, None)
 44 | 
 45 |         result = await search_device_events(device="FoundationOne", limit=10)
 46 | 
 47 |         # Verify request
 48 |         mock_request.assert_called_once()
 49 |         call_args = mock_request.call_args
 50 |         assert "FoundationOne" in call_args[0][1]["search"]
 51 |         # When searching for a specific device, genomic filter is not needed
 52 |         # The device search itself is sufficient
 53 | 
 54 |         # Check output
 55 |         assert "FDA Device Adverse Event Reports" in result
 56 |         assert "FoundationOne CDx" in result
 57 |         assert "Foundation Medicine" in result
 58 |         assert "False negative result" in result
 59 |         assert "Malfunction" in result
 60 |         assert "MDR123456" in result
 61 | 
 62 | 
 63 | @pytest.mark.asyncio
 64 | async def test_search_device_events_genomics_filter():
 65 |     """Test that genomics filter is applied by default."""
 66 |     mock_response = {"meta": {"results": {"total": 5}}, "results": []}
 67 | 
 68 |     with patch(
 69 |         "biomcp.openfda.device_events.make_openfda_request"
 70 |     ) as mock_request:
 71 |         mock_request.return_value = (mock_response, None)
 72 | 
 73 |         await search_device_events(manufacturer="Illumina", genomics_only=True)
 74 | 
 75 |         # Verify genomic device codes are in search
 76 |         call_args = mock_request.call_args
 77 |         search_query = call_args[0][1]["search"]
 78 |         # Should contain at least one genomic product code
 79 |         assert any(
 80 |             code in search_query for code in ["OOI", "PQP", "OYD", "NYE"]
 81 |         )
 82 | 
 83 | 
 84 | @pytest.mark.asyncio
 85 | async def test_search_device_events_no_genomics_filter():
 86 |     """Test searching without genomics filter."""
 87 |     mock_response = {"meta": {"results": {"total": 10}}, "results": []}
 88 | 
 89 |     with patch(
 90 |         "biomcp.openfda.device_events.make_openfda_request"
 91 |     ) as mock_request:
 92 |         mock_request.return_value = (mock_response, None)
 93 | 
 94 |         await search_device_events(device="pacemaker", genomics_only=False)
 95 | 
 96 |         # Verify no genomic product codes in search
 97 |         call_args = mock_request.call_args
 98 |         search_query = call_args[0][1]["search"]
 99 |         # Should not contain genomic product codes
100 |         assert not any(code in search_query for code in ["OOI", "PQP", "OYD"])
101 | 
102 | 
103 | @pytest.mark.asyncio
104 | async def test_search_device_events_by_problem():
105 |     """Test searching device events by problem description."""
106 |     mock_response = {
107 |         "meta": {"results": {"total": 8}},
108 |         "results": [
109 |             {
110 |                 "event_type": "IN",
111 |                 "device": [
112 |                     {
113 |                         "brand_name": "Test Device",
114 |                         "device_problem_text": [
115 |                             "Software malfunction",
116 |                             "Data loss",
117 |                         ],
118 |                     }
119 |                 ],
120 |                 "mdr_report_key": "MDR789",
121 |             }
122 |         ],
123 |     }
124 | 
125 |     with patch(
126 |         "biomcp.openfda.device_events.make_openfda_request"
127 |     ) as mock_request:
128 |         mock_request.return_value = (mock_response, None)
129 | 
130 |         result = await search_device_events(problem="software malfunction")
131 | 
132 |         # Verify request
133 |         call_args = mock_request.call_args
134 |         assert "software malfunction" in call_args[0][1]["search"].lower()
135 | 
136 |         # Check output
137 |         assert "Software malfunction" in result
138 |         assert "Data loss" in result
139 |         assert "Injury" in result  # IN = Injury
140 | 
141 | 
142 | @pytest.mark.asyncio
143 | async def test_search_device_events_no_params():
144 |     """Test that searching without parameters returns helpful message."""
145 |     result = await search_device_events()
146 | 
147 |     assert "Please specify" in result
148 |     assert "device name, manufacturer, or problem" in result
149 |     assert "Examples:" in result
150 | 
151 | 
152 | @pytest.mark.asyncio
153 | async def test_get_device_event_detail():
154 |     """Test getting detailed device event report."""
155 |     mock_response = {
156 |         "results": [
157 |             {
158 |                 "mdr_report_key": "MDR999888",
159 |                 "event_type": "D",
160 |                 "date_received": "2024-02-01",
161 |                 "date_of_event": "2024-01-20",
162 |                 "source_type": "M",
163 |                 "device": [
164 |                     {
165 |                         "brand_name": "Genomic Sequencer X",
166 |                         "manufacturer_d_name": "GenTech Corp",
167 |                         "model_number": "GSX-2000",
168 |                         "catalog_number": "CAT123",
169 |                         "lot_number": "LOT456",
170 |                         "expiration_date_of_device": "2025-12-31",
171 |                         "device_problem_text": [
172 |                             "Critical failure",
173 |                             "Sample contamination",
174 |                         ],
175 |                         "device_evaluated_by_manufacturer": "Y",
176 |                         "openfda": {
177 |                             "device_class": "3",
178 |                             "medical_specialty_description": [
179 |                                 "Clinical Chemistry"
180 |                             ],
181 |                             "product_code": "OOI",
182 |                         },
183 |                     }
184 |                 ],
185 |                 "event_description": "Device failure led to incorrect cancer diagnosis",
186 |                 "manufacturer_narrative": "Investigation revealed component failure",
187 |                 "patient": [
188 |                     {
189 |                         "patient_age": "65",
190 |                         "patient_sex": "F",
191 |                         "date_of_death": "2024-01-25",
192 |                         "life_threatening": "Y",
193 |                     }
194 |                 ],
195 |                 "remedial_action": "Device recall initiated",
196 |             }
197 |         ]
198 |     }
199 | 
200 |     with patch(
201 |         "biomcp.openfda.device_events.make_openfda_request"
202 |     ) as mock_request:
203 |         mock_request.return_value = (mock_response, None)
204 | 
205 |         result = await get_device_event("MDR999888")
206 | 
207 |         # Verify request
208 |         mock_request.assert_called_once()
209 |         call_args = mock_request.call_args
210 |         assert "MDR999888" in call_args[0][1]["search"]
211 | 
212 |         # Check detailed output
213 |         assert "MDR999888" in result
214 |         assert "Death" in result
215 |         assert "Genomic Sequencer X" in result
216 |         assert "GenTech Corp" in result
217 |         assert "GSX-2000" in result
218 |         assert "Critical failure" in result
219 |         assert "Sample contamination" in result
220 |         assert "Class III" in result
221 |         assert "65 years" in result
222 |         assert "Female" in result
223 |         assert "2024-01-25" in result
224 |         assert "Life-threatening" in result
225 |         assert "Device recall initiated" in result
226 |         assert "Investigation revealed component failure" in result
227 | 
228 | 
229 | @pytest.mark.asyncio
230 | async def test_get_device_event_not_found():
231 |     """Test handling when device event report is not found."""
232 |     with patch(
233 |         "biomcp.openfda.device_events.make_openfda_request"
234 |     ) as mock_request:
235 |         mock_request.return_value = ({"results": []}, None)
236 | 
237 |         result = await get_device_event("NOTFOUND789")
238 | 
239 |         assert "NOTFOUND789" in result
240 |         assert "not found" in result
241 | 
242 | 
243 | @pytest.mark.asyncio
244 | async def test_search_device_events_error():
245 |     """Test error handling in device event search."""
246 |     with patch(
247 |         "biomcp.openfda.device_events.make_openfda_request"
248 |     ) as mock_request:
249 |         mock_request.return_value = (None, "Network timeout")
250 | 
251 |         result = await search_device_events(device="test")
252 | 
253 |         assert "Error searching device events" in result
254 |         assert "Network timeout" in result
255 | 
```

--------------------------------------------------------------------------------
/docs/reference/quick-reference.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP Quick Reference
  2 | 
  3 | ## Command Cheat Sheet
  4 | 
  5 | ### Installation
  6 | 
  7 | ```bash
  8 | # Install BioMCP
  9 | uv tool install biomcp
 10 | 
 11 | # Update to latest version
 12 | uv tool install biomcp --force
 13 | 
 14 | # Check version
 15 | biomcp --version
 16 | ```
 17 | 
 18 | ### Article Search Commands
 19 | 
 20 | ```bash
 21 | # Basic gene search
 22 | biomcp article search --gene BRAF
 23 | 
 24 | # Multiple filters
 25 | biomcp article search \
 26 |   --gene EGFR --disease "lung cancer" \
 27 |   --chemical erlotinib
 28 | 
 29 | # Exclude preprints
 30 | biomcp article search --gene TP53 --no-preprints
 31 | 
 32 | # OR logic in keywords
 33 | biomcp article search --gene PTEN \
 34 |   --keyword "R173|Arg173|p.R173"
 35 | 
 36 | # Get specific article
 37 | biomcp article get 38768446  # PMID
 38 | biomcp article get "10.1101/2024.01.20.23288905"  # DOI
 39 | ```
 40 | 
 41 | ### Trial Search Commands
 42 | 
 43 | ```bash
 44 | # Basic disease search
 45 | biomcp trial search \
 46 |   --condition melanoma --status RECRUITING
 47 | 
 48 | # Location-based search (requires coordinates)
 49 | biomcp trial search --condition cancer \
 50 |   --latitude 40.7128 --longitude -74.0060 --distance 50
 51 | 
 52 | # Phase-specific search
 53 | biomcp trial search \
 54 |   --condition "breast cancer" --phase PHASE3
 55 | 
 56 | # Using NCI source (requires API key)
 57 | biomcp trial search --condition melanoma --source nci \
 58 |   --required-mutations "BRAF V600E" --api-key $NCI_API_KEY
 59 | ```
 60 | 
 61 | ### Variant Commands
 62 | 
 63 | ```bash
 64 | # Search by gene
 65 | biomcp variant search \
 66 |   --gene BRCA1 --significance pathogenic
 67 | 
 68 | # Search by HGVS
 69 | biomcp variant search --hgvs "NM_007294.4:c.5266dupC"
 70 | 
 71 | # Search by frequency
 72 | biomcp variant search --gene TP53 \
 73 |   --max-frequency 0.01 --min-cadd 20
 74 | 
 75 | # Get variant details
 76 | biomcp variant get rs121913529
 77 | biomcp variant get "NM_007294.4:c.5266dupC"
 78 | 
 79 | # Predict effects (requires AlphaGenome key)
 80 | biomcp variant predict chr7 140753336 A T --tissue UBERON:0002367
 81 | ```
 82 | 
 83 | ### Gene/Drug/Disease Commands
 84 | 
 85 | ```bash
 86 | # Get gene information
 87 | biomcp gene get TP53
 88 | biomcp gene get BRAF
 89 | 
 90 | # Get drug information
 91 | biomcp drug get imatinib
 92 | biomcp drug get pembrolizumab
 93 | 
 94 | # Get disease information
 95 | biomcp disease get melanoma
 96 | biomcp disease get "non-small cell lung cancer"
 97 | ```
 98 | 
 99 | ### NCI Commands (Require API Key)
100 | 
101 | ```bash
102 | # Search organizations
103 | biomcp organization search --name "MD Anderson" \
104 |   --city Houston --state TX --api-key $NCI_API_KEY
105 | 
106 | # Search interventions
107 | biomcp intervention search --name pembrolizumab \
108 |   --intervention-type Drug --api-key $NCI_API_KEY
109 | 
110 | # Search biomarkers
111 | biomcp biomarker search --gene EGFR \
112 |   --biomarker-type mutation --api-key $NCI_API_KEY
113 | ```
114 | 
115 | ### Health Check
116 | 
117 | ```bash
118 | # Full health check
119 | biomcp health check
120 | 
121 | # Check APIs only
122 | biomcp health check --apis-only
123 | 
124 | # Verbose output
125 | biomcp health check --verbose
126 | ```
127 | 
128 | ## Common Parameter Reference
129 | 
130 | ### Search Parameters
131 | 
132 | | Parameter  | Description   | Example         |
133 | | ---------- | ------------- | --------------- |
134 | | `--limit`  | Max results   | `--limit 20`    |
135 | | `--page`   | Page number   | `--page 2`      |
136 | | `--format` | Output format | `--format json` |
137 | 
138 | ### Trial Status Values
139 | 
140 | | Status                  | Description            |
141 | | ----------------------- | ---------------------- |
142 | | `RECRUITING`            | Currently enrolling    |
143 | | `ACTIVE_NOT_RECRUITING` | Ongoing, not enrolling |
144 | | `NOT_YET_RECRUITING`    | Will start recruiting  |
145 | | `COMPLETED`             | Trial has ended        |
146 | | `SUSPENDED`             | Temporarily halted     |
147 | | `TERMINATED`            | Stopped early          |
148 | 
149 | ### Trial Phase Values
150 | 
151 | | Phase          | Description   |
152 | | -------------- | ------------- |
153 | | `EARLY_PHASE1` | Early Phase 1 |
154 | | `PHASE1`       | Phase 1       |
155 | | `PHASE2`       | Phase 2       |
156 | | `PHASE3`       | Phase 3       |
157 | | `PHASE4`       | Phase 4       |
158 | 
159 | ### Clinical Significance
160 | 
161 | | Value                    | Description             |
162 | | ------------------------ | ----------------------- |
163 | | `pathogenic`             | Causes disease          |
164 | | `likely_pathogenic`      | Probably causes disease |
165 | | `uncertain_significance` | Unknown impact          |
166 | | `likely_benign`          | Probably harmless       |
167 | | `benign`                 | Does not cause disease  |
168 | 
169 | ## Gene Symbol Quick Lookup
170 | 
171 | ### Common Gene Aliases
172 | 
173 | | Common Name | Official Symbol |
174 | | ----------- | --------------- |
175 | | HER2        | ERBB2           |
176 | | HER3        | ERBB3           |
177 | | EGFR        | EGFR            |
178 | | ALK         | ALK             |
179 | | c-MET       | MET             |
180 | | PD-1        | PDCD1           |
181 | | PD-L1       | CD274           |
182 | | CTLA-4      | CTLA4           |
183 | 
184 | ## Location Coordinates
185 | 
186 | ### Major US Cities
187 | 
188 | | City          | Latitude | Longitude |
189 | | ------------- | -------- | --------- |
190 | | New York      | 40.7128  | -74.0060  |
191 | | Los Angeles   | 34.0522  | -118.2437 |
192 | | Chicago       | 41.8781  | -87.6298  |
193 | | Houston       | 29.7604  | -95.3698  |
194 | | Philadelphia  | 39.9526  | -75.1652  |
195 | | Boston        | 42.3601  | -71.0589  |
196 | | Atlanta       | 33.7490  | -84.3880  |
197 | | Miami         | 25.7617  | -80.1918  |
198 | | Seattle       | 47.6062  | -122.3321 |
199 | | San Francisco | 37.7749  | -122.4194 |
200 | 
201 | ## Environment Variables
202 | 
203 | ```bash
204 | # API Keys
205 | export NCI_API_KEY="your-nci-key"
206 | export ALPHAGENOME_API_KEY="your-alphagenome-key"
207 | export CBIO_TOKEN="your-cbioportal-token"
208 | 
209 | # Configuration
210 | export BIOMCP_LOG_LEVEL="DEBUG"
211 | export BIOMCP_CACHE_DIR="/path/to/cache"
212 | export BIOMCP_TIMEOUT=300
213 | export BIOMCP_MAX_CONCURRENT=5
214 | ```
215 | 
216 | ## Output Format Examples
217 | 
218 | ### JSON Output
219 | 
220 | ```bash
221 | biomcp article search --gene BRAF --format json | jq '.articles[0]'
222 | ```
223 | 
224 | ### Extract Specific Fields
225 | 
226 | ```bash
227 | # Get PMIDs only
228 | biomcp article search --gene TP53 --format json | \
229 |   jq -r '.articles[].pmid'
230 | 
231 | # Get trial NCT IDs
232 | biomcp trial search --condition melanoma --format json | \
233 |   jq -r '.trials[].nct_id'
234 | ```
235 | 
236 | ### Save to File
237 | 
238 | ```bash
239 | biomcp article search --gene BRCA1 --format json > results.json
240 | ```
241 | 
242 | ## MCP Tool Names
243 | 
244 | ### Core Tools
245 | 
246 | - `search` - Unified search
247 | - `fetch` - Get details
248 | - `think` - Sequential thinking
249 | 
250 | ### Article Tools
251 | 
252 | - `article_searcher`
253 | - `article_getter`
254 | 
255 | ### Trial Tools
256 | 
257 | - `trial_searcher`
258 | - `trial_getter`
259 | - `trial_protocol_getter`
260 | - `trial_references_getter`
261 | - `trial_outcomes_getter`
262 | - `trial_locations_getter`
263 | 
264 | ### Variant Tools
265 | 
266 | - `variant_searcher`
267 | - `variant_getter`
268 | - `alphagenome_predictor`
269 | 
270 | ### BioThings Tools
271 | 
272 | - `gene_getter`
273 | - `disease_getter`
274 | - `drug_getter`
275 | 
276 | ### NCI Tools
277 | 
278 | - `nci_organization_searcher`
279 | - `nci_organization_getter`
280 | - `nci_intervention_searcher`
281 | - `nci_intervention_getter`
282 | - `nci_biomarker_searcher`
283 | - `nci_disease_searcher`
284 | 
285 | ## Query Language Syntax
286 | 
287 | ### Unified Search Examples
288 | 
289 | ```
290 | gene:BRAF AND disease:melanoma
291 | gene:EGFR AND (mutation OR variant)
292 | drugs.tradename:gleevec
293 | diseases.name:"lung cancer"
294 | chemicals.mesh:D000069439
295 | ```
296 | 
297 | ### Field Prefixes
298 | 
299 | - `gene:` - Gene symbol
300 | - `disease:` - Disease/condition
301 | - `chemical:` - Drug/chemical
302 | - `variant:` - Genetic variant
303 | - `pmid:` - PubMed ID
304 | - `doi:` - Digital Object ID
305 | 
306 | ## Common Workflows
307 | 
308 | ### Find Articles About a Mutation
309 | 
310 | ```bash
311 | # Step 1: Search articles
312 | biomcp article search --gene BRAF --keyword "V600E|p.V600E"
313 | 
314 | # Step 2: Get full article
315 | biomcp article get [PMID]
316 | ```
317 | 
318 | ### Check Trial Eligibility
319 | 
320 | ```bash
321 | # Step 1: Search trials
322 | biomcp trial search --condition melanoma --status RECRUITING
323 | 
324 | # Step 2: Get trial details
325 | biomcp trial get NCT03006926
326 | ```
327 | 
328 | ### Variant Analysis
329 | 
330 | ```bash
331 | # Step 1: Search variant
332 | biomcp variant search --gene BRCA1 --significance pathogenic
333 | 
334 | # Step 2: Get variant details
335 | biomcp variant get rs80357906
336 | 
337 | # Step 3: Search related articles
338 | biomcp article search --gene BRCA1 --variant rs80357906
339 | ```
340 | 
341 | ## Error Code Quick Reference
342 | 
343 | ### Common HTTP Codes
344 | 
345 | - `400` - Bad request (check parameters)
346 | - `401` - Unauthorized (check API key)
347 | - `404` - Not found (verify ID)
348 | - `429` - Rate limited (wait and retry)
349 | - `500` - Server error (retry later)
350 | 
351 | ### BioMCP Error Patterns
352 | 
353 | - `1xxx` - Article errors
354 | - `2xxx` - Trial errors
355 | - `3xxx` - Variant errors
356 | - `4xxx` - Gene/drug/disease errors
357 | - `5xxx` - Authentication errors
358 | - `6xxx` - Rate limit errors
359 | - `7xxx` - Validation errors
360 | 
361 | ## Tips and Tricks
362 | 
363 | ### 1. Use Official Gene Symbols
364 | 
365 | ```bash
366 | # Wrong
367 | biomcp article search --gene HER2  # ❌
368 | 
369 | # Right
370 | biomcp article search --gene ERBB2  # ✅
371 | ```
372 | 
373 | ### 2. Combine Multiple Searches
374 | 
375 | ```bash
376 | # Search multiple databases in parallel
377 | (
378 |   biomcp article search --gene BRAF --format json > articles.json &
379 |   biomcp trial search --condition melanoma --format json > trials.json &
380 |   biomcp variant search --gene BRAF --format json > variants.json &
381 |   wait
382 | )
383 | ```
384 | 
385 | ### 3. Process Large Results
386 | 
387 | ```bash
388 | # Paginate through results
389 | for page in {1..10}; do
390 |   biomcp article search --gene TP53 --page $page --limit 100
391 | done
392 | ```
393 | 
394 | ### 4. Debug API Issues
395 | 
396 | ```bash
397 | # Enable debug logging
398 | export BIOMCP_LOG_LEVEL=DEBUG
399 | biomcp article search --gene BRAF --verbose
400 | ```
401 | 
402 | ## Getting Help
403 | 
404 | ```bash
405 | # General help
406 | biomcp --help
407 | 
408 | # Command help
409 | biomcp article search --help
410 | 
411 | # Check documentation
412 | open https://biomcp.org/
413 | 
414 | # Report issues
415 | open https://github.com/genomoncology/biomcp/issues
416 | ```
417 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_retry.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for retry logic with exponential backoff."""
  2 | 
  3 | import asyncio
  4 | from unittest.mock import AsyncMock, MagicMock, patch
  5 | 
  6 | import httpx
  7 | import pytest
  8 | 
  9 | from biomcp.retry import (
 10 |     RetryableHTTPError,
 11 |     RetryConfig,
 12 |     calculate_delay,
 13 |     is_retryable_exception,
 14 |     is_retryable_status,
 15 |     retry_with_backoff,
 16 |     with_retry,
 17 | )
 18 | 
 19 | 
 20 | def test_calculate_delay_exponential_backoff():
 21 |     """Test that delay increases exponentially."""
 22 |     config = RetryConfig(initial_delay=1.0, exponential_base=2.0, jitter=False)
 23 | 
 24 |     # Test exponential increase
 25 |     assert calculate_delay(0, config) == 1.0  # 1 * 2^0
 26 |     assert calculate_delay(1, config) == 2.0  # 1 * 2^1
 27 |     assert calculate_delay(2, config) == 4.0  # 1 * 2^2
 28 |     assert calculate_delay(3, config) == 8.0  # 1 * 2^3
 29 | 
 30 | 
 31 | def test_calculate_delay_max_cap():
 32 |     """Test that delay is capped at max_delay."""
 33 |     config = RetryConfig(
 34 |         initial_delay=1.0, exponential_base=2.0, max_delay=5.0, jitter=False
 35 |     )
 36 | 
 37 |     # Test that delay is capped
 38 |     assert calculate_delay(0, config) == 1.0
 39 |     assert calculate_delay(1, config) == 2.0
 40 |     assert calculate_delay(2, config) == 4.0
 41 |     assert calculate_delay(3, config) == 5.0  # Capped at max_delay
 42 |     assert calculate_delay(10, config) == 5.0  # Still capped
 43 | 
 44 | 
 45 | def test_calculate_delay_with_jitter():
 46 |     """Test that jitter adds randomness to delay."""
 47 |     config = RetryConfig(initial_delay=10.0, jitter=True)
 48 | 
 49 |     # Generate multiple delays and check they're different
 50 |     delays = [calculate_delay(1, config) for _ in range(10)]
 51 | 
 52 |     # All should be around 20.0 (10 * 2^1) with jitter
 53 |     for delay in delays:
 54 |         assert 18.0 <= delay <= 22.0  # Within 10% jitter range
 55 | 
 56 |     # Should have some variation
 57 |     assert len(set(delays)) > 1
 58 | 
 59 | 
 60 | def test_is_retryable_exception():
 61 |     """Test exception retryability check."""
 62 |     config = RetryConfig(retryable_exceptions=(ConnectionError, TimeoutError))
 63 | 
 64 |     # Retryable exceptions
 65 |     assert is_retryable_exception(ConnectionError("test"), config)
 66 |     assert is_retryable_exception(TimeoutError("test"), config)
 67 | 
 68 |     # Non-retryable exceptions
 69 |     assert not is_retryable_exception(ValueError("test"), config)
 70 |     assert not is_retryable_exception(KeyError("test"), config)
 71 | 
 72 | 
 73 | def test_is_retryable_status():
 74 |     """Test HTTP status code retryability check."""
 75 |     config = RetryConfig(retryable_status_codes=(429, 502, 503, 504))
 76 | 
 77 |     # Retryable status codes
 78 |     assert is_retryable_status(429, config)
 79 |     assert is_retryable_status(502, config)
 80 |     assert is_retryable_status(503, config)
 81 |     assert is_retryable_status(504, config)
 82 | 
 83 |     # Non-retryable status codes
 84 |     assert not is_retryable_status(200, config)
 85 |     assert not is_retryable_status(404, config)
 86 |     assert not is_retryable_status(500, config)
 87 | 
 88 | 
 89 | @pytest.mark.asyncio
 90 | async def test_with_retry_decorator_success():
 91 |     """Test retry decorator with successful call."""
 92 |     call_count = 0
 93 | 
 94 |     @with_retry(RetryConfig(max_attempts=3))
 95 |     async def test_func():
 96 |         nonlocal call_count
 97 |         call_count += 1
 98 |         return "success"
 99 | 
100 |     result = await test_func()
101 |     assert result == "success"
102 |     assert call_count == 1  # Should succeed on first try
103 | 
104 | 
105 | @pytest.mark.asyncio
106 | async def test_with_retry_decorator_eventual_success():
107 |     """Test retry decorator with eventual success."""
108 |     call_count = 0
109 | 
110 |     @with_retry(
111 |         RetryConfig(
112 |             max_attempts=3,
113 |             initial_delay=0.01,  # Fast for testing
114 |             retryable_exceptions=(ValueError,),
115 |         )
116 |     )
117 |     async def test_func():
118 |         nonlocal call_count
119 |         call_count += 1
120 |         if call_count < 3:
121 |             raise ValueError("Transient error")
122 |         return "success"
123 | 
124 |     result = await test_func()
125 |     assert result == "success"
126 |     assert call_count == 3
127 | 
128 | 
129 | @pytest.mark.asyncio
130 | async def test_with_retry_decorator_max_attempts_exceeded():
131 |     """Test retry decorator when max attempts exceeded."""
132 |     call_count = 0
133 | 
134 |     @with_retry(
135 |         RetryConfig(
136 |             max_attempts=3,
137 |             initial_delay=0.01,
138 |             retryable_exceptions=(ConnectionError,),
139 |         )
140 |     )
141 |     async def test_func():
142 |         nonlocal call_count
143 |         call_count += 1
144 |         raise ConnectionError("Persistent error")
145 | 
146 |     with pytest.raises(ConnectionError, match="Persistent error"):
147 |         await test_func()
148 | 
149 |     assert call_count == 3
150 | 
151 | 
152 | @pytest.mark.asyncio
153 | async def test_with_retry_non_retryable_exception():
154 |     """Test retry decorator with non-retryable exception."""
155 |     call_count = 0
156 | 
157 |     @with_retry(
158 |         RetryConfig(max_attempts=3, retryable_exceptions=(ConnectionError,))
159 |     )
160 |     async def test_func():
161 |         nonlocal call_count
162 |         call_count += 1
163 |         raise ValueError("Non-retryable error")
164 | 
165 |     with pytest.raises(ValueError, match="Non-retryable error"):
166 |         await test_func()
167 | 
168 |     assert call_count == 1  # Should not retry
169 | 
170 | 
171 | @pytest.mark.asyncio
172 | async def test_retry_with_backoff_function():
173 |     """Test retry_with_backoff function."""
174 |     call_count = 0
175 | 
176 |     async def test_func(value):
177 |         nonlocal call_count
178 |         call_count += 1
179 |         if call_count < 2:
180 |             raise TimeoutError("Timeout")
181 |         return f"result: {value}"
182 | 
183 |     config = RetryConfig(
184 |         max_attempts=3,
185 |         initial_delay=0.01,
186 |         retryable_exceptions=(TimeoutError,),
187 |     )
188 | 
189 |     result = await retry_with_backoff(test_func, "test", config=config)
190 |     assert result == "result: test"
191 |     assert call_count == 2
192 | 
193 | 
194 | def test_retryable_http_error():
195 |     """Test RetryableHTTPError."""
196 |     error = RetryableHTTPError(503, "Service Unavailable")
197 |     assert error.status_code == 503
198 |     assert error.message == "Service Unavailable"
199 |     assert str(error) == "HTTP 503: Service Unavailable"
200 | 
201 | 
202 | @pytest.mark.asyncio
203 | async def test_retry_with_delay_progression():
204 |     """Test that retries happen with correct delay progression."""
205 |     call_times = []
206 | 
207 |     @with_retry(
208 |         RetryConfig(
209 |             max_attempts=3,
210 |             initial_delay=0.1,
211 |             exponential_base=2.0,
212 |             jitter=False,
213 |             retryable_exceptions=(ValueError,),
214 |         )
215 |     )
216 |     async def test_func():
217 |         call_times.append(asyncio.get_event_loop().time())
218 |         if len(call_times) < 3:
219 |             raise ValueError("Retry me")
220 |         return "success"
221 | 
222 |     asyncio.get_event_loop().time()
223 |     result = await test_func()
224 | 
225 |     assert result == "success"
226 |     assert len(call_times) == 3
227 | 
228 |     # Check delays between attempts (allowing some tolerance)
229 |     first_delay = call_times[1] - call_times[0]
230 |     second_delay = call_times[2] - call_times[1]
231 | 
232 |     assert 0.08 <= first_delay <= 0.12  # ~0.1s
233 |     assert 0.18 <= second_delay <= 0.22  # ~0.2s
234 | 
235 | 
236 | @pytest.mark.asyncio
237 | async def test_integration_with_http_client(monkeypatch):
238 |     """Test retry integration with HTTP client."""
239 |     from biomcp.http_client import call_http
240 | 
241 |     # Disable connection pooling for this test
242 |     monkeypatch.setenv("BIOMCP_USE_CONNECTION_POOL", "false")
243 | 
244 |     # Test 1: Connection error retry
245 |     with patch(
246 |         "biomcp.http_client_simple.httpx.AsyncClient"
247 |     ) as mock_client_class:
248 |         mock_client = AsyncMock()
249 |         mock_client_class.return_value = mock_client
250 |         mock_client.aclose = AsyncMock()  # Mock aclose method
251 | 
252 |         # Simulate connection errors then success
253 |         call_count = 0
254 | 
255 |         async def mock_get(*args, **kwargs):
256 |             nonlocal call_count
257 |             call_count += 1
258 |             if call_count < 3:
259 |                 raise httpx.ConnectError("Connection failed")
260 |             # Return success on third try
261 |             mock_response = MagicMock()
262 |             mock_response.status_code = 200
263 |             mock_response.text = '{"result": "success"}'
264 |             return mock_response
265 | 
266 |         mock_client.get = mock_get
267 | 
268 |         config = RetryConfig(
269 |             max_attempts=3,
270 |             initial_delay=0.01,
271 |         )
272 | 
273 |         status, content = await call_http(
274 |             "GET", "https://api.example.com/test", {}, retry_config=config
275 |         )
276 | 
277 |         assert status == 200
278 |         assert content == '{"result": "success"}'
279 |         assert call_count == 3
280 | 
281 |     # Test 2: Timeout error retry
282 |     with patch(
283 |         "biomcp.http_client_simple.httpx.AsyncClient"
284 |     ) as mock_client_class:
285 |         mock_client = AsyncMock()
286 |         mock_client_class.return_value = mock_client
287 |         mock_client.aclose = AsyncMock()  # Mock aclose method
288 | 
289 |         # Simulate timeout errors
290 |         mock_client.get.side_effect = httpx.TimeoutException(
291 |             "Request timed out"
292 |         )
293 | 
294 |         config = RetryConfig(
295 |             max_attempts=2,
296 |             initial_delay=0.01,
297 |         )
298 | 
299 |         # This should raise TimeoutError after retries fail
300 |         with pytest.raises(TimeoutError):
301 |             await call_http(
302 |                 "GET", "https://api.example.com/test", {}, retry_config=config
303 |             )
304 | 
305 |         assert mock_client.get.call_count == 2
306 | 
```

--------------------------------------------------------------------------------
/src/biomcp/circuit_breaker.py:
--------------------------------------------------------------------------------

```python
  1 | """Circuit breaker pattern implementation for fault tolerance."""
  2 | 
  3 | import asyncio
  4 | import enum
  5 | import logging
  6 | from collections.abc import Callable
  7 | from dataclasses import dataclass, field
  8 | from datetime import datetime
  9 | from typing import Any
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class CircuitState(enum.Enum):
 15 |     """Circuit breaker states."""
 16 | 
 17 |     CLOSED = "closed"  # Normal operation, requests pass through
 18 |     OPEN = "open"  # Circuit tripped, requests fail fast
 19 |     HALF_OPEN = "half_open"  # Testing if service recovered
 20 | 
 21 | 
 22 | @dataclass
 23 | class CircuitBreakerConfig:
 24 |     """Configuration for circuit breaker behavior."""
 25 | 
 26 |     failure_threshold: int = 5
 27 |     """Number of failures before opening circuit"""
 28 | 
 29 |     recovery_timeout: float = 60.0
 30 |     """Seconds to wait before attempting recovery"""
 31 | 
 32 |     success_threshold: int = 2
 33 |     """Successes needed in half-open state to close circuit"""
 34 | 
 35 |     expected_exception: type[Exception] | tuple[type[Exception], ...] = (
 36 |         Exception
 37 |     )
 38 |     """Exception types that count as failures"""
 39 | 
 40 |     exclude_exceptions: tuple[type[Exception], ...] = ()
 41 |     """Exception types that don't count as failures"""
 42 | 
 43 | 
 44 | @dataclass
 45 | class CircuitBreakerState:
 46 |     """Mutable state for a circuit breaker."""
 47 | 
 48 |     state: CircuitState = CircuitState.CLOSED
 49 |     failure_count: int = 0
 50 |     success_count: int = 0
 51 |     last_failure_time: datetime | None = None
 52 |     last_state_change: datetime = field(default_factory=datetime.now)
 53 |     _lock: asyncio.Lock = field(default_factory=asyncio.Lock)
 54 | 
 55 | 
 56 | class CircuitBreakerError(Exception):
 57 |     """Raised when circuit breaker is open."""
 58 | 
 59 |     def __init__(
 60 |         self, message: str, last_failure_time: datetime | None = None
 61 |     ):
 62 |         super().__init__(message)
 63 |         self.last_failure_time = last_failure_time
 64 | 
 65 | 
 66 | class CircuitBreaker:
 67 |     """Circuit breaker implementation."""
 68 | 
 69 |     def __init__(
 70 |         self,
 71 |         name: str,
 72 |         config: CircuitBreakerConfig | None = None,
 73 |     ):
 74 |         """Initialize circuit breaker.
 75 | 
 76 |         Args:
 77 |             name: Circuit breaker name for logging
 78 |             config: Configuration (uses defaults if not provided)
 79 |         """
 80 |         self.name = name
 81 |         self.config = config or CircuitBreakerConfig()
 82 |         self._state = CircuitBreakerState()
 83 | 
 84 |     async def call(
 85 |         self,
 86 |         func: Callable[..., Any],
 87 |         *args: Any,
 88 |         **kwargs: Any,
 89 |     ) -> Any:
 90 |         """Execute function through circuit breaker.
 91 | 
 92 |         Args:
 93 |             func: Async function to execute
 94 |             *args: Positional arguments for func
 95 |             **kwargs: Keyword arguments for func
 96 | 
 97 |         Returns:
 98 |             Result of function call
 99 | 
100 |         Raises:
101 |             CircuitBreakerError: If circuit is open
102 |             Exception: If function raises exception
103 |         """
104 |         async with self._state._lock:
105 |             # Check if we should transition from open to half-open
106 |             if self._state.state == CircuitState.OPEN:
107 |                 if self._should_attempt_reset():
108 |                     self._state.state = CircuitState.HALF_OPEN
109 |                     self._state.success_count = 0
110 |                     self._state.last_state_change = datetime.now()
111 |                     logger.info(
112 |                         f"Circuit breaker '{self.name}' entering half-open state"
113 |                     )
114 |                 else:
115 |                     raise CircuitBreakerError(
116 |                         f"Circuit breaker '{self.name}' is open",
117 |                         self._state.last_failure_time,
118 |                     )
119 | 
120 |         # Execute the function
121 |         try:
122 |             result = await func(*args, **kwargs)
123 |             await self._on_success()
124 |             return result
125 |         except Exception as exc:
126 |             if await self._on_failure(exc):
127 |                 raise
128 |             # If exception doesn't count as failure, re-raise it
129 |             raise
130 | 
131 |     async def _on_success(self) -> None:
132 |         """Handle successful call."""
133 |         async with self._state._lock:
134 |             if self._state.state == CircuitState.HALF_OPEN:
135 |                 self._state.success_count += 1
136 |                 if self._state.success_count >= self.config.success_threshold:
137 |                     self._state.state = CircuitState.CLOSED
138 |                     self._state.failure_count = 0
139 |                     self._state.success_count = 0
140 |                     self._state.last_state_change = datetime.now()
141 |                     logger.info(
142 |                         f"Circuit breaker '{self.name}' closed after recovery"
143 |                     )
144 |             elif self._state.state == CircuitState.CLOSED:
145 |                 # Reset failure count on success
146 |                 self._state.failure_count = 0
147 | 
148 |     async def _on_failure(self, exc: Exception) -> bool:
149 |         """Handle failed call.
150 | 
151 |         Args:
152 |             exc: The exception that was raised
153 | 
154 |         Returns:
155 |             True if exception counts as failure
156 |         """
157 |         # Check if exception should be counted
158 |         if not self._is_counted_exception(exc):
159 |             return False
160 | 
161 |         async with self._state._lock:
162 |             self._state.failure_count += 1
163 |             self._state.last_failure_time = datetime.now()
164 | 
165 |             if self._state.state == CircuitState.HALF_OPEN:
166 |                 # Single failure in half-open state reopens circuit
167 |                 self._state.state = CircuitState.OPEN
168 |                 self._state.last_state_change = datetime.now()
169 |                 logger.warning(
170 |                     f"Circuit breaker '{self.name}' reopened due to failure in half-open state"
171 |                 )
172 |             elif (
173 |                 self._state.state == CircuitState.CLOSED
174 |                 and self._state.failure_count >= self.config.failure_threshold
175 |             ):
176 |                 # Threshold exceeded, open circuit
177 |                 self._state.state = CircuitState.OPEN
178 |                 self._state.last_state_change = datetime.now()
179 |                 logger.error(
180 |                     f"Circuit breaker '{self.name}' opened after {self._state.failure_count} failures"
181 |                 )
182 | 
183 |         return True
184 | 
185 |     def _should_attempt_reset(self) -> bool:
186 |         """Check if enough time has passed to attempt reset."""
187 |         if self._state.last_failure_time is None:
188 |             return True
189 | 
190 |         time_since_failure = datetime.now() - self._state.last_failure_time
191 |         return (
192 |             time_since_failure.total_seconds() >= self.config.recovery_timeout
193 |         )
194 | 
195 |     def _is_counted_exception(self, exc: Exception) -> bool:
196 |         """Check if exception should count as failure."""
197 |         # Check excluded exceptions first
198 |         if isinstance(exc, self.config.exclude_exceptions):
199 |             return False
200 | 
201 |         # Check expected exceptions
202 |         return isinstance(exc, self.config.expected_exception)
203 | 
204 |     @property
205 |     def state(self) -> CircuitState:
206 |         """Get current circuit state."""
207 |         return self._state.state
208 | 
209 |     @property
210 |     def is_open(self) -> bool:
211 |         """Check if circuit is open."""
212 |         return self._state.state == CircuitState.OPEN
213 | 
214 |     @property
215 |     def is_closed(self) -> bool:
216 |         """Check if circuit is closed."""
217 |         return self._state.state == CircuitState.CLOSED
218 | 
219 |     async def reset(self) -> None:
220 |         """Manually reset circuit to closed state."""
221 |         async with self._state._lock:
222 |             self._state.state = CircuitState.CLOSED
223 |             self._state.failure_count = 0
224 |             self._state.success_count = 0
225 |             self._state.last_failure_time = None
226 |             self._state.last_state_change = datetime.now()
227 |             logger.info(f"Circuit breaker '{self.name}' manually reset")
228 | 
229 | 
230 | # Global registry of circuit breakers
231 | _circuit_breakers: dict[str, CircuitBreaker] = {}
232 | 
233 | 
234 | def get_circuit_breaker(
235 |     name: str,
236 |     config: CircuitBreakerConfig | None = None,
237 | ) -> CircuitBreaker:
238 |     """Get or create a circuit breaker.
239 | 
240 |     Args:
241 |         name: Circuit breaker name
242 |         config: Configuration (used only on creation)
243 | 
244 |     Returns:
245 |         Circuit breaker instance
246 |     """
247 |     if name not in _circuit_breakers:
248 |         _circuit_breakers[name] = CircuitBreaker(name, config)
249 |     return _circuit_breakers[name]
250 | 
251 | 
252 | def circuit_breaker(
253 |     name: str | None = None,
254 |     config: CircuitBreakerConfig | None = None,
255 | ):
256 |     """Decorator to apply circuit breaker to function.
257 | 
258 |     Args:
259 |         name: Circuit breaker name (defaults to function name)
260 |         config: Circuit breaker configuration
261 | 
262 |     Returns:
263 |         Decorated function
264 |     """
265 | 
266 |     def decorator(func):
267 |         breaker_name = name or f"{func.__module__}.{func.__name__}"
268 |         breaker = get_circuit_breaker(breaker_name, config)
269 | 
270 |         async def wrapper(*args, **kwargs):
271 |             return await breaker.call(func, *args, **kwargs)
272 | 
273 |         # Preserve function metadata
274 |         wrapper.__name__ = func.__name__
275 |         wrapper.__doc__ = func.__doc__
276 |         wrapper._circuit_breaker = breaker  # Expose breaker for testing
277 | 
278 |         return wrapper
279 | 
280 |     return decorator
281 | 
```