This is page 6 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /src/biomcp/retry.py: -------------------------------------------------------------------------------- ```python 1 | """Retry logic with exponential backoff for handling transient failures.""" 2 | 3 | import asyncio 4 | import functools 5 | import logging 6 | import secrets 7 | from collections.abc import Callable, Coroutine 8 | from typing import Any, TypeVar 9 | 10 | from .constants import ( 11 | DEFAULT_EXPONENTIAL_BASE, 12 | DEFAULT_INITIAL_RETRY_DELAY, 13 | DEFAULT_MAX_RETRY_ATTEMPTS, 14 | DEFAULT_MAX_RETRY_DELAY, 15 | METRIC_JITTER_RANGE, 16 | ) 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | T = TypeVar("T") 21 | 22 | 23 | class RetryConfig: 24 | """Configuration for retry behavior.""" 25 | 26 | def __init__( 27 | self, 28 | max_attempts: int = DEFAULT_MAX_RETRY_ATTEMPTS, 29 | initial_delay: float = DEFAULT_INITIAL_RETRY_DELAY, 30 | max_delay: float = DEFAULT_MAX_RETRY_DELAY, 31 | exponential_base: float = DEFAULT_EXPONENTIAL_BASE, 32 | jitter: bool = True, 33 | retryable_exceptions: tuple[type[Exception], ...] = ( 34 | ConnectionError, 35 | TimeoutError, 36 | OSError, 37 | ), 38 | retryable_status_codes: tuple[int, ...] = (429, 502, 503, 504), 39 | ): 40 | """Initialize retry configuration. 41 | 42 | Args: 43 | max_attempts: Maximum number of retry attempts 44 | initial_delay: Initial delay between retries in seconds 45 | max_delay: Maximum delay between retries in seconds 46 | exponential_base: Base for exponential backoff calculation 47 | jitter: Whether to add random jitter to delays 48 | retryable_exceptions: Exception types that should trigger retry 49 | retryable_status_codes: HTTP status codes that should trigger retry 50 | """ 51 | self.max_attempts = max_attempts 52 | self.initial_delay = initial_delay 53 | self.max_delay = max_delay 54 | self.exponential_base = exponential_base 55 | self.jitter = jitter 56 | self.retryable_exceptions = retryable_exceptions 57 | self.retryable_status_codes = retryable_status_codes 58 | 59 | 60 | def calculate_delay(attempt: int, config: RetryConfig) -> float: 61 | """Calculate delay for the next retry attempt. 62 | 63 | Args: 64 | attempt: Current attempt number (0-based) 65 | config: Retry configuration 66 | 67 | Returns: 68 | Delay in seconds before the next retry 69 | """ 70 | # Exponential backoff: delay = initial_delay * (base ^ attempt) 71 | delay = config.initial_delay * (config.exponential_base**attempt) 72 | 73 | # Cap at maximum delay 74 | delay = min(delay, config.max_delay) 75 | 76 | # Add jitter to prevent thundering herd 77 | if config.jitter: 78 | jitter_range = delay * METRIC_JITTER_RANGE # 10% jitter 79 | # Use secrets for cryptographically secure randomness 80 | # Generate random float between -1 and 1, then scale 81 | random_factor = (secrets.randbits(32) / (2**32 - 1)) * 2 - 1 82 | jitter = random_factor * jitter_range 83 | delay += jitter 84 | 85 | return max(0, delay) # Ensure non-negative 86 | 87 | 88 | def is_retryable_exception(exc: Exception, config: RetryConfig) -> bool: 89 | """Check if an exception should trigger a retry. 90 | 91 | Args: 92 | exc: The exception that occurred 93 | config: Retry configuration 94 | 95 | Returns: 96 | True if the exception is retryable 97 | """ 98 | return isinstance(exc, config.retryable_exceptions) 99 | 100 | 101 | def is_retryable_status(status_code: int, config: RetryConfig) -> bool: 102 | """Check if an HTTP status code should trigger a retry. 103 | 104 | Args: 105 | status_code: HTTP status code 106 | config: Retry configuration 107 | 108 | Returns: 109 | True if the status code is retryable 110 | """ 111 | return status_code in config.retryable_status_codes 112 | 113 | 114 | def with_retry( 115 | config: RetryConfig | None = None, 116 | ) -> Callable[ 117 | [Callable[..., Coroutine[Any, Any, T]]], 118 | Callable[..., Coroutine[Any, Any, T]], 119 | ]: 120 | """Decorator to add retry logic to async functions. 121 | 122 | Args: 123 | config: Retry configuration (uses defaults if not provided) 124 | 125 | Returns: 126 | Decorated function with retry logic 127 | """ 128 | if config is None: 129 | config = RetryConfig() 130 | 131 | def decorator( 132 | func: Callable[..., Coroutine[Any, Any, T]], 133 | ) -> Callable[..., Coroutine[Any, Any, T]]: 134 | @functools.wraps(func) 135 | async def wrapper(*args: Any, **kwargs: Any) -> T: 136 | last_exception = None 137 | 138 | for attempt in range(config.max_attempts): 139 | try: 140 | return await func(*args, **kwargs) 141 | except Exception as exc: 142 | last_exception = exc 143 | 144 | # Check if this is the last attempt 145 | if attempt == config.max_attempts - 1: 146 | logger.error( 147 | f"Max retry attempts ({config.max_attempts}) " 148 | f"reached for {func.__name__}: {exc}" 149 | ) 150 | raise 151 | 152 | # Check if the exception is retryable 153 | if not is_retryable_exception(exc, config): 154 | logger.debug( 155 | f"Non-retryable exception in {func.__name__}: {exc}" 156 | ) 157 | raise 158 | 159 | # Calculate delay for next attempt 160 | delay = calculate_delay(attempt, config) 161 | logger.warning( 162 | f"Retry attempt {attempt + 1}/{config.max_attempts} " 163 | f"for {func.__name__} after {delay:.2f}s delay. " 164 | f"Error: {exc}" 165 | ) 166 | 167 | # Wait before retrying 168 | await asyncio.sleep(delay) 169 | 170 | # This should never be reached due to the raise in the loop 171 | if last_exception: 172 | raise last_exception 173 | raise RuntimeError("Unexpected retry loop exit") 174 | 175 | return wrapper 176 | 177 | return decorator 178 | 179 | 180 | class RetryableHTTPError(Exception): 181 | """Exception wrapper for HTTP errors that should be retried.""" 182 | 183 | def __init__(self, status_code: int, message: str): 184 | self.status_code = status_code 185 | self.message = message 186 | super().__init__(f"HTTP {status_code}: {message}") 187 | 188 | 189 | async def retry_with_backoff( 190 | func: Callable[..., Coroutine[Any, Any, T]], 191 | *args: Any, 192 | config: RetryConfig | None = None, 193 | **kwargs: Any, 194 | ) -> T: 195 | """Execute a function with retry logic and exponential backoff. 196 | 197 | This is an alternative to the decorator for cases where you need 198 | more control over retry behavior. 199 | 200 | Args: 201 | func: Async function to execute 202 | *args: Positional arguments for the function 203 | config: Retry configuration (uses defaults if not provided) 204 | **kwargs: Keyword arguments for the function 205 | 206 | Returns: 207 | Result of the function call 208 | 209 | Raises: 210 | The last exception if all retries fail 211 | """ 212 | if config is None: 213 | config = RetryConfig() 214 | 215 | last_exception = None 216 | 217 | for attempt in range(config.max_attempts): 218 | try: 219 | return await func(*args, **kwargs) 220 | except Exception as exc: 221 | last_exception = exc 222 | 223 | # Check if this is the last attempt 224 | if attempt == config.max_attempts - 1: 225 | logger.error( 226 | f"Max retry attempts ({config.max_attempts}) " 227 | f"reached for {func.__name__}: {exc}" 228 | ) 229 | raise 230 | 231 | # Check if the exception is retryable 232 | if not is_retryable_exception(exc, config): 233 | logger.debug( 234 | f"Non-retryable exception in {func.__name__}: {exc}" 235 | ) 236 | raise 237 | 238 | # Calculate delay for next attempt 239 | delay = calculate_delay(attempt, config) 240 | logger.warning( 241 | f"Retry attempt {attempt + 1}/{config.max_attempts} " 242 | f"for {func.__name__} after {delay:.2f}s delay. " 243 | f"Error: {exc}" 244 | ) 245 | 246 | # Wait before retrying 247 | await asyncio.sleep(delay) 248 | 249 | # This should never be reached due to the raise in the loop 250 | if last_exception: 251 | raise last_exception 252 | raise RuntimeError("Unexpected retry loop exit") 253 | ``` -------------------------------------------------------------------------------- /tests/integration/test_openfda_integration.py: -------------------------------------------------------------------------------- ```python 1 | """Integration tests for OpenFDA API. 2 | 3 | These tests make real API calls to verify FDA integration works correctly. 4 | They are marked with pytest.mark.integration and can be skipped with --ignore-integration. 5 | """ 6 | 7 | import os 8 | 9 | import pytest 10 | 11 | from biomcp.openfda.adverse_events import search_adverse_events 12 | from biomcp.openfda.device_events import search_device_events 13 | from biomcp.openfda.drug_approvals import search_drug_approvals 14 | from biomcp.openfda.drug_labels import search_drug_labels 15 | from biomcp.openfda.drug_recalls import search_drug_recalls 16 | 17 | 18 | @pytest.mark.integration 19 | class TestOpenFDAIntegration: 20 | """Integration tests for OpenFDA API endpoints.""" 21 | 22 | @pytest.mark.asyncio 23 | async def test_adverse_events_real_api(self): 24 | """Test real adverse event API call.""" 25 | result = await search_adverse_events(drug="aspirin", limit=5) 26 | 27 | # Should return formatted results 28 | assert isinstance(result, str) 29 | assert len(result) > 100 # Non-trivial response 30 | 31 | # Should contain disclaimer 32 | assert "FDA Data Notice" in result 33 | 34 | # Should have structure 35 | if "No adverse events found" not in result: 36 | assert ( 37 | "Total Reports Found:" in result or "adverse" in result.lower() 38 | ) 39 | 40 | @pytest.mark.asyncio 41 | async def test_drug_labels_real_api(self): 42 | """Test real drug label API call.""" 43 | result = await search_drug_labels(name="ibuprofen", limit=5) 44 | 45 | # Should return formatted results 46 | assert isinstance(result, str) 47 | assert len(result) > 100 48 | 49 | # Should contain disclaimer 50 | assert "FDA Data Notice" in result 51 | 52 | # Should have label information 53 | if "No drug labels found" not in result: 54 | assert "Total Labels Found:" in result or "label" in result.lower() 55 | 56 | @pytest.mark.asyncio 57 | async def test_device_events_real_api(self): 58 | """Test real device event API call.""" 59 | result = await search_device_events(device="insulin pump", limit=5) 60 | 61 | # Should return formatted results 62 | assert isinstance(result, str) 63 | assert len(result) > 100 64 | 65 | # Should contain disclaimer 66 | assert "FDA Data Notice" in result 67 | 68 | # Should have device information 69 | if "No device events found" not in result: 70 | assert ( 71 | "Total Events Found:" in result or "device" in result.lower() 72 | ) 73 | 74 | @pytest.mark.asyncio 75 | async def test_drug_approvals_real_api(self): 76 | """Test real drug approval API call.""" 77 | result = await search_drug_approvals(drug="pembrolizumab", limit=5) 78 | 79 | # Should return formatted results 80 | assert isinstance(result, str) 81 | assert len(result) > 100 82 | 83 | # Should contain disclaimer 84 | assert "FDA Data Notice" in result 85 | 86 | # Pembrolizumab (Keytruda) should have results 87 | if "No drug approvals found" not in result: 88 | assert "KEYTRUDA" in result or "pembrolizumab" in result.lower() 89 | 90 | @pytest.mark.asyncio 91 | async def test_drug_recalls_real_api(self): 92 | """Test real drug recall API call.""" 93 | # Use drug parameter which is more likely to return results 94 | result = await search_drug_recalls(drug="acetaminophen", limit=5) 95 | 96 | # Should return formatted results 97 | assert isinstance(result, str) 98 | assert len(result) > 100 99 | 100 | # Should contain disclaimer OR error message (API might return no results) 101 | assert "FDA Data Notice" in result or "Error" in result 102 | 103 | # Should have recall information if not an error 104 | if "Error" not in result and "No drug recalls found" not in result: 105 | assert "recall" in result.lower() 106 | 107 | @pytest.mark.asyncio 108 | async def test_rate_limiting_without_key(self): 109 | """Test that rate limiting is handled gracefully without API key.""" 110 | # Temporarily remove API key if present 111 | original_key = os.environ.get("OPENFDA_API_KEY") 112 | if original_key: 113 | del os.environ["OPENFDA_API_KEY"] 114 | 115 | try: 116 | # Make multiple rapid requests 117 | results = [] 118 | for i in range(5): 119 | result = await search_adverse_events(drug=f"drug{i}", limit=1) 120 | results.append(result) 121 | 122 | # All should return strings (not crash) 123 | assert all(isinstance(r, str) for r in results) 124 | 125 | finally: 126 | # Restore API key 127 | if original_key: 128 | os.environ["OPENFDA_API_KEY"] = original_key 129 | 130 | @pytest.mark.asyncio 131 | async def test_api_key_usage(self): 132 | """Test that API key is used when provided.""" 133 | # This test only runs if API key is available 134 | if not os.environ.get("OPENFDA_API_KEY"): 135 | pytest.skip("OPENFDA_API_KEY not set") 136 | 137 | result = await search_adverse_events(drug="acetaminophen", limit=10) 138 | 139 | # With API key, should be able to get results 140 | assert isinstance(result, str) 141 | assert len(result) > 100 142 | 143 | @pytest.mark.asyncio 144 | async def test_error_handling_invalid_params(self): 145 | """Test graceful handling of invalid parameters.""" 146 | # Search with invalid/nonsense parameters 147 | result = await search_adverse_events( 148 | drug="xyzabc123notarealdrugname999", limit=5 149 | ) 150 | 151 | # Should handle gracefully 152 | assert isinstance(result, str) 153 | 154 | # Should either show no results or error message 155 | assert ( 156 | "No adverse events found" in result 157 | or "Error" in result 158 | or "no results" in result.lower() 159 | ) 160 | 161 | @pytest.mark.asyncio 162 | async def test_cross_domain_consistency(self): 163 | """Test that different FDA domains return consistent formats.""" 164 | # Search for a common drug across domains 165 | drug_name = "aspirin" 166 | 167 | adverse_result = await search_adverse_events(drug=drug_name, limit=2) 168 | label_result = await search_drug_labels(name=drug_name, limit=2) 169 | 170 | # Both should have disclaimers 171 | assert "FDA Data Notice" in adverse_result 172 | assert "FDA Data Notice" in label_result 173 | 174 | # Both should be properly formatted strings 175 | assert isinstance(adverse_result, str) 176 | assert isinstance(label_result, str) 177 | 178 | # Both should mention the drug or indicate no results 179 | assert ( 180 | drug_name in adverse_result.lower() 181 | or "no " in adverse_result.lower() 182 | ) 183 | assert ( 184 | drug_name in label_result.lower() or "no " in label_result.lower() 185 | ) 186 | 187 | @pytest.mark.asyncio 188 | async def test_special_characters_handling(self): 189 | """Test handling of special characters in queries.""" 190 | # Test with special characters 191 | result = await search_drug_labels(name="aspirin/dipyridamole", limit=5) 192 | 193 | # Should handle forward slash gracefully 194 | assert isinstance(result, str) 195 | # API might return error or no results for complex drug names 196 | assert isinstance(result, str) # Just verify we get a response 197 | 198 | @pytest.mark.asyncio 199 | async def test_large_result_handling(self): 200 | """Test handling of large result sets.""" 201 | # Request maximum allowed results 202 | result = await search_adverse_events( 203 | drug="ibuprofen", # Common drug with many reports 204 | limit=100, # Maximum limit 205 | ) 206 | 207 | # Should handle large results 208 | assert isinstance(result, str) 209 | assert len(result) > 500 # Should be substantial 210 | 211 | # Should still include disclaimer 212 | assert "FDA Data Notice" in result 213 | 214 | @pytest.mark.asyncio 215 | async def test_empty_query_handling(self): 216 | """Test handling of empty/missing query parameters.""" 217 | # Search without specifying a drug 218 | result = await search_drug_recalls( 219 | limit=5 # Only limit, no other filters 220 | ) 221 | 222 | # Should return recent recalls 223 | assert isinstance(result, str) 224 | assert len(result) > 100 225 | 226 | # Should have results (there are always some recalls) 227 | if "Error" not in result: 228 | assert "recall" in result.lower() 229 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_metrics.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for performance metrics collection.""" 2 | 3 | import asyncio 4 | import time 5 | from datetime import datetime 6 | from unittest.mock import patch 7 | 8 | import pytest 9 | 10 | from biomcp.metrics import ( 11 | MetricSample, 12 | MetricsCollector, 13 | MetricSummary, 14 | Timer, 15 | get_all_metrics, 16 | get_metric_summary, 17 | record_metric, 18 | track_performance, 19 | ) 20 | 21 | 22 | @pytest.fixture(autouse=True) 23 | def enable_metrics(monkeypatch): 24 | """Enable metrics for all tests in this module.""" 25 | monkeypatch.setenv("BIOMCP_METRICS_ENABLED", "true") 26 | # Force reload of the module to pick up the new env var 27 | import importlib 28 | 29 | import biomcp.metrics 30 | 31 | importlib.reload(biomcp.metrics) 32 | 33 | 34 | def test_metric_sample(): 35 | """Test MetricSample dataclass.""" 36 | sample = MetricSample( 37 | timestamp=datetime.now(), 38 | duration=1.5, 39 | success=True, 40 | error=None, 41 | tags={"domain": "article"}, 42 | ) 43 | 44 | assert sample.duration == 1.5 45 | assert sample.success is True 46 | assert sample.error is None 47 | assert sample.tags["domain"] == "article" 48 | 49 | 50 | def test_metric_summary_from_samples(): 51 | """Test MetricSummary calculation from samples.""" 52 | now = datetime.now() 53 | samples = [ 54 | MetricSample(timestamp=now, duration=0.1, success=True), 55 | MetricSample(timestamp=now, duration=0.2, success=True), 56 | MetricSample( 57 | timestamp=now, duration=0.3, success=False, error="timeout" 58 | ), 59 | MetricSample(timestamp=now, duration=0.4, success=True), 60 | MetricSample(timestamp=now, duration=0.5, success=True), 61 | ] 62 | 63 | summary = MetricSummary.from_samples("test_metric", samples) 64 | 65 | assert summary.name == "test_metric" 66 | assert summary.count == 5 67 | assert summary.success_count == 4 68 | assert summary.error_count == 1 69 | assert summary.total_duration == 1.5 70 | assert summary.min_duration == 0.1 71 | assert summary.max_duration == 0.5 72 | assert summary.avg_duration == 0.3 73 | assert summary.error_rate == 0.2 # 1/5 74 | 75 | # Check percentiles 76 | assert summary.p50_duration == 0.3 # median 77 | assert 0.4 <= summary.p95_duration <= 0.5 78 | assert 0.4 <= summary.p99_duration <= 0.5 79 | 80 | 81 | def test_metric_summary_empty(): 82 | """Test MetricSummary with no samples.""" 83 | summary = MetricSummary.from_samples("empty", []) 84 | 85 | assert summary.count == 0 86 | assert summary.success_count == 0 87 | assert summary.error_count == 0 88 | assert summary.total_duration == 0.0 89 | assert summary.error_rate == 0.0 90 | 91 | 92 | @pytest.mark.asyncio 93 | async def test_metrics_collector(): 94 | """Test MetricsCollector functionality.""" 95 | collector = MetricsCollector(max_samples_per_metric=3) 96 | 97 | # Record some metrics 98 | await collector.record("api_call", 0.1, success=True) 99 | await collector.record("api_call", 0.2, success=True) 100 | await collector.record("api_call", 0.3, success=False, error="timeout") 101 | 102 | # Get summary 103 | summary = await collector.get_summary("api_call") 104 | assert summary is not None 105 | assert summary.count == 3 106 | assert summary.success_count == 2 107 | assert summary.error_count == 1 108 | 109 | # Test max samples limit 110 | await collector.record("api_call", 0.4, success=True) 111 | await collector.record("api_call", 0.5, success=True) 112 | 113 | summary = await collector.get_summary("api_call") 114 | assert summary.count == 3 # Still 3 due to limit 115 | assert summary.min_duration == 0.3 # Oldest samples dropped 116 | 117 | # Test clear 118 | await collector.clear("api_call") 119 | summary = await collector.get_summary("api_call") 120 | assert summary is None 121 | 122 | 123 | @pytest.mark.asyncio 124 | async def test_global_metrics_functions(): 125 | """Test global metrics functions.""" 126 | # Clear any existing metrics 127 | from biomcp.metrics import _metrics_collector 128 | 129 | await _metrics_collector.clear() 130 | 131 | # Record metrics 132 | await record_metric("test_op", 0.5, success=True) 133 | await record_metric("test_op", 0.7, success=False, error="failed") 134 | 135 | # Get summary 136 | summary = await get_metric_summary("test_op") 137 | assert summary is not None 138 | assert summary.count == 2 139 | assert summary.success_count == 1 140 | 141 | # Get all metrics 142 | all_metrics = await get_all_metrics() 143 | assert "test_op" in all_metrics 144 | 145 | 146 | @pytest.mark.asyncio 147 | async def test_track_performance_decorator_async(): 148 | """Test track_performance decorator on async functions.""" 149 | from biomcp.metrics import _metrics_collector 150 | 151 | await _metrics_collector.clear() 152 | 153 | @track_performance("test_async_func") 154 | async def slow_operation(): 155 | await asyncio.sleep(0.1) 156 | return "done" 157 | 158 | result = await slow_operation() 159 | assert result == "done" 160 | 161 | # Check metric was recorded 162 | summary = await get_metric_summary("test_async_func") 163 | assert summary is not None 164 | assert summary.count == 1 165 | assert summary.success_count == 1 166 | assert summary.min_duration >= 0.1 167 | 168 | 169 | @pytest.mark.asyncio 170 | async def test_track_performance_decorator_async_error(): 171 | """Test track_performance decorator on async functions with errors.""" 172 | from biomcp.metrics import _metrics_collector 173 | 174 | await _metrics_collector.clear() 175 | 176 | @track_performance("test_async_error") 177 | async def failing_operation(): 178 | await asyncio.sleep(0.05) 179 | raise ValueError("Test error") 180 | 181 | with pytest.raises(ValueError, match="Test error"): 182 | await failing_operation() 183 | 184 | # Check metric was recorded with error 185 | summary = await get_metric_summary("test_async_error") 186 | assert summary is not None 187 | assert summary.count == 1 188 | assert summary.success_count == 0 189 | assert summary.error_count == 1 190 | 191 | 192 | def test_track_performance_decorator_sync(): 193 | """Test track_performance decorator on sync functions.""" 194 | 195 | @track_performance("test_sync_func") 196 | def fast_operation(): 197 | time.sleep(0.05) 198 | return "done" 199 | 200 | # Need to run in an event loop context 201 | async def run_test(): 202 | from biomcp.metrics import _metrics_collector 203 | 204 | await _metrics_collector.clear() 205 | 206 | result = fast_operation() 207 | assert result == "done" 208 | 209 | # Give time for the metric to be recorded 210 | await asyncio.sleep(0.1) 211 | 212 | summary = await get_metric_summary("test_sync_func") 213 | assert summary is not None 214 | assert summary.count == 1 215 | assert summary.success_count == 1 216 | 217 | asyncio.run(run_test()) 218 | 219 | 220 | @pytest.mark.asyncio 221 | async def test_timer_context_manager(): 222 | """Test Timer context manager.""" 223 | from biomcp.metrics import _metrics_collector 224 | 225 | await _metrics_collector.clear() 226 | 227 | # Test async timer 228 | async with Timer("test_timer", tags={"operation": "test"}): 229 | await asyncio.sleep(0.1) 230 | 231 | summary = await get_metric_summary("test_timer") 232 | assert summary is not None 233 | assert summary.count == 1 234 | assert summary.success_count == 1 235 | assert summary.min_duration >= 0.1 236 | 237 | # Test sync timer (in async context) 238 | with Timer("test_sync_timer"): 239 | time.sleep(0.05) 240 | 241 | # Give time for metric to be recorded 242 | await asyncio.sleep(0.1) 243 | 244 | summary = await get_metric_summary("test_sync_timer") 245 | assert summary is not None 246 | assert summary.count == 1 247 | 248 | 249 | @pytest.mark.asyncio 250 | async def test_timer_with_exception(): 251 | """Test Timer context manager with exceptions.""" 252 | from biomcp.metrics import _metrics_collector 253 | 254 | await _metrics_collector.clear() 255 | 256 | # Test async timer with exception 257 | with pytest.raises(ValueError): 258 | async with Timer("test_timer_error"): 259 | await asyncio.sleep(0.05) 260 | raise ValueError("Test error") 261 | 262 | summary = await get_metric_summary("test_timer_error") 263 | assert summary is not None 264 | assert summary.count == 1 265 | assert summary.success_count == 0 266 | assert summary.error_count == 1 267 | 268 | 269 | def test_timer_without_event_loop(): 270 | """Test Timer when no event loop is running.""" 271 | # This simulates using Timer in a non-async context 272 | with patch("biomcp.metrics.logger") as mock_logger: 273 | with Timer("test_no_loop"): 274 | time.sleep(0.01) 275 | 276 | # Should log instead of recording metric 277 | mock_logger.debug.assert_called_once() 278 | call_args = mock_logger.debug.call_args[0][0] 279 | assert "test_no_loop" in call_args 280 | assert "duration=" in call_args 281 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/rate_limiter.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Rate limiting and circuit breaker for OpenFDA API requests. 3 | 4 | This module provides client-side rate limiting to prevent API quota exhaustion 5 | and circuit breaker pattern to handle API failures gracefully. 6 | """ 7 | 8 | import asyncio 9 | import logging 10 | import os 11 | import time 12 | from collections.abc import Callable 13 | from datetime import datetime 14 | from enum import Enum 15 | from typing import Any 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class CircuitState(Enum): 21 | """Circuit breaker states.""" 22 | 23 | CLOSED = "closed" # Normal operation 24 | OPEN = "open" # Blocking requests 25 | HALF_OPEN = "half_open" # Testing recovery 26 | 27 | 28 | class RateLimiter: 29 | """ 30 | Token bucket rate limiter for FDA API requests. 31 | """ 32 | 33 | def __init__(self, rate: int = 10, per: float = 1.0): 34 | """ 35 | Initialize rate limiter. 36 | 37 | Args: 38 | rate: Number of requests allowed 39 | per: Time period in seconds 40 | """ 41 | self.rate = rate 42 | self.per = per 43 | self.allowance = float(rate) 44 | self.last_check = time.monotonic() 45 | self._lock = asyncio.Lock() 46 | 47 | async def acquire(self) -> None: 48 | """ 49 | Acquire permission to make a request. 50 | Blocks if rate limit would be exceeded. 51 | """ 52 | async with self._lock: 53 | current = time.monotonic() 54 | time_passed = current - self.last_check 55 | self.last_check = current 56 | 57 | # Add tokens based on time passed 58 | self.allowance += time_passed * (self.rate / self.per) 59 | 60 | # Cap at maximum rate 61 | if self.allowance > self.rate: 62 | self.allowance = float(self.rate) 63 | 64 | # Check if we can proceed 65 | if self.allowance < 1.0: 66 | # Calculate wait time 67 | deficit = 1.0 - self.allowance 68 | wait_time = deficit * (self.per / self.rate) 69 | 70 | logger.debug(f"Rate limit: waiting {wait_time:.2f}s") 71 | await asyncio.sleep(wait_time) 72 | 73 | # Update allowance after waiting 74 | self.allowance = 0.0 75 | else: 76 | # Consume one token 77 | self.allowance -= 1.0 78 | 79 | 80 | class CircuitBreaker: 81 | """ 82 | Circuit breaker to prevent cascading failures. 83 | """ 84 | 85 | def __init__( 86 | self, 87 | failure_threshold: int = 5, 88 | recovery_timeout: int = 60, 89 | half_open_max_calls: int = 3, 90 | ): 91 | """ 92 | Initialize circuit breaker. 93 | 94 | Args: 95 | failure_threshold: Number of failures before opening circuit 96 | recovery_timeout: Seconds to wait before attempting recovery 97 | half_open_max_calls: Max calls allowed in half-open state 98 | """ 99 | self.failure_threshold = failure_threshold 100 | self.recovery_timeout = recovery_timeout 101 | self.half_open_max_calls = half_open_max_calls 102 | 103 | self.failure_count = 0 104 | self.last_failure_time: float | None = None 105 | self.state = CircuitState.CLOSED 106 | self.half_open_calls = 0 107 | self._lock = asyncio.Lock() 108 | 109 | async def call(self, func: Callable, *args, **kwargs) -> Any: 110 | """ 111 | Execute function through circuit breaker. 112 | 113 | Args: 114 | func: Async function to call 115 | *args: Function arguments 116 | **kwargs: Function keyword arguments 117 | 118 | Returns: 119 | Function result 120 | 121 | Raises: 122 | Exception: If circuit is open or function fails 123 | """ 124 | async with self._lock: 125 | # Check circuit state 126 | if self.state == CircuitState.OPEN: 127 | if self._should_attempt_reset(): 128 | self.state = CircuitState.HALF_OPEN 129 | self.half_open_calls = 0 130 | logger.info( 131 | "Circuit breaker: attempting recovery (half-open)" 132 | ) 133 | else: 134 | if self.last_failure_time is not None: 135 | time_left = self.recovery_timeout - ( 136 | time.time() - self.last_failure_time 137 | ) 138 | raise Exception( 139 | f"Circuit breaker is OPEN. Retry in {time_left:.0f} seconds" 140 | ) 141 | else: 142 | raise Exception("Circuit breaker is OPEN") 143 | 144 | elif self.state == CircuitState.HALF_OPEN: 145 | if self.half_open_calls >= self.half_open_max_calls: 146 | # Don't allow more calls in half-open state 147 | raise Exception( 148 | "Circuit breaker is HALF_OPEN. Max test calls reached" 149 | ) 150 | self.half_open_calls += 1 151 | 152 | # Execute the function 153 | try: 154 | result = await func(*args, **kwargs) 155 | await self._on_success() 156 | return result 157 | except Exception as e: 158 | await self._on_failure() 159 | raise e 160 | 161 | async def _on_success(self) -> None: 162 | """Handle successful call.""" 163 | async with self._lock: 164 | if self.state == CircuitState.HALF_OPEN: 165 | # Recovery succeeded 166 | self.state = CircuitState.CLOSED 167 | self.failure_count = 0 168 | logger.info("Circuit breaker: recovered (closed)") 169 | else: 170 | # Reset failure count on success 171 | self.failure_count = 0 172 | 173 | async def _on_failure(self) -> None: 174 | """Handle failed call.""" 175 | async with self._lock: 176 | self.failure_count += 1 177 | self.last_failure_time = time.time() 178 | 179 | if self.state == CircuitState.HALF_OPEN: 180 | # Recovery failed, reopen circuit 181 | self.state = CircuitState.OPEN 182 | logger.warning("Circuit breaker: recovery failed (open)") 183 | elif self.failure_count >= self.failure_threshold: 184 | # Too many failures, open circuit 185 | self.state = CircuitState.OPEN 186 | logger.warning( 187 | f"Circuit breaker: opened after {self.failure_count} failures" 188 | ) 189 | 190 | def _should_attempt_reset(self) -> bool: 191 | """Check if enough time has passed to attempt reset.""" 192 | return ( 193 | self.last_failure_time is not None 194 | and time.time() - self.last_failure_time >= self.recovery_timeout 195 | ) 196 | 197 | @property 198 | def is_closed(self) -> bool: 199 | """Check if circuit is closed (normal operation).""" 200 | return self.state == CircuitState.CLOSED 201 | 202 | @property 203 | def is_open(self) -> bool: 204 | """Check if circuit is open (blocking requests).""" 205 | return self.state == CircuitState.OPEN 206 | 207 | def get_state(self) -> dict[str, Any]: 208 | """Get current circuit breaker state.""" 209 | return { 210 | "state": self.state.value, 211 | "failure_count": self.failure_count, 212 | "last_failure": ( 213 | datetime.fromtimestamp(self.last_failure_time).isoformat() 214 | if self.last_failure_time 215 | else None 216 | ), 217 | } 218 | 219 | 220 | # Global instances 221 | # Configure based on API key availability 222 | _has_api_key = bool(os.environ.get("OPENFDA_API_KEY")) 223 | _rate_limit = 240 if _has_api_key else 40 # per minute 224 | 225 | # Create rate limiter (convert to per-second rate) 226 | FDA_RATE_LIMITER = RateLimiter(rate=_rate_limit, per=60.0) 227 | 228 | # Create circuit breaker 229 | FDA_CIRCUIT_BREAKER = CircuitBreaker( 230 | failure_threshold=5, recovery_timeout=60, half_open_max_calls=3 231 | ) 232 | 233 | # Semaphore for concurrent request limiting 234 | FDA_SEMAPHORE = asyncio.Semaphore(10) # Max 10 concurrent requests 235 | 236 | 237 | async def rate_limited_request(func: Callable, *args, **kwargs) -> Any: 238 | """ 239 | Execute FDA API request with rate limiting and circuit breaker. 240 | 241 | Args: 242 | func: Async function to call 243 | *args: Function arguments 244 | **kwargs: Function keyword arguments 245 | 246 | Returns: 247 | Function result 248 | """ 249 | # Apply semaphore for concurrent limiting 250 | async with FDA_SEMAPHORE: 251 | # Apply rate limiting 252 | await FDA_RATE_LIMITER.acquire() 253 | 254 | # Apply circuit breaker 255 | return await FDA_CIRCUIT_BREAKER.call(func, *args, **kwargs) 256 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_nci_integration.py: -------------------------------------------------------------------------------- ```python 1 | """Unit tests for NCI CTS API integration.""" 2 | 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from biomcp.biomarkers import search_biomarkers 8 | from biomcp.diseases.search import search_diseases 9 | from biomcp.integrations.cts_api import CTSAPIError, make_cts_request 10 | from biomcp.interventions import search_interventions 11 | from biomcp.organizations import get_organization, search_organizations 12 | from biomcp.trials.nci_getter import get_trial_nci 13 | from biomcp.trials.nci_search import convert_query_to_nci, search_trials_nci 14 | from biomcp.trials.search import TrialQuery 15 | 16 | 17 | class TestCTSAPIIntegration: 18 | """Test CTS API helper functions.""" 19 | 20 | @pytest.mark.asyncio 21 | async def test_make_cts_request_no_api_key(self): 22 | """Test that missing API key raises appropriate error.""" 23 | with ( 24 | patch.dict("os.environ", {}, clear=True), 25 | pytest.raises(CTSAPIError, match="NCI API key required"), 26 | ): 27 | await make_cts_request("https://example.com/api") 28 | 29 | @pytest.mark.asyncio 30 | async def test_make_cts_request_with_api_key(self): 31 | """Test successful request with API key.""" 32 | with patch("biomcp.integrations.cts_api.request_api") as mock_request: 33 | mock_request.return_value = ({"data": "test"}, None) 34 | 35 | result = await make_cts_request( 36 | "https://example.com/api", api_key="test-key" 37 | ) 38 | 39 | assert result == {"data": "test"} 40 | mock_request.assert_called_once() 41 | 42 | # Verify headers were included 43 | call_args = mock_request.call_args 44 | request_data = call_args.kwargs["request"] 45 | assert "_headers" in request_data 46 | 47 | 48 | class TestOrganizationsModule: 49 | """Test organizations module functions.""" 50 | 51 | @pytest.mark.asyncio 52 | async def test_search_organizations(self): 53 | """Test organization search.""" 54 | with patch( 55 | "biomcp.organizations.search.make_cts_request" 56 | ) as mock_request: 57 | mock_request.return_value = { 58 | "data": [{"id": "ORG001", "name": "Test Cancer Center"}], 59 | "total": 1, 60 | } 61 | 62 | result = await search_organizations( 63 | name="Cancer Center", api_key="test-key" 64 | ) 65 | 66 | assert result["total"] == 1 67 | assert len(result["organizations"]) == 1 68 | assert result["organizations"][0]["name"] == "Test Cancer Center" 69 | 70 | @pytest.mark.asyncio 71 | async def test_get_organization(self): 72 | """Test getting specific organization.""" 73 | with patch( 74 | "biomcp.organizations.getter.make_cts_request" 75 | ) as mock_request: 76 | mock_request.return_value = { 77 | "data": { 78 | "id": "ORG001", 79 | "name": "Test Cancer Center", 80 | "type": "Academic", 81 | } 82 | } 83 | 84 | result = await get_organization("ORG001", api_key="test-key") 85 | 86 | assert result["id"] == "ORG001" 87 | assert result["name"] == "Test Cancer Center" 88 | assert result["type"] == "Academic" 89 | 90 | 91 | class TestInterventionsModule: 92 | """Test interventions module functions.""" 93 | 94 | @pytest.mark.asyncio 95 | async def test_search_interventions(self): 96 | """Test intervention search.""" 97 | with patch( 98 | "biomcp.interventions.search.make_cts_request" 99 | ) as mock_request: 100 | mock_request.return_value = { 101 | "data": [ 102 | {"id": "INT001", "name": "Pembrolizumab", "type": "Drug"} 103 | ], 104 | "total": 1, 105 | } 106 | 107 | result = await search_interventions( 108 | name="Pembrolizumab", api_key="test-key" 109 | ) 110 | 111 | assert result["total"] == 1 112 | assert len(result["interventions"]) == 1 113 | assert result["interventions"][0]["name"] == "Pembrolizumab" 114 | 115 | 116 | class TestBiomarkersModule: 117 | """Test biomarkers module functions.""" 118 | 119 | @pytest.mark.asyncio 120 | async def test_search_biomarkers(self): 121 | """Test biomarker search.""" 122 | with patch( 123 | "biomcp.biomarkers.search.make_cts_request" 124 | ) as mock_request: 125 | mock_request.return_value = { 126 | "data": [{"id": "BIO001", "name": "PD-L1", "gene": "CD274"}], 127 | "total": 1, 128 | } 129 | 130 | result = await search_biomarkers(name="PD-L1", api_key="test-key") 131 | 132 | assert result["total"] == 1 133 | assert len(result["biomarkers"]) == 1 134 | assert result["biomarkers"][0]["name"] == "PD-L1" 135 | 136 | 137 | class TestDiseasesModule: 138 | """Test diseases module functions.""" 139 | 140 | @pytest.mark.asyncio 141 | async def test_search_diseases_nci(self): 142 | """Test disease search via NCI API.""" 143 | with patch("biomcp.diseases.search.make_cts_request") as mock_request: 144 | mock_request.return_value = { 145 | "data": [ 146 | { 147 | "id": "DIS001", 148 | "name": "Melanoma", 149 | "synonyms": ["Malignant Melanoma"], 150 | } 151 | ], 152 | "total": 1, 153 | } 154 | 155 | result = await search_diseases(name="Melanoma", api_key="test-key") 156 | 157 | assert result["total"] == 1 158 | assert len(result["diseases"]) == 1 159 | assert result["diseases"][0]["name"] == "Melanoma" 160 | 161 | 162 | class TestNCITrialIntegration: 163 | """Test NCI trial search and getter.""" 164 | 165 | @pytest.mark.asyncio 166 | async def test_convert_query_to_nci(self): 167 | """Test converting TrialQuery to NCI parameters.""" 168 | query = TrialQuery( 169 | conditions=["melanoma"], 170 | phase="PHASE2", 171 | recruiting_status="OPEN", 172 | allow_brain_mets=True, 173 | ) 174 | 175 | # Mock the disease/intervention lookups 176 | with ( 177 | patch("biomcp.trials.nci_search.search_diseases") as mock_diseases, 178 | patch( 179 | "biomcp.trials.nci_search.search_interventions" 180 | ) as mock_interventions, 181 | ): 182 | mock_diseases.return_value = {"diseases": []} 183 | mock_interventions.return_value = {"interventions": []} 184 | 185 | params = await convert_query_to_nci(query) 186 | 187 | assert params["diseases"] == ["melanoma"] 188 | assert params["phase"] == "II" 189 | assert params["recruitment_status"] == [ 190 | "recruiting", 191 | "enrolling_by_invitation", 192 | ] 193 | assert params["accepts_brain_mets"] is True 194 | 195 | @pytest.mark.asyncio 196 | async def test_search_trials_nci(self): 197 | """Test NCI trial search.""" 198 | query = TrialQuery(conditions=["melanoma"]) 199 | 200 | with ( 201 | patch( 202 | "biomcp.trials.nci_search.convert_query_to_nci" 203 | ) as mock_convert, 204 | patch("biomcp.trials.nci_search.make_cts_request") as mock_request, 205 | ): 206 | mock_convert.return_value = {"diseases": ["melanoma"]} 207 | mock_request.return_value = { 208 | "data": [ 209 | { 210 | "nct_id": "NCT12345", 211 | "title": "Test Trial", 212 | "phase": "II", 213 | } 214 | ], 215 | "total": 1, 216 | } 217 | 218 | result = await search_trials_nci(query, api_key="test-key") 219 | 220 | assert result["total"] == 1 221 | assert result["source"] == "nci" 222 | assert len(result["trials"]) == 1 223 | assert result["trials"][0]["nct_id"] == "NCT12345" 224 | 225 | @pytest.mark.asyncio 226 | async def test_get_trial_nci(self): 227 | """Test getting specific trial from NCI.""" 228 | with patch( 229 | "biomcp.trials.nci_getter.make_cts_request" 230 | ) as mock_request: 231 | mock_request.return_value = { 232 | "data": { 233 | "nct_id": "NCT12345", 234 | "title": "Test Trial", 235 | "phase": "II", 236 | "overall_status": "Recruiting", 237 | } 238 | } 239 | 240 | result = await get_trial_nci("NCT12345", api_key="test-key") 241 | 242 | assert result["nct_id"] == "NCT12345" 243 | assert result["title"] == "Test Trial" 244 | assert result["phase"] == "II" 245 | ``` -------------------------------------------------------------------------------- /src/biomcp/cli/variants.py: -------------------------------------------------------------------------------- ```python 1 | """BioMCP Command Line Interface for genetic variants.""" 2 | 3 | import asyncio 4 | from typing import Annotated 5 | 6 | import typer 7 | 8 | from ..constants import DEFAULT_ASSEMBLY, SYSTEM_PAGE_SIZE 9 | from ..variants import getter, search 10 | 11 | variant_app = typer.Typer(help="Search and get variants from MyVariant.info.") 12 | 13 | 14 | @variant_app.command("get") 15 | def get_variant( 16 | variant_id: Annotated[ 17 | str, 18 | typer.Argument( 19 | help="rsID (rs456) or MyVariant ID (chr1:g.1234A>G)", 20 | ), 21 | ], 22 | output_json: Annotated[ 23 | bool, 24 | typer.Option( 25 | "--json", 26 | "-j", 27 | help="Render in JSON format", 28 | case_sensitive=False, 29 | ), 30 | ] = False, 31 | include_external: Annotated[ 32 | bool, 33 | typer.Option( 34 | "--include-external/--no-external", 35 | help="Include annotations from external sources (TCGA, 1000 Genomes, cBioPortal)", 36 | ), 37 | ] = True, 38 | assembly: Annotated[ 39 | str, 40 | typer.Option( 41 | "--assembly", 42 | help="Genome assembly (hg19 or hg38)", 43 | case_sensitive=False, 44 | ), 45 | ] = DEFAULT_ASSEMBLY, 46 | ): 47 | """ 48 | Get detailed information about a specific genetic variant. 49 | 50 | Supports HGVS identifiers (e.g., 'chr7:g.140453136A>T') or dbSNP rsIDs. 51 | 52 | Examples: 53 | Get by HGVS: biomcp variant get "chr7:g.140453136A>T" 54 | Get by rsID: biomcp variant get rs113488022 55 | Get as JSON: biomcp variant get rs113488022 --json 56 | Get without external annotations: biomcp variant get rs113488022 --no-external 57 | Get with hg38 assembly: biomcp variant get rs113488022 --assembly hg38 58 | """ 59 | if not variant_id: 60 | typer.echo("Error: A variant identifier must be provided.", err=True) 61 | raise typer.Exit(code=1) 62 | 63 | # Validate assembly value 64 | if assembly not in ["hg19", "hg38"]: 65 | typer.echo( 66 | f"Error: Invalid assembly '{assembly}'. Must be 'hg19' or 'hg38'.", 67 | err=True, 68 | ) 69 | raise typer.Exit(code=1) 70 | 71 | result = asyncio.run( 72 | getter.get_variant( 73 | variant_id, 74 | output_json=output_json, 75 | include_external=include_external, 76 | assembly=assembly, 77 | ) 78 | ) 79 | typer.echo(result) 80 | 81 | 82 | @variant_app.command("search") 83 | def search_variant_cmd( 84 | gene: Annotated[ 85 | str | None, 86 | typer.Option( 87 | "--gene", 88 | help="Gene symbol (e.g., BRCA1)", 89 | ), 90 | ] = None, 91 | hgvsp: Annotated[ 92 | str | None, 93 | typer.Option( 94 | "--hgvsp", 95 | help="Protein notation (e.g., p.Val600Glu).", 96 | ), 97 | ] = None, 98 | hgvsc: Annotated[ 99 | str | None, 100 | typer.Option( 101 | "--hgvsc", 102 | help="cDNA notation (e.g., c.1799T>A).", 103 | ), 104 | ] = None, 105 | rsid: Annotated[ 106 | str | None, 107 | typer.Option( 108 | "--rsid", 109 | help="dbSNP rsID (e.g., rs113488022)", 110 | ), 111 | ] = None, 112 | region: Annotated[ 113 | str | None, 114 | typer.Option( 115 | "--region", 116 | help="Genomic region (e.g., chr1:69000-70000)", 117 | ), 118 | ] = None, 119 | significance: Annotated[ 120 | search.ClinicalSignificance | None, 121 | typer.Option( 122 | "--significance", 123 | help="Clinical significance (e.g., pathogenic, likely benign)", 124 | case_sensitive=False, 125 | ), 126 | ] = None, 127 | min_frequency: Annotated[ 128 | float | None, 129 | typer.Option( 130 | "--min-frequency", 131 | help="Minimum gnomAD exome allele frequency (0.0 to 1.0)", 132 | min=0.0, 133 | max=1.0, 134 | ), 135 | ] = None, 136 | max_frequency: Annotated[ 137 | float | None, 138 | typer.Option( 139 | "--max-frequency", 140 | help="Maximum gnomAD exome allele frequency (0.0 to 1.0)", 141 | min=0.0, 142 | max=1.0, 143 | ), 144 | ] = None, 145 | cadd: Annotated[ 146 | float | None, 147 | typer.Option( 148 | "--cadd", 149 | help="Minimum CADD phred score", 150 | min=0.0, 151 | ), 152 | ] = None, 153 | polyphen: Annotated[ 154 | search.PolyPhenPrediction | None, 155 | typer.Option( 156 | "--polyphen", 157 | help="PolyPhen-2 prediction: Probably damaging = D," 158 | "Possibly damaging = P, Benign = B", 159 | case_sensitive=False, 160 | ), 161 | ] = None, 162 | sift: Annotated[ 163 | search.SiftPrediction | None, 164 | typer.Option( 165 | "--sift", 166 | help="SIFT prediction: D = Deleterious, T = Tolerated", 167 | case_sensitive=False, 168 | ), 169 | ] = None, 170 | size: Annotated[ 171 | int, 172 | typer.Option( 173 | "--size", 174 | help="Maximum number of results to return", 175 | min=1, 176 | max=100, 177 | ), 178 | ] = SYSTEM_PAGE_SIZE, 179 | sources: Annotated[ 180 | str | None, 181 | typer.Option( 182 | "--sources", 183 | help="Specific sources to include in results (comma-separated)", 184 | ), 185 | ] = None, 186 | output_json: Annotated[ 187 | bool, 188 | typer.Option( 189 | "--json", 190 | "-j", 191 | help="Render in JSON format", 192 | case_sensitive=False, 193 | ), 194 | ] = False, 195 | ): 196 | query = search.VariantQuery( 197 | gene=gene, 198 | hgvsp=hgvsp, 199 | hgvsc=hgvsc, 200 | rsid=rsid, 201 | region=region, 202 | significance=significance, 203 | min_frequency=min_frequency, 204 | max_frequency=max_frequency, 205 | cadd=cadd, 206 | polyphen=polyphen, 207 | sift=sift, 208 | size=size, 209 | sources=sources.split(",") if sources else [], 210 | ) 211 | 212 | result = asyncio.run(search.search_variants(query, output_json)) 213 | typer.echo(result) 214 | 215 | 216 | @variant_app.command("predict") 217 | def predict_variant_effects( 218 | chromosome: Annotated[ 219 | str, 220 | typer.Argument(help="Chromosome (e.g., chr7, chrX)"), 221 | ], 222 | position: Annotated[ 223 | int, 224 | typer.Argument(help="1-based genomic position"), 225 | ], 226 | reference: Annotated[ 227 | str, 228 | typer.Argument(help="Reference allele(s) (e.g., A, ATG)"), 229 | ], 230 | alternate: Annotated[ 231 | str, 232 | typer.Argument(help="Alternate allele(s) (e.g., T, A)"), 233 | ], 234 | interval_size: Annotated[ 235 | int, 236 | typer.Option( 237 | "--interval", 238 | "-i", 239 | help="Analysis interval size in bp (max 1000000)", 240 | min=2000, 241 | max=1000000, 242 | ), 243 | ] = 131072, 244 | tissue: Annotated[ 245 | list[str] | None, 246 | typer.Option( 247 | "--tissue", 248 | "-t", 249 | help="UBERON ontology terms for tissue-specific predictions", 250 | ), 251 | ] = None, 252 | threshold: Annotated[ 253 | float, 254 | typer.Option( 255 | "--threshold", 256 | help="Significance threshold for log2 fold changes", 257 | min=0.0, 258 | max=5.0, 259 | ), 260 | ] = 0.5, 261 | api_key: Annotated[ 262 | str | None, 263 | typer.Option( 264 | "--api-key", 265 | help="AlphaGenome API key (overrides ALPHAGENOME_API_KEY env var)", 266 | envvar="ALPHAGENOME_API_KEY", 267 | ), 268 | ] = None, 269 | ): 270 | """ 271 | Predict variant effects using Google DeepMind's AlphaGenome:\n 272 | - Gene expression changes\n 273 | - Chromatin accessibility\n 274 | - Splicing alterations\n 275 | - Promoter activity\n 276 | \n 277 | Requires AlphaGenome API key via --api-key or ALPHAGENOME_API_KEY env var.\n 278 | \n 279 | Examples: 280 | \n\t# Predict BRAF V600E mutation 281 | \n\tbiomcp variant predict chr7 140753336 A T 282 | \n 283 | \n\t# With API key specified 284 | \n\tbiomcp variant predict chr7 140753336 A T --api-key YOUR_KEY 285 | \n 286 | \n\t# With tissue-specific predictions 287 | \n\tbiomcp variant predict chr7 140753336 A T --tissue UBERON:0002367 288 | \n 289 | \n\t# With larger analysis interval 290 | \n\tbiomcp variant predict chr7 140753336 A T --interval 500000 291 | """ 292 | from ..variants.alphagenome import predict_variant_effects 293 | 294 | result = asyncio.run( 295 | predict_variant_effects( 296 | chromosome=chromosome, 297 | position=position, 298 | reference=reference, 299 | alternate=alternate, 300 | interval_size=interval_size, 301 | tissue_types=tissue, 302 | significance_threshold=threshold, 303 | api_key=api_key, 304 | ) 305 | ) 306 | typer.echo(result) 307 | ``` -------------------------------------------------------------------------------- /tests/integration/test_variants_integration.py: -------------------------------------------------------------------------------- ```python 1 | """Integration tests for external variant data sources.""" 2 | 3 | import asyncio 4 | 5 | import pytest 6 | 7 | from biomcp.variants.external import ( 8 | ExternalVariantAggregator, 9 | TCGAClient, 10 | ThousandGenomesClient, 11 | ) 12 | from biomcp.variants.getter import get_variant 13 | 14 | 15 | class TestTCGAIntegration: 16 | """Integration tests for TCGA/GDC API.""" 17 | 18 | @pytest.mark.asyncio 19 | async def test_tcga_real_variant(self): 20 | """Test real TCGA API with known variant.""" 21 | client = TCGAClient() 22 | 23 | # Try with BRAF V600E - a well-known cancer mutation 24 | # TCGA can search by gene AA change format 25 | result = await client.get_variant_data("BRAF V600E") 26 | 27 | print(f"TCGA result: {result}") 28 | 29 | if result: 30 | print(f"COSMIC ID: {result.cosmic_id}") 31 | print(f"Tumor types: {result.tumor_types}") 32 | print(f"Affected cases: {result.affected_cases}") 33 | print(f"Consequence: {result.consequence_type}") 34 | else: 35 | print("No TCGA data found for this variant") 36 | 37 | 38 | class TestThousandGenomesIntegration: 39 | """Integration tests for 1000 Genomes via Ensembl.""" 40 | 41 | @pytest.mark.asyncio 42 | async def test_1000g_real_variant(self): 43 | """Test real 1000 Genomes API with known variant.""" 44 | client = ThousandGenomesClient() 45 | 46 | # Try with a known rsID 47 | result = await client.get_variant_data("rs7412") # APOE variant 48 | 49 | print(f"1000 Genomes result: {result}") 50 | 51 | if result: 52 | print(f"Global MAF: {result.global_maf}") 53 | print(f"EUR MAF: {result.eur_maf}") 54 | print(f"AFR MAF: {result.afr_maf}") 55 | print(f"Consequence: {result.most_severe_consequence}") 56 | print(f"Ancestral allele: {result.ancestral_allele}") 57 | 58 | # This variant should have frequency data 59 | assert result.global_maf is not None 60 | else: 61 | print("No 1000 Genomes data found") 62 | 63 | 64 | class TestExternalAggregatorIntegration: 65 | """Integration tests for the aggregator.""" 66 | 67 | @pytest.mark.asyncio 68 | async def test_aggregator_basic(self): 69 | """Test aggregator with basic functionality.""" 70 | aggregator = ExternalVariantAggregator() 71 | 72 | # Test with a known variant 73 | result = await aggregator.get_enhanced_annotations( 74 | "rs7412", # APOE variant 75 | include_tcga=True, 76 | include_1000g=True, 77 | ) 78 | 79 | print(f"Variant ID: {result.variant_id}") 80 | print(f"TCGA data: {'Present' if result.tcga else 'Not found'}") 81 | print( 82 | f"1000G data: {'Present' if result.thousand_genomes else 'Not found'}" 83 | ) 84 | print(f"Errors: {result.error_sources}") 85 | 86 | # Should still work 87 | assert result.variant_id == "rs7412" 88 | 89 | @pytest.mark.asyncio 90 | async def test_aggregator_partial_failures(self): 91 | """Test aggregator handles partial failures gracefully.""" 92 | aggregator = ExternalVariantAggregator() 93 | 94 | # Use a variant that might not be in all databases 95 | result = await aggregator.get_enhanced_annotations( 96 | "chr1:g.12345678A>G", # Arbitrary variant 97 | include_tcga=True, 98 | include_1000g=True, 99 | ) 100 | 101 | print("Results for arbitrary variant:") 102 | print(f"- TCGA: {'Found' if result.tcga else 'Not found'}") 103 | print( 104 | f"- 1000G: {'Found' if result.thousand_genomes else 'Not found'}" 105 | ) 106 | print(f"- Errors: {result.error_sources}") 107 | 108 | # Should complete without crashing 109 | assert result.variant_id == "chr1:g.12345678A>G" 110 | 111 | 112 | class TestAssemblyParameter: 113 | """Integration tests for assembly parameter.""" 114 | 115 | @pytest.mark.integration 116 | @pytest.mark.asyncio 117 | async def test_get_variant_hg19_assembly(self): 118 | """Test get_variant with hg19 assembly on real API.""" 119 | # Use a well-known variant: BRAF V600E 120 | variant_id = "rs113488022" 121 | 122 | result = await get_variant( 123 | variant_id, 124 | output_json=True, 125 | include_external=False, 126 | assembly="hg19", 127 | ) 128 | 129 | # Should return valid JSON 130 | assert result is not None 131 | assert len(result) > 0 132 | 133 | # Parse and check for hg19 data 134 | import json 135 | 136 | data = json.loads(result) 137 | if data and len(data) > 0: 138 | variant_data = data[0] 139 | # BRAF V600E should have hg19 coordinates 140 | if "hg19" in variant_data: 141 | print(f"hg19 coordinates: {variant_data['hg19']}") 142 | assert "start" in variant_data["hg19"] 143 | assert "end" in variant_data["hg19"] 144 | else: 145 | pytest.skip("hg19 data not available in API response") 146 | else: 147 | pytest.skip("No data returned from API") 148 | 149 | @pytest.mark.integration 150 | @pytest.mark.asyncio 151 | async def test_get_variant_hg38_assembly(self): 152 | """Test get_variant with hg38 assembly on real API.""" 153 | # Use the same variant but request hg38 154 | variant_id = "rs113488022" 155 | 156 | result = await get_variant( 157 | variant_id, 158 | output_json=True, 159 | include_external=False, 160 | assembly="hg38", 161 | ) 162 | 163 | # Should return valid JSON 164 | assert result is not None 165 | assert len(result) > 0 166 | 167 | # Parse and check for hg38 data 168 | import json 169 | 170 | data = json.loads(result) 171 | if data and len(data) > 0: 172 | variant_data = data[0] 173 | # Should have hg38 coordinates 174 | if "hg38" in variant_data: 175 | print(f"hg38 coordinates: {variant_data['hg38']}") 176 | assert "start" in variant_data["hg38"] 177 | assert "end" in variant_data["hg38"] 178 | else: 179 | pytest.skip("hg38 data not available in API response") 180 | else: 181 | pytest.skip("No data returned from API") 182 | 183 | @pytest.mark.integration 184 | @pytest.mark.asyncio 185 | async def test_assembly_coordinate_differences(self): 186 | """Test that hg19 and hg38 return different coordinates for same variant.""" 187 | variant_id = "rs113488022" # BRAF V600E 188 | 189 | # Get both assemblies 190 | result_hg19 = await get_variant( 191 | variant_id, 192 | output_json=True, 193 | include_external=False, 194 | assembly="hg19", 195 | ) 196 | 197 | result_hg38 = await get_variant( 198 | variant_id, 199 | output_json=True, 200 | include_external=False, 201 | assembly="hg38", 202 | ) 203 | 204 | import json 205 | 206 | data_hg19 = json.loads(result_hg19) 207 | data_hg38 = json.loads(result_hg38) 208 | 209 | # Both should return data 210 | if not data_hg19 or not data_hg38: 211 | pytest.skip("API did not return data for both assemblies") 212 | 213 | # Compare coordinates if available 214 | if len(data_hg19) > 0 and len(data_hg38) > 0: 215 | v19 = data_hg19[0] 216 | v38 = data_hg38[0] 217 | 218 | # BRAF V600E has different coordinates in hg19 vs hg38 219 | # hg19: chr7:140453136 220 | # hg38: chr7:140753336 221 | if "hg19" in v19 and "hg38" in v38: 222 | print(f"hg19 start: {v19['hg19']['start']}") 223 | print(f"hg38 start: {v38['hg38']['start']}") 224 | 225 | # Coordinates should be different (BRAF moved between assemblies) 226 | assert v19["hg19"]["start"] != v38["hg38"]["start"] 227 | else: 228 | pytest.skip("Assembly-specific coordinates not in response") 229 | 230 | 231 | if __name__ == "__main__": 232 | print("Testing TCGA/GDC...") 233 | asyncio.run(TestTCGAIntegration().test_tcga_real_variant()) 234 | 235 | print("\n" + "=" * 50 + "\n") 236 | print("Testing 1000 Genomes...") 237 | asyncio.run(TestThousandGenomesIntegration().test_1000g_real_variant()) 238 | 239 | print("\n" + "=" * 50 + "\n") 240 | print("Testing aggregator...") 241 | asyncio.run(TestExternalAggregatorIntegration().test_aggregator_basic()) 242 | 243 | print("\n" + "=" * 50 + "\n") 244 | print("Testing aggregator with partial failures...") 245 | asyncio.run( 246 | TestExternalAggregatorIntegration().test_aggregator_partial_failures() 247 | ) 248 | 249 | print("\n" + "=" * 50 + "\n") 250 | print("Testing assembly parameter...") 251 | asyncio.run(TestAssemblyParameter().test_get_variant_hg19_assembly()) 252 | asyncio.run(TestAssemblyParameter().test_get_variant_hg38_assembly()) 253 | asyncio.run(TestAssemblyParameter().test_assembly_coordinate_differences()) 254 | ``` -------------------------------------------------------------------------------- /tests/tdd/trials/test_backward_compatibility.py: -------------------------------------------------------------------------------- ```python 1 | """Test backward compatibility for trial search and getter functions.""" 2 | 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from biomcp.trials.getter import Module, get_trial, get_trial_unified 8 | from biomcp.trials.search import ( 9 | TrialQuery, 10 | search_trials, 11 | search_trials_unified, 12 | ) 13 | 14 | 15 | class TestTrialSearchBackwardCompatibility: 16 | """Test that existing trial search functionality remains unchanged.""" 17 | 18 | @pytest.mark.asyncio 19 | async def test_search_trials_defaults_to_clinicaltrials(self): 20 | """Test that search_trials still defaults to ClinicalTrials.gov.""" 21 | query = TrialQuery(conditions=["diabetes"]) 22 | 23 | with patch("biomcp.http_client.request_api") as mock_request: 24 | mock_request.return_value = ( 25 | { 26 | "studies": [ 27 | { 28 | "protocolSection": { 29 | "identificationModule": {"nctId": "NCT12345"} 30 | } 31 | } 32 | ] 33 | }, 34 | None, 35 | ) 36 | 37 | await search_trials(query, output_json=True) 38 | 39 | # Verify it called the ClinicalTrials.gov API 40 | assert mock_request.called 41 | call_args = mock_request.call_args 42 | # Check the URL argument 43 | url_arg = call_args.kwargs.get("url") 44 | assert url_arg is not None 45 | assert "clinicaltrials.gov" in url_arg 46 | 47 | @pytest.mark.asyncio 48 | async def test_search_trials_no_source_parameter(self): 49 | """Test that search_trials function signature hasn't changed.""" 50 | # This test ensures the function can still be called without source 51 | query = TrialQuery(conditions=["cancer"]) 52 | 53 | with patch("biomcp.http_client.request_api") as mock_request: 54 | mock_request.return_value = ({"studies": []}, None) 55 | 56 | # Should not raise TypeError about unexpected keyword argument 57 | await search_trials(query) 58 | assert mock_request.called 59 | 60 | @pytest.mark.asyncio 61 | async def test_search_trials_unified_with_source(self): 62 | """Test unified function supports source parameter.""" 63 | query = TrialQuery(conditions=["melanoma"]) 64 | 65 | # Test with ClinicalTrials.gov 66 | with patch("biomcp.trials.search.search_trials") as mock_ct: 67 | mock_ct.return_value = "CT results" 68 | 69 | result = await search_trials_unified( 70 | query, source="clinicaltrials" 71 | ) 72 | assert result == "CT results" 73 | mock_ct.assert_called_once_with(query, False) 74 | 75 | # Test with NCI 76 | with ( 77 | patch("biomcp.trials.nci_search.search_trials_nci") as mock_nci, 78 | patch( 79 | "biomcp.trials.nci_search.format_nci_trial_results" 80 | ) as mock_format, 81 | ): 82 | mock_nci.return_value = {"source": "nci", "trials": []} 83 | mock_format.return_value = "NCI formatted results" 84 | 85 | result = await search_trials_unified( 86 | query, source="nci", api_key="test-key" 87 | ) 88 | assert result == "NCI formatted results" 89 | mock_nci.assert_called_once_with(query, "test-key") 90 | 91 | 92 | class TestTrialGetterBackwardCompatibility: 93 | """Test that existing trial getter functionality remains unchanged.""" 94 | 95 | @pytest.mark.asyncio 96 | async def test_get_trial_defaults_to_clinicaltrials(self): 97 | """Test that get_trial still defaults to ClinicalTrials.gov.""" 98 | with patch("biomcp.http_client.request_api") as mock_request: 99 | mock_request.return_value = ( 100 | { 101 | "protocolSection": { 102 | "identificationModule": {"nctId": "NCT12345"} 103 | } 104 | }, 105 | None, 106 | ) 107 | 108 | await get_trial("NCT12345", Module.PROTOCOL) 109 | 110 | # Verify it called the ClinicalTrials.gov API 111 | assert mock_request.called 112 | call_args = mock_request.call_args 113 | # Check the URL argument 114 | url_arg = call_args.kwargs.get("url") 115 | assert url_arg is not None 116 | assert "clinicaltrials.gov" in url_arg 117 | # NCT ID would be in the request params, not the URL 118 | 119 | @pytest.mark.asyncio 120 | async def test_get_trial_no_source_parameter(self): 121 | """Test that get_trial function signature hasn't changed.""" 122 | with patch("biomcp.http_client.request_api") as mock_request: 123 | mock_request.return_value = ( 124 | { 125 | "protocolSection": { 126 | "identificationModule": {"nctId": "NCT99999"} 127 | } 128 | }, 129 | None, 130 | ) 131 | 132 | # Should not raise TypeError about unexpected keyword argument 133 | await get_trial("NCT99999", Module.PROTOCOL, output_json=True) 134 | assert mock_request.called 135 | 136 | @pytest.mark.asyncio 137 | async def test_get_trial_unified_with_source(self): 138 | """Test unified function supports source parameter.""" 139 | # Test with ClinicalTrials.gov - uses private functions 140 | with patch("biomcp.trials.getter._trial_protocol") as mock_protocol: 141 | mock_protocol.return_value = "CT trial details" 142 | 143 | result = await get_trial_unified( 144 | "NCT12345", source="clinicaltrials", sections=["protocol"] 145 | ) 146 | assert result == "CT trial details" 147 | mock_protocol.assert_called_once_with( 148 | nct_id="NCT12345", 149 | call_benefit="Getting protocol information for trial NCT12345", 150 | ) 151 | 152 | # Test with NCI 153 | with ( 154 | patch("biomcp.trials.nci_getter.get_trial_nci") as mock_nci, 155 | patch( 156 | "biomcp.trials.nci_getter.format_nci_trial_details" 157 | ) as mock_format, 158 | ): 159 | mock_nci.return_value = {"nct_id": "NCT12345", "source": "nci"} 160 | mock_format.return_value = "NCI formatted trial" 161 | 162 | result = await get_trial_unified( 163 | "NCT12345", source="nci", api_key="test-key" 164 | ) 165 | assert result == "NCI formatted trial" 166 | mock_nci.assert_called_once_with("NCT12345", "test-key") 167 | 168 | @pytest.mark.asyncio 169 | async def test_get_trial_all_modules_still_work(self): 170 | """Test that all existing Module options still work.""" 171 | modules_to_test = [ 172 | Module.PROTOCOL, 173 | Module.LOCATIONS, 174 | Module.REFERENCES, 175 | Module.OUTCOMES, 176 | ] 177 | 178 | for module in modules_to_test: 179 | with patch("biomcp.http_client.request_api") as mock_request: 180 | mock_request.return_value = ( 181 | { 182 | "protocolSection": { 183 | "identificationModule": {"nctId": "NCT12345"} 184 | } 185 | }, 186 | None, 187 | ) 188 | 189 | await get_trial("NCT12345", module) 190 | assert mock_request.called 191 | # Reset for next iteration 192 | mock_request.reset_mock() 193 | 194 | 195 | class TestCLIBackwardCompatibility: 196 | """Test that CLI commands maintain backward compatibility.""" 197 | 198 | def test_cli_imports_exist(self): 199 | """Test that CLI still imports the expected functions.""" 200 | # These imports should not raise ImportError 201 | from biomcp.cli.trials import get_trial_cli, search_trials_cli 202 | 203 | assert search_trials_cli is not None 204 | assert get_trial_cli is not None 205 | 206 | def test_search_defaults_without_source(self): 207 | """Test CLI search works without source parameter.""" 208 | from typer.testing import CliRunner 209 | 210 | from biomcp.cli.main import app 211 | 212 | runner = CliRunner() 213 | 214 | with patch("biomcp.cli.trials.asyncio.run") as mock_run: 215 | mock_run.return_value = None 216 | 217 | # Run CLI command without --source 218 | result = runner.invoke( 219 | app, ["trial", "search", "--condition", "diabetes"] 220 | ) 221 | 222 | # Should succeed 223 | assert result.exit_code == 0 224 | 225 | # Verify asyncio.run was called with the right function 226 | mock_run.assert_called() 227 | args = mock_run.call_args[0][0] 228 | # Check that it's the unified search function being called 229 | assert hasattr(args, "__name__") or hasattr(args, "func") 230 | ``` -------------------------------------------------------------------------------- /docs/reference/architecture-diagrams.md: -------------------------------------------------------------------------------- ```markdown 1 | # BioMCP Architecture Diagrams 2 | 3 | This page describes BioMCP's architecture, data flows, and workflows. 4 | 5 | ## System Architecture Overview 6 | 7 | BioMCP consists of three main layers: 8 | 9 | ### Client Layer 10 | 11 | - **CLI Interface**: Command-line tool for direct interaction 12 | - **Claude Desktop**: AI assistant integration via MCP protocol 13 | - **Python SDK**: Programmatic access for custom applications 14 | - **Custom MCP Clients**: Any MCP-compatible client 15 | 16 | ### BioMCP Core 17 | 18 | - **MCP Server**: Handles protocol communication 19 | - **Request Router**: Directs queries to appropriate handlers 20 | - **Cache Layer**: Intelligent caching for API responses 21 | - **Domain Handlers**: Specialized processors for each data type 22 | - Articles Handler (PubMed/PubTator3) 23 | - Trials Handler (ClinicalTrials.gov, NCI) 24 | - Variants Handler (MyVariant.info) 25 | - Genes Handler (MyGene.info) 26 | 27 | ### External APIs 28 | 29 | - **PubMed/PubTator3**: Biomedical literature 30 | - **ClinicalTrials.gov**: US clinical trials registry 31 | - **NCI CTS API**: National Cancer Institute trials 32 | - **MyVariant.info**: Genetic variant annotations 33 | - **MyGene.info**: Gene information 34 | - **cBioPortal**: Cancer genomics data 35 | - **AlphaGenome**: Variant effect predictions 36 | 37 | ## Data Flow Architecture 38 | 39 | 1. **User Request**: Query submitted via CLI, Claude, or SDK 40 | 2. **Cache Check**: System checks for cached results 41 | 3. **API Request**: If cache miss, fetch from external API 42 | 4. **Result Processing**: Normalize and enrich data 43 | 5. **Cache Storage**: Store results for future use 44 | 6. **Response Delivery**: Return formatted results to user 45 | 46 | ## Key Workflows 47 | 48 | ### Search Workflow 49 | 50 | 1. **Think Tool**: Plan search strategy 51 | 2. **Execute Search**: Query relevant data sources 52 | 3. **Enrich Results**: Add contextual information 53 | 4. **Combine Data**: Merge results from multiple sources 54 | 5. **Format Output**: Present in user-friendly format 55 | 56 | ### Article Search Pipeline 57 | 58 | 1. **Query Processing**: Parse user input 59 | 2. **Entity Recognition**: Normalize gene/disease names 60 | 3. **PubTator3 Search**: Query literature database 61 | 4. **Preprint Integration**: Include bioRxiv/medRxiv if enabled 62 | 5. **cBioPortal Enrichment**: Add cancer genomics data for genes 63 | 6. **Result Merging**: Combine all data sources 64 | 65 | ### Clinical Trial Matching 66 | 67 | 1. **Patient Profile**: Parse eligibility criteria 68 | 2. **Location Filter**: Geographic constraints 69 | 3. **Molecular Profile**: Mutation requirements 70 | 4. **Prior Treatments**: Treatment history matching 71 | 5. **Scoring Algorithm**: Rank trials by relevance 72 | 6. **Contact Extraction**: Retrieve site information 73 | 74 | ### Variant Interpretation 75 | 76 | 1. **Input Parsing**: Process VCF/MAF files 77 | 2. **Batch Processing**: Group variants efficiently 78 | 3. **Annotation Gathering**: 79 | - Clinical significance from MyVariant.info 80 | - Population frequency data 81 | - In silico predictions 82 | - Literature evidence 83 | - Clinical trial associations 84 | 4. **AlphaGenome Integration**: Regulatory predictions (optional) 85 | 5. **Tier Classification**: Categorize by clinical relevance 86 | 6. **Report Generation**: Create interpretation summary 87 | 88 | ## Architecture Patterns 89 | 90 | ### Caching Strategy 91 | 92 | - **Multi-tier Cache**: Memory → Disk → External 93 | - **Smart TTL**: Domain-specific expiration times 94 | - **Cache Key Generation**: Include all query parameters 95 | - **Invalidation Logic**: Clear on errors or updates 96 | 97 | ### Error Handling 98 | 99 | - **Retry Logic**: Exponential backoff for transient errors 100 | - **Rate Limiting**: Respect API limits with queuing 101 | - **Graceful Degradation**: Return partial results when possible 102 | - **Clear Error Messages**: Help users troubleshoot issues 103 | 104 | ### Authentication Flow 105 | 106 | 1. Check for user-provided API key 107 | 2. Fall back to environment variable 108 | 3. Use public access if no key available 109 | 4. Handle authentication errors gracefully 110 | 111 | ### Performance Optimization 112 | 113 | - **Request Batching**: Combine multiple queries 114 | - **Parallel Execution**: Concurrent API calls 115 | - **Connection Pooling**: Reuse HTTP connections 116 | - **Result Streaming**: Return data as available 117 | 118 | ## Deployment Options 119 | 120 | ### Local Development 121 | 122 | - Single process with in-memory cache 123 | - Direct file system access 124 | - Simple configuration 125 | 126 | ### Docker Deployment 127 | 128 | - Containerized application 129 | - Volume-mounted cache 130 | - Environment-based configuration 131 | 132 | ### Cloud Deployment 133 | 134 | - Load-balanced instances 135 | - Shared Redis cache 136 | - Auto-scaling capabilities 137 | - Monitoring integration 138 | 139 | ## Creating Documentation Diagrams 140 | 141 | For visual diagrams, we recommend: 142 | 143 | 1. **ASCII Art**: Universal compatibility 144 | 145 | - Use tools like asciiflow.com 146 | - Store in `docs/assets/` directory 147 | 148 | 2. **Screenshots**: For complex UIs 149 | 150 | - Annotate with arrows/labels 151 | - Save as PNG in `docs/assets/` 152 | 153 | 3. **External Tools**: 154 | - draw.io for flowcharts 155 | - Lucidchart for professional diagrams 156 | - Export as static images 157 | 158 | ## ASCII System Architecture 159 | 160 | ``` 161 | ┌─────────────────────────────────────────────────────────────────────────┐ 162 | │ USER INTERFACES │ 163 | ├────────────────┬───────────────────┬───────────────┬───────────────────┤ 164 | │ │ │ │ │ 165 | │ CLI Tool │ Claude Desktop │ Python SDK │ Custom Client │ 166 | │ (biomcp) │ (MCP Client) │ (async) │ (your app) │ 167 | │ │ │ │ │ 168 | └────────┬───────┴─────────┬─────────┴───────┬───────┴───────────┬───────┘ 169 | │ │ │ │ 170 | └─────────────────┴─────────────────┴───────────────────┘ 171 | │ 172 | ▼ 173 | ┌─────────────────────────────────────────────────────────────────────────┐ 174 | │ BioMCP CORE SERVER │ 175 | ├─────────────────────────────────────────────────────────────────────────┤ 176 | │ │ 177 | │ ┌─────────────┐ ┌──────────────┐ ┌──────────────┐ ┌────────────┐ │ 178 | │ │ Router │ │ Rate Limiter │ │ Cache Manager│ │ Logger │ │ 179 | │ │ │ │ │ │ │ │ │ │ 180 | │ └──────┬──────┘ └──────────────┘ └──────────────┘ └────────────┘ │ 181 | │ │ │ 182 | │ ▼ │ 183 | │ ┌─────────────────────────────────────────────────────────────────┐ │ 184 | │ │ Domain Handlers │ │ 185 | │ ├─────────────┬─────────────┬─────────────┬──────────────────────┤ │ 186 | │ │ Articles │ Trials │ Variants │ Genes/Drugs/Disease │ │ 187 | │ │ Handler │ Handler │ Handler │ Handler │ │ 188 | │ └──────┬──────┴──────┬──────┴──────┬──────┴──────────┬───────────┘ │ 189 | │ │ │ │ │ │ 190 | └─────────┼─────────────┼─────────────┼─────────────────┼─────────────────┘ 191 | │ │ │ │ 192 | ▼ ▼ ▼ ▼ 193 | ┌─────────────────────────────────────────────────────────────────────────┐ 194 | │ EXTERNAL DATA SOURCES │ 195 | ├─────────────┬─────────────┬─────────────┬──────────────────────────────┤ 196 | │ │ │ │ │ 197 | │ PubMed/ │ Clinical │ MyVariant │ BioThings Suite │ 198 | │ PubTator3 │ Trials.gov │ .info │ (MyGene/MyDisease/MyChem) │ 199 | │ │ + NCI │ │ │ 200 | │ │ │ │ │ 201 | ├─────────────┴─────────────┴─────────────┴──────────────────────────────┤ 202 | │ │ 203 | │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ 204 | │ │ cBioPortal │ │ AlphaGenome │ │ Europe PMC │ │ 205 | │ │ (Cancer) │ │ (Predictions)│ │ (Preprints) │ │ 206 | │ └──────────────┘ └──────────────┘ └──────────────┘ │ 207 | │ │ 208 | └─────────────────────────────────────────────────────────────────────────┘ 209 | ``` 210 | 211 | See also: [Quick Architecture Reference](quick-architecture.md) 212 | 213 | ## Next Steps 214 | 215 | - View the [Quick Architecture Guide](quick-architecture.md) for a concise overview 216 | - Check [Developer Guides](../developer-guides/01-server-deployment.md) for implementation details 217 | - See [API Reference](../apis/overview.md) for detailed specifications 218 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_circuit_breaker.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for circuit breaker pattern.""" 2 | 3 | import asyncio 4 | 5 | import pytest 6 | 7 | from biomcp.circuit_breaker import ( 8 | CircuitBreaker, 9 | CircuitBreakerConfig, 10 | CircuitBreakerError, 11 | CircuitState, 12 | circuit_breaker, 13 | get_circuit_breaker, 14 | ) 15 | 16 | 17 | class CircuitBreakerTestException(Exception): 18 | """Test exception for circuit breaker tests.""" 19 | 20 | pass 21 | 22 | 23 | class IgnoredException(Exception): 24 | """Exception that should be ignored by circuit breaker.""" 25 | 26 | pass 27 | 28 | 29 | @pytest.mark.asyncio 30 | async def test_circuit_breaker_closed_state(): 31 | """Test circuit breaker in closed state allows calls.""" 32 | breaker = CircuitBreaker("test_closed") 33 | call_count = 0 34 | 35 | async def test_func(): 36 | nonlocal call_count 37 | call_count += 1 38 | return "success" 39 | 40 | # Should allow calls in closed state 41 | assert breaker.is_closed 42 | result = await breaker.call(test_func) 43 | assert result == "success" 44 | assert call_count == 1 45 | 46 | 47 | @pytest.mark.asyncio 48 | async def test_circuit_breaker_opens_on_threshold(): 49 | """Test circuit breaker opens after failure threshold.""" 50 | config = CircuitBreakerConfig( 51 | failure_threshold=3, 52 | expected_exception=CircuitBreakerTestException, 53 | ) 54 | breaker = CircuitBreaker("test_threshold", config) 55 | 56 | async def failing_func(): 57 | raise CircuitBreakerTestException("Test failure") 58 | 59 | # First 2 failures should pass through 60 | for _i in range(2): 61 | with pytest.raises(CircuitBreakerTestException): 62 | await breaker.call(failing_func) 63 | assert breaker.is_closed 64 | 65 | # Third failure should open the circuit 66 | with pytest.raises(CircuitBreakerTestException): 67 | await breaker.call(failing_func) 68 | assert breaker.is_open 69 | 70 | # Subsequent calls should fail fast 71 | with pytest.raises(CircuitBreakerError): 72 | await breaker.call(failing_func) 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_circuit_breaker_half_open_recovery(): 77 | """Test circuit breaker recovery through half-open state.""" 78 | config = CircuitBreakerConfig( 79 | failure_threshold=2, 80 | recovery_timeout=0.1, # 100ms for testing 81 | success_threshold=2, 82 | ) 83 | breaker = CircuitBreaker("test_recovery", config) 84 | 85 | call_count = 0 86 | should_fail = True 87 | 88 | async def test_func(): 89 | nonlocal call_count 90 | call_count += 1 91 | if should_fail: 92 | raise CircuitBreakerTestException("Failure") 93 | return "success" 94 | 95 | # Open the circuit 96 | for _ in range(2): 97 | with pytest.raises(CircuitBreakerTestException): 98 | await breaker.call(test_func) 99 | assert breaker.is_open 100 | 101 | # Wait for recovery timeout 102 | await asyncio.sleep(0.15) 103 | 104 | # Next call should attempt (half-open state) 105 | should_fail = False 106 | result = await breaker.call(test_func) 107 | assert result == "success" 108 | assert breaker.state == CircuitState.HALF_OPEN 109 | 110 | # Need one more success to close 111 | result = await breaker.call(test_func) 112 | assert result == "success" 113 | assert breaker.is_closed 114 | 115 | 116 | @pytest.mark.asyncio 117 | async def test_circuit_breaker_half_open_failure(): 118 | """Test circuit breaker reopens on failure in half-open state.""" 119 | config = CircuitBreakerConfig( 120 | failure_threshold=2, 121 | recovery_timeout=0.1, 122 | ) 123 | breaker = CircuitBreaker("test_half_open_fail", config) 124 | 125 | async def failing_func(): 126 | raise CircuitBreakerTestException("Failure") 127 | 128 | # Open the circuit 129 | for _ in range(2): 130 | with pytest.raises(CircuitBreakerTestException): 131 | await breaker.call(failing_func) 132 | assert breaker.is_open 133 | 134 | # Wait for recovery timeout 135 | await asyncio.sleep(0.15) 136 | 137 | # Failure in half-open should reopen immediately 138 | with pytest.raises(CircuitBreakerTestException): 139 | await breaker.call(failing_func) 140 | assert breaker.is_open 141 | 142 | 143 | @pytest.mark.asyncio 144 | async def test_circuit_breaker_ignored_exceptions(): 145 | """Test that certain exceptions don't trigger circuit breaker.""" 146 | config = CircuitBreakerConfig( 147 | failure_threshold=2, 148 | expected_exception=Exception, 149 | exclude_exceptions=(IgnoredException,), 150 | ) 151 | breaker = CircuitBreaker("test_ignored", config) 152 | 153 | async def func_with_ignored_exception(): 154 | raise IgnoredException("Should be ignored") 155 | 156 | # These exceptions shouldn't count 157 | for _ in range(5): 158 | with pytest.raises(IgnoredException): 159 | await breaker.call(func_with_ignored_exception) 160 | assert breaker.is_closed 161 | 162 | 163 | @pytest.mark.asyncio 164 | async def test_circuit_breaker_reset(): 165 | """Test manual reset of circuit breaker.""" 166 | config = CircuitBreakerConfig(failure_threshold=1) 167 | breaker = CircuitBreaker("test_reset", config) 168 | 169 | async def failing_func(): 170 | raise CircuitBreakerTestException("Failure") 171 | 172 | # Open the circuit 173 | with pytest.raises(CircuitBreakerTestException): 174 | await breaker.call(failing_func) 175 | assert breaker.is_open 176 | 177 | # Manual reset 178 | await breaker.reset() 179 | assert breaker.is_closed 180 | 181 | # Should allow calls again 182 | async def success_func(): 183 | return "success" 184 | 185 | result = await breaker.call(success_func) 186 | assert result == "success" 187 | 188 | 189 | @pytest.mark.asyncio 190 | async def test_circuit_breaker_decorator(): 191 | """Test circuit breaker decorator.""" 192 | call_count = 0 193 | 194 | @circuit_breaker( 195 | "test_decorator", CircuitBreakerConfig(failure_threshold=2) 196 | ) 197 | async def decorated_func(should_fail=False): 198 | nonlocal call_count 199 | call_count += 1 200 | if should_fail: 201 | raise CircuitBreakerTestException("Failure") 202 | return "success" 203 | 204 | # Success calls 205 | result = await decorated_func() 206 | assert result == "success" 207 | 208 | # Open circuit with failures 209 | for _ in range(2): 210 | with pytest.raises(CircuitBreakerTestException): 211 | await decorated_func(should_fail=True) 212 | 213 | # Circuit should be open 214 | with pytest.raises(CircuitBreakerError): 215 | await decorated_func() 216 | 217 | 218 | def test_get_circuit_breaker(): 219 | """Test getting circuit breaker from registry.""" 220 | # First call creates breaker 221 | breaker1 = get_circuit_breaker("test_registry") 222 | assert breaker1.name == "test_registry" 223 | 224 | # Second call returns same instance 225 | breaker2 = get_circuit_breaker("test_registry") 226 | assert breaker1 is breaker2 227 | 228 | # Different name creates different breaker 229 | breaker3 = get_circuit_breaker("test_registry_2") 230 | assert breaker3 is not breaker1 231 | 232 | 233 | @pytest.mark.asyncio 234 | async def test_circuit_breaker_concurrent_calls(): 235 | """Test circuit breaker handles concurrent calls correctly.""" 236 | config = CircuitBreakerConfig( 237 | failure_threshold=5, 238 | expected_exception=CircuitBreakerTestException, 239 | ) 240 | breaker = CircuitBreaker("test_concurrent", config) 241 | 242 | failure_count = 0 243 | 244 | async def failing_func(): 245 | nonlocal failure_count 246 | failure_count += 1 247 | if failure_count <= 5: 248 | raise CircuitBreakerTestException("Failure") 249 | return "success" 250 | 251 | # Run concurrent failing calls 252 | tasks = [] 253 | for _ in range(10): 254 | tasks.append(breaker.call(failing_func)) 255 | 256 | results = await asyncio.gather(*tasks, return_exceptions=True) 257 | 258 | # Should have some CircuitBreakerTestExceptions and some CircuitBreakerErrors 259 | test_exceptions = sum( 260 | 1 for r in results if isinstance(r, CircuitBreakerTestException) 261 | ) 262 | breaker_errors = sum( 263 | 1 for r in results if isinstance(r, CircuitBreakerError) 264 | ) 265 | 266 | # At least failure_threshold CircuitBreakerTestExceptions 267 | assert test_exceptions >= config.failure_threshold 268 | # Some calls should have been blocked 269 | assert breaker_errors > 0 270 | # Circuit should be open 271 | assert breaker.is_open 272 | 273 | 274 | @pytest.mark.asyncio 275 | async def test_circuit_breaker_success_resets_failures(): 276 | """Test that successes reset failure count in closed state.""" 277 | config = CircuitBreakerConfig(failure_threshold=3) 278 | breaker = CircuitBreaker("test_success_reset", config) 279 | 280 | async def sometimes_failing_func(fail=False): 281 | if fail: 282 | raise CircuitBreakerTestException("Failure") 283 | return "success" 284 | 285 | # Two failures 286 | for _ in range(2): 287 | with pytest.raises(CircuitBreakerTestException): 288 | await breaker.call(sometimes_failing_func, fail=True) 289 | 290 | # Success should reset failure count 291 | result = await breaker.call(sometimes_failing_func, fail=False) 292 | assert result == "success" 293 | assert breaker.is_closed 294 | 295 | # Can now fail 2 more times without opening 296 | for _ in range(2): 297 | with pytest.raises(CircuitBreakerTestException): 298 | await breaker.call(sometimes_failing_func, fail=True) 299 | assert breaker.is_closed 300 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_drug_recalls.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for FDA drug recalls module.""" 2 | 3 | import json 4 | from pathlib import Path 5 | from unittest.mock import AsyncMock, patch 6 | 7 | import pytest 8 | 9 | from biomcp.openfda.drug_recalls import ( 10 | get_drug_recall, 11 | search_drug_recalls, 12 | ) 13 | 14 | # Load mock data 15 | MOCK_DIR = Path(__file__).parent.parent / "data" / "openfda" 16 | MOCK_RECALLS_SEARCH = json.loads( 17 | (MOCK_DIR / "enforcement_search.json").read_text() 18 | ) 19 | MOCK_RECALL_DETAIL = json.loads( 20 | (MOCK_DIR / "enforcement_detail.json").read_text() 21 | ) 22 | 23 | 24 | class TestDrugRecalls: 25 | """Test drug recalls functionality.""" 26 | 27 | @pytest.mark.asyncio 28 | async def test_search_drug_recalls_success(self): 29 | """Test successful drug recall search.""" 30 | with patch( 31 | "biomcp.openfda.drug_recalls.make_openfda_request", 32 | new_callable=AsyncMock, 33 | ) as mock_request: 34 | mock_request.return_value = (MOCK_RECALLS_SEARCH, None) 35 | 36 | result = await search_drug_recalls( 37 | drug="valsartan", 38 | limit=10, 39 | ) 40 | 41 | assert "Drug Recall" in result or "FDA Drug Recall" in result 42 | assert "valsartan" in result.lower() 43 | # Check for presence of key recall info 44 | assert "Recall" in result or "recall" in result.lower() 45 | mock_request.assert_called_once() 46 | 47 | @pytest.mark.asyncio 48 | async def test_search_drug_recalls_with_filters(self): 49 | """Test drug recall search with multiple filters.""" 50 | with patch( 51 | "biomcp.openfda.drug_recalls.make_openfda_request", 52 | new_callable=AsyncMock, 53 | ) as mock_request: 54 | mock_request.return_value = (MOCK_RECALLS_SEARCH, None) 55 | 56 | result = await search_drug_recalls( 57 | drug="metformin", 58 | recall_class="2", 59 | status="ongoing", 60 | reason="contamination", 61 | since_date="20230101", 62 | limit=5, 63 | api_key="test-key", 64 | ) 65 | 66 | assert "Drug Recall" in result or "FDA Drug Recall" in result 67 | # Verify API key was passed as the 4th positional argument 68 | call_args = mock_request.call_args 69 | assert ( 70 | call_args[0][3] == "test-key" 71 | ) # api_key is 4th positional arg 72 | 73 | @pytest.mark.asyncio 74 | async def test_search_drug_recalls_no_results(self): 75 | """Test drug recall search with no results.""" 76 | with patch( 77 | "biomcp.openfda.drug_recalls.make_openfda_request", 78 | new_callable=AsyncMock, 79 | ) as mock_request: 80 | mock_request.return_value = ({"results": []}, None) 81 | 82 | result = await search_drug_recalls(drug="nonexistent-drug") 83 | 84 | assert "No drug recall records found" in result 85 | 86 | @pytest.mark.asyncio 87 | async def test_search_drug_recalls_api_error(self): 88 | """Test drug recall search with API error.""" 89 | with patch( 90 | "biomcp.openfda.drug_recalls.make_openfda_request", 91 | new_callable=AsyncMock, 92 | ) as mock_request: 93 | mock_request.return_value = (None, "API rate limit exceeded") 94 | 95 | result = await search_drug_recalls(drug="test") 96 | 97 | assert "Error searching drug recalls" in result 98 | assert "API rate limit exceeded" in result 99 | 100 | @pytest.mark.asyncio 101 | async def test_get_drug_recall_success(self): 102 | """Test getting specific drug recall details.""" 103 | with patch( 104 | "biomcp.openfda.drug_recalls.make_openfda_request", 105 | new_callable=AsyncMock, 106 | ) as mock_request: 107 | mock_request.return_value = (MOCK_RECALL_DETAIL, None) 108 | 109 | result = await get_drug_recall("D-0001-2023") 110 | 111 | assert "Drug Recall" in result or "D-0001-2023" in result 112 | assert "D-0001-2023" in result 113 | # Check for key details in the output (formats may vary) 114 | assert "product" in result.lower() or "valsartan" in result.lower() 115 | 116 | @pytest.mark.asyncio 117 | async def test_get_drug_recall_not_found(self): 118 | """Test getting drug recall that doesn't exist.""" 119 | with patch( 120 | "biomcp.openfda.drug_recalls.make_openfda_request", 121 | new_callable=AsyncMock, 122 | ) as mock_request: 123 | mock_request.return_value = ({"results": []}, None) 124 | 125 | result = await get_drug_recall("INVALID-RECALL") 126 | 127 | assert "No recall record found" in result 128 | assert "INVALID-RECALL" in result 129 | 130 | @pytest.mark.asyncio 131 | async def test_get_drug_recall_with_api_key(self): 132 | """Test getting drug recall with API key.""" 133 | with patch( 134 | "biomcp.openfda.drug_recalls.make_openfda_request", 135 | new_callable=AsyncMock, 136 | ) as mock_request: 137 | mock_request.return_value = (MOCK_RECALL_DETAIL, None) 138 | 139 | result = await get_drug_recall( 140 | "D-0001-2023", 141 | api_key="test-api-key", 142 | ) 143 | 144 | assert "Drug Recall" in result or "D-0001-2023" in result 145 | # Verify API key was passed as the 4th positional argument 146 | call_args = mock_request.call_args 147 | assert ( 148 | call_args[0][3] == "test-api-key" 149 | ) # api_key is 4th positional arg 150 | 151 | @pytest.mark.asyncio 152 | async def test_recall_class_validation(self): 153 | """Test that recall class is validated.""" 154 | with patch( 155 | "biomcp.openfda.drug_recalls.make_openfda_request", 156 | new_callable=AsyncMock, 157 | ) as mock_request: 158 | mock_request.return_value = (MOCK_RECALLS_SEARCH, None) 159 | 160 | # Valid recall classes 161 | for recall_class in ["1", "2", "3"]: 162 | result = await search_drug_recalls(recall_class=recall_class) 163 | assert "Drug Recall" in result or "FDA Drug Recall" in result 164 | 165 | # Test with Class I, II, III format 166 | result = await search_drug_recalls(recall_class="Class I") 167 | call_args = mock_request.call_args 168 | params = call_args[0][1] # params is 2nd positional arg 169 | assert 'classification:"Class I"' in params["search"] 170 | 171 | @pytest.mark.asyncio 172 | async def test_recall_status_mapping(self): 173 | """Test that recall status is properly mapped.""" 174 | with patch( 175 | "biomcp.openfda.drug_recalls.make_openfda_request", 176 | new_callable=AsyncMock, 177 | ) as mock_request: 178 | mock_request.return_value = (MOCK_RECALLS_SEARCH, None) 179 | 180 | # Test ongoing status 181 | await search_drug_recalls(status="ongoing") 182 | call_args = mock_request.call_args 183 | params = call_args[0][1] # params is 2nd positional arg 184 | assert "Ongoing" in params["search"] 185 | 186 | # Test completed status 187 | await search_drug_recalls(status="completed") 188 | call_args = mock_request.call_args 189 | params = call_args[0][1] # params is 2nd positional arg 190 | assert "Completed" in params["search"] 191 | 192 | @pytest.mark.asyncio 193 | async def test_search_drug_recalls_pagination(self): 194 | """Test drug recall search pagination.""" 195 | with patch( 196 | "biomcp.openfda.drug_recalls.make_openfda_request", 197 | new_callable=AsyncMock, 198 | ) as mock_request: 199 | mock_response = { 200 | "meta": {"results": {"total": 150}}, 201 | "results": MOCK_RECALLS_SEARCH["results"], 202 | } 203 | mock_request.return_value = (mock_response, None) 204 | 205 | result = await search_drug_recalls( 206 | drug="aspirin", 207 | limit=10, 208 | skip=30, 209 | ) 210 | 211 | # Check for total count instead of specific pagination format 212 | assert "150" in result 213 | # Verify skip parameter was passed 214 | call_args = mock_request.call_args 215 | assert ( 216 | call_args[0][1]["skip"] == "30" 217 | ) # params is 2nd positional arg, value is string 218 | 219 | @pytest.mark.asyncio 220 | async def test_date_filtering(self): 221 | """Test that date filtering works correctly.""" 222 | with patch( 223 | "biomcp.openfda.drug_recalls.make_openfda_request", 224 | new_callable=AsyncMock, 225 | ) as mock_request: 226 | mock_request.return_value = (MOCK_RECALLS_SEARCH, None) 227 | 228 | await search_drug_recalls( 229 | since_date="20230615", 230 | ) 231 | 232 | # Check that date was properly formatted in query 233 | call_args = mock_request.call_args 234 | params = call_args[0][1] # params is 2nd positional arg 235 | assert "recall_initiation_date" in params["search"] 236 | assert "[2023-06-15 TO *]" in params["search"] 237 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/validation.py: -------------------------------------------------------------------------------- ```python 1 | """Validation functions for OpenFDA API responses.""" 2 | 3 | import logging 4 | from typing import Any 5 | 6 | from .exceptions import OpenFDAValidationError 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def validate_fda_response( 12 | response: dict[str, Any], 13 | required_fields: list[str] | None = None, 14 | response_type: str = "generic", 15 | ) -> bool: 16 | """ 17 | Validate FDA API response structure. 18 | 19 | Args: 20 | response: The FDA API response dictionary 21 | required_fields: List of required top-level fields 22 | response_type: Type of response for specific validation 23 | 24 | Returns: 25 | True if valid 26 | 27 | Raises: 28 | OpenFDAValidationError: If validation fails 29 | """ 30 | if not isinstance(response, dict): 31 | raise OpenFDAValidationError( 32 | f"Expected dict response, got {type(response).__name__}" 33 | ) 34 | 35 | # Default required fields for most FDA responses 36 | if required_fields is None: 37 | required_fields = ["results"] if "results" in response else [] 38 | 39 | # Check required fields 40 | missing_fields = [ 41 | field for field in required_fields if field not in response 42 | ] 43 | if missing_fields: 44 | raise OpenFDAValidationError( 45 | f"Missing required fields in FDA response: {', '.join(missing_fields)}" 46 | ) 47 | 48 | # Type-specific validation 49 | if response_type == "search": 50 | validate_search_response(response) 51 | elif response_type == "detail": 52 | validate_detail_response(response) 53 | 54 | return True 55 | 56 | 57 | def validate_search_response(response: dict[str, Any]) -> bool: 58 | """ 59 | Validate FDA search response structure. 60 | 61 | Args: 62 | response: FDA search response 63 | 64 | Returns: 65 | True if valid 66 | 67 | Raises: 68 | OpenFDAValidationError: If validation fails 69 | """ 70 | # Search responses should have results array 71 | if "results" not in response: 72 | raise OpenFDAValidationError("Search response missing 'results' field") 73 | 74 | if not isinstance(response["results"], list): 75 | raise OpenFDAValidationError( 76 | f"Expected 'results' to be a list, got {type(response['results']).__name__}" 77 | ) 78 | 79 | # If meta is present, validate it 80 | if "meta" in response: 81 | validate_meta_field(response["meta"]) 82 | 83 | return True 84 | 85 | 86 | def validate_detail_response(response: dict[str, Any]) -> bool: 87 | """ 88 | Validate FDA detail response structure. 89 | 90 | Args: 91 | response: FDA detail response 92 | 93 | Returns: 94 | True if valid 95 | 96 | Raises: 97 | OpenFDAValidationError: If validation fails 98 | """ 99 | # Detail responses usually have a single result 100 | if "results" in response: 101 | if not isinstance(response["results"], list): 102 | raise OpenFDAValidationError( 103 | f"Expected 'results' to be a list, got {type(response['results']).__name__}" 104 | ) 105 | 106 | if len(response["results"]) == 0: 107 | # Empty results is valid (not found) 108 | return True 109 | 110 | if len(response["results"]) > 1: 111 | logger.warning( 112 | f"Detail response contains {len(response['results'])} results, expected 1" 113 | ) 114 | 115 | return True 116 | 117 | 118 | def validate_meta_field(meta: dict[str, Any]) -> bool: 119 | """ 120 | Validate FDA response meta field. 121 | 122 | Args: 123 | meta: Meta field from FDA response 124 | 125 | Returns: 126 | True if valid 127 | 128 | Raises: 129 | OpenFDAValidationError: If validation fails 130 | """ 131 | if not isinstance(meta, dict): 132 | raise OpenFDAValidationError( 133 | f"Expected 'meta' to be a dict, got {type(meta).__name__}" 134 | ) 135 | 136 | # Check for results metadata 137 | if "results" in meta: 138 | results_meta = meta["results"] 139 | if not isinstance(results_meta, dict): 140 | raise OpenFDAValidationError( 141 | f"Expected 'meta.results' to be a dict, got {type(results_meta).__name__}" 142 | ) 143 | 144 | # Validate pagination fields if present 145 | for field in ["skip", "limit", "total"]: 146 | if field in results_meta and not isinstance( 147 | results_meta[field], int | float 148 | ): 149 | raise OpenFDAValidationError( 150 | f"Expected 'meta.results.{field}' to be numeric, " 151 | f"got {type(results_meta[field]).__name__}" 152 | ) 153 | 154 | return True 155 | 156 | 157 | def validate_adverse_event(event: dict[str, Any]) -> bool: 158 | """ 159 | Validate an adverse event record. 160 | 161 | Args: 162 | event: Adverse event record 163 | 164 | Returns: 165 | True if valid 166 | 167 | Raises: 168 | OpenFDAValidationError: If validation fails 169 | """ 170 | if not isinstance(event, dict): 171 | raise OpenFDAValidationError( 172 | f"Expected adverse event to be a dict, got {type(event).__name__}" 173 | ) 174 | 175 | # Key fields that should be present (but may be null) 176 | important_fields = ["patient", "safetyreportid"] 177 | 178 | for field in important_fields: 179 | if field not in event: 180 | logger.warning(f"Adverse event missing expected field: {field}") 181 | 182 | return True 183 | 184 | 185 | def validate_drug_label(label: dict[str, Any]) -> bool: 186 | """ 187 | Validate a drug label record. 188 | 189 | Args: 190 | label: Drug label record 191 | 192 | Returns: 193 | True if valid 194 | 195 | Raises: 196 | OpenFDAValidationError: If validation fails 197 | """ 198 | if not isinstance(label, dict): 199 | raise OpenFDAValidationError( 200 | f"Expected drug label to be a dict, got {type(label).__name__}" 201 | ) 202 | 203 | # Labels should have OpenFDA section 204 | if "openfda" not in label: 205 | logger.warning("Drug label missing 'openfda' section") 206 | 207 | # Should have at least one section 208 | label_sections = [ 209 | "indications_and_usage", 210 | "contraindications", 211 | "warnings_and_precautions", 212 | "adverse_reactions", 213 | "dosage_and_administration", 214 | ] 215 | 216 | has_section = any(section in label for section in label_sections) 217 | if not has_section: 218 | logger.warning("Drug label has no standard sections") 219 | 220 | return True 221 | 222 | 223 | def validate_device_event(event: dict[str, Any]) -> bool: 224 | """ 225 | Validate a device event record. 226 | 227 | Args: 228 | event: Device event record 229 | 230 | Returns: 231 | True if valid 232 | 233 | Raises: 234 | OpenFDAValidationError: If validation fails 235 | """ 236 | if not isinstance(event, dict): 237 | raise OpenFDAValidationError( 238 | f"Expected device event to be a dict, got {type(event).__name__}" 239 | ) 240 | 241 | # Device events should have MDR report key 242 | if "mdr_report_key" not in event: 243 | logger.warning("Device event missing 'mdr_report_key'") 244 | 245 | # Should have device information 246 | if "device" not in event and "devices" not in event: 247 | logger.warning("Device event missing device information") 248 | 249 | return True 250 | 251 | 252 | def validate_recall(recall: dict[str, Any]) -> bool: 253 | """ 254 | Validate a recall record. 255 | 256 | Args: 257 | recall: Recall record 258 | 259 | Returns: 260 | True if valid 261 | 262 | Raises: 263 | OpenFDAValidationError: If validation fails 264 | """ 265 | if not isinstance(recall, dict): 266 | raise OpenFDAValidationError( 267 | f"Expected recall to be a dict, got {type(recall).__name__}" 268 | ) 269 | 270 | # Required fields for recalls 271 | required = ["recall_number", "classification", "product_description"] 272 | 273 | for field in required: 274 | if field not in recall: 275 | logger.warning(f"Recall missing required field: {field}") 276 | 277 | # Validate classification if present 278 | if "classification" in recall: 279 | valid_classes = ["Class I", "Class II", "Class III", "1", "2", "3"] 280 | if recall["classification"] not in valid_classes: 281 | logger.warning( 282 | f"Invalid recall classification: {recall['classification']}" 283 | ) 284 | 285 | return True 286 | 287 | 288 | def sanitize_response(response: dict[str, Any]) -> dict[str, Any]: 289 | """ 290 | Sanitize FDA response to handle common issues. 291 | 292 | Args: 293 | response: Raw FDA response 294 | 295 | Returns: 296 | Sanitized response 297 | """ 298 | if not response: 299 | return {} 300 | 301 | # Handle fields that can be string or list 302 | if "results" in response and isinstance(response["results"], list): 303 | for result in response["results"]: 304 | if isinstance(result, dict): 305 | # Fields that can be string or list 306 | polymorphic_fields = [ 307 | "source_type", 308 | "remedial_action", 309 | "medical_specialty_description", 310 | "manufacturer_name", 311 | "brand_name", 312 | "generic_name", 313 | ] 314 | 315 | for field in polymorphic_fields: 316 | if field in result: 317 | value = result[field] 318 | # Ensure consistent list format 319 | if not isinstance(value, list): 320 | result[field] = [value] if value else [] 321 | 322 | return response 323 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/input_validation.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Input validation and sanitization for OpenFDA API requests. 3 | 4 | This module provides security-focused input validation to prevent injection attacks 5 | and ensure data integrity for all FDA API requests. 6 | """ 7 | 8 | import logging 9 | import re 10 | from typing import Any 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | # Maximum lengths for different input types 15 | MAX_DRUG_NAME_LENGTH = 100 16 | MAX_REACTION_LENGTH = 200 17 | MAX_GENERAL_QUERY_LENGTH = 500 18 | MAX_DATE_LENGTH = 10 19 | 20 | # Patterns for validation 21 | SAFE_CHARS_PATTERN = re.compile(r"^[a-zA-Z0-9\s\-\.\,\(\)\/\*]+$") 22 | DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$") 23 | # Include SQL comment pattern -- and other injection patterns 24 | INJECTION_CHARS = re.compile(r"[<>\"\';&|\\`${}]|--") 25 | 26 | 27 | def sanitize_input( 28 | value: str | None, max_length: int = MAX_GENERAL_QUERY_LENGTH 29 | ) -> str | None: 30 | """ 31 | Sanitize user input to prevent injection attacks. 32 | 33 | Args: 34 | value: Input string to sanitize 35 | max_length: Maximum allowed length 36 | 37 | Returns: 38 | Sanitized string or None if input is invalid 39 | """ 40 | if not value: 41 | return None 42 | 43 | # Convert to string and strip whitespace 44 | value = str(value).strip() 45 | 46 | # Check length 47 | if len(value) > max_length: 48 | logger.warning( 49 | f"Input truncated from {len(value)} to {max_length} characters" 50 | ) 51 | value = value[:max_length] 52 | 53 | # Remove potential injection characters 54 | cleaned = INJECTION_CHARS.sub("", value) 55 | 56 | # Warn if characters were removed 57 | if cleaned != value: 58 | logger.warning("Removed potentially dangerous characters from input") 59 | 60 | # Normalize whitespace 61 | cleaned = " ".join(cleaned.split()) 62 | 63 | return cleaned if cleaned else None 64 | 65 | 66 | def validate_drug_name(drug: str | None) -> str | None: 67 | """ 68 | Validate and sanitize drug name input. 69 | 70 | Args: 71 | drug: Drug name to validate 72 | 73 | Returns: 74 | Validated drug name or None 75 | """ 76 | if not drug: 77 | return None 78 | 79 | sanitized = sanitize_input(drug, MAX_DRUG_NAME_LENGTH) 80 | 81 | if not sanitized: 82 | return None 83 | 84 | # Drug names should only contain alphanumeric, spaces, hyphens, and slashes 85 | if not re.match(r"^[a-zA-Z0-9\s\-\/\(\)]+$", sanitized): 86 | logger.warning(f"Invalid drug name format: {sanitized[:20]}...") 87 | return None 88 | 89 | return sanitized 90 | 91 | 92 | def validate_date(date_str: str | None) -> str | None: 93 | """ 94 | Validate date string format. 95 | 96 | Args: 97 | date_str: Date string in YYYY-MM-DD format 98 | 99 | Returns: 100 | Validated date string or None 101 | """ 102 | if not date_str: 103 | return None 104 | 105 | sanitized = sanitize_input(date_str, MAX_DATE_LENGTH) 106 | 107 | if not sanitized: 108 | return None 109 | 110 | # Check date format 111 | if not DATE_PATTERN.match(sanitized): 112 | logger.warning(f"Invalid date format: {sanitized}") 113 | return None 114 | 115 | # Basic date validation 116 | try: 117 | year, month, day = map(int, sanitized.split("-")) 118 | if not (1900 <= year <= 2100 and 1 <= month <= 12 and 1 <= day <= 31): 119 | logger.warning(f"Date out of valid range: {sanitized}") 120 | return None 121 | except (ValueError, IndexError): 122 | logger.warning(f"Cannot parse date: {sanitized}") 123 | return None 124 | 125 | return sanitized 126 | 127 | 128 | def validate_limit(limit: int | None, max_limit: int = 100) -> int: 129 | """ 130 | Validate and constrain limit parameter. 131 | 132 | Args: 133 | limit: Requested limit 134 | max_limit: Maximum allowed limit 135 | 136 | Returns: 137 | Valid limit value 138 | """ 139 | if limit is None: 140 | return 25 # Default 141 | 142 | try: 143 | limit = int(limit) 144 | except (ValueError, TypeError): 145 | logger.warning(f"Invalid limit value: {limit}") 146 | return 25 147 | 148 | if limit < 1: 149 | return 1 150 | elif limit > max_limit: 151 | logger.warning(f"Limit {limit} exceeds maximum {max_limit}") 152 | return max_limit 153 | 154 | return limit 155 | 156 | 157 | def validate_skip(skip: int | None, max_skip: int = 10000) -> int: 158 | """ 159 | Validate and constrain skip/offset parameter. 160 | 161 | Args: 162 | skip: Requested skip/offset 163 | max_skip: Maximum allowed skip 164 | 165 | Returns: 166 | Valid skip value 167 | """ 168 | if skip is None: 169 | return 0 170 | 171 | try: 172 | skip = int(skip) 173 | except (ValueError, TypeError): 174 | logger.warning(f"Invalid skip value: {skip}") 175 | return 0 176 | 177 | if skip < 0: 178 | return 0 179 | elif skip > max_skip: 180 | logger.warning(f"Skip {skip} exceeds maximum {max_skip}") 181 | return max_skip 182 | 183 | return skip 184 | 185 | 186 | def validate_classification(classification: str | None) -> str | None: 187 | """ 188 | Validate recall classification. 189 | 190 | Args: 191 | classification: Classification string (Class I, II, or III) 192 | 193 | Returns: 194 | Validated classification or None 195 | """ 196 | if not classification: 197 | return None 198 | 199 | sanitized = sanitize_input(classification, 20) 200 | 201 | if not sanitized: 202 | return None 203 | 204 | # Normalize classification format 205 | sanitized = sanitized.upper() 206 | 207 | # Check valid classifications 208 | valid_classes = [ 209 | "CLASS I", 210 | "CLASS II", 211 | "CLASS III", 212 | "I", 213 | "II", 214 | "III", 215 | "1", 216 | "2", 217 | "3", 218 | ] 219 | 220 | if sanitized not in valid_classes: 221 | logger.warning(f"Invalid classification: {sanitized}") 222 | return None 223 | 224 | # Normalize to standard format 225 | if sanitized in ["I", "1"]: 226 | return "Class I" 227 | elif sanitized in ["II", "2"]: 228 | return "Class II" 229 | elif sanitized in ["III", "3"]: 230 | return "Class III" 231 | 232 | return sanitized.title() # "CLASS I" -> "Class I" 233 | 234 | 235 | def validate_status(status: str | None) -> str | None: 236 | """ 237 | Validate status parameter. 238 | 239 | Args: 240 | status: Status string 241 | 242 | Returns: 243 | Validated status or None 244 | """ 245 | if not status: 246 | return None 247 | 248 | sanitized = sanitize_input(status, 50) 249 | 250 | if not sanitized: 251 | return None 252 | 253 | # Normalize status 254 | sanitized = sanitized.lower() 255 | 256 | # Check valid statuses 257 | valid_statuses = [ 258 | "ongoing", 259 | "terminated", 260 | "completed", 261 | "current", 262 | "resolved", 263 | ] 264 | 265 | if sanitized not in valid_statuses: 266 | logger.warning(f"Invalid status: {sanitized}") 267 | return None 268 | 269 | return sanitized.title() # "ongoing" -> "Ongoing" 270 | 271 | 272 | def validate_boolean(value: Any) -> bool | None: 273 | """ 274 | Validate boolean parameter. 275 | 276 | Args: 277 | value: Boolean-like value 278 | 279 | Returns: 280 | Boolean value or None 281 | """ 282 | if value is None: 283 | return None 284 | 285 | if isinstance(value, bool): 286 | return value 287 | 288 | if isinstance(value, str): 289 | value = value.lower().strip() 290 | if value in ["true", "1", "yes", "y"]: 291 | return True 292 | elif value in ["false", "0", "no", "n"]: 293 | return False 294 | 295 | return None 296 | 297 | 298 | def validate_api_key(api_key: str | None) -> str | None: 299 | """ 300 | Validate API key format. 301 | 302 | Args: 303 | api_key: API key string 304 | 305 | Returns: 306 | Validated API key or None 307 | """ 308 | if not api_key: 309 | return None 310 | 311 | # API keys should be alphanumeric with possible hyphens 312 | if not re.match(r"^[a-zA-Z0-9\-_]+$", api_key): 313 | logger.warning("Invalid API key format") 314 | return None 315 | 316 | # Check reasonable length 317 | if len(api_key) < 10 or len(api_key) > 100: 318 | logger.warning("API key length out of expected range") 319 | return None 320 | 321 | return api_key 322 | 323 | 324 | def _validate_parameter(key: str, value: Any) -> Any: 325 | """Validate a single parameter based on its key.""" 326 | if key in ["drug", "brand", "generic"]: 327 | return validate_drug_name(value) 328 | elif key in ["limit"]: 329 | return validate_limit(value) 330 | elif key in ["skip", "offset"]: 331 | return validate_skip(value) 332 | elif key in ["classification"]: 333 | return validate_classification(value) 334 | elif key in ["status"]: 335 | return validate_status(value) 336 | elif key in ["serious", "death", "ongoing"]: 337 | return validate_boolean(value) 338 | elif key in ["api_key"]: 339 | return validate_api_key(value) 340 | elif "date" in key.lower(): 341 | return validate_date(value) 342 | else: 343 | return sanitize_input(value) 344 | 345 | 346 | def build_safe_query(params: dict[str, Any]) -> dict[str, Any]: 347 | """ 348 | Build a safe query dictionary with validated parameters. 349 | 350 | Args: 351 | params: Raw parameters dictionary 352 | 353 | Returns: 354 | Dictionary with validated parameters 355 | """ 356 | safe_params = {} 357 | 358 | for key, value in params.items(): 359 | if value is None: 360 | continue 361 | 362 | # Validate key name 363 | if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", key): 364 | logger.warning(f"Skipping invalid parameter key: {key}") 365 | continue 366 | 367 | # Validate parameter value 368 | validated = _validate_parameter(key, value) 369 | 370 | if validated is not None: 371 | safe_params[key] = validated 372 | 373 | return safe_params 374 | ``` -------------------------------------------------------------------------------- /tests/tdd/openfda/test_device_events.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Unit tests for OpenFDA device events integration. 3 | """ 4 | 5 | from unittest.mock import patch 6 | 7 | import pytest 8 | 9 | from biomcp.openfda.device_events import get_device_event, search_device_events 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_search_device_events_by_device(): 14 | """Test searching device events by device name.""" 15 | mock_response = { 16 | "meta": {"results": {"total": 3}}, 17 | "results": [ 18 | { 19 | "event_type": "M", 20 | "date_received": "2024-01-15", 21 | "device": [ 22 | { 23 | "brand_name": "FoundationOne CDx", 24 | "manufacturer_d_name": "Foundation Medicine", 25 | "model_number": "F1CDX", 26 | "device_problem_text": ["False negative result"], 27 | "openfda": { 28 | "device_class": "2", 29 | "medical_specialty_description": ["Pathology"], 30 | "product_code": "PQP", 31 | }, 32 | } 33 | ], 34 | "event_description": "Device failed to detect known mutation", 35 | "mdr_report_key": "MDR123456", 36 | } 37 | ], 38 | } 39 | 40 | with patch( 41 | "biomcp.openfda.device_events.make_openfda_request" 42 | ) as mock_request: 43 | mock_request.return_value = (mock_response, None) 44 | 45 | result = await search_device_events(device="FoundationOne", limit=10) 46 | 47 | # Verify request 48 | mock_request.assert_called_once() 49 | call_args = mock_request.call_args 50 | assert "FoundationOne" in call_args[0][1]["search"] 51 | # When searching for a specific device, genomic filter is not needed 52 | # The device search itself is sufficient 53 | 54 | # Check output 55 | assert "FDA Device Adverse Event Reports" in result 56 | assert "FoundationOne CDx" in result 57 | assert "Foundation Medicine" in result 58 | assert "False negative result" in result 59 | assert "Malfunction" in result 60 | assert "MDR123456" in result 61 | 62 | 63 | @pytest.mark.asyncio 64 | async def test_search_device_events_genomics_filter(): 65 | """Test that genomics filter is applied by default.""" 66 | mock_response = {"meta": {"results": {"total": 5}}, "results": []} 67 | 68 | with patch( 69 | "biomcp.openfda.device_events.make_openfda_request" 70 | ) as mock_request: 71 | mock_request.return_value = (mock_response, None) 72 | 73 | await search_device_events(manufacturer="Illumina", genomics_only=True) 74 | 75 | # Verify genomic device codes are in search 76 | call_args = mock_request.call_args 77 | search_query = call_args[0][1]["search"] 78 | # Should contain at least one genomic product code 79 | assert any( 80 | code in search_query for code in ["OOI", "PQP", "OYD", "NYE"] 81 | ) 82 | 83 | 84 | @pytest.mark.asyncio 85 | async def test_search_device_events_no_genomics_filter(): 86 | """Test searching without genomics filter.""" 87 | mock_response = {"meta": {"results": {"total": 10}}, "results": []} 88 | 89 | with patch( 90 | "biomcp.openfda.device_events.make_openfda_request" 91 | ) as mock_request: 92 | mock_request.return_value = (mock_response, None) 93 | 94 | await search_device_events(device="pacemaker", genomics_only=False) 95 | 96 | # Verify no genomic product codes in search 97 | call_args = mock_request.call_args 98 | search_query = call_args[0][1]["search"] 99 | # Should not contain genomic product codes 100 | assert not any(code in search_query for code in ["OOI", "PQP", "OYD"]) 101 | 102 | 103 | @pytest.mark.asyncio 104 | async def test_search_device_events_by_problem(): 105 | """Test searching device events by problem description.""" 106 | mock_response = { 107 | "meta": {"results": {"total": 8}}, 108 | "results": [ 109 | { 110 | "event_type": "IN", 111 | "device": [ 112 | { 113 | "brand_name": "Test Device", 114 | "device_problem_text": [ 115 | "Software malfunction", 116 | "Data loss", 117 | ], 118 | } 119 | ], 120 | "mdr_report_key": "MDR789", 121 | } 122 | ], 123 | } 124 | 125 | with patch( 126 | "biomcp.openfda.device_events.make_openfda_request" 127 | ) as mock_request: 128 | mock_request.return_value = (mock_response, None) 129 | 130 | result = await search_device_events(problem="software malfunction") 131 | 132 | # Verify request 133 | call_args = mock_request.call_args 134 | assert "software malfunction" in call_args[0][1]["search"].lower() 135 | 136 | # Check output 137 | assert "Software malfunction" in result 138 | assert "Data loss" in result 139 | assert "Injury" in result # IN = Injury 140 | 141 | 142 | @pytest.mark.asyncio 143 | async def test_search_device_events_no_params(): 144 | """Test that searching without parameters returns helpful message.""" 145 | result = await search_device_events() 146 | 147 | assert "Please specify" in result 148 | assert "device name, manufacturer, or problem" in result 149 | assert "Examples:" in result 150 | 151 | 152 | @pytest.mark.asyncio 153 | async def test_get_device_event_detail(): 154 | """Test getting detailed device event report.""" 155 | mock_response = { 156 | "results": [ 157 | { 158 | "mdr_report_key": "MDR999888", 159 | "event_type": "D", 160 | "date_received": "2024-02-01", 161 | "date_of_event": "2024-01-20", 162 | "source_type": "M", 163 | "device": [ 164 | { 165 | "brand_name": "Genomic Sequencer X", 166 | "manufacturer_d_name": "GenTech Corp", 167 | "model_number": "GSX-2000", 168 | "catalog_number": "CAT123", 169 | "lot_number": "LOT456", 170 | "expiration_date_of_device": "2025-12-31", 171 | "device_problem_text": [ 172 | "Critical failure", 173 | "Sample contamination", 174 | ], 175 | "device_evaluated_by_manufacturer": "Y", 176 | "openfda": { 177 | "device_class": "3", 178 | "medical_specialty_description": [ 179 | "Clinical Chemistry" 180 | ], 181 | "product_code": "OOI", 182 | }, 183 | } 184 | ], 185 | "event_description": "Device failure led to incorrect cancer diagnosis", 186 | "manufacturer_narrative": "Investigation revealed component failure", 187 | "patient": [ 188 | { 189 | "patient_age": "65", 190 | "patient_sex": "F", 191 | "date_of_death": "2024-01-25", 192 | "life_threatening": "Y", 193 | } 194 | ], 195 | "remedial_action": "Device recall initiated", 196 | } 197 | ] 198 | } 199 | 200 | with patch( 201 | "biomcp.openfda.device_events.make_openfda_request" 202 | ) as mock_request: 203 | mock_request.return_value = (mock_response, None) 204 | 205 | result = await get_device_event("MDR999888") 206 | 207 | # Verify request 208 | mock_request.assert_called_once() 209 | call_args = mock_request.call_args 210 | assert "MDR999888" in call_args[0][1]["search"] 211 | 212 | # Check detailed output 213 | assert "MDR999888" in result 214 | assert "Death" in result 215 | assert "Genomic Sequencer X" in result 216 | assert "GenTech Corp" in result 217 | assert "GSX-2000" in result 218 | assert "Critical failure" in result 219 | assert "Sample contamination" in result 220 | assert "Class III" in result 221 | assert "65 years" in result 222 | assert "Female" in result 223 | assert "2024-01-25" in result 224 | assert "Life-threatening" in result 225 | assert "Device recall initiated" in result 226 | assert "Investigation revealed component failure" in result 227 | 228 | 229 | @pytest.mark.asyncio 230 | async def test_get_device_event_not_found(): 231 | """Test handling when device event report is not found.""" 232 | with patch( 233 | "biomcp.openfda.device_events.make_openfda_request" 234 | ) as mock_request: 235 | mock_request.return_value = ({"results": []}, None) 236 | 237 | result = await get_device_event("NOTFOUND789") 238 | 239 | assert "NOTFOUND789" in result 240 | assert "not found" in result 241 | 242 | 243 | @pytest.mark.asyncio 244 | async def test_search_device_events_error(): 245 | """Test error handling in device event search.""" 246 | with patch( 247 | "biomcp.openfda.device_events.make_openfda_request" 248 | ) as mock_request: 249 | mock_request.return_value = (None, "Network timeout") 250 | 251 | result = await search_device_events(device="test") 252 | 253 | assert "Error searching device events" in result 254 | assert "Network timeout" in result 255 | ``` -------------------------------------------------------------------------------- /docs/reference/quick-reference.md: -------------------------------------------------------------------------------- ```markdown 1 | # BioMCP Quick Reference 2 | 3 | ## Command Cheat Sheet 4 | 5 | ### Installation 6 | 7 | ```bash 8 | # Install BioMCP 9 | uv tool install biomcp 10 | 11 | # Update to latest version 12 | uv tool install biomcp --force 13 | 14 | # Check version 15 | biomcp --version 16 | ``` 17 | 18 | ### Article Search Commands 19 | 20 | ```bash 21 | # Basic gene search 22 | biomcp article search --gene BRAF 23 | 24 | # Multiple filters 25 | biomcp article search \ 26 | --gene EGFR --disease "lung cancer" \ 27 | --chemical erlotinib 28 | 29 | # Exclude preprints 30 | biomcp article search --gene TP53 --no-preprints 31 | 32 | # OR logic in keywords 33 | biomcp article search --gene PTEN \ 34 | --keyword "R173|Arg173|p.R173" 35 | 36 | # Get specific article 37 | biomcp article get 38768446 # PMID 38 | biomcp article get "10.1101/2024.01.20.23288905" # DOI 39 | ``` 40 | 41 | ### Trial Search Commands 42 | 43 | ```bash 44 | # Basic disease search 45 | biomcp trial search \ 46 | --condition melanoma --status RECRUITING 47 | 48 | # Location-based search (requires coordinates) 49 | biomcp trial search --condition cancer \ 50 | --latitude 40.7128 --longitude -74.0060 --distance 50 51 | 52 | # Phase-specific search 53 | biomcp trial search \ 54 | --condition "breast cancer" --phase PHASE3 55 | 56 | # Using NCI source (requires API key) 57 | biomcp trial search --condition melanoma --source nci \ 58 | --required-mutations "BRAF V600E" --api-key $NCI_API_KEY 59 | ``` 60 | 61 | ### Variant Commands 62 | 63 | ```bash 64 | # Search by gene 65 | biomcp variant search \ 66 | --gene BRCA1 --significance pathogenic 67 | 68 | # Search by HGVS 69 | biomcp variant search --hgvs "NM_007294.4:c.5266dupC" 70 | 71 | # Search by frequency 72 | biomcp variant search --gene TP53 \ 73 | --max-frequency 0.01 --min-cadd 20 74 | 75 | # Get variant details 76 | biomcp variant get rs121913529 77 | biomcp variant get "NM_007294.4:c.5266dupC" 78 | 79 | # Predict effects (requires AlphaGenome key) 80 | biomcp variant predict chr7 140753336 A T --tissue UBERON:0002367 81 | ``` 82 | 83 | ### Gene/Drug/Disease Commands 84 | 85 | ```bash 86 | # Get gene information 87 | biomcp gene get TP53 88 | biomcp gene get BRAF 89 | 90 | # Get drug information 91 | biomcp drug get imatinib 92 | biomcp drug get pembrolizumab 93 | 94 | # Get disease information 95 | biomcp disease get melanoma 96 | biomcp disease get "non-small cell lung cancer" 97 | ``` 98 | 99 | ### NCI Commands (Require API Key) 100 | 101 | ```bash 102 | # Search organizations 103 | biomcp organization search --name "MD Anderson" \ 104 | --city Houston --state TX --api-key $NCI_API_KEY 105 | 106 | # Search interventions 107 | biomcp intervention search --name pembrolizumab \ 108 | --intervention-type Drug --api-key $NCI_API_KEY 109 | 110 | # Search biomarkers 111 | biomcp biomarker search --gene EGFR \ 112 | --biomarker-type mutation --api-key $NCI_API_KEY 113 | ``` 114 | 115 | ### Health Check 116 | 117 | ```bash 118 | # Full health check 119 | biomcp health check 120 | 121 | # Check APIs only 122 | biomcp health check --apis-only 123 | 124 | # Verbose output 125 | biomcp health check --verbose 126 | ``` 127 | 128 | ## Common Parameter Reference 129 | 130 | ### Search Parameters 131 | 132 | | Parameter | Description | Example | 133 | | ---------- | ------------- | --------------- | 134 | | `--limit` | Max results | `--limit 20` | 135 | | `--page` | Page number | `--page 2` | 136 | | `--format` | Output format | `--format json` | 137 | 138 | ### Trial Status Values 139 | 140 | | Status | Description | 141 | | ----------------------- | ---------------------- | 142 | | `RECRUITING` | Currently enrolling | 143 | | `ACTIVE_NOT_RECRUITING` | Ongoing, not enrolling | 144 | | `NOT_YET_RECRUITING` | Will start recruiting | 145 | | `COMPLETED` | Trial has ended | 146 | | `SUSPENDED` | Temporarily halted | 147 | | `TERMINATED` | Stopped early | 148 | 149 | ### Trial Phase Values 150 | 151 | | Phase | Description | 152 | | -------------- | ------------- | 153 | | `EARLY_PHASE1` | Early Phase 1 | 154 | | `PHASE1` | Phase 1 | 155 | | `PHASE2` | Phase 2 | 156 | | `PHASE3` | Phase 3 | 157 | | `PHASE4` | Phase 4 | 158 | 159 | ### Clinical Significance 160 | 161 | | Value | Description | 162 | | ------------------------ | ----------------------- | 163 | | `pathogenic` | Causes disease | 164 | | `likely_pathogenic` | Probably causes disease | 165 | | `uncertain_significance` | Unknown impact | 166 | | `likely_benign` | Probably harmless | 167 | | `benign` | Does not cause disease | 168 | 169 | ## Gene Symbol Quick Lookup 170 | 171 | ### Common Gene Aliases 172 | 173 | | Common Name | Official Symbol | 174 | | ----------- | --------------- | 175 | | HER2 | ERBB2 | 176 | | HER3 | ERBB3 | 177 | | EGFR | EGFR | 178 | | ALK | ALK | 179 | | c-MET | MET | 180 | | PD-1 | PDCD1 | 181 | | PD-L1 | CD274 | 182 | | CTLA-4 | CTLA4 | 183 | 184 | ## Location Coordinates 185 | 186 | ### Major US Cities 187 | 188 | | City | Latitude | Longitude | 189 | | ------------- | -------- | --------- | 190 | | New York | 40.7128 | -74.0060 | 191 | | Los Angeles | 34.0522 | -118.2437 | 192 | | Chicago | 41.8781 | -87.6298 | 193 | | Houston | 29.7604 | -95.3698 | 194 | | Philadelphia | 39.9526 | -75.1652 | 195 | | Boston | 42.3601 | -71.0589 | 196 | | Atlanta | 33.7490 | -84.3880 | 197 | | Miami | 25.7617 | -80.1918 | 198 | | Seattle | 47.6062 | -122.3321 | 199 | | San Francisco | 37.7749 | -122.4194 | 200 | 201 | ## Environment Variables 202 | 203 | ```bash 204 | # API Keys 205 | export NCI_API_KEY="your-nci-key" 206 | export ALPHAGENOME_API_KEY="your-alphagenome-key" 207 | export CBIO_TOKEN="your-cbioportal-token" 208 | 209 | # Configuration 210 | export BIOMCP_LOG_LEVEL="DEBUG" 211 | export BIOMCP_CACHE_DIR="/path/to/cache" 212 | export BIOMCP_TIMEOUT=300 213 | export BIOMCP_MAX_CONCURRENT=5 214 | ``` 215 | 216 | ## Output Format Examples 217 | 218 | ### JSON Output 219 | 220 | ```bash 221 | biomcp article search --gene BRAF --format json | jq '.articles[0]' 222 | ``` 223 | 224 | ### Extract Specific Fields 225 | 226 | ```bash 227 | # Get PMIDs only 228 | biomcp article search --gene TP53 --format json | \ 229 | jq -r '.articles[].pmid' 230 | 231 | # Get trial NCT IDs 232 | biomcp trial search --condition melanoma --format json | \ 233 | jq -r '.trials[].nct_id' 234 | ``` 235 | 236 | ### Save to File 237 | 238 | ```bash 239 | biomcp article search --gene BRCA1 --format json > results.json 240 | ``` 241 | 242 | ## MCP Tool Names 243 | 244 | ### Core Tools 245 | 246 | - `search` - Unified search 247 | - `fetch` - Get details 248 | - `think` - Sequential thinking 249 | 250 | ### Article Tools 251 | 252 | - `article_searcher` 253 | - `article_getter` 254 | 255 | ### Trial Tools 256 | 257 | - `trial_searcher` 258 | - `trial_getter` 259 | - `trial_protocol_getter` 260 | - `trial_references_getter` 261 | - `trial_outcomes_getter` 262 | - `trial_locations_getter` 263 | 264 | ### Variant Tools 265 | 266 | - `variant_searcher` 267 | - `variant_getter` 268 | - `alphagenome_predictor` 269 | 270 | ### BioThings Tools 271 | 272 | - `gene_getter` 273 | - `disease_getter` 274 | - `drug_getter` 275 | 276 | ### NCI Tools 277 | 278 | - `nci_organization_searcher` 279 | - `nci_organization_getter` 280 | - `nci_intervention_searcher` 281 | - `nci_intervention_getter` 282 | - `nci_biomarker_searcher` 283 | - `nci_disease_searcher` 284 | 285 | ## Query Language Syntax 286 | 287 | ### Unified Search Examples 288 | 289 | ``` 290 | gene:BRAF AND disease:melanoma 291 | gene:EGFR AND (mutation OR variant) 292 | drugs.tradename:gleevec 293 | diseases.name:"lung cancer" 294 | chemicals.mesh:D000069439 295 | ``` 296 | 297 | ### Field Prefixes 298 | 299 | - `gene:` - Gene symbol 300 | - `disease:` - Disease/condition 301 | - `chemical:` - Drug/chemical 302 | - `variant:` - Genetic variant 303 | - `pmid:` - PubMed ID 304 | - `doi:` - Digital Object ID 305 | 306 | ## Common Workflows 307 | 308 | ### Find Articles About a Mutation 309 | 310 | ```bash 311 | # Step 1: Search articles 312 | biomcp article search --gene BRAF --keyword "V600E|p.V600E" 313 | 314 | # Step 2: Get full article 315 | biomcp article get [PMID] 316 | ``` 317 | 318 | ### Check Trial Eligibility 319 | 320 | ```bash 321 | # Step 1: Search trials 322 | biomcp trial search --condition melanoma --status RECRUITING 323 | 324 | # Step 2: Get trial details 325 | biomcp trial get NCT03006926 326 | ``` 327 | 328 | ### Variant Analysis 329 | 330 | ```bash 331 | # Step 1: Search variant 332 | biomcp variant search --gene BRCA1 --significance pathogenic 333 | 334 | # Step 2: Get variant details 335 | biomcp variant get rs80357906 336 | 337 | # Step 3: Search related articles 338 | biomcp article search --gene BRCA1 --variant rs80357906 339 | ``` 340 | 341 | ## Error Code Quick Reference 342 | 343 | ### Common HTTP Codes 344 | 345 | - `400` - Bad request (check parameters) 346 | - `401` - Unauthorized (check API key) 347 | - `404` - Not found (verify ID) 348 | - `429` - Rate limited (wait and retry) 349 | - `500` - Server error (retry later) 350 | 351 | ### BioMCP Error Patterns 352 | 353 | - `1xxx` - Article errors 354 | - `2xxx` - Trial errors 355 | - `3xxx` - Variant errors 356 | - `4xxx` - Gene/drug/disease errors 357 | - `5xxx` - Authentication errors 358 | - `6xxx` - Rate limit errors 359 | - `7xxx` - Validation errors 360 | 361 | ## Tips and Tricks 362 | 363 | ### 1. Use Official Gene Symbols 364 | 365 | ```bash 366 | # Wrong 367 | biomcp article search --gene HER2 # ❌ 368 | 369 | # Right 370 | biomcp article search --gene ERBB2 # ✅ 371 | ``` 372 | 373 | ### 2. Combine Multiple Searches 374 | 375 | ```bash 376 | # Search multiple databases in parallel 377 | ( 378 | biomcp article search --gene BRAF --format json > articles.json & 379 | biomcp trial search --condition melanoma --format json > trials.json & 380 | biomcp variant search --gene BRAF --format json > variants.json & 381 | wait 382 | ) 383 | ``` 384 | 385 | ### 3. Process Large Results 386 | 387 | ```bash 388 | # Paginate through results 389 | for page in {1..10}; do 390 | biomcp article search --gene TP53 --page $page --limit 100 391 | done 392 | ``` 393 | 394 | ### 4. Debug API Issues 395 | 396 | ```bash 397 | # Enable debug logging 398 | export BIOMCP_LOG_LEVEL=DEBUG 399 | biomcp article search --gene BRAF --verbose 400 | ``` 401 | 402 | ## Getting Help 403 | 404 | ```bash 405 | # General help 406 | biomcp --help 407 | 408 | # Command help 409 | biomcp article search --help 410 | 411 | # Check documentation 412 | open https://biomcp.org/ 413 | 414 | # Report issues 415 | open https://github.com/genomoncology/biomcp/issues 416 | ``` 417 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_retry.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for retry logic with exponential backoff.""" 2 | 3 | import asyncio 4 | from unittest.mock import AsyncMock, MagicMock, patch 5 | 6 | import httpx 7 | import pytest 8 | 9 | from biomcp.retry import ( 10 | RetryableHTTPError, 11 | RetryConfig, 12 | calculate_delay, 13 | is_retryable_exception, 14 | is_retryable_status, 15 | retry_with_backoff, 16 | with_retry, 17 | ) 18 | 19 | 20 | def test_calculate_delay_exponential_backoff(): 21 | """Test that delay increases exponentially.""" 22 | config = RetryConfig(initial_delay=1.0, exponential_base=2.0, jitter=False) 23 | 24 | # Test exponential increase 25 | assert calculate_delay(0, config) == 1.0 # 1 * 2^0 26 | assert calculate_delay(1, config) == 2.0 # 1 * 2^1 27 | assert calculate_delay(2, config) == 4.0 # 1 * 2^2 28 | assert calculate_delay(3, config) == 8.0 # 1 * 2^3 29 | 30 | 31 | def test_calculate_delay_max_cap(): 32 | """Test that delay is capped at max_delay.""" 33 | config = RetryConfig( 34 | initial_delay=1.0, exponential_base=2.0, max_delay=5.0, jitter=False 35 | ) 36 | 37 | # Test that delay is capped 38 | assert calculate_delay(0, config) == 1.0 39 | assert calculate_delay(1, config) == 2.0 40 | assert calculate_delay(2, config) == 4.0 41 | assert calculate_delay(3, config) == 5.0 # Capped at max_delay 42 | assert calculate_delay(10, config) == 5.0 # Still capped 43 | 44 | 45 | def test_calculate_delay_with_jitter(): 46 | """Test that jitter adds randomness to delay.""" 47 | config = RetryConfig(initial_delay=10.0, jitter=True) 48 | 49 | # Generate multiple delays and check they're different 50 | delays = [calculate_delay(1, config) for _ in range(10)] 51 | 52 | # All should be around 20.0 (10 * 2^1) with jitter 53 | for delay in delays: 54 | assert 18.0 <= delay <= 22.0 # Within 10% jitter range 55 | 56 | # Should have some variation 57 | assert len(set(delays)) > 1 58 | 59 | 60 | def test_is_retryable_exception(): 61 | """Test exception retryability check.""" 62 | config = RetryConfig(retryable_exceptions=(ConnectionError, TimeoutError)) 63 | 64 | # Retryable exceptions 65 | assert is_retryable_exception(ConnectionError("test"), config) 66 | assert is_retryable_exception(TimeoutError("test"), config) 67 | 68 | # Non-retryable exceptions 69 | assert not is_retryable_exception(ValueError("test"), config) 70 | assert not is_retryable_exception(KeyError("test"), config) 71 | 72 | 73 | def test_is_retryable_status(): 74 | """Test HTTP status code retryability check.""" 75 | config = RetryConfig(retryable_status_codes=(429, 502, 503, 504)) 76 | 77 | # Retryable status codes 78 | assert is_retryable_status(429, config) 79 | assert is_retryable_status(502, config) 80 | assert is_retryable_status(503, config) 81 | assert is_retryable_status(504, config) 82 | 83 | # Non-retryable status codes 84 | assert not is_retryable_status(200, config) 85 | assert not is_retryable_status(404, config) 86 | assert not is_retryable_status(500, config) 87 | 88 | 89 | @pytest.mark.asyncio 90 | async def test_with_retry_decorator_success(): 91 | """Test retry decorator with successful call.""" 92 | call_count = 0 93 | 94 | @with_retry(RetryConfig(max_attempts=3)) 95 | async def test_func(): 96 | nonlocal call_count 97 | call_count += 1 98 | return "success" 99 | 100 | result = await test_func() 101 | assert result == "success" 102 | assert call_count == 1 # Should succeed on first try 103 | 104 | 105 | @pytest.mark.asyncio 106 | async def test_with_retry_decorator_eventual_success(): 107 | """Test retry decorator with eventual success.""" 108 | call_count = 0 109 | 110 | @with_retry( 111 | RetryConfig( 112 | max_attempts=3, 113 | initial_delay=0.01, # Fast for testing 114 | retryable_exceptions=(ValueError,), 115 | ) 116 | ) 117 | async def test_func(): 118 | nonlocal call_count 119 | call_count += 1 120 | if call_count < 3: 121 | raise ValueError("Transient error") 122 | return "success" 123 | 124 | result = await test_func() 125 | assert result == "success" 126 | assert call_count == 3 127 | 128 | 129 | @pytest.mark.asyncio 130 | async def test_with_retry_decorator_max_attempts_exceeded(): 131 | """Test retry decorator when max attempts exceeded.""" 132 | call_count = 0 133 | 134 | @with_retry( 135 | RetryConfig( 136 | max_attempts=3, 137 | initial_delay=0.01, 138 | retryable_exceptions=(ConnectionError,), 139 | ) 140 | ) 141 | async def test_func(): 142 | nonlocal call_count 143 | call_count += 1 144 | raise ConnectionError("Persistent error") 145 | 146 | with pytest.raises(ConnectionError, match="Persistent error"): 147 | await test_func() 148 | 149 | assert call_count == 3 150 | 151 | 152 | @pytest.mark.asyncio 153 | async def test_with_retry_non_retryable_exception(): 154 | """Test retry decorator with non-retryable exception.""" 155 | call_count = 0 156 | 157 | @with_retry( 158 | RetryConfig(max_attempts=3, retryable_exceptions=(ConnectionError,)) 159 | ) 160 | async def test_func(): 161 | nonlocal call_count 162 | call_count += 1 163 | raise ValueError("Non-retryable error") 164 | 165 | with pytest.raises(ValueError, match="Non-retryable error"): 166 | await test_func() 167 | 168 | assert call_count == 1 # Should not retry 169 | 170 | 171 | @pytest.mark.asyncio 172 | async def test_retry_with_backoff_function(): 173 | """Test retry_with_backoff function.""" 174 | call_count = 0 175 | 176 | async def test_func(value): 177 | nonlocal call_count 178 | call_count += 1 179 | if call_count < 2: 180 | raise TimeoutError("Timeout") 181 | return f"result: {value}" 182 | 183 | config = RetryConfig( 184 | max_attempts=3, 185 | initial_delay=0.01, 186 | retryable_exceptions=(TimeoutError,), 187 | ) 188 | 189 | result = await retry_with_backoff(test_func, "test", config=config) 190 | assert result == "result: test" 191 | assert call_count == 2 192 | 193 | 194 | def test_retryable_http_error(): 195 | """Test RetryableHTTPError.""" 196 | error = RetryableHTTPError(503, "Service Unavailable") 197 | assert error.status_code == 503 198 | assert error.message == "Service Unavailable" 199 | assert str(error) == "HTTP 503: Service Unavailable" 200 | 201 | 202 | @pytest.mark.asyncio 203 | async def test_retry_with_delay_progression(): 204 | """Test that retries happen with correct delay progression.""" 205 | call_times = [] 206 | 207 | @with_retry( 208 | RetryConfig( 209 | max_attempts=3, 210 | initial_delay=0.1, 211 | exponential_base=2.0, 212 | jitter=False, 213 | retryable_exceptions=(ValueError,), 214 | ) 215 | ) 216 | async def test_func(): 217 | call_times.append(asyncio.get_event_loop().time()) 218 | if len(call_times) < 3: 219 | raise ValueError("Retry me") 220 | return "success" 221 | 222 | asyncio.get_event_loop().time() 223 | result = await test_func() 224 | 225 | assert result == "success" 226 | assert len(call_times) == 3 227 | 228 | # Check delays between attempts (allowing some tolerance) 229 | first_delay = call_times[1] - call_times[0] 230 | second_delay = call_times[2] - call_times[1] 231 | 232 | assert 0.08 <= first_delay <= 0.12 # ~0.1s 233 | assert 0.18 <= second_delay <= 0.22 # ~0.2s 234 | 235 | 236 | @pytest.mark.asyncio 237 | async def test_integration_with_http_client(monkeypatch): 238 | """Test retry integration with HTTP client.""" 239 | from biomcp.http_client import call_http 240 | 241 | # Disable connection pooling for this test 242 | monkeypatch.setenv("BIOMCP_USE_CONNECTION_POOL", "false") 243 | 244 | # Test 1: Connection error retry 245 | with patch( 246 | "biomcp.http_client_simple.httpx.AsyncClient" 247 | ) as mock_client_class: 248 | mock_client = AsyncMock() 249 | mock_client_class.return_value = mock_client 250 | mock_client.aclose = AsyncMock() # Mock aclose method 251 | 252 | # Simulate connection errors then success 253 | call_count = 0 254 | 255 | async def mock_get(*args, **kwargs): 256 | nonlocal call_count 257 | call_count += 1 258 | if call_count < 3: 259 | raise httpx.ConnectError("Connection failed") 260 | # Return success on third try 261 | mock_response = MagicMock() 262 | mock_response.status_code = 200 263 | mock_response.text = '{"result": "success"}' 264 | return mock_response 265 | 266 | mock_client.get = mock_get 267 | 268 | config = RetryConfig( 269 | max_attempts=3, 270 | initial_delay=0.01, 271 | ) 272 | 273 | status, content = await call_http( 274 | "GET", "https://api.example.com/test", {}, retry_config=config 275 | ) 276 | 277 | assert status == 200 278 | assert content == '{"result": "success"}' 279 | assert call_count == 3 280 | 281 | # Test 2: Timeout error retry 282 | with patch( 283 | "biomcp.http_client_simple.httpx.AsyncClient" 284 | ) as mock_client_class: 285 | mock_client = AsyncMock() 286 | mock_client_class.return_value = mock_client 287 | mock_client.aclose = AsyncMock() # Mock aclose method 288 | 289 | # Simulate timeout errors 290 | mock_client.get.side_effect = httpx.TimeoutException( 291 | "Request timed out" 292 | ) 293 | 294 | config = RetryConfig( 295 | max_attempts=2, 296 | initial_delay=0.01, 297 | ) 298 | 299 | # This should raise TimeoutError after retries fail 300 | with pytest.raises(TimeoutError): 301 | await call_http( 302 | "GET", "https://api.example.com/test", {}, retry_config=config 303 | ) 304 | 305 | assert mock_client.get.call_count == 2 306 | ``` -------------------------------------------------------------------------------- /src/biomcp/circuit_breaker.py: -------------------------------------------------------------------------------- ```python 1 | """Circuit breaker pattern implementation for fault tolerance.""" 2 | 3 | import asyncio 4 | import enum 5 | import logging 6 | from collections.abc import Callable 7 | from dataclasses import dataclass, field 8 | from datetime import datetime 9 | from typing import Any 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class CircuitState(enum.Enum): 15 | """Circuit breaker states.""" 16 | 17 | CLOSED = "closed" # Normal operation, requests pass through 18 | OPEN = "open" # Circuit tripped, requests fail fast 19 | HALF_OPEN = "half_open" # Testing if service recovered 20 | 21 | 22 | @dataclass 23 | class CircuitBreakerConfig: 24 | """Configuration for circuit breaker behavior.""" 25 | 26 | failure_threshold: int = 5 27 | """Number of failures before opening circuit""" 28 | 29 | recovery_timeout: float = 60.0 30 | """Seconds to wait before attempting recovery""" 31 | 32 | success_threshold: int = 2 33 | """Successes needed in half-open state to close circuit""" 34 | 35 | expected_exception: type[Exception] | tuple[type[Exception], ...] = ( 36 | Exception 37 | ) 38 | """Exception types that count as failures""" 39 | 40 | exclude_exceptions: tuple[type[Exception], ...] = () 41 | """Exception types that don't count as failures""" 42 | 43 | 44 | @dataclass 45 | class CircuitBreakerState: 46 | """Mutable state for a circuit breaker.""" 47 | 48 | state: CircuitState = CircuitState.CLOSED 49 | failure_count: int = 0 50 | success_count: int = 0 51 | last_failure_time: datetime | None = None 52 | last_state_change: datetime = field(default_factory=datetime.now) 53 | _lock: asyncio.Lock = field(default_factory=asyncio.Lock) 54 | 55 | 56 | class CircuitBreakerError(Exception): 57 | """Raised when circuit breaker is open.""" 58 | 59 | def __init__( 60 | self, message: str, last_failure_time: datetime | None = None 61 | ): 62 | super().__init__(message) 63 | self.last_failure_time = last_failure_time 64 | 65 | 66 | class CircuitBreaker: 67 | """Circuit breaker implementation.""" 68 | 69 | def __init__( 70 | self, 71 | name: str, 72 | config: CircuitBreakerConfig | None = None, 73 | ): 74 | """Initialize circuit breaker. 75 | 76 | Args: 77 | name: Circuit breaker name for logging 78 | config: Configuration (uses defaults if not provided) 79 | """ 80 | self.name = name 81 | self.config = config or CircuitBreakerConfig() 82 | self._state = CircuitBreakerState() 83 | 84 | async def call( 85 | self, 86 | func: Callable[..., Any], 87 | *args: Any, 88 | **kwargs: Any, 89 | ) -> Any: 90 | """Execute function through circuit breaker. 91 | 92 | Args: 93 | func: Async function to execute 94 | *args: Positional arguments for func 95 | **kwargs: Keyword arguments for func 96 | 97 | Returns: 98 | Result of function call 99 | 100 | Raises: 101 | CircuitBreakerError: If circuit is open 102 | Exception: If function raises exception 103 | """ 104 | async with self._state._lock: 105 | # Check if we should transition from open to half-open 106 | if self._state.state == CircuitState.OPEN: 107 | if self._should_attempt_reset(): 108 | self._state.state = CircuitState.HALF_OPEN 109 | self._state.success_count = 0 110 | self._state.last_state_change = datetime.now() 111 | logger.info( 112 | f"Circuit breaker '{self.name}' entering half-open state" 113 | ) 114 | else: 115 | raise CircuitBreakerError( 116 | f"Circuit breaker '{self.name}' is open", 117 | self._state.last_failure_time, 118 | ) 119 | 120 | # Execute the function 121 | try: 122 | result = await func(*args, **kwargs) 123 | await self._on_success() 124 | return result 125 | except Exception as exc: 126 | if await self._on_failure(exc): 127 | raise 128 | # If exception doesn't count as failure, re-raise it 129 | raise 130 | 131 | async def _on_success(self) -> None: 132 | """Handle successful call.""" 133 | async with self._state._lock: 134 | if self._state.state == CircuitState.HALF_OPEN: 135 | self._state.success_count += 1 136 | if self._state.success_count >= self.config.success_threshold: 137 | self._state.state = CircuitState.CLOSED 138 | self._state.failure_count = 0 139 | self._state.success_count = 0 140 | self._state.last_state_change = datetime.now() 141 | logger.info( 142 | f"Circuit breaker '{self.name}' closed after recovery" 143 | ) 144 | elif self._state.state == CircuitState.CLOSED: 145 | # Reset failure count on success 146 | self._state.failure_count = 0 147 | 148 | async def _on_failure(self, exc: Exception) -> bool: 149 | """Handle failed call. 150 | 151 | Args: 152 | exc: The exception that was raised 153 | 154 | Returns: 155 | True if exception counts as failure 156 | """ 157 | # Check if exception should be counted 158 | if not self._is_counted_exception(exc): 159 | return False 160 | 161 | async with self._state._lock: 162 | self._state.failure_count += 1 163 | self._state.last_failure_time = datetime.now() 164 | 165 | if self._state.state == CircuitState.HALF_OPEN: 166 | # Single failure in half-open state reopens circuit 167 | self._state.state = CircuitState.OPEN 168 | self._state.last_state_change = datetime.now() 169 | logger.warning( 170 | f"Circuit breaker '{self.name}' reopened due to failure in half-open state" 171 | ) 172 | elif ( 173 | self._state.state == CircuitState.CLOSED 174 | and self._state.failure_count >= self.config.failure_threshold 175 | ): 176 | # Threshold exceeded, open circuit 177 | self._state.state = CircuitState.OPEN 178 | self._state.last_state_change = datetime.now() 179 | logger.error( 180 | f"Circuit breaker '{self.name}' opened after {self._state.failure_count} failures" 181 | ) 182 | 183 | return True 184 | 185 | def _should_attempt_reset(self) -> bool: 186 | """Check if enough time has passed to attempt reset.""" 187 | if self._state.last_failure_time is None: 188 | return True 189 | 190 | time_since_failure = datetime.now() - self._state.last_failure_time 191 | return ( 192 | time_since_failure.total_seconds() >= self.config.recovery_timeout 193 | ) 194 | 195 | def _is_counted_exception(self, exc: Exception) -> bool: 196 | """Check if exception should count as failure.""" 197 | # Check excluded exceptions first 198 | if isinstance(exc, self.config.exclude_exceptions): 199 | return False 200 | 201 | # Check expected exceptions 202 | return isinstance(exc, self.config.expected_exception) 203 | 204 | @property 205 | def state(self) -> CircuitState: 206 | """Get current circuit state.""" 207 | return self._state.state 208 | 209 | @property 210 | def is_open(self) -> bool: 211 | """Check if circuit is open.""" 212 | return self._state.state == CircuitState.OPEN 213 | 214 | @property 215 | def is_closed(self) -> bool: 216 | """Check if circuit is closed.""" 217 | return self._state.state == CircuitState.CLOSED 218 | 219 | async def reset(self) -> None: 220 | """Manually reset circuit to closed state.""" 221 | async with self._state._lock: 222 | self._state.state = CircuitState.CLOSED 223 | self._state.failure_count = 0 224 | self._state.success_count = 0 225 | self._state.last_failure_time = None 226 | self._state.last_state_change = datetime.now() 227 | logger.info(f"Circuit breaker '{self.name}' manually reset") 228 | 229 | 230 | # Global registry of circuit breakers 231 | _circuit_breakers: dict[str, CircuitBreaker] = {} 232 | 233 | 234 | def get_circuit_breaker( 235 | name: str, 236 | config: CircuitBreakerConfig | None = None, 237 | ) -> CircuitBreaker: 238 | """Get or create a circuit breaker. 239 | 240 | Args: 241 | name: Circuit breaker name 242 | config: Configuration (used only on creation) 243 | 244 | Returns: 245 | Circuit breaker instance 246 | """ 247 | if name not in _circuit_breakers: 248 | _circuit_breakers[name] = CircuitBreaker(name, config) 249 | return _circuit_breakers[name] 250 | 251 | 252 | def circuit_breaker( 253 | name: str | None = None, 254 | config: CircuitBreakerConfig | None = None, 255 | ): 256 | """Decorator to apply circuit breaker to function. 257 | 258 | Args: 259 | name: Circuit breaker name (defaults to function name) 260 | config: Circuit breaker configuration 261 | 262 | Returns: 263 | Decorated function 264 | """ 265 | 266 | def decorator(func): 267 | breaker_name = name or f"{func.__module__}.{func.__name__}" 268 | breaker = get_circuit_breaker(breaker_name, config) 269 | 270 | async def wrapper(*args, **kwargs): 271 | return await breaker.call(func, *args, **kwargs) 272 | 273 | # Preserve function metadata 274 | wrapper.__name__ = func.__name__ 275 | wrapper.__doc__ = func.__doc__ 276 | wrapper._circuit_breaker = breaker # Expose breaker for testing 277 | 278 | return wrapper 279 | 280 | return decorator 281 | ```