This is page 9 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /docs/how-to-guides/02-find-trials-with-nci-and-biothings.md: -------------------------------------------------------------------------------- ```markdown 1 | # How to Find Trials with NCI and BioThings 2 | 3 | This guide demonstrates how to search for clinical trials using BioMCP's dual data sources and automatic disease synonym expansion. 4 | 5 | ## Overview 6 | 7 | BioMCP provides access to clinical trials through: 8 | 9 | - **ClinicalTrials.gov**: Default source with comprehensive U.S. and international trials ([API Reference](../backend-services-reference/04-clinicaltrials-gov.md)) 10 | - **NCI CTS API**: Advanced cancer trial search with biomarker filtering (requires API key) ([API Reference](../backend-services-reference/05-nci-cts-api.md)) 11 | - **BioThings Integration**: Automatic disease synonym expansion for better coverage ([BioThings Reference](../backend-services-reference/02-biothings-suite.md)) 12 | 13 | ## Basic Trial Search 14 | 15 | ### Simple Disease Search 16 | 17 | Find trials for a specific condition: 18 | 19 | ```bash 20 | # CLI 21 | biomcp trial search --condition melanoma --status RECRUITING 22 | 23 | # Python 24 | trials = await client.trials.search( 25 | conditions=["melanoma"], 26 | recruiting_status="RECRUITING" 27 | ) 28 | 29 | # MCP Tool 30 | trial_searcher( 31 | conditions=["melanoma"], 32 | recruiting_status="OPEN" 33 | ) 34 | ``` 35 | 36 | ### Search by Intervention 37 | 38 | Find trials testing specific drugs: 39 | 40 | ```bash 41 | # CLI 42 | biomcp trial search --intervention pembrolizumab --phase PHASE3 43 | 44 | # Python 45 | trials = await client.trials.search( 46 | interventions=["pembrolizumab"], 47 | phase="PHASE3" 48 | ) 49 | ``` 50 | 51 | ## Location-Based Search 52 | 53 | ### Finding Nearby Trials 54 | 55 | **Important**: Location searches require latitude and longitude coordinates. 56 | 57 | ```python 58 | # Find trials near Cleveland, Ohio 59 | trials = await trial_searcher( 60 | conditions=["lung cancer"], 61 | lat=41.4993, 62 | long=-81.6944, 63 | distance=50 # 50 miles radius 64 | ) 65 | 66 | # Find trials near Boston 67 | trials = await trial_searcher( 68 | conditions=["breast cancer"], 69 | lat=42.3601, 70 | long=-71.0589, 71 | distance=25 72 | ) 73 | ``` 74 | 75 | ### Getting Coordinates 76 | 77 | For common locations: 78 | 79 | - Cleveland: lat=41.4993, long=-81.6944 80 | - Boston: lat=42.3601, long=-71.0589 81 | - New York: lat=40.7128, long=-74.0060 82 | - Los Angeles: lat=34.0522, long=-118.2437 83 | - Houston: lat=29.7604, long=-95.3698 84 | 85 | ## Advanced Filtering 86 | 87 | ### Multiple Criteria 88 | 89 | Combine multiple filters for precise results: 90 | 91 | ```python 92 | # Complex search example 93 | trials = await trial_searcher( 94 | conditions=["non-small cell lung cancer", "NSCLC"], 95 | interventions=["pembrolizumab", "immunotherapy"], 96 | phase="PHASE3", 97 | recruiting_status="OPEN", 98 | age_group="ADULT", 99 | study_type="INTERVENTIONAL", 100 | funder_type="INDUSTRY" 101 | ) 102 | ``` 103 | 104 | ### Date-Based Filtering 105 | 106 | Find recently started trials: 107 | 108 | ```bash 109 | # CLI - Trials started in 2024 110 | biomcp trial search \ 111 | --condition cancer \ 112 | --start-date 2024-01-01 \ 113 | --status RECRUITING 114 | ``` 115 | 116 | ## Using NCI API Advanced Features 117 | 118 | ### Setup NCI API Key 119 | 120 | Get your key from [api.cancer.gov](https://api.cancer.gov). For detailed setup instructions, see [Authentication and API Keys](../getting-started/03-authentication-and-api-keys.md#nci-clinical-trials-api): 121 | 122 | ```bash 123 | export NCI_API_KEY="your-key-here" 124 | ``` 125 | 126 | ### Biomarker-Based Search 127 | 128 | Find trials for specific mutations: 129 | 130 | ```python 131 | # Search using NCI source 132 | trials = await search( 133 | domain="trial", 134 | source="nci", 135 | conditions=["melanoma"], 136 | required_mutations=["BRAF V600E"], 137 | allow_brain_mets=True, 138 | api_key="your-key" 139 | ) 140 | ``` 141 | 142 | ### NCI-Specific Parameters 143 | 144 | ```python 145 | # Advanced NCI search 146 | trials = await trial_searcher( 147 | source="nci", 148 | conditions=["lung cancer"], 149 | required_mutations=["EGFR L858R", "EGFR exon 19 deletion"], 150 | prior_therapy_required=False, 151 | allow_brain_mets=True, 152 | allow_prior_immunotherapy=False, 153 | api_key="your-key" 154 | ) 155 | ``` 156 | 157 | ## BioThings Integration for Enhanced Search 158 | 159 | For technical details on the BioThings APIs, see: 160 | 161 | - [BioThings Suite Reference](../backend-services-reference/02-biothings-suite.md) 162 | 163 | ### Automatic Disease Synonym Expansion 164 | 165 | BioMCP automatically expands disease terms using MyDisease.info: 166 | 167 | ```python 168 | # Searching for "GIST" automatically includes: 169 | # - "gastrointestinal stromal tumor" 170 | # - "gastrointestinal stromal tumour" 171 | # - "GI stromal tumor" 172 | trials = await trial_searcher(conditions=["GIST"]) 173 | ``` 174 | 175 | ### Manual Disease Lookup 176 | 177 | Get all synonyms for a disease: 178 | 179 | ```python 180 | # Get disease information 181 | disease_info = await disease_getter("melanoma") 182 | 183 | # Extract synonyms 184 | synonyms = disease_info.synonyms 185 | # Returns: ["malignant melanoma", "melanoma, malignant", ...] 186 | 187 | # Use in trial search 188 | trials = await trial_searcher(conditions=synonyms) 189 | ``` 190 | 191 | ## Practical Workflows 192 | 193 | ### Workflow 1: Patient-Centric Trial Search 194 | 195 | Find trials for a specific patient profile: 196 | 197 | ```python 198 | async def find_trials_for_patient( 199 | disease: str, 200 | mutations: list[str], 201 | location: tuple[float, float], 202 | prior_treatments: list[str] 203 | ): 204 | # Step 1: Think about the search 205 | await think( 206 | thought=f"Searching trials for {disease} with {mutations}", 207 | thoughtNumber=1 208 | ) 209 | 210 | # Step 2: Get disease synonyms 211 | disease_info = await disease_getter(disease) 212 | all_conditions = [disease] + disease_info.synonyms 213 | 214 | # Step 3: Search both sources 215 | # ClinicalTrials.gov 216 | ctgov_trials = await trial_searcher( 217 | conditions=all_conditions, 218 | other_terms=mutations, 219 | lat=location[0], 220 | long=location[1], 221 | distance=100, 222 | recruiting_status="OPEN" 223 | ) 224 | 225 | # NCI (if API key available) 226 | if os.getenv("NCI_API_KEY"): 227 | nci_trials = await trial_searcher( 228 | source="nci", 229 | conditions=all_conditions, 230 | required_mutations=mutations, 231 | exclude_prior_therapy=prior_treatments, 232 | api_key=os.getenv("NCI_API_KEY") 233 | ) 234 | 235 | return { 236 | "clinicaltrials_gov": ctgov_trials, 237 | "nci": nci_trials 238 | } 239 | 240 | # Example usage 241 | trials = await find_trials_for_patient( 242 | disease="melanoma", 243 | mutations=["BRAF V600E"], 244 | location=(40.7128, -74.0060), # New York 245 | prior_treatments=["vemurafenib"] 246 | ) 247 | ``` 248 | 249 | ### Workflow 2: Research Landscape Analysis 250 | 251 | Understand ongoing research in a field: 252 | 253 | ```python 254 | async def analyze_research_landscape(gene: str, disease: str): 255 | # Get gene information 256 | gene_info = await gene_getter(gene) 257 | 258 | # Find all active trials 259 | all_trials = await trial_searcher( 260 | conditions=[disease], 261 | other_terms=[gene, f"{gene} mutation", f"{gene} positive"], 262 | recruiting_status="OPEN", 263 | page_size=50 264 | ) 265 | 266 | # Categorize by phase 267 | phase_distribution = {} 268 | for trial in all_trials: 269 | phase = trial.phase or "Not specified" 270 | phase_distribution[phase] = phase_distribution.get(phase, 0) + 1 271 | 272 | # Extract unique interventions 273 | interventions = set() 274 | for trial in all_trials: 275 | if trial.interventions: 276 | interventions.update(trial.interventions) 277 | 278 | return { 279 | "total_trials": len(all_trials), 280 | "phase_distribution": phase_distribution, 281 | "unique_interventions": list(interventions), 282 | "gene_info": gene_info 283 | } 284 | 285 | # Example 286 | landscape = await analyze_research_landscape("ALK", "lung cancer") 287 | ``` 288 | 289 | ### Workflow 3: Biomarker-Driven Search 290 | 291 | Find trials based on specific biomarkers: 292 | 293 | ```python 294 | async def biomarker_trial_search(biomarkers: list[str], cancer_type: str): 295 | # Search NCI biomarker database 296 | biomarker_results = [] 297 | for biomarker in biomarkers: 298 | result = await nci_biomarker_searcher( 299 | name=biomarker, 300 | api_key=os.getenv("NCI_API_KEY") 301 | ) 302 | biomarker_results.extend(result) 303 | 304 | # Extract associated trials 305 | trial_ids = set() 306 | for bio in biomarker_results: 307 | if bio.get("associated_trials"): 308 | trial_ids.update(bio["associated_trials"]) 309 | 310 | # Get trial details 311 | trials = [] 312 | for nct_id in trial_ids: 313 | trial = await trial_getter(nct_id) 314 | trials.append(trial) 315 | 316 | return trials 317 | 318 | # Example 319 | trials = await biomarker_trial_search( 320 | biomarkers=["PD-L1", "TMB-high", "MSI-H"], 321 | cancer_type="colorectal cancer" 322 | ) 323 | ``` 324 | 325 | ## Working with Trial Results 326 | 327 | ### Extracting Key Information 328 | 329 | ```python 330 | # Process trial results 331 | for trial in trials: 332 | print(f"NCT ID: {trial.nct_id}") 333 | print(f"Title: {trial.title}") 334 | print(f"Status: {trial.status}") 335 | print(f"Phase: {trial.phase}") 336 | 337 | # Locations 338 | if trial.locations: 339 | print("Locations:") 340 | for loc in trial.locations: 341 | print(f" - {loc.facility}, {loc.city}, {loc.state}") 342 | 343 | # Eligibility 344 | if trial.eligibility: 345 | print(f"Age: {trial.eligibility.minimum_age} - {trial.eligibility.maximum_age}") 346 | print(f"Sex: {trial.eligibility.sex}") 347 | ``` 348 | 349 | ### Getting Detailed Trial Information 350 | 351 | ```python 352 | # Get complete trial details 353 | full_trial = await trial_getter("NCT03006926") 354 | 355 | # Get specific sections 356 | protocol = await trial_protocol_getter("NCT03006926") 357 | locations = await trial_locations_getter("NCT03006926") 358 | outcomes = await trial_outcomes_getter("NCT03006926") 359 | references = await trial_references_getter("NCT03006926") 360 | ``` 361 | 362 | ## Tips for Effective Trial Searches 363 | 364 | ### 1. Use Multiple Search Terms 365 | 366 | ```python 367 | # Cover variations 368 | trials = await trial_searcher( 369 | conditions=["NSCLC", "non-small cell lung cancer", "lung adenocarcinoma"], 370 | interventions=["anti-PD-1", "pembrolizumab", "Keytruda"] 371 | ) 372 | ``` 373 | 374 | ### 2. Check Both Data Sources 375 | 376 | ```python 377 | # Some trials may only be in one database 378 | ctgov_count = len(await trial_searcher(source="ctgov", conditions=["melanoma"])) 379 | nci_count = len(await trial_searcher(source="nci", conditions=["melanoma"])) 380 | ``` 381 | 382 | ### 3. Use Appropriate Filters 383 | 384 | - **recruiting_status**: Focus on trials accepting patients 385 | - **phase**: Later phases for established treatments 386 | - **age_group**: Match patient demographics 387 | - **study_type**: INTERVENTIONAL vs OBSERVATIONAL 388 | 389 | ### 4. Leverage Location Search 390 | 391 | Always include location for patient-specific searches: 392 | 393 | ```python 394 | # Bad - no location 395 | trials = await trial_searcher(conditions=["cancer"]) 396 | 397 | # Good - includes location 398 | trials = await trial_searcher( 399 | conditions=["cancer"], 400 | lat=40.7128, 401 | long=-74.0060, 402 | distance=50 403 | ) 404 | ``` 405 | 406 | ## Troubleshooting 407 | 408 | ### No Results Found 409 | 410 | 1. **Broaden search terms**: Remove specific filters 411 | 2. **Check synonyms**: Use disease_getter to find alternatives 412 | 3. **Expand location**: Increase distance parameter 413 | 4. **Try both sources**: Some trials only in NCI or ClinicalTrials.gov 414 | 415 | ### Location Search Issues 416 | 417 | - Ensure both latitude AND longitude are provided 418 | - Use decimal degrees (not degrees/minutes/seconds) 419 | - Check coordinate signs (negative for West/South) 420 | 421 | ### NCI API Errors 422 | 423 | - Verify API key is valid 424 | - Check rate limits (1000 requests/day with key) 425 | - Some features require specific API key permissions 426 | 427 | ## Next Steps 428 | 429 | - Learn about [variant annotations](03-get-comprehensive-variant-annotations.md) 430 | - Explore [AlphaGenome predictions](04-predict-variant-effects-with-alphagenome.md) 431 | - Set up [monitoring and logging](05-logging-and-monitoring-with-bigquery.md) 432 | ``` -------------------------------------------------------------------------------- /src/biomcp/variants/search.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import logging 3 | from typing import Annotated, Any 4 | 5 | from pydantic import BaseModel, Field, model_validator 6 | 7 | from .. import StrEnum, ensure_list, http_client, render 8 | from ..constants import MYVARIANT_QUERY_URL, SYSTEM_PAGE_SIZE 9 | from .filters import filter_variants 10 | from .links import inject_links 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class ClinicalSignificance(StrEnum): 16 | PATHOGENIC = "pathogenic" 17 | LIKELY_PATHOGENIC = "likely pathogenic" 18 | UNCERTAIN_SIGNIFICANCE = "uncertain significance" 19 | LIKELY_BENIGN = "likely benign" 20 | BENIGN = "benign" 21 | 22 | 23 | class PolyPhenPrediction(StrEnum): 24 | PROBABLY_DAMAGING = "D" 25 | POSSIBLY_DAMAGING = "P" 26 | BENIGN = "B" 27 | 28 | 29 | class SiftPrediction(StrEnum): 30 | DELETERIOUS = "D" 31 | TOLERATED = "T" 32 | 33 | 34 | class VariantSources(StrEnum): 35 | CADD = "cadd" 36 | CGI = "cgi" 37 | CIVIC = "civic" 38 | CLINVAR = "clinvar" 39 | COSMIC = "cosmic" 40 | DBNSFP = "dbnsfp" 41 | DBSNP = "dbsnp" 42 | DOCM = "docm" 43 | EMV = "evm" 44 | EXAC = "exac" 45 | GNOMAD_EXOME = "gnomad_exome" 46 | HG19 = "hg19" 47 | MUTDB = "mutdb" 48 | SNPEFF = "snpeff" 49 | VCF = "vcf" 50 | 51 | 52 | MYVARIANT_FIELDS = [ 53 | "_id", 54 | "chrom", 55 | "vcf.position", 56 | "vcf.ref", 57 | "vcf.alt", 58 | "cadd.phred", 59 | "civic.id", 60 | "civic.openCravatUrl", 61 | "clinvar.rcv.clinical_significance", 62 | "clinvar.variant_id", 63 | "cosmic.cosmic_id", 64 | "dbnsfp.genename", 65 | "dbnsfp.hgvsc", 66 | "dbnsfp.hgvsp", 67 | "dbnsfp.polyphen2.hdiv.pred", 68 | "dbnsfp.polyphen2.hdiv.score", 69 | "dbnsfp.sift.pred", 70 | "dbnsfp.sift.score", 71 | "dbsnp.rsid", 72 | "exac.af", 73 | "gnomad_exome.af.af", 74 | ] 75 | 76 | 77 | class VariantQuery(BaseModel): 78 | """Search parameters for querying variant data from MyVariant.info.""" 79 | 80 | gene: str | None = Field( 81 | default=None, 82 | description="Gene symbol to search for (e.g. BRAF, TP53)", 83 | ) 84 | hgvsp: str | None = Field( 85 | default=None, 86 | description="Protein change notation (e.g., p.V600E, p.Arg557His)", 87 | ) 88 | hgvsc: str | None = Field( 89 | default=None, 90 | description="cDNA notation (e.g., c.1799T>A)", 91 | ) 92 | rsid: str | None = Field( 93 | default=None, 94 | description="dbSNP rsID (e.g., rs113488022)", 95 | ) 96 | region: str | None = Field( 97 | default=None, 98 | description="Genomic region as chr:start-end (e.g. chr1:12345-67890)", 99 | ) 100 | significance: ClinicalSignificance | None = Field( 101 | default=None, 102 | description="ClinVar clinical significance", 103 | ) 104 | max_frequency: float | None = Field( 105 | default=None, 106 | description="Maximum population allele frequency threshold", 107 | ) 108 | min_frequency: float | None = Field( 109 | default=None, 110 | description="Minimum population allele frequency threshold", 111 | ) 112 | cadd: float | None = Field( 113 | default=None, 114 | description="Minimum CADD phred score", 115 | ) 116 | polyphen: PolyPhenPrediction | None = Field( 117 | default=None, 118 | description="PolyPhen-2 prediction", 119 | ) 120 | sift: SiftPrediction | None = Field( 121 | default=None, 122 | description="SIFT prediction", 123 | ) 124 | sources: list[VariantSources] = Field( 125 | description="Include only specific data sources", 126 | default_factory=list, 127 | ) 128 | size: int = Field( 129 | default=SYSTEM_PAGE_SIZE, 130 | description="Number of results to return", 131 | ) 132 | offset: int = Field( 133 | default=0, 134 | description="Result offset for pagination", 135 | ) 136 | 137 | @model_validator(mode="after") 138 | def validate_query_params(self) -> "VariantQuery": 139 | if not self.model_dump(exclude_none=True, exclude_defaults=True): 140 | raise ValueError("At least one search parameter is required") 141 | return self 142 | 143 | 144 | def _construct_query_part( 145 | field: str, 146 | val: Any | None, 147 | operator: str | None = None, 148 | quoted: bool = False, 149 | ) -> str | None: 150 | if val is not None: 151 | val = str(val) 152 | val = f'"{val}"' if quoted else val 153 | operator = operator or "" 154 | val = f"{field}:{operator}{val}" 155 | return val 156 | 157 | 158 | def build_query_string(query: VariantQuery) -> str: 159 | query_parts: list[str] = list(filter(None, [query.region, query.rsid])) 160 | 161 | query_params = [ 162 | ("dbnsfp.genename", query.gene, None, True), 163 | ("dbnsfp.hgvsp", query.hgvsp, None, True), 164 | ("dbnsfp.hgvsc", query.hgvsc, None, True), 165 | ("dbsnp.rsid", query.rsid, None, True), 166 | ("clinvar.rcv.clinical_significance", query.significance, None, True), 167 | ("gnomad_exome.af.af", query.max_frequency, "<=", False), 168 | ("gnomad_exome.af.af", query.min_frequency, ">=", False), 169 | ("cadd.phred", query.cadd, ">=", False), 170 | ("dbnsfp.polyphen2.hdiv.pred", query.polyphen, None, True), 171 | ("dbnsfp.sift.pred", query.sift, None, True), 172 | ] 173 | 174 | for field, val, operator, quoted in query_params: 175 | part = _construct_query_part(field, val, operator, quoted) 176 | if part is not None: 177 | query_parts.append(part) 178 | 179 | return " AND ".join(query_parts) if query_parts else "*" 180 | 181 | 182 | async def convert_query(query: VariantQuery) -> dict[str, Any]: 183 | """Convert a VariantQuery to parameters for the MyVariant.info API.""" 184 | fields = MYVARIANT_FIELDS[:] + [f"{s}.*" for s in query.sources] 185 | 186 | # Optimize common queries to prevent timeouts 187 | query_string = build_query_string(query) 188 | 189 | # Special handling for common BRAF V600E query 190 | if query.gene == "BRAF" and query.hgvsp == "V600E": 191 | # Use a more specific query that performs better 192 | query_string = 'dbnsfp.genename:"BRAF" AND (dbnsfp.aaref:"V" AND dbnsfp.aapos:600 AND dbnsfp.aaalt:"E")' 193 | 194 | return { 195 | "q": query_string, 196 | "size": query.size, 197 | "from": query.offset, 198 | "fields": ",".join(fields), 199 | } 200 | 201 | 202 | async def search_variants( 203 | query: VariantQuery, 204 | output_json: bool = False, 205 | include_cbioportal: bool = True, 206 | ) -> str: 207 | """Search variants using the MyVariant.info API with optional cBioPortal summary.""" 208 | 209 | params = await convert_query(query) 210 | 211 | response, error = await http_client.request_api( 212 | url=MYVARIANT_QUERY_URL, 213 | request=params, 214 | method="GET", 215 | domain="myvariant", 216 | ) 217 | data: list = response.get("hits", []) if response else [] 218 | 219 | if error: 220 | # Provide more specific error messages for common issues 221 | if "timed out" in error.message.lower(): 222 | error_msg = ( 223 | "MyVariant.info API request timed out. This can happen with complex queries. " 224 | "Try narrowing your search criteria or searching by specific identifiers (rsID, HGVS)." 225 | ) 226 | else: 227 | error_msg = f"Error {error.code}: {error.message}" 228 | data = [{"error": error_msg}] 229 | else: 230 | data = inject_links(data) 231 | data = filter_variants(data) 232 | 233 | # Get cBioPortal summary if searching by gene 234 | cbioportal_summary = None 235 | if include_cbioportal and query.gene and not error: 236 | try: 237 | from .cbioportal_search import ( 238 | CBioPortalSearchClient, 239 | format_cbioportal_search_summary, 240 | ) 241 | 242 | client = CBioPortalSearchClient() 243 | summary = await client.get_gene_search_summary(query.gene) 244 | if summary: 245 | cbioportal_summary = format_cbioportal_search_summary(summary) 246 | except Exception as e: 247 | logger.warning(f"Failed to get cBioPortal summary: {e}") 248 | 249 | if not output_json: 250 | result = render.to_markdown(data) 251 | if cbioportal_summary: 252 | result = cbioportal_summary + "\n\n" + result 253 | return result 254 | else: 255 | if cbioportal_summary: 256 | return json.dumps( 257 | {"cbioportal_summary": cbioportal_summary, "variants": data}, 258 | indent=2, 259 | ) 260 | return json.dumps(data, indent=2) 261 | 262 | 263 | async def _variant_searcher( 264 | call_benefit: Annotated[ 265 | str, 266 | "Define and summarize why this function is being called and the intended benefit", 267 | ], 268 | gene: Annotated[ 269 | str | None, "Gene symbol to search for (e.g. BRAF, TP53)" 270 | ] = None, 271 | hgvsp: Annotated[ 272 | str | None, "Protein change notation (e.g., p.V600E, p.Arg557His)" 273 | ] = None, 274 | hgvsc: Annotated[str | None, "cDNA notation (e.g., c.1799T>A)"] = None, 275 | rsid: Annotated[str | None, "dbSNP rsID (e.g., rs113488022)"] = None, 276 | region: Annotated[ 277 | str | None, "Genomic region as chr:start-end (e.g. chr1:12345-67890)" 278 | ] = None, 279 | significance: Annotated[ 280 | ClinicalSignificance | str | None, "ClinVar clinical significance" 281 | ] = None, 282 | max_frequency: Annotated[ 283 | float | None, "Maximum population allele frequency threshold" 284 | ] = None, 285 | min_frequency: Annotated[ 286 | float | None, "Minimum population allele frequency threshold" 287 | ] = None, 288 | cadd: Annotated[float | None, "Minimum CADD phred score"] = None, 289 | polyphen: Annotated[ 290 | PolyPhenPrediction | str | None, "PolyPhen-2 prediction" 291 | ] = None, 292 | sift: Annotated[SiftPrediction | str | None, "SIFT prediction"] = None, 293 | sources: Annotated[ 294 | list[VariantSources] | list[str] | str | None, 295 | "Include only specific data sources (list or comma-separated string)", 296 | ] = None, 297 | size: Annotated[int, "Number of results to return"] = SYSTEM_PAGE_SIZE, 298 | offset: Annotated[int, "Result offset for pagination"] = 0, 299 | ) -> str: 300 | """ 301 | Searches for genetic variants based on specified criteria. 302 | 303 | Parameters: 304 | - call_benefit: Define and summarize why this function is being called and the intended benefit 305 | - gene: Gene symbol to search for (e.g. BRAF, TP53) 306 | - hgvsp: Protein change notation (e.g., p.V600E, p.Arg557His) 307 | - hgvsc: cDNA notation (e.g., c.1799T>A) 308 | - rsid: dbSNP rsID (e.g., rs113488022) 309 | - region: Genomic region as chr:start-end (e.g. chr1:12345-67890) 310 | - significance: ClinVar clinical significance 311 | - max_frequency: Maximum population allele frequency threshold 312 | - min_frequency: Minimum population allele frequency threshold 313 | - cadd: Minimum CADD phred score 314 | - polyphen: PolyPhen-2 prediction 315 | - sift: SIFT prediction 316 | - sources: Include only specific data sources (list or comma-separated string) 317 | - size: Number of results to return (default: 10) 318 | - offset: Result offset for pagination (default: 0) 319 | 320 | Returns: 321 | Markdown formatted list of matching variants with key annotations 322 | """ 323 | # Convert individual parameters to a VariantQuery object 324 | query = VariantQuery( 325 | gene=gene, 326 | hgvsp=hgvsp, 327 | hgvsc=hgvsc, 328 | rsid=rsid, 329 | region=region, 330 | significance=significance, 331 | max_frequency=max_frequency, 332 | min_frequency=min_frequency, 333 | cadd=cadd, 334 | polyphen=polyphen, 335 | sift=sift, 336 | sources=ensure_list(sources, split_strings=True), 337 | size=size, 338 | offset=offset, 339 | ) 340 | return await search_variants( 341 | query, output_json=False, include_cbioportal=True 342 | ) 343 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_mcp_integration.py: -------------------------------------------------------------------------------- ```python 1 | """Integration tests for MCP server functionality.""" 2 | 3 | import json 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | from biomcp.core import mcp_app 9 | 10 | 11 | @pytest.mark.asyncio 12 | class TestMCPIntegration: 13 | """Integration tests for the MCP server.""" 14 | 15 | async def test_mcp_server_tools_registered(self): 16 | """Test that MCP tools are properly registered.""" 17 | # Get the registered tools 18 | tools = await mcp_app.list_tools() 19 | 20 | # Should have 35 tools (2 unified + 1 think + 32 individual including OpenFDA) 21 | assert len(tools) == 35 22 | 23 | # Check tool names 24 | tool_names = [tool.name for tool in tools] 25 | # Unified tools 26 | assert "search" in tool_names 27 | assert "fetch" in tool_names 28 | assert "think" in tool_names 29 | # Individual tools 30 | assert "article_searcher" in tool_names 31 | assert "article_getter" in tool_names 32 | assert "trial_searcher" in tool_names 33 | assert "trial_getter" in tool_names 34 | assert "trial_protocol_getter" in tool_names 35 | assert "trial_references_getter" in tool_names 36 | assert "trial_outcomes_getter" in tool_names 37 | assert "trial_locations_getter" in tool_names 38 | assert "variant_searcher" in tool_names 39 | assert "variant_getter" in tool_names 40 | assert "alphagenome_predictor" in tool_names 41 | assert "gene_getter" in tool_names 42 | assert "drug_getter" in tool_names 43 | assert "disease_getter" in tool_names 44 | # OpenFDA tools 45 | assert "openfda_adverse_searcher" in tool_names 46 | assert "openfda_adverse_getter" in tool_names 47 | assert "openfda_label_searcher" in tool_names 48 | assert "openfda_label_getter" in tool_names 49 | assert "openfda_device_searcher" in tool_names 50 | assert "openfda_device_getter" in tool_names 51 | assert "openfda_approval_searcher" in tool_names 52 | assert "openfda_approval_getter" in tool_names 53 | assert "openfda_recall_searcher" in tool_names 54 | assert "openfda_recall_getter" in tool_names 55 | assert "openfda_shortage_searcher" in tool_names 56 | assert "openfda_shortage_getter" in tool_names 57 | 58 | async def test_mcp_search_tool_schema(self): 59 | """Test the search tool schema.""" 60 | tools = await mcp_app.list_tools() 61 | search_tool = next(t for t in tools if t.name == "search") 62 | 63 | # Check required parameters 64 | assert "query" in search_tool.inputSchema["properties"] 65 | assert "domain" in search_tool.inputSchema["properties"] 66 | assert "call_benefit" in search_tool.inputSchema["properties"] 67 | # Verify query is required (no default value) 68 | assert "query" in search_tool.inputSchema.get("required", []) 69 | # Verify call_benefit is optional 70 | assert "call_benefit" not in search_tool.inputSchema.get( 71 | "required", [] 72 | ) 73 | 74 | # Check domain enum values 75 | domain_schema = search_tool.inputSchema["properties"]["domain"] 76 | # The enum is nested in anyOf 77 | enum_values = domain_schema["anyOf"][0]["enum"] 78 | assert "article" in enum_values 79 | assert "trial" in enum_values 80 | assert "variant" in enum_values 81 | # thinking domain was removed from search tool 82 | # assert "thinking" in enum_values 83 | 84 | async def test_mcp_fetch_tool_schema(self): 85 | """Test the fetch tool schema.""" 86 | tools = await mcp_app.list_tools() 87 | fetch_tool = next(t for t in tools if t.name == "fetch") 88 | 89 | # Check required parameters - only id should be required 90 | required = fetch_tool.inputSchema["required"] 91 | assert "id" in required 92 | assert len(required) == 1 # Only id should be required 93 | # Check optional parameters are present 94 | assert "domain" in fetch_tool.inputSchema["properties"] 95 | assert "call_benefit" in fetch_tool.inputSchema["properties"] 96 | assert "detail" in fetch_tool.inputSchema["properties"] 97 | 98 | # Check domain enum values (no thinking for fetch) 99 | domain_schema = fetch_tool.inputSchema["properties"]["domain"] 100 | # For required enums, the structure is different 101 | if "enum" in domain_schema: 102 | enum_values = domain_schema["enum"] 103 | else: 104 | # Check if it's in anyOf structure 105 | enum_values = domain_schema.get("anyOf", [{}])[0].get("enum", []) 106 | assert "article" in enum_values 107 | assert "trial" in enum_values 108 | assert "variant" in enum_values 109 | assert "thinking" not in enum_values 110 | 111 | async def test_mcp_search_article_integration(self): 112 | """Test end-to-end article search through MCP.""" 113 | mock_result = json.dumps([ 114 | { 115 | "pmid": "12345", 116 | "title": "Test Article", 117 | "abstract": "Test abstract", 118 | } 119 | ]) 120 | 121 | with patch( 122 | "biomcp.articles.unified.search_articles_unified" 123 | ) as mock_search: 124 | mock_search.return_value = mock_result 125 | 126 | # Import search function directly since we can't test through MCP without Context 127 | from biomcp.router import search 128 | 129 | # Call the search function 130 | result = await search( 131 | query="", 132 | domain="article", 133 | genes="BRAF", 134 | page_size=10, 135 | ) 136 | 137 | # Verify the result structure 138 | assert "results" in result 139 | # May include thinking reminder as first result 140 | actual_results = [ 141 | r for r in result["results"] if r["id"] != "thinking-reminder" 142 | ] 143 | assert len(actual_results) == 1 144 | assert actual_results[0]["id"] == "12345" 145 | 146 | async def test_mcp_fetch_variant_integration(self): 147 | """Test end-to-end variant fetch through MCP.""" 148 | mock_result = json.dumps([ 149 | { 150 | "_id": "rs121913529", 151 | "gene": {"symbol": "BRAF"}, 152 | "clinvar": {"clinical_significance": "Pathogenic"}, 153 | } 154 | ]) 155 | 156 | with patch("biomcp.variants.getter.get_variant") as mock_get: 157 | mock_get.return_value = mock_result 158 | 159 | from biomcp.router import fetch 160 | 161 | # Call the fetch function 162 | result = await fetch( 163 | domain="variant", 164 | id="rs121913529", 165 | ) 166 | 167 | # Verify the result structure 168 | assert result["id"] == "rs121913529" 169 | assert "title" in result 170 | assert "text" in result 171 | assert "url" in result 172 | assert "metadata" in result 173 | 174 | async def test_mcp_unified_query_integration(self): 175 | """Test unified query through MCP.""" 176 | with patch("biomcp.query_router.execute_routing_plan") as mock_execute: 177 | mock_execute.return_value = { 178 | "articles": json.dumps([ 179 | {"pmid": "111", "title": "Article 1"} 180 | ]), 181 | "variants": json.dumps([ 182 | {"_id": "rs222", "gene": {"symbol": "TP53"}} 183 | ]), 184 | } 185 | 186 | from biomcp.router import search 187 | 188 | # Call search with unified query 189 | result = await search( 190 | query="gene:BRAF AND disease:cancer", 191 | max_results_per_domain=10, 192 | ) 193 | 194 | # Should get results from multiple domains 195 | assert "results" in result 196 | # May include thinking reminder 197 | actual_results = [ 198 | r for r in result["results"] if r["id"] != "thinking-reminder" 199 | ] 200 | assert len(actual_results) >= 2 201 | 202 | async def test_mcp_thinking_integration(self): 203 | """Test sequential thinking through MCP.""" 204 | with patch( 205 | "biomcp.thinking.sequential._sequential_thinking" 206 | ) as mock_think: 207 | mock_think.return_value = { 208 | "thought": "Processed thought", 209 | "analysis": "Test analysis", 210 | } 211 | 212 | from biomcp.thinking_tool import think 213 | 214 | # Call the think tool directly 215 | result = await think( 216 | thought="Test thought", 217 | thoughtNumber=1, 218 | totalThoughts=3, 219 | nextThoughtNeeded=True, 220 | ) 221 | 222 | # Verify thinking result 223 | assert result["domain"] == "thinking" 224 | assert result["thoughtNumber"] == 1 225 | assert result["nextThoughtNeeded"] is True 226 | 227 | async def test_mcp_error_handling(self): 228 | """Test MCP error handling.""" 229 | from biomcp.exceptions import InvalidDomainError 230 | from biomcp.router import search 231 | 232 | # Test with invalid domain 233 | with pytest.raises(InvalidDomainError) as exc_info: 234 | await search( 235 | query="", 236 | domain="invalid_domain", 237 | ) 238 | 239 | assert "Unknown domain" in str(exc_info.value) 240 | 241 | async def test_mcp_fetch_all_trial_sections(self): 242 | """Test fetching trial with all sections through MCP.""" 243 | mock_protocol = {"title": "Test Trial", "nct_id": "NCT123"} 244 | mock_locations = {"locations": [{"city": "Boston"}]} 245 | 246 | with ( 247 | patch("biomcp.trials.getter._trial_protocol") as mock_p, 248 | patch("biomcp.trials.getter._trial_locations") as mock_l, 249 | patch("biomcp.trials.getter._trial_outcomes") as mock_o, 250 | patch("biomcp.trials.getter._trial_references") as mock_r, 251 | ): 252 | mock_p.return_value = json.dumps(mock_protocol) 253 | mock_l.return_value = json.dumps(mock_locations) 254 | mock_o.return_value = json.dumps({"outcomes": {}}) 255 | mock_r.return_value = json.dumps({"references": []}) 256 | 257 | from biomcp.router import fetch 258 | 259 | result = await fetch( 260 | domain="trial", 261 | id="NCT123", 262 | detail="all", 263 | ) 264 | 265 | # Verify all sections are included 266 | assert result["id"] == "NCT123" 267 | assert "locations" in result["metadata"] 268 | assert "outcomes" in result["metadata"] 269 | assert "references" in result["metadata"] 270 | 271 | async def test_mcp_parameter_parsing(self): 272 | """Test parameter parsing through MCP.""" 273 | mock_result = json.dumps([]) 274 | 275 | with patch( 276 | "biomcp.articles.unified.search_articles_unified" 277 | ) as mock_search: 278 | mock_search.return_value = mock_result 279 | 280 | from biomcp.router import search 281 | 282 | # Test with various parameter formats 283 | await search( 284 | query="", 285 | domain="article", 286 | genes='["BRAF", "KRAS"]', # JSON string 287 | diseases="cancer,melanoma", # Comma-separated 288 | keywords=["test1", "test2"], # Already a list 289 | ) 290 | 291 | # Verify parameters were parsed correctly 292 | call_args = mock_search.call_args[0][0] 293 | assert call_args.genes == ["BRAF", "KRAS"] 294 | assert call_args.diseases == ["cancer", "melanoma"] 295 | assert call_args.keywords == ["test1", "test2"] 296 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_biothings_integration_real.py: -------------------------------------------------------------------------------- ```python 1 | """Integration tests for BioThings API - calls real APIs.""" 2 | 3 | import pytest 4 | 5 | from biomcp.integrations import BioThingsClient 6 | 7 | 8 | @pytest.mark.integration 9 | class TestRealBioThingsAPIs: 10 | """Integration tests that call real BioThings APIs.""" 11 | 12 | @pytest.fixture 13 | def client(self): 14 | """Create a real BioThings client.""" 15 | return BioThingsClient() 16 | 17 | @pytest.mark.asyncio 18 | async def test_mygene_tp53(self, client): 19 | """Test real MyGene.info API with TP53.""" 20 | result = await client.get_gene_info("TP53") 21 | 22 | assert result is not None 23 | assert result.symbol == "TP53" 24 | assert result.name == "tumor protein p53" 25 | assert result.entrezgene in ["7157", 7157] 26 | assert "tumor suppressor" in result.summary.lower() 27 | # Check for either lowercase or uppercase P53 in aliases 28 | assert any("p53" in alias.lower() for alias in result.alias) 29 | 30 | @pytest.mark.asyncio 31 | async def test_mygene_braf(self, client): 32 | """Test real MyGene.info API with BRAF.""" 33 | result = await client.get_gene_info("BRAF") 34 | 35 | assert result is not None 36 | assert result.symbol == "BRAF" 37 | assert "proto-oncogene" in result.name.lower() 38 | assert result.type_of_gene == "protein-coding" 39 | 40 | @pytest.mark.asyncio 41 | async def test_mygene_by_entrez_id(self, client): 42 | """Test real MyGene.info API with Entrez ID.""" 43 | result = await client.get_gene_info("673") # BRAF 44 | 45 | assert result is not None 46 | assert result.symbol == "BRAF" 47 | assert result.gene_id == "673" 48 | 49 | @pytest.mark.asyncio 50 | async def test_mydisease_melanoma(self, client): 51 | """Test real MyDisease.info API with melanoma.""" 52 | result = await client.get_disease_info("melanoma") 53 | 54 | if result is None: 55 | # API might be down or melanoma might not be found directly 56 | # Try a more specific search 57 | result = await client.get_disease_info( 58 | "MONDO:0005105" 59 | ) # MONDO ID for melanoma 60 | 61 | assert result is not None, "Disease info should be returned" 62 | # The API may return subtypes of melanoma 63 | if result.name: 64 | assert "melanoma" in result.name.lower() or ( 65 | result.definition and "melanoma" in result.definition.lower() 66 | ) 67 | assert result.disease_id is not None 68 | # Synonyms might be empty for specific subtypes 69 | assert result.synonyms is not None 70 | 71 | @pytest.mark.asyncio 72 | async def test_mydisease_gist(self, client): 73 | """Test real MyDisease.info API with GIST.""" 74 | result = await client.get_disease_info("GIST") 75 | 76 | if result is None: 77 | # API might be down or GIST might not be found directly 78 | # Try the full name 79 | result = await client.get_disease_info( 80 | "gastrointestinal stromal tumor" 81 | ) 82 | 83 | assert result is not None, "Disease info should be returned" 84 | # GIST might return as a variant name 85 | if result.name: 86 | assert ( 87 | "gist" in result.name.lower() 88 | or "stromal" in result.name.lower() 89 | ) 90 | assert result.disease_id is not None 91 | # GIST should have synonyms including full name if available 92 | assert result.synonyms is not None 93 | 94 | @pytest.mark.asyncio 95 | async def test_mydisease_by_mondo_id(self, client): 96 | """Test real MyDisease.info API with MONDO ID.""" 97 | result = await client.get_disease_info("MONDO:0005105") # melanoma 98 | 99 | assert result is not None 100 | assert result.disease_id == "MONDO:0005105" 101 | # The result should have mondo data 102 | assert result.mondo is not None 103 | assert result.mondo.get("mondo") == "MONDO:0005105" 104 | # Name field might come from different sources in the API 105 | if result.name: 106 | assert "melanoma" in result.name.lower() 107 | 108 | @pytest.mark.asyncio 109 | async def test_disease_synonyms_expansion(self, client): 110 | """Test disease synonym expansion.""" 111 | synonyms = await client.get_disease_synonyms("lung cancer") 112 | 113 | assert len(synonyms) >= 1 # At least includes the original term 114 | assert "lung cancer" in [s.lower() for s in synonyms] 115 | # May or may not include formal terms depending on API results 116 | # Just check we got some results back 117 | assert synonyms is not None and len(synonyms) > 0 118 | 119 | @pytest.mark.asyncio 120 | async def test_batch_genes(self, client): 121 | """Test batch gene retrieval.""" 122 | # Test single gene retrieval as a workaround since batch requires special POST encoding 123 | # This validates the gene getter can handle multiple calls efficiently 124 | genes = ["TP53", "BRAF", "EGFR"] 125 | results = [] 126 | 127 | for gene in genes: 128 | result = await client.get_gene_info(gene) 129 | if result: 130 | results.append(result) 131 | 132 | assert len(results) == 3 133 | gene_symbols = [r.symbol for r in results] 134 | assert "TP53" in gene_symbols 135 | assert "BRAF" in gene_symbols 136 | assert "EGFR" in gene_symbols 137 | 138 | @pytest.mark.asyncio 139 | async def test_invalid_gene(self, client): 140 | """Test handling of invalid gene.""" 141 | result = await client.get_gene_info("INVALID_GENE_XYZ123") 142 | assert result is None 143 | 144 | @pytest.mark.asyncio 145 | async def test_invalid_disease(self, client): 146 | """Test handling of invalid disease.""" 147 | result = await client.get_disease_info("INVALID_DISEASE_XYZ123") 148 | assert result is None 149 | 150 | @pytest.mark.asyncio 151 | async def test_mychem_aspirin(self, client): 152 | """Test real MyChem.info API with aspirin.""" 153 | # Use DrugBank ID for reliable results 154 | result = await client.get_drug_info("DB00945") 155 | 156 | assert result is not None 157 | # API returns various forms - could be aspirin or acetylsalicylic acid 158 | assert result.name is not None 159 | assert result.drugbank_id == "DB00945" 160 | # Should have at least one identifier 161 | assert any([ 162 | result.drugbank_id, 163 | result.chembl_id, 164 | result.chebi_id, 165 | result.pubchem_cid, 166 | ]) 167 | 168 | @pytest.mark.asyncio 169 | async def test_mychem_imatinib(self, client): 170 | """Test real MyChem.info API with imatinib.""" 171 | # Use DrugBank ID for reliable results 172 | result = await client.get_drug_info("DB00619") 173 | 174 | assert result is not None 175 | assert result.name is not None 176 | assert "imatinib" in result.name.lower() 177 | assert result.drugbank_id == "DB00619" 178 | # Should have at least one identifier 179 | assert any([ 180 | result.drugbank_id, 181 | result.chembl_id, 182 | result.chebi_id, 183 | result.pubchem_cid, 184 | ]) 185 | 186 | @pytest.mark.asyncio 187 | async def test_mychem_by_drugbank_id(self, client): 188 | """Test real MyChem.info API with DrugBank ID.""" 189 | result = await client.get_drug_info("DB00945") # Aspirin 190 | 191 | assert result is not None 192 | assert result.drugbank_id == "DB00945" 193 | assert ( 194 | result.name is not None 195 | ) # Could be Acetylsalicylic acid or similar 196 | 197 | @pytest.mark.asyncio 198 | async def test_invalid_drug(self, client): 199 | """Test handling of invalid drug.""" 200 | result = await client.get_drug_info("INVALID_DRUG_XYZ123") 201 | assert result is None 202 | 203 | @pytest.mark.asyncio 204 | async def test_mychem_pembrolizumab(self, client): 205 | """Test real MyChem.info API with pembrolizumab.""" 206 | result = await client.get_drug_info("pembrolizumab") 207 | 208 | assert result is not None 209 | assert result.name == "Pembrolizumab" 210 | assert result.drugbank_id == "DB09037" 211 | assert result.unii == "DPT0O3T46P" 212 | assert "PD-1" in result.description 213 | assert "antibody" in result.description.lower() 214 | 215 | 216 | @pytest.mark.integration 217 | class TestGeneToolIntegration: 218 | """Test the gene getter tool with real APIs.""" 219 | 220 | @pytest.mark.asyncio 221 | async def test_gene_getter_tool(self): 222 | """Test the gene_getter tool function.""" 223 | from biomcp.genes.getter import get_gene 224 | 225 | result = await get_gene("TP53", output_json=False) 226 | 227 | assert "TP53" in result 228 | assert "tumor protein p53" in result 229 | assert "tumor suppressor" in result.lower() 230 | # Links might be formatted differently 231 | assert "ncbi" in result.lower() or "gene" in result.lower() 232 | 233 | @pytest.mark.asyncio 234 | async def test_gene_getter_json(self): 235 | """Test gene_getter with JSON output.""" 236 | import json 237 | 238 | from biomcp.genes.getter import get_gene 239 | 240 | result = await get_gene("BRAF", output_json=True) 241 | data = json.loads(result) 242 | 243 | assert data["symbol"] == "BRAF" 244 | assert "_links" in data 245 | assert "NCBI Gene" in data["_links"] 246 | 247 | 248 | @pytest.mark.integration 249 | class TestDiseaseToolIntegration: 250 | """Test the disease getter tool with real APIs.""" 251 | 252 | @pytest.mark.asyncio 253 | async def test_disease_getter_tool(self): 254 | """Test the disease_getter tool function.""" 255 | from biomcp.diseases.getter import get_disease 256 | 257 | result = await get_disease("melanoma", output_json=False) 258 | 259 | assert "melanoma" in result.lower() 260 | assert "MONDO:" in result 261 | # In markdown format, links are shown as "MONDO Browser:" not "_links" 262 | assert "Browser:" in result or "https://" in result 263 | 264 | @pytest.mark.asyncio 265 | async def test_disease_getter_json(self): 266 | """Test disease_getter with JSON output.""" 267 | import json 268 | 269 | from biomcp.diseases.getter import get_disease 270 | 271 | result = await get_disease("GIST", output_json=True) 272 | data = json.loads(result) 273 | 274 | # API might return error or different structure 275 | if "error" in data: 276 | pytest.skip("Disease not found in API") 277 | else: 278 | # Check for key fields 279 | assert "disease_id" in data or "id" in data or "_id" in data 280 | assert "MONDO:" in str(data) 281 | 282 | 283 | @pytest.mark.integration 284 | class TestDrugToolIntegration: 285 | """Test the drug getter tool with real APIs.""" 286 | 287 | @pytest.mark.asyncio 288 | async def test_drug_getter_tool(self): 289 | """Test the drug_getter tool function.""" 290 | from biomcp.drugs.getter import get_drug 291 | 292 | result = await get_drug("DB00945", output_json=False) # Aspirin 293 | 294 | assert "Drug:" in result 295 | assert "DrugBank ID" in result 296 | assert "DB00945" in result 297 | assert "External Links" in result 298 | 299 | @pytest.mark.asyncio 300 | async def test_drug_getter_json(self): 301 | """Test drug_getter with JSON output.""" 302 | import json 303 | 304 | from biomcp.drugs.getter import get_drug 305 | 306 | result = await get_drug("DB00619", output_json=True) # Imatinib 307 | data = json.loads(result) 308 | 309 | # Check for basic fields 310 | assert "drug_id" in data 311 | assert "drugbank_id" in data 312 | assert data["drugbank_id"] == "DB00619" 313 | assert "_links" in data 314 | # Should have at least one database link 315 | assert any( 316 | key in data["_links"] 317 | for key in ["DrugBank", "ChEMBL", "PubChem", "ChEBI"] 318 | ) 319 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_domain_handlers.py: -------------------------------------------------------------------------------- ```python 1 | """Tests for domain handlers module.""" 2 | 3 | import pytest 4 | 5 | from biomcp.constants import DEFAULT_TITLE 6 | from biomcp.domain_handlers import ( 7 | ArticleHandler, 8 | TrialHandler, 9 | VariantHandler, 10 | get_domain_handler, 11 | ) 12 | 13 | 14 | class TestArticleHandler: 15 | """Test ArticleHandler class.""" 16 | 17 | def test_format_pubmed_article(self): 18 | """Test formatting a PubMed article.""" 19 | article = { 20 | "pmid": "12345", 21 | "title": "Test Article Title", 22 | "abstract": "This is a test abstract that is longer than 200 characters. " 23 | * 5, 24 | "pub_year": "2023", 25 | "journal": "Test Journal", 26 | "authors": ["Smith J", "Doe J", "Johnson A", "Williams B"], 27 | } 28 | 29 | result = ArticleHandler.format_result(article) 30 | 31 | assert result["id"] == "12345" 32 | assert result["title"] == "Test Article Title" 33 | assert len(result["snippet"]) == 203 # 200 + "..." 34 | assert result["snippet"].endswith("...") 35 | assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/" 36 | assert result["metadata"]["year"] == "2023" 37 | assert result["metadata"]["journal"] == "Test Journal" 38 | assert len(result["metadata"]["authors"]) == 3 # Only first 3 39 | 40 | def test_format_preprint_article(self): 41 | """Test formatting a preprint article.""" 42 | preprint = { 43 | "doi": "10.1101/2023.01.01.12345", 44 | "id": "biorxiv-123", 45 | "title": "Preprint Title", 46 | "abstract": "Short abstract", 47 | "url": "https://www.biorxiv.org/content/10.1101/2023.01.01.12345", 48 | "pub_year": "2023", 49 | "source": "bioRxiv", 50 | "authors": ["Author A", "Author B"], 51 | } 52 | 53 | result = ArticleHandler.format_result(preprint) 54 | 55 | assert result["id"] == "10.1101/2023.01.01.12345" 56 | assert result["title"] == "Preprint Title" 57 | assert result["snippet"] == "Short abstract..." 58 | assert ( 59 | result["url"] 60 | == "https://www.biorxiv.org/content/10.1101/2023.01.01.12345" 61 | ) 62 | assert result["metadata"]["source"] == "bioRxiv" 63 | 64 | def test_format_article_missing_fields(self): 65 | """Test formatting article with missing fields.""" 66 | article = { 67 | "pmid": "67890", 68 | # Missing title, abstract, etc. 69 | } 70 | 71 | result = ArticleHandler.format_result(article) 72 | 73 | assert result["id"] == "67890" 74 | assert ( 75 | result["title"] == DEFAULT_TITLE 76 | ) # Should use default for missing title 77 | assert result["snippet"] == "" # Empty when no abstract 78 | assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/67890/" 79 | 80 | def test_format_article_with_date_field(self): 81 | """Test formatting article with date field instead of pub_year.""" 82 | article = { 83 | "pmid": "123", 84 | "title": "Test", 85 | "date": "2023-05-15", 86 | } 87 | 88 | result = ArticleHandler.format_result(article) 89 | 90 | assert result["metadata"]["year"] == "2023" 91 | 92 | def test_format_article_title_normalization(self): 93 | """Test that article title whitespace is normalized.""" 94 | article = { 95 | "pmid": "123", 96 | "title": " Test Article\n\nWith Extra Spaces ", 97 | } 98 | 99 | result = ArticleHandler.format_result(article) 100 | 101 | assert result["title"] == "Test Article With Extra Spaces" 102 | 103 | 104 | class TestTrialHandler: 105 | """Test TrialHandler class.""" 106 | 107 | def test_format_trial_api_v2(self): 108 | """Test formatting trial with API v2 structure.""" 109 | trial = { 110 | "protocolSection": { 111 | "identificationModule": { 112 | "nctId": "NCT12345", 113 | "briefTitle": "Brief Title", 114 | "officialTitle": "Official Title", 115 | }, 116 | "statusModule": { 117 | "overallStatus": "RECRUITING", 118 | "startDateStruct": {"date": "2023-01-01"}, 119 | "primaryCompletionDateStruct": {"date": "2024-12-31"}, 120 | }, 121 | "descriptionModule": { 122 | "briefSummary": "This is a brief summary of the trial." 123 | }, 124 | "designModule": { 125 | "phases": ["PHASE3"], 126 | }, 127 | } 128 | } 129 | 130 | result = TrialHandler.format_result(trial) 131 | 132 | assert result["id"] == "NCT12345" 133 | assert result["title"] == "Brief Title" 134 | assert "brief summary" in result["snippet"] 135 | assert result["url"] == "https://clinicaltrials.gov/study/NCT12345" 136 | assert result["metadata"]["status"] == "RECRUITING" 137 | assert result["metadata"]["phase"] == "PHASE3" 138 | assert result["metadata"]["start_date"] == "2023-01-01" 139 | assert result["metadata"]["primary_completion_date"] == "2024-12-31" 140 | 141 | def test_format_trial_legacy_flat(self): 142 | """Test formatting trial with legacy flat structure.""" 143 | trial = { 144 | "NCT Number": "NCT67890", 145 | "Study Title": "Legacy Trial Title", 146 | "Brief Summary": "Legacy summary", 147 | "Study Status": "COMPLETED", 148 | "Phases": "Phase 2", 149 | "Start Date": "2022-01-01", 150 | "Completion Date": "2023-12-31", 151 | } 152 | 153 | result = TrialHandler.format_result(trial) 154 | 155 | assert result["id"] == "NCT67890" 156 | assert result["title"] == "Legacy Trial Title" 157 | assert result["snippet"].startswith("Legacy summary") 158 | assert result["url"] == "https://clinicaltrials.gov/study/NCT67890" 159 | assert result["metadata"]["status"] == "COMPLETED" 160 | assert result["metadata"]["phase"] == "Phase 2" 161 | 162 | def test_format_trial_legacy_simple(self): 163 | """Test formatting trial with legacy simple structure.""" 164 | trial = { 165 | "nct_id": "NCT11111", 166 | "brief_title": "Simple Trial", 167 | "overall_status": "ACTIVE", 168 | "phase": "PHASE1", 169 | } 170 | 171 | result = TrialHandler.format_result(trial) 172 | 173 | assert result["id"] == "NCT11111" 174 | assert result["title"] == "Simple Trial" 175 | assert result["metadata"]["status"] == "ACTIVE" 176 | assert result["metadata"]["phase"] == "PHASE1" 177 | 178 | def test_format_trial_missing_title(self): 179 | """Test formatting trial with missing brief title.""" 180 | trial = { 181 | "protocolSection": { 182 | "identificationModule": { 183 | "nctId": "NCT99999", 184 | "officialTitle": "Only Official Title", 185 | }, 186 | } 187 | } 188 | 189 | result = TrialHandler.format_result(trial) 190 | 191 | assert result["id"] == "NCT99999" 192 | assert result["title"] == "Only Official Title" 193 | 194 | def test_format_trial_empty_phases(self): 195 | """Test formatting trial with empty phases array.""" 196 | trial = { 197 | "protocolSection": { 198 | "identificationModule": {"nctId": "NCT123"}, 199 | "designModule": {"phases": []}, 200 | } 201 | } 202 | 203 | result = TrialHandler.format_result(trial) 204 | 205 | assert result["metadata"]["phase"] == "" 206 | 207 | 208 | class TestVariantHandler: 209 | """Test VariantHandler class.""" 210 | 211 | def test_format_variant_complete(self): 212 | """Test formatting variant with complete data.""" 213 | variant = { 214 | "_id": "chr7:g.140453136A>T", 215 | "dbnsfp": { 216 | "genename": "BRAF", 217 | "hgvsp": ["BRAF:p.V600E"], 218 | }, 219 | "dbsnp": { 220 | "rsid": "rs121913529", 221 | "gene": {"symbol": "BRAF"}, 222 | }, 223 | "clinvar": { 224 | "rcv": { 225 | "clinical_significance": "Pathogenic", 226 | } 227 | }, 228 | "cadd": { 229 | "consequence": "missense_variant", 230 | }, 231 | } 232 | 233 | result = VariantHandler.format_result(variant) 234 | 235 | assert result["id"] == "chr7:g.140453136A>T" 236 | assert result["title"] == "BRAF BRAF:p.V600E" 237 | assert "Pathogenic" in result["snippet"] 238 | assert "rs121913529" in result["url"] 239 | assert result["metadata"]["gene"] == "BRAF" 240 | assert result["metadata"]["rsid"] == "rs121913529" 241 | assert result["metadata"]["clinical_significance"] == "Pathogenic" 242 | assert result["metadata"]["consequence"] == "missense_variant" 243 | 244 | def test_format_variant_gene_list(self): 245 | """Test formatting variant when gene is a list.""" 246 | variant = { 247 | "_id": "rs123", 248 | "dbnsfp": {"genename": ["GENE1", "GENE2"]}, 249 | } 250 | 251 | result = VariantHandler.format_result(variant) 252 | 253 | assert result["metadata"]["gene"] == "GENE1" 254 | 255 | def test_format_variant_clinvar_list(self): 256 | """Test formatting variant when clinvar RCV is a list.""" 257 | variant = { 258 | "_id": "rs456", 259 | "clinvar": { 260 | "rcv": [ 261 | {"clinical_significance": "Pathogenic"}, 262 | {"clinical_significance": "Likely pathogenic"}, 263 | ] 264 | }, 265 | } 266 | 267 | result = VariantHandler.format_result(variant) 268 | 269 | assert result["metadata"]["clinical_significance"] == "Pathogenic" 270 | 271 | def test_format_variant_minimal(self): 272 | """Test formatting variant with minimal data.""" 273 | variant = { 274 | "_id": "chr1:g.12345A>G", 275 | } 276 | 277 | result = VariantHandler.format_result(variant) 278 | 279 | assert result["id"] == "chr1:g.12345A>G" 280 | assert result["title"] == "chr1:g.12345A>G" 281 | assert "Unknown" in result["snippet"] 282 | assert result["url"] == "" 283 | 284 | def test_format_variant_hgvsp_list(self): 285 | """Test formatting variant when HGVS protein is a list.""" 286 | variant = { 287 | "_id": "rs789", 288 | "dbnsfp": { 289 | "genename": "TP53", 290 | "hgvsp": ["TP53:p.R175H", "TP53:p.R175C"], 291 | }, 292 | } 293 | 294 | result = VariantHandler.format_result(variant) 295 | 296 | assert result["title"] == "TP53 TP53:p.R175H" 297 | 298 | def test_format_variant_no_rsid_url(self): 299 | """Test variant URL generation without rsID.""" 300 | variant = { 301 | "_id": "chr2:g.234567C>T", 302 | } 303 | 304 | result = VariantHandler.format_result(variant) 305 | 306 | assert result["url"] == "" 307 | 308 | 309 | class TestGetDomainHandler: 310 | """Test get_domain_handler function.""" 311 | 312 | def test_get_article_handler(self): 313 | """Test getting article handler.""" 314 | handler = get_domain_handler("article") 315 | assert handler == ArticleHandler 316 | 317 | def test_get_trial_handler(self): 318 | """Test getting trial handler.""" 319 | handler = get_domain_handler("trial") 320 | assert handler == TrialHandler 321 | 322 | def test_get_variant_handler(self): 323 | """Test getting variant handler.""" 324 | handler = get_domain_handler("variant") 325 | assert handler == VariantHandler 326 | 327 | def test_get_invalid_handler(self): 328 | """Test getting handler for invalid domain.""" 329 | with pytest.raises(ValueError) as exc_info: 330 | get_domain_handler("invalid") 331 | 332 | assert "Unknown domain: invalid" in str(exc_info.value) 333 | 334 | def test_get_handler_case_sensitive(self): 335 | """Test that domain names are case sensitive.""" 336 | # Should work with lowercase 337 | handler = get_domain_handler("article") 338 | assert handler == ArticleHandler 339 | 340 | # Should fail with uppercase 341 | with pytest.raises(ValueError): 342 | get_domain_handler("ARTICLE") 343 | ``` -------------------------------------------------------------------------------- /src/biomcp/cli/health.py: -------------------------------------------------------------------------------- ```python 1 | """Health check command for BioMCP CLI. 2 | 3 | This module provides a command to check the health of API endpoints and system resources. 4 | """ 5 | 6 | import asyncio 7 | import platform 8 | import socket 9 | from typing import Any 10 | 11 | import typer 12 | from rich.console import Console 13 | from rich.panel import Panel 14 | from rich.table import Table 15 | 16 | from .. import http_client 17 | from ..constants import ( 18 | CLINICAL_TRIALS_BASE_URL, 19 | MYVARIANT_BASE_URL, 20 | PUBTATOR3_BASE_URL, 21 | ) 22 | 23 | # Try to import psutil, but handle case where it's not installed 24 | try: 25 | import psutil 26 | 27 | PSUTIL_AVAILABLE = True 28 | except ImportError: 29 | PSUTIL_AVAILABLE = False 30 | 31 | health_app = typer.Typer(help="Health check operations") 32 | console = Console() 33 | 34 | 35 | async def check_api_endpoint( 36 | url: str, 37 | name: str, 38 | params: dict[Any, Any] | None = None, 39 | method: str = "GET", 40 | ) -> dict: 41 | """Check if an API endpoint is accessible and responding.""" 42 | try: 43 | status, content = await http_client.call_http( 44 | method, url, params or {} 45 | ) 46 | return { 47 | "name": name, 48 | "url": url, 49 | "status": status, 50 | "accessible": status == 200, 51 | "message": "OK" if status == 200 else f"Error: HTTP {status}", 52 | "content": content[:500] 53 | if len(content) > 500 54 | else content, # Truncate long responses 55 | } 56 | except Exception as e: 57 | return { 58 | "name": name, 59 | "url": url, 60 | "status": 0, 61 | "accessible": False, 62 | "message": f"Error: {e!s}", 63 | "content": str(e), 64 | } 65 | 66 | 67 | async def check_all_api_endpoints() -> list[dict]: 68 | """Check all known API endpoints.""" 69 | endpoints: list[dict[str, Any]] = [ 70 | # PubTator3 API endpoints 71 | { 72 | "url": f"{PUBTATOR3_BASE_URL}/entity/autocomplete/", 73 | "name": "PubTator3 Autocomplete", 74 | "params": {"query": "BRAF", "concept": "gene", "limit": 2}, 75 | }, 76 | { 77 | "url": f"{PUBTATOR3_BASE_URL}/publications/export/biocjson", 78 | "name": "PubTator3 Publications", 79 | "params": {"pmids": "29355051", "full": "false"}, 80 | }, 81 | { 82 | "url": f"{PUBTATOR3_BASE_URL}/search/", 83 | "name": "PubTator3 Search", 84 | "params": { 85 | "query": "BRAF", 86 | "concepts": "gene", 87 | "page": 1, 88 | "size": 1, 89 | "text": "@CHEMICAL_remdesivir", 90 | }, 91 | }, 92 | # ClinicalTrials.gov API endpoints 93 | { 94 | "url": f"{CLINICAL_TRIALS_BASE_URL}", 95 | "name": "ClinicalTrials.gov Search API", 96 | "params": {"query.term": "cancer", "pageSize": "1"}, 97 | }, 98 | { 99 | "url": f"{CLINICAL_TRIALS_BASE_URL}/NCT04280705", 100 | "name": "ClinicalTrials.gov Study API", 101 | "params": {"fields": "IdentificationModule,StatusModule"}, 102 | }, 103 | # MyVariant.info API endpoints 104 | { 105 | "url": f"{MYVARIANT_BASE_URL}/query", 106 | "name": "MyVariant.info Query API", 107 | "params": {"q": "rs113488022", "size": 1}, 108 | }, 109 | { 110 | "url": f"{MYVARIANT_BASE_URL}/variant/rs113488022", 111 | "name": "MyVariant.info Variant API", 112 | "params": {"fields": "all"}, 113 | }, 114 | ] 115 | 116 | tasks = [] 117 | for endpoint in endpoints: 118 | url = endpoint["url"] 119 | name = endpoint["name"] 120 | params = endpoint.get("params") 121 | tasks.append(check_api_endpoint(url, name, params)) 122 | 123 | return await asyncio.gather(*tasks) 124 | 125 | 126 | def check_network_connectivity() -> dict: 127 | """Check basic network connectivity.""" 128 | try: 129 | # Try to connect to Google's DNS to check internet connectivity 130 | socket.create_connection(("8.8.8.8", 53), timeout=3) 131 | return { 132 | "status": "Connected", 133 | "message": "Internet connection is available", 134 | } 135 | except OSError: 136 | return { 137 | "status": "Disconnected", 138 | "message": "No internet connection detected", 139 | } 140 | 141 | 142 | def check_system_resources() -> dict: 143 | """Check system resources like CPU, memory, and disk space.""" 144 | if not PSUTIL_AVAILABLE: 145 | return { 146 | "error": "psutil package not installed. Install with: pip install psutil" 147 | } 148 | 149 | return { 150 | "cpu_usage": psutil.cpu_percent(interval=1), 151 | "memory": { 152 | "total": psutil.virtual_memory().total / (1024**3), # GB 153 | "available": psutil.virtual_memory().available / (1024**3), # GB 154 | "percent_used": psutil.virtual_memory().percent, 155 | }, 156 | "disk": { 157 | "total": psutil.disk_usage("/").total / (1024**3), # GB 158 | "free": psutil.disk_usage("/").free / (1024**3), # GB 159 | "percent_used": psutil.disk_usage("/").percent, 160 | }, 161 | } 162 | 163 | 164 | def check_python_environment() -> dict: 165 | """Check Python environment and installed packages.""" 166 | env_info = { 167 | "python_version": platform.python_version(), 168 | "platform": platform.platform(), 169 | "system": platform.system(), 170 | } 171 | 172 | # Check for httpx version without importing it 173 | try: 174 | import importlib.metadata 175 | 176 | env_info["httpx_version"] = importlib.metadata.version("httpx") 177 | except (ImportError, importlib.metadata.PackageNotFoundError): 178 | env_info["httpx_version"] = "Unknown" 179 | 180 | if PSUTIL_AVAILABLE: 181 | env_info["psutil_version"] = psutil.__version__ 182 | else: 183 | env_info["psutil_version"] = "Not installed" 184 | 185 | return env_info 186 | 187 | 188 | def display_api_health(results: list[dict], verbose: bool = False) -> None: 189 | """Display API health check results in a table.""" 190 | table = Table(title="API Endpoints Health") 191 | table.add_column("Endpoint", style="cyan") 192 | table.add_column("URL", style="blue") 193 | table.add_column("Status", style="magenta") 194 | table.add_column("Message", style="green") 195 | 196 | for result in results: 197 | "green" if result["accessible"] else "red" 198 | table.add_row( 199 | result["name"], 200 | result["url"], 201 | f"{result['status']}", 202 | result["message"], 203 | style=None if result["accessible"] else "red", 204 | ) 205 | 206 | console.print(table) 207 | 208 | # Display detailed response content if verbose mode is enabled 209 | if verbose: 210 | for result in results: 211 | if not result["accessible"]: 212 | console.print( 213 | f"\n[bold red]Detailed error for {result['name']}:[/bold red]" 214 | ) 215 | console.print( 216 | Panel( 217 | result["content"], 218 | title=f"{result['name']} Response", 219 | border_style="red", 220 | ) 221 | ) 222 | 223 | 224 | def display_system_health( 225 | system_info: dict, network_info: dict, env_info: dict 226 | ) -> None: 227 | """Display system health information in a table.""" 228 | # System resources table 229 | resource_table = Table(title="System Resources") 230 | resource_table.add_column("Resource", style="cyan") 231 | resource_table.add_column("Value", style="green") 232 | 233 | if "error" in system_info: 234 | resource_table.add_row("Error", system_info["error"], style="red") 235 | else: 236 | resource_table.add_row("CPU Usage", f"{system_info['cpu_usage']}%") 237 | resource_table.add_row( 238 | "Memory Total", f"{system_info['memory']['total']:.2f} GB" 239 | ) 240 | resource_table.add_row( 241 | "Memory Available", f"{system_info['memory']['available']:.2f} GB" 242 | ) 243 | resource_table.add_row( 244 | "Memory Usage", 245 | f"{system_info['memory']['percent_used']}%", 246 | style="green" 247 | if system_info["memory"]["percent_used"] < 90 248 | else "red", 249 | ) 250 | resource_table.add_row( 251 | "Disk Total", f"{system_info['disk']['total']:.2f} GB" 252 | ) 253 | resource_table.add_row( 254 | "Disk Free", f"{system_info['disk']['free']:.2f} GB" 255 | ) 256 | resource_table.add_row( 257 | "Disk Usage", 258 | f"{system_info['disk']['percent_used']}%", 259 | style="green" 260 | if system_info["disk"]["percent_used"] < 90 261 | else "red", 262 | ) 263 | 264 | console.print(resource_table) 265 | 266 | # Network and environment table 267 | env_table = Table(title="Network & Environment") 268 | env_table.add_column("Component", style="cyan") 269 | env_table.add_column("Status/Version", style="green") 270 | 271 | env_table.add_row( 272 | "Network", 273 | network_info["status"], 274 | style=None if network_info["status"] == "Connected" else "red", 275 | ) 276 | env_table.add_row("Python Version", env_info["python_version"]) 277 | env_table.add_row("Platform", env_info["platform"]) 278 | env_table.add_row("System", env_info["system"]) 279 | env_table.add_row("HTTPX Version", env_info["httpx_version"]) 280 | env_table.add_row( 281 | "Psutil Version", 282 | env_info["psutil_version"], 283 | style="red" if env_info["psutil_version"] == "Not installed" else None, 284 | ) 285 | 286 | console.print(env_table) 287 | 288 | 289 | @health_app.callback(invoke_without_command=True) 290 | def health_callback(ctx: typer.Context): 291 | """Health check callback.""" 292 | if ctx.invoked_subcommand is None: 293 | # If no subcommand is provided, run the default health check 294 | check() 295 | 296 | 297 | @health_app.command() 298 | def check( 299 | api_only: bool = typer.Option( 300 | False, "--api-only", help="Check only API endpoints" 301 | ), 302 | system_only: bool = typer.Option( 303 | False, "--system-only", help="Check only system health" 304 | ), 305 | verbose: bool = typer.Option( 306 | False, 307 | "--verbose", 308 | "-v", 309 | help="Show detailed error information and API responses", 310 | ), 311 | ): 312 | """ 313 | Run a comprehensive health check on API endpoints and system resources. 314 | 315 | This command checks: 316 | - API endpoints connectivity and response 317 | - Network connectivity 318 | - System resources (CPU, memory, disk) 319 | - Python environment 320 | 321 | Note: For full system resource checks, the 'psutil' package is required. 322 | Install with: pip install psutil 323 | """ 324 | with console.status("[bold green]Running health checks...") as status: 325 | # Check API endpoints 326 | if not system_only: 327 | status.update("[bold green]Checking API endpoints...") 328 | api_results = asyncio.run(check_all_api_endpoints()) 329 | display_api_health(api_results, verbose) 330 | 331 | # Check system health 332 | if not api_only: 333 | status.update("[bold green]Checking system resources...") 334 | system_info = check_system_resources() 335 | network_info = check_network_connectivity() 336 | env_info = check_python_environment() 337 | display_system_health(system_info, network_info, env_info) 338 | 339 | # Overall status 340 | if not api_only and not system_only: 341 | api_health = all(result["accessible"] for result in api_results) 342 | 343 | if "error" in system_info: 344 | system_health = False 345 | else: 346 | system_health = ( 347 | network_info["status"] == "Connected" 348 | and system_info["memory"]["percent_used"] < 90 349 | and system_info["disk"]["percent_used"] < 90 350 | ) 351 | 352 | if api_health and system_health: 353 | console.print( 354 | "\n[bold green]✓ All systems operational![/bold green]" 355 | ) 356 | else: 357 | console.print( 358 | "\n[bold red]⚠ Some health checks failed. See details above.[/bold red]" 359 | ) 360 | if verbose: 361 | console.print( 362 | "[yellow]Run with --verbose flag to see detailed error information[/yellow]" 363 | ) 364 | ``` -------------------------------------------------------------------------------- /src/biomcp/metrics.py: -------------------------------------------------------------------------------- ```python 1 | """Performance monitoring and metrics collection for BioMCP.""" 2 | 3 | import asyncio 4 | import functools 5 | import logging 6 | import os 7 | import time 8 | from collections import defaultdict 9 | from dataclasses import dataclass, field 10 | from datetime import datetime 11 | 12 | from .constants import ( 13 | MAX_METRIC_SAMPLES, 14 | METRIC_PERCENTILE_50, 15 | METRIC_PERCENTILE_95, 16 | METRIC_PERCENTILE_99, 17 | ) 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | # Check if metrics are enabled via environment variable 22 | METRICS_ENABLED = ( 23 | os.getenv("BIOMCP_METRICS_ENABLED", "false").lower() == "true" 24 | ) 25 | 26 | 27 | @dataclass 28 | class MetricSample: 29 | """Single metric measurement.""" 30 | 31 | timestamp: datetime 32 | duration: float 33 | success: bool 34 | error: str | None = None 35 | tags: dict[str, str] = field(default_factory=dict) 36 | 37 | 38 | @dataclass 39 | class MetricSummary: 40 | """Summary statistics for a metric.""" 41 | 42 | name: str 43 | count: int 44 | success_count: int 45 | error_count: int 46 | total_duration: float 47 | min_duration: float 48 | max_duration: float 49 | avg_duration: float 50 | p50_duration: float 51 | p95_duration: float 52 | p99_duration: float 53 | error_rate: float 54 | 55 | @classmethod 56 | def from_samples( 57 | cls, name: str, samples: list[MetricSample] 58 | ) -> "MetricSummary": 59 | """Calculate summary statistics from samples.""" 60 | if not samples: 61 | return cls( 62 | name=name, 63 | count=0, 64 | success_count=0, 65 | error_count=0, 66 | total_duration=0.0, 67 | min_duration=0.0, 68 | max_duration=0.0, 69 | avg_duration=0.0, 70 | p50_duration=0.0, 71 | p95_duration=0.0, 72 | p99_duration=0.0, 73 | error_rate=0.0, 74 | ) 75 | 76 | durations = sorted([s.duration for s in samples]) 77 | success_count = sum(1 for s in samples if s.success) 78 | error_count = len(samples) - success_count 79 | 80 | def percentile(data: list[float], p: float) -> float: 81 | """Calculate percentile.""" 82 | if not data: 83 | return 0.0 84 | k = (len(data) - 1) * p 85 | f = int(k) 86 | c = k - f 87 | if f >= len(data) - 1: 88 | return data[-1] 89 | return data[f] + c * (data[f + 1] - data[f]) 90 | 91 | return cls( 92 | name=name, 93 | count=len(samples), 94 | success_count=success_count, 95 | error_count=error_count, 96 | total_duration=sum(durations), 97 | min_duration=min(durations), 98 | max_duration=max(durations), 99 | avg_duration=sum(durations) / len(durations), 100 | p50_duration=percentile(durations, METRIC_PERCENTILE_50), 101 | p95_duration=percentile(durations, METRIC_PERCENTILE_95), 102 | p99_duration=percentile(durations, METRIC_PERCENTILE_99), 103 | error_rate=error_count / len(samples) if samples else 0.0, 104 | ) 105 | 106 | 107 | class MetricsCollector: 108 | """Collects and manages performance metrics.""" 109 | 110 | def __init__(self, max_samples_per_metric: int = MAX_METRIC_SAMPLES): 111 | """Initialize metrics collector. 112 | 113 | Args: 114 | max_samples_per_metric: Maximum samples to keep per metric 115 | """ 116 | self._metrics: dict[str, list[MetricSample]] = defaultdict(list) 117 | self._max_samples = max_samples_per_metric 118 | self._lock = asyncio.Lock() 119 | 120 | async def record( 121 | self, 122 | name: str, 123 | duration: float, 124 | success: bool = True, 125 | error: str | None = None, 126 | tags: dict[str, str] | None = None, 127 | ) -> None: 128 | """Record a metric sample. 129 | 130 | Args: 131 | name: Metric name 132 | duration: Duration in seconds 133 | success: Whether operation succeeded 134 | error: Error message if failed 135 | tags: Additional metadata tags 136 | """ 137 | sample = MetricSample( 138 | timestamp=datetime.now(), 139 | duration=duration, 140 | success=success, 141 | error=error, 142 | tags=tags or {}, 143 | ) 144 | 145 | async with self._lock: 146 | samples = self._metrics[name] 147 | samples.append(sample) 148 | 149 | # Keep only the most recent samples 150 | if len(samples) > self._max_samples: 151 | self._metrics[name] = samples[-self._max_samples :] 152 | 153 | async def get_summary(self, name: str) -> MetricSummary | None: 154 | """Get summary statistics for a metric. 155 | 156 | Args: 157 | name: Metric name 158 | 159 | Returns: 160 | Summary statistics or None if metric not found 161 | """ 162 | async with self._lock: 163 | samples = self._metrics.get(name, []) 164 | if not samples: 165 | return None 166 | return MetricSummary.from_samples(name, samples) 167 | 168 | async def get_all_summaries(self) -> dict[str, MetricSummary]: 169 | """Get summaries for all metrics. 170 | 171 | Returns: 172 | Dictionary of metric name to summary 173 | """ 174 | async with self._lock: 175 | return { 176 | name: MetricSummary.from_samples(name, samples) 177 | for name, samples in self._metrics.items() 178 | } 179 | 180 | async def clear(self, name: str | None = None) -> None: 181 | """Clear metrics. 182 | 183 | Args: 184 | name: Specific metric to clear, or None to clear all 185 | """ 186 | async with self._lock: 187 | if name: 188 | self._metrics.pop(name, None) 189 | else: 190 | self._metrics.clear() 191 | 192 | 193 | # Global metrics collector instance 194 | _metrics_collector = MetricsCollector() 195 | 196 | 197 | async def record_metric( 198 | name: str, 199 | duration: float, 200 | success: bool = True, 201 | error: str | None = None, 202 | tags: dict[str, str] | None = None, 203 | ) -> None: 204 | """Record a metric to the global collector. 205 | 206 | Note: This is a no-op if BIOMCP_METRICS_ENABLED is not set to true. 207 | 208 | Args: 209 | name: Metric name 210 | duration: Duration in seconds 211 | success: Whether operation succeeded 212 | error: Error message if failed 213 | tags: Additional metadata tags 214 | """ 215 | if METRICS_ENABLED: 216 | await _metrics_collector.record(name, duration, success, error, tags) 217 | 218 | 219 | async def get_metric_summary(name: str) -> MetricSummary | None: 220 | """Get summary statistics for a metric. 221 | 222 | Args: 223 | name: Metric name 224 | 225 | Returns: 226 | Summary statistics or None if metric not found 227 | """ 228 | return await _metrics_collector.get_summary(name) 229 | 230 | 231 | async def get_all_metrics() -> dict[str, MetricSummary]: 232 | """Get summaries for all metrics. 233 | 234 | Returns: 235 | Dictionary of metric name to summary 236 | """ 237 | return await _metrics_collector.get_all_summaries() 238 | 239 | 240 | def track_performance(metric_name: str | None = None): 241 | """Decorator to track function performance. 242 | 243 | Args: 244 | metric_name: Custom metric name (defaults to function name) 245 | 246 | Returns: 247 | Decorated function 248 | """ 249 | 250 | def decorator(func): 251 | name = metric_name or f"{func.__module__}.{func.__name__}" 252 | 253 | @functools.wraps(func) 254 | async def async_wrapper(*args, **kwargs): 255 | start_time = time.perf_counter() 256 | success = True 257 | error_msg = None 258 | 259 | try: 260 | result = await func(*args, **kwargs) 261 | return result 262 | except Exception as exc: 263 | success = False 264 | error_msg = str(exc) 265 | raise 266 | finally: 267 | duration = time.perf_counter() - start_time 268 | await record_metric( 269 | name=name, 270 | duration=duration, 271 | success=success, 272 | error=error_msg, 273 | ) 274 | 275 | @functools.wraps(func) 276 | def sync_wrapper(*args, **kwargs): 277 | start_time = time.perf_counter() 278 | success = True 279 | error_msg = None 280 | 281 | try: 282 | result = func(*args, **kwargs) 283 | return result 284 | except Exception as exc: 285 | success = False 286 | error_msg = str(exc) 287 | raise 288 | finally: 289 | duration = time.perf_counter() - start_time 290 | # Schedule metric recording in the event loop 291 | try: 292 | loop = asyncio.get_running_loop() 293 | # Fire and forget the metric recording 294 | task = loop.create_task( 295 | record_metric( 296 | name=name, 297 | duration=duration, 298 | success=success, 299 | error=error_msg, 300 | ) 301 | ) 302 | # Add error handler to prevent unhandled exceptions 303 | task.add_done_callback( 304 | lambda t: t.exception() if t.done() else None 305 | ) 306 | except RuntimeError: 307 | # No event loop running, log instead 308 | logger.debug( 309 | f"Metric {name}: duration={duration:.3f}s, " 310 | f"success={success}, error={error_msg}" 311 | ) 312 | 313 | # Return appropriate wrapper based on function type 314 | if asyncio.iscoroutinefunction(func): 315 | return async_wrapper 316 | else: 317 | return sync_wrapper 318 | 319 | return decorator 320 | 321 | 322 | # Context manager for timing operations 323 | class Timer: 324 | """Context manager for timing operations.""" 325 | 326 | def __init__(self, metric_name: str, tags: dict[str, str] | None = None): 327 | """Initialize timer. 328 | 329 | Args: 330 | metric_name: Name for the metric 331 | tags: Additional metadata tags 332 | """ 333 | self.metric_name = metric_name 334 | self.tags = tags or {} 335 | self.start_time: float | None = None 336 | 337 | def __enter__(self): 338 | """Start timing.""" 339 | self.start_time = time.perf_counter() 340 | return self 341 | 342 | def __exit__(self, exc_type, exc_val, exc_tb): 343 | """Stop timing and record metric.""" 344 | if self.start_time is None or not METRICS_ENABLED: 345 | return False 346 | 347 | duration = time.perf_counter() - self.start_time 348 | success = exc_type is None 349 | error_msg = str(exc_val) if exc_val else None 350 | 351 | # Schedule metric recording 352 | try: 353 | loop = asyncio.get_running_loop() 354 | # Fire and forget the metric recording 355 | task = loop.create_task( 356 | record_metric( 357 | name=self.metric_name, 358 | duration=duration, 359 | success=success, 360 | error=error_msg, 361 | tags=self.tags, 362 | ) 363 | ) 364 | # Add error handler to prevent unhandled exceptions 365 | task.add_done_callback( 366 | lambda t: t.exception() if t.done() else None 367 | ) 368 | except RuntimeError: 369 | # No event loop running, log instead 370 | logger.debug( 371 | f"Metric {self.metric_name}: duration={duration:.3f}s, " 372 | f"success={success}, error={error_msg}, tags={self.tags}" 373 | ) 374 | 375 | # Don't suppress exceptions 376 | return False 377 | 378 | async def __aenter__(self): 379 | """Async enter.""" 380 | self.start_time = time.perf_counter() 381 | return self 382 | 383 | async def __aexit__(self, exc_type, exc_val, exc_tb): 384 | """Async exit.""" 385 | if self.start_time is None or not METRICS_ENABLED: 386 | return False 387 | 388 | duration = time.perf_counter() - self.start_time 389 | success = exc_type is None 390 | error_msg = str(exc_val) if exc_val else None 391 | 392 | await record_metric( 393 | name=self.metric_name, 394 | duration=duration, 395 | success=success, 396 | error=error_msg, 397 | tags=self.tags, 398 | ) 399 | 400 | # Don't suppress exceptions 401 | return False 402 | ``` -------------------------------------------------------------------------------- /src/biomcp/openfda/device_events_helpers.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Helper functions for OpenFDA device events to reduce complexity. 3 | """ 4 | 5 | from collections import Counter 6 | from typing import Any 7 | 8 | from .utils import clean_text, truncate_text 9 | 10 | 11 | def analyze_device_problems( 12 | results: list[dict[str, Any]], 13 | ) -> tuple[list, list, list]: 14 | """Analyze problems, devices, and manufacturers from results.""" 15 | all_problems = [] 16 | all_device_names = [] 17 | all_manufacturers = [] 18 | 19 | for result in results: 20 | devices = result.get("device", []) 21 | for dev in devices: 22 | # Collect device names 23 | if "brand_name" in dev: 24 | all_device_names.append(dev["brand_name"]) 25 | elif "generic_name" in dev: 26 | all_device_names.append(dev["generic_name"]) 27 | 28 | # Collect manufacturers 29 | if "manufacturer_d_name" in dev: 30 | all_manufacturers.append(dev["manufacturer_d_name"]) 31 | 32 | # Collect problems 33 | if "device_problem_text" in dev: 34 | problems = dev["device_problem_text"] 35 | if isinstance(problems, str): 36 | all_problems.append(problems) 37 | elif isinstance(problems, list): 38 | all_problems.extend(problems) 39 | 40 | return all_problems, all_device_names, all_manufacturers 41 | 42 | 43 | def format_top_problems(all_problems: list, results: list) -> list[str]: 44 | """Format top reported device problems.""" 45 | output = [] 46 | 47 | if len(results) > 1 and all_problems: 48 | problem_counts = Counter(all_problems) 49 | top_problems = problem_counts.most_common(5) 50 | 51 | output.append("### Top Reported Problems:") 52 | for prob, count in top_problems: 53 | percentage = (count / len(results)) * 100 54 | output.append(f"- **{prob}**: {count} reports ({percentage:.1f}%)") 55 | output.append("") 56 | 57 | return output 58 | 59 | 60 | def format_device_distribution( 61 | all_device_names: list, results: list 62 | ) -> list[str]: 63 | """Format device distribution for problem searches.""" 64 | output = [] 65 | 66 | if len(results) > 1 and all_device_names: 67 | device_counts = Counter(all_device_names) 68 | top_devices = device_counts.most_common(5) 69 | 70 | output.append("### Devices with This Problem:") 71 | for dev_name, count in top_devices: 72 | output.append(f"- **{dev_name}**: {count} reports") 73 | output.append("") 74 | 75 | return output 76 | 77 | 78 | def format_device_report_summary( 79 | result: dict[str, Any], report_num: int 80 | ) -> list[str]: 81 | """Format a single device event report summary.""" 82 | output = [f"#### Report {report_num}"] 83 | 84 | # Event type 85 | event_type_map = { 86 | "D": "Death", 87 | "IN": "Injury", 88 | "IL": "Illness", 89 | "M": "Malfunction", 90 | "O": "Other", 91 | } 92 | event_type_code = result.get("event_type") or "Unknown" 93 | event_type = event_type_map.get(event_type_code, "Unknown") 94 | output.append(f"**Event Type**: {event_type}") 95 | 96 | # Date 97 | if date_received := result.get("date_received"): 98 | output.append(f"**Date Received**: {date_received}") 99 | 100 | # Device information 101 | devices = result.get("device", []) 102 | for j, dev in enumerate(devices, 1): 103 | output.extend(_format_device_info(dev, j, len(devices))) 104 | 105 | # Event description 106 | if event_desc := result.get("event_description"): 107 | output.append("\n**Event Description**:") 108 | cleaned_desc = clean_text(event_desc) 109 | output.append(truncate_text(cleaned_desc, 500)) 110 | 111 | # Patient impact 112 | output.extend(_format_patient_impact(result.get("patient", []))) 113 | 114 | # MDR report number 115 | if mdr_key := result.get("mdr_report_key"): 116 | output.append(f"\n*MDR Report #: {mdr_key}*") 117 | 118 | output.append("") 119 | return output 120 | 121 | 122 | def _format_device_info( 123 | dev: dict, device_num: int, total_devices: int 124 | ) -> list[str]: 125 | """Format individual device information.""" 126 | output = [] 127 | 128 | if total_devices > 1: 129 | output.append(f"\n**Device {device_num}:**") 130 | 131 | # Basic device info 132 | output.extend(_format_device_basic_info(dev)) 133 | 134 | # Problem 135 | if "device_problem_text" in dev: 136 | problems = dev["device_problem_text"] 137 | if isinstance(problems, str): 138 | problems = [problems] 139 | if problems: 140 | output.append(f"- **Problem**: {', '.join(problems[:3])}") 141 | 142 | # OpenFDA info 143 | output.extend(_format_device_class_info(dev.get("openfda", {}))) 144 | 145 | return output 146 | 147 | 148 | def _format_device_basic_info(dev: dict) -> list[str]: 149 | """Format basic device information.""" 150 | output = [] 151 | 152 | # Device name 153 | dev_name = dev.get("brand_name") or dev.get("generic_name") or "Unknown" 154 | output.append(f"- **Device**: {dev_name}") 155 | 156 | # Manufacturer 157 | if "manufacturer_d_name" in dev: 158 | output.append(f"- **Manufacturer**: {dev['manufacturer_d_name']}") 159 | 160 | # Model/Catalog 161 | if "model_number" in dev: 162 | output.append(f"- **Model**: {dev['model_number']}") 163 | if "catalog_number" in dev: 164 | output.append(f"- **Catalog #**: {dev['catalog_number']}") 165 | 166 | return output 167 | 168 | 169 | def _format_device_class_info(openfda: dict) -> list[str]: 170 | """Format device class and specialty information.""" 171 | output = [] 172 | 173 | if "device_class" in openfda: 174 | dev_class = openfda["device_class"] 175 | class_map = {"1": "Class I", "2": "Class II", "3": "Class III"} 176 | output.append( 177 | f"- **FDA Class**: {class_map.get(dev_class, dev_class)}" 178 | ) 179 | 180 | if "medical_specialty_description" in openfda: 181 | specialties = openfda["medical_specialty_description"] 182 | if specialties: 183 | output.append(f"- **Medical Specialty**: {specialties[0]}") 184 | 185 | return output 186 | 187 | 188 | def _format_patient_impact(patient_list: list) -> list[str]: 189 | """Format patient impact information.""" 190 | output = [] 191 | 192 | if patient_list: 193 | patient_info = patient_list[0] 194 | outcomes = [] 195 | 196 | if patient_info.get("date_of_death"): 197 | outcomes.append("Death") 198 | if patient_info.get("life_threatening") == "Y": 199 | outcomes.append("Life-threatening") 200 | if patient_info.get("disability") == "Y": 201 | outcomes.append("Disability") 202 | 203 | if outcomes: 204 | output.append(f"\n**Patient Impact**: {', '.join(outcomes)}") 205 | 206 | return output 207 | 208 | 209 | def format_device_detail_header( 210 | result: dict[str, Any], mdr_report_key: str 211 | ) -> list[str]: 212 | """Format device event detail header.""" 213 | output = [f"## Device Event Report: {mdr_report_key}\n"] 214 | output.append("### Event Overview") 215 | 216 | event_type_map = { 217 | "D": "Death", 218 | "IN": "Injury", 219 | "IL": "Illness", 220 | "M": "Malfunction", 221 | "O": "Other", 222 | } 223 | event_type_code = result.get("event_type") or "Unknown" 224 | event_type = event_type_map.get(event_type_code, "Unknown") 225 | output.append(f"**Event Type**: {event_type}") 226 | 227 | if date_received := result.get("date_received"): 228 | output.append(f"**Date Received**: {date_received}") 229 | 230 | if date_of_event := result.get("date_of_event"): 231 | output.append(f"**Date of Event**: {date_of_event}") 232 | 233 | # Report source 234 | source_map = { 235 | "P": "Physician", 236 | "O": "Other health professional", 237 | "U": "User facility", 238 | "C": "Distributor", 239 | "M": "Manufacturer", 240 | } 241 | source_type = result.get("source_type") 242 | if isinstance(source_type, list): 243 | # Handle case where source_type is a list 244 | sources: list[str] = [] 245 | for st in source_type: 246 | if st: 247 | mapped = source_map.get(st) 248 | sources.append(mapped if mapped else st) 249 | else: 250 | sources.append("Unknown") 251 | output.append(f"**Report Source**: {', '.join(sources)}") 252 | elif source_type: 253 | source = source_map.get(source_type, source_type) 254 | output.append(f"**Report Source**: {source}") 255 | else: 256 | output.append("**Report Source**: Unknown") 257 | 258 | output.append("") 259 | return output 260 | 261 | 262 | def format_detailed_device_info(devices: list[dict[str, Any]]) -> list[str]: 263 | """Format detailed device information.""" 264 | output = ["### Device Information"] 265 | 266 | for i, dev in enumerate(devices, 1): 267 | if len(devices) > 1: 268 | output.append(f"\n#### Device {i}") 269 | 270 | # Basic info 271 | dev_name = ( 272 | dev.get("brand_name") or dev.get("generic_name") or "Unknown" 273 | ) 274 | output.append(f"**Device Name**: {dev_name}") 275 | 276 | for field, label in [ 277 | ("manufacturer_d_name", "Manufacturer"), 278 | ("model_number", "Model Number"), 279 | ("catalog_number", "Catalog Number"), 280 | ("lot_number", "Lot Number"), 281 | ("date_received", "Device Received Date"), 282 | ("expiration_date_of_device", "Expiration Date"), 283 | ]: 284 | if value := dev.get(field): 285 | output.append(f"**{label}**: {value}") 286 | 287 | # Problems 288 | if "device_problem_text" in dev: 289 | problems = dev["device_problem_text"] 290 | if isinstance(problems, str): 291 | problems = [problems] 292 | output.append(f"**Device Problems**: {', '.join(problems)}") 293 | 294 | # OpenFDA data 295 | output.extend(_format_device_openfda(dev.get("openfda", {}))) 296 | 297 | # Evaluation 298 | if "device_evaluated_by_manufacturer" in dev: 299 | evaluated = ( 300 | "Yes" 301 | if dev["device_evaluated_by_manufacturer"] == "Y" 302 | else "No" 303 | ) 304 | output.append(f"**Evaluated by Manufacturer**: {evaluated}") 305 | 306 | output.append("") 307 | return output 308 | 309 | 310 | def _format_device_openfda(openfda: dict) -> list[str]: 311 | """Format OpenFDA device data.""" 312 | output = [] 313 | 314 | if "device_class" in openfda: 315 | dev_class = openfda["device_class"] 316 | class_map = {"1": "Class I", "2": "Class II", "3": "Class III"} 317 | output.append( 318 | f"**FDA Device Class**: {class_map.get(dev_class, dev_class)}" 319 | ) 320 | 321 | if specialties := openfda.get("medical_specialty_description"): 322 | if isinstance(specialties, list): 323 | output.append(f"**Medical Specialty**: {', '.join(specialties)}") 324 | else: 325 | output.append(f"**Medical Specialty**: {specialties}") 326 | 327 | if "product_code" in openfda: 328 | output.append(f"**Product Code**: {openfda['product_code']}") 329 | 330 | return output 331 | 332 | 333 | def format_patient_details(patient_list: list) -> list[str]: 334 | """Format detailed patient information.""" 335 | output: list[str] = [] 336 | 337 | if not patient_list: 338 | return output 339 | 340 | output.append("### Patient Information") 341 | patient_info = patient_list[0] 342 | 343 | # Demographics 344 | output.extend(_format_patient_demographics(patient_info)) 345 | 346 | # Outcomes 347 | outcomes = _collect_patient_outcomes(patient_info) 348 | if outcomes: 349 | output.append(f"**Outcomes**: {', '.join(outcomes)}") 350 | 351 | output.append("") 352 | return output 353 | 354 | 355 | def _format_patient_demographics(patient_info: dict) -> list[str]: 356 | """Format patient demographic information.""" 357 | output = [] 358 | 359 | if "patient_age" in patient_info: 360 | output.append(f"**Age**: {patient_info['patient_age']} years") 361 | 362 | if "patient_sex" in patient_info: 363 | sex_map = {"M": "Male", "F": "Female", "U": "Unknown"} 364 | sex = sex_map.get(patient_info["patient_sex"], "Unknown") 365 | output.append(f"**Sex**: {sex}") 366 | 367 | return output 368 | 369 | 370 | def _collect_patient_outcomes(patient_info: dict) -> list[str]: 371 | """Collect patient outcome information.""" 372 | outcomes = [] 373 | 374 | if date_of_death := patient_info.get("date_of_death"): 375 | outcomes.append(f"Death ({date_of_death})") 376 | if patient_info.get("life_threatening") == "Y": 377 | outcomes.append("Life-threatening") 378 | if patient_info.get("disability") == "Y": 379 | outcomes.append("Disability") 380 | if patient_info.get("hospitalization") == "Y": 381 | outcomes.append("Hospitalization") 382 | if patient_info.get("congenital_anomaly") == "Y": 383 | outcomes.append("Congenital anomaly") 384 | if patient_info.get("required_intervention") == "Y": 385 | outcomes.append("Required intervention") 386 | 387 | return outcomes 388 | ``` -------------------------------------------------------------------------------- /docs/backend-services-reference/07-alphagenome.md: -------------------------------------------------------------------------------- ```markdown 1 | # AlphaGenome API Reference 2 | 3 | Google DeepMind's AlphaGenome provides AI-powered predictions of variant effects on gene regulation, chromatin accessibility, and splicing. 4 | 5 | ## Usage Guide 6 | 7 | For a step-by-step tutorial on using AlphaGenome for variant effect prediction, see [How to Predict Variant Effects with AlphaGenome](../how-to-guides/04-predict-variant-effects-with-alphagenome.md). 8 | 9 | ## Overview 10 | 11 | AlphaGenome predicts regulatory effects of genetic variants by analyzing: 12 | 13 | - Gene expression changes in nearby genes 14 | - Chromatin accessibility alterations 15 | - Splicing pattern modifications 16 | - Enhancer and promoter activity 17 | - Transcription factor binding 18 | - 3D chromatin interactions 19 | 20 | **Note:** AlphaGenome is an optional integration requiring separate installation and API key. 21 | 22 | ## Authentication 23 | 24 | ### Obtaining an API Key 25 | 26 | 1. Visit [https://deepmind.google.com/science/alphagenome](https://deepmind.google.com/science/alphagenome) 27 | 2. Register for non-commercial research use 28 | 3. Accept terms of service 29 | 4. Receive API key via email 30 | 31 | ### API Key Usage 32 | 33 | **Environment Variable:** 34 | 35 | ```bash 36 | export ALPHAGENOME_API_KEY="your-key-here" 37 | ``` 38 | 39 | **Per-Request:** 40 | 41 | ```python 42 | result = alphagenome_predictor( 43 | chromosome="chr7", 44 | position=140753336, 45 | reference="A", 46 | alternate="T", 47 | api_key="your-key-here" # Overrides environment 48 | ) 49 | ``` 50 | 51 | ## Installation 52 | 53 | AlphaGenome requires separate installation: 54 | 55 | ```bash 56 | # Clone and install 57 | git clone https://github.com/google-deepmind/alphagenome.git 58 | cd alphagenome 59 | pip install . 60 | 61 | # Verify installation 62 | python -c "import alphagenome; print('AlphaGenome installed')" 63 | ``` 64 | 65 | ## API Interface 66 | 67 | ### Prediction Endpoint 68 | 69 | The AlphaGenome API is accessed through the BioMCP `alphagenome_predictor` tool. 70 | 71 | #### Parameters 72 | 73 | | Parameter | Type | Required | Description | 74 | | ------------------------ | --------- | -------- | --------------------------------- | 75 | | `chromosome` | str | Yes | Chromosome (e.g., "chr7") | 76 | | `position` | int | Yes | 1-based genomic position | 77 | | `reference` | str | Yes | Reference allele | 78 | | `alternate` | str | Yes | Alternate allele | 79 | | `interval_size` | int | No | Analysis window (default: 131072) | 80 | | `tissue_types` | list[str] | No | UBERON tissue codes | 81 | | `significance_threshold` | float | No | Log2FC threshold (default: 0.5) | 82 | | `api_key` | str | No | AlphaGenome API key | 83 | 84 | #### Interval Sizes 85 | 86 | | Size | Use Case | Description | 87 | | --------- | ---------- | ------------------------------ | 88 | | 2,048 | Promoter | TSS and promoter variants | 89 | | 16,384 | Local | Proximal regulatory elements | 90 | | 131,072 | Standard | Enhancer-promoter interactions | 91 | | 524,288 | Long-range | Distal regulatory elements | 92 | | 1,048,576 | TAD-level | Topological domain effects | 93 | 94 | ## Tissue Codes 95 | 96 | AlphaGenome supports tissue-specific predictions using UBERON ontology: 97 | 98 | | Tissue | UBERON Code | Description | 99 | | -------- | -------------- | -------------------- | 100 | | Breast | UBERON:0000310 | Mammary gland tissue | 101 | | Liver | UBERON:0002107 | Hepatic tissue | 102 | | Prostate | UBERON:0002367 | Prostate gland | 103 | | Brain | UBERON:0000955 | Neural tissue | 104 | | Lung | UBERON:0002048 | Pulmonary tissue | 105 | | Colon | UBERON:0001155 | Colonic mucosa | 106 | 107 | ## Response Format 108 | 109 | ### Gene Expression Predictions 110 | 111 | ```json 112 | { 113 | "gene_expression": [ 114 | { 115 | "gene_name": "BRAF", 116 | "gene_id": "ENSG00000157764", 117 | "distance_to_tss": 1234, 118 | "log2_fold_change": 1.25, 119 | "confidence": 0.89, 120 | "tissue": "UBERON:0000310" 121 | } 122 | ] 123 | } 124 | ``` 125 | 126 | **Interpretation:** 127 | 128 | - `log2_fold_change > 1.0`: Strong increase (2x+) 129 | - `log2_fold_change > 0.5`: Moderate increase 130 | - `log2_fold_change < -1.0`: Strong decrease (0.5x) 131 | - `log2_fold_change < -0.5`: Moderate decrease 132 | 133 | ### Chromatin Accessibility 134 | 135 | ```json 136 | { 137 | "chromatin_accessibility": [ 138 | { 139 | "region_type": "enhancer", 140 | "coordinates": "chr7:140450000-140451000", 141 | "accessibility_change": 0.75, 142 | "peak_height_change": 1.2, 143 | "tissue": "UBERON:0000310" 144 | } 145 | ] 146 | } 147 | ``` 148 | 149 | **Interpretation:** 150 | 151 | - Positive values: Increased accessibility (open chromatin) 152 | - Negative values: Decreased accessibility (closed chromatin) 153 | 154 | ### Splicing Predictions 155 | 156 | ```json 157 | { 158 | "splicing": [ 159 | { 160 | "event_type": "exon_skipping", 161 | "affected_exon": "ENST00000288602.6:exon14", 162 | "delta_psi": -0.35, 163 | "splice_site_strength_change": -2.1 164 | } 165 | ] 166 | } 167 | ``` 168 | 169 | **PSI (Percent Spliced In):** 170 | 171 | - `delta_psi > 0`: Increased exon inclusion 172 | - `delta_psi < 0`: Increased exon skipping 173 | - `|delta_psi| > 0.1`: Biologically significant 174 | 175 | ## Usage Examples 176 | 177 | ### Basic Prediction 178 | 179 | ```python 180 | # Predict BRAF V600E effects 181 | result = await alphagenome_predictor( 182 | chromosome="chr7", 183 | position=140753336, 184 | reference="A", 185 | alternate="T" 186 | ) 187 | 188 | # Process results 189 | for gene in result.gene_expression: 190 | if abs(gene.log2_fold_change) > 1.0: 191 | print(f"{gene.gene_name}: {gene.log2_fold_change:.2f} log2FC") 192 | ``` 193 | 194 | ### Tissue-Specific Analysis 195 | 196 | ```python 197 | # Compare effects across tissues 198 | tissues = { 199 | "breast": "UBERON:0000310", 200 | "lung": "UBERON:0002048", 201 | "brain": "UBERON:0000955" 202 | } 203 | 204 | results = {} 205 | for tissue_name, tissue_code in tissues.items(): 206 | results[tissue_name] = await alphagenome_predictor( 207 | chromosome="chr17", 208 | position=7577120, 209 | reference="G", 210 | alternate="A", 211 | tissue_types=[tissue_code] 212 | ) 213 | ``` 214 | 215 | ### Promoter Variant Analysis 216 | 217 | ```python 218 | # Use small window for promoter variants 219 | result = await alphagenome_predictor( 220 | chromosome="chr7", 221 | position=5569100, # Near ACTB promoter 222 | reference="C", 223 | alternate="T", 224 | interval_size=2048 # 2kb window 225 | ) 226 | 227 | # Check for promoter effects 228 | promoter_effects = [ 229 | g for g in result.gene_expression 230 | if abs(g.distance_to_tss) < 1000 231 | ] 232 | ``` 233 | 234 | ### Enhancer Variant Analysis 235 | 236 | ```python 237 | # Use larger window for enhancer variants 238 | result = await alphagenome_predictor( 239 | chromosome="chr8", 240 | position=128748315, # MYC enhancer region 241 | reference="G", 242 | alternate="A", 243 | interval_size=524288 # 512kb window 244 | ) 245 | 246 | # Analyze chromatin changes 247 | enhancer_changes = [ 248 | c for c in result.chromatin_accessibility 249 | if c.region_type == "enhancer" and abs(c.accessibility_change) > 0.5 250 | ] 251 | ``` 252 | 253 | ## Best Practices 254 | 255 | ### 1. Choose Appropriate Interval Size 256 | 257 | ```python 258 | def select_interval_size(variant_type): 259 | """Select interval based on variant type""" 260 | intervals = { 261 | "promoter": 2048, 262 | "splice_site": 16384, 263 | "enhancer": 131072, 264 | "intergenic": 524288, 265 | "structural": 1048576 266 | } 267 | return intervals.get(variant_type, 131072) 268 | ``` 269 | 270 | ### 2. Handle Missing Predictions 271 | 272 | ```python 273 | # Not all variants affect gene expression 274 | if not result.gene_expression: 275 | print("No gene expression changes predicted") 276 | # Check chromatin or splicing effects instead 277 | ``` 278 | 279 | ### 3. Filter by Significance 280 | 281 | ```python 282 | # Focus on significant changes 283 | significant_genes = [ 284 | g for g in result.gene_expression 285 | if abs(g.log2_fold_change) > significance_threshold 286 | and g.confidence > 0.8 287 | ] 288 | ``` 289 | 290 | ### 4. Validate Input 291 | 292 | ```python 293 | def validate_variant(chr, pos, ref, alt): 294 | """Validate variant format""" 295 | # Check chromosome format 296 | if not chr.startswith("chr"): 297 | raise ValueError("Chromosome must start with 'chr'") 298 | 299 | # Check alleles 300 | valid_bases = set("ACGT") 301 | if ref not in valid_bases or alt not in valid_bases: 302 | raise ValueError("Invalid nucleotide") 303 | 304 | # Check position 305 | if pos < 1: 306 | raise ValueError("Position must be 1-based") 307 | ``` 308 | 309 | ## Integration Patterns 310 | 311 | ### VUS Classification Pipeline 312 | 313 | ```python 314 | async def classify_vus(variant): 315 | """Classify variant of unknown significance""" 316 | 317 | # 1. Predict regulatory effects 318 | predictions = await alphagenome_predictor( 319 | chromosome=variant.chr, 320 | position=variant.pos, 321 | reference=variant.ref, 322 | alternate=variant.alt 323 | ) 324 | 325 | # 2. Score impact 326 | max_expression = max( 327 | abs(g.log2_fold_change) for g in predictions.gene_expression 328 | ) if predictions.gene_expression else 0 329 | 330 | max_chromatin = max( 331 | abs(c.accessibility_change) for c in predictions.chromatin_accessibility 332 | ) if predictions.chromatin_accessibility else 0 333 | 334 | # 3. Classify 335 | if max_expression > 2.0 or max_chromatin > 1.5: 336 | return "High regulatory impact" 337 | elif max_expression > 1.0 or max_chromatin > 0.75: 338 | return "Moderate regulatory impact" 339 | else: 340 | return "Low regulatory impact" 341 | ``` 342 | 343 | ### Multi-Variant Analysis 344 | 345 | ```python 346 | async def analyze_variant_set(variants, target_gene): 347 | """Analyze multiple variants affecting a gene""" 348 | 349 | results = [] 350 | for variant in variants: 351 | prediction = await alphagenome_predictor( 352 | chromosome=variant["chr"], 353 | position=variant["pos"], 354 | reference=variant["ref"], 355 | alternate=variant["alt"] 356 | ) 357 | 358 | # Find target gene effect 359 | for gene in prediction.gene_expression: 360 | if gene.gene_name == target_gene: 361 | results.append({ 362 | "variant": f"{variant['chr']}:{variant['pos']}", 363 | "effect": gene.log2_fold_change, 364 | "confidence": gene.confidence 365 | }) 366 | break 367 | 368 | # Sort by effect size 369 | return sorted(results, key=lambda x: abs(x["effect"]), reverse=True) 370 | ``` 371 | 372 | ## Limitations 373 | 374 | ### Technical Limitations 375 | 376 | - **Species**: Human only (GRCh38) 377 | - **Variant Types**: SNVs only (no indels/SVs) 378 | - **Sequence Context**: Requires reference match 379 | - **Computation Time**: 1-3 seconds per variant 380 | 381 | ### Biological Limitations 382 | 383 | - **Cell Type**: Predictions are tissue-specific approximations 384 | - **Environmental Factors**: Does not account for conditions 385 | - **Epistasis**: Single variant effects only 386 | - **Temporal**: No developmental stage consideration 387 | 388 | ## Error Handling 389 | 390 | ### Common Errors 391 | 392 | ```python 393 | try: 394 | result = await alphagenome_predictor(...) 395 | except AlphaGenomeError as e: 396 | if "API key" in str(e): 397 | # Handle missing/invalid key 398 | pass 399 | elif "Invalid sequence" in str(e): 400 | # Handle sequence errors 401 | pass 402 | elif "Rate limit" in str(e): 403 | # Handle rate limiting 404 | pass 405 | ``` 406 | 407 | ### Retry Logic 408 | 409 | ```python 410 | async def predict_with_retry(params, max_retries=3): 411 | """Retry on transient failures""" 412 | for attempt in range(max_retries): 413 | try: 414 | return await alphagenome_predictor(**params) 415 | except Exception as e: 416 | if attempt == max_retries - 1: 417 | raise 418 | await asyncio.sleep(2 ** attempt) # Exponential backoff 419 | ``` 420 | 421 | ## Performance Optimization 422 | 423 | ### Batch Processing 424 | 425 | ```python 426 | async def batch_predict(variants, batch_size=10): 427 | """Process variants in batches""" 428 | results = [] 429 | 430 | for i in range(0, len(variants), batch_size): 431 | batch = variants[i:i + batch_size] 432 | batch_results = await asyncio.gather(*[ 433 | alphagenome_predictor(**v) for v in batch 434 | ]) 435 | results.extend(batch_results) 436 | 437 | # Rate limiting 438 | if i + batch_size < len(variants): 439 | await asyncio.sleep(1) 440 | 441 | return results 442 | ``` 443 | 444 | ### Caching Strategy 445 | 446 | ```python 447 | from functools import lru_cache 448 | 449 | @lru_cache(maxsize=1000) 450 | def get_cached_prediction(chr, pos, ref, alt, interval): 451 | """Cache predictions for repeated queries""" 452 | return alphagenome_predictor( 453 | chromosome=chr, 454 | position=pos, 455 | reference=ref, 456 | alternate=alt, 457 | interval_size=interval 458 | ) 459 | ``` 460 | 461 | ## Support Resources 462 | 463 | - **Documentation**: [AlphaGenome GitHub](https://github.com/google-deepmind/alphagenome) 464 | - **Paper**: [Nature Publication](https://www.nature.com/alphagenome) 465 | - **Support**: Via GitHub issues 466 | - **Terms**: Non-commercial research use only 467 | ``` -------------------------------------------------------------------------------- /docs/how-to-guides/03-get-comprehensive-variant-annotations.md: -------------------------------------------------------------------------------- ```markdown 1 | # How to Get Comprehensive Variant Annotations 2 | 3 | This guide demonstrates how to retrieve and interpret genetic variant information using BioMCP's integrated databases. 4 | 5 | ## Overview 6 | 7 | BioMCP provides variant annotations from multiple sources: 8 | 9 | - **MyVariant.info**: Core variant database with clinical significance ([BioThings Reference](../backend-services-reference/02-biothings-suite.md)) 10 | - **External Annotations**: TCGA cancer data, 1000 Genomes population frequencies 11 | - **cBioPortal Integration**: Cancer-specific mutation context ([API Reference](../backend-services-reference/03-cbioportal.md)) 12 | - **BioThings Links**: Connected gene, disease, and drug information ([BioThings Suite](../backend-services-reference/02-biothings-suite.md)) 13 | 14 | ## Basic Variant Lookup 15 | 16 | ### Search by rsID 17 | 18 | Find variant information using dbSNP identifiers: 19 | 20 | ```bash 21 | # CLI 22 | biomcp variant get rs121913529 23 | 24 | # Python 25 | variant = await client.variants.get("rs121913529") 26 | 27 | # MCP Tool 28 | variant_getter(variant_id="rs121913529") 29 | ``` 30 | 31 | ### Search by HGVS Notation 32 | 33 | Use standard HGVS notation: 34 | 35 | ```python 36 | # Protein change 37 | variant = await variant_getter("NP_004324.2:p.Val600Glu") 38 | 39 | # Coding DNA change 40 | variant = await variant_getter("NM_004333.4:c.1799T>A") 41 | 42 | # Genomic coordinates 43 | variant = await variant_getter("NC_000007.13:g.140453136A>T") 44 | ``` 45 | 46 | ### Search by Genomic Position 47 | 48 | ```python 49 | # Search by coordinates 50 | variants = await variant_searcher( 51 | chromosome="7", 52 | start=140453136, 53 | end=140453136, 54 | assembly="hg38" # or hg19 55 | ) 56 | ``` 57 | 58 | ## Understanding Variant Annotations 59 | 60 | ### Clinical Significance 61 | 62 | ```python 63 | # Get variant details 64 | variant = await variant_getter("rs121913529") 65 | 66 | # Check clinical significance 67 | print(f"Clinical Significance: {variant.clinical_significance}") 68 | # Output: "Pathogenic" 69 | 70 | print(f"ClinVar Review Status: {variant.review_status}") 71 | # Output: "reviewed by expert panel" 72 | ``` 73 | 74 | ### Population Frequencies 75 | 76 | ```python 77 | # Access frequency data 78 | if variant.frequencies: 79 | print("Population Frequencies:") 80 | print(f" gnomAD: {variant.frequencies.gnomad}") 81 | print(f" 1000 Genomes: {variant.frequencies.thousand_genomes}") 82 | print(f" ExAC: {variant.frequencies.exac}") 83 | ``` 84 | 85 | ### Functional Predictions 86 | 87 | ```python 88 | # In silico predictions 89 | if variant.predictions: 90 | print(f"CADD Score: {variant.predictions.cadd}") 91 | print(f"PolyPhen: {variant.predictions.polyphen}") 92 | print(f"SIFT: {variant.predictions.sift}") 93 | ``` 94 | 95 | ## Advanced Variant Searches 96 | 97 | ### Filter by Clinical Significance 98 | 99 | ```python 100 | # Find pathogenic BRCA1 variants 101 | pathogenic_variants = await variant_searcher( 102 | gene="BRCA1", 103 | significance="pathogenic", 104 | limit=20 105 | ) 106 | 107 | # Multiple significance levels 108 | variants = await variant_searcher( 109 | gene="TP53", 110 | significance=["pathogenic", "likely_pathogenic"] 111 | ) 112 | ``` 113 | 114 | ### Filter by Frequency 115 | 116 | Find rare variants: 117 | 118 | ```python 119 | # Rare variants (MAF < 1%) 120 | rare_variants = await variant_searcher( 121 | gene="CFTR", 122 | frequency_max=0.01, 123 | significance="pathogenic" 124 | ) 125 | 126 | # Ultra-rare variants 127 | ultra_rare = await variant_searcher( 128 | gene="SCN1A", 129 | frequency_max=0.0001 130 | ) 131 | ``` 132 | 133 | ### Filter by Prediction Scores 134 | 135 | ```python 136 | # High-impact variants 137 | high_impact = await variant_searcher( 138 | gene="MLH1", 139 | cadd_score_min=20, # CADD > 20 suggests deleteriousness 140 | polyphen_prediction="probably_damaging" 141 | ) 142 | ``` 143 | 144 | ## External Database Integration 145 | 146 | For technical details on external data sources, see the [BioThings Suite Reference](../backend-services-reference/02-biothings-suite.md). 147 | 148 | ### TCGA Cancer Data 149 | 150 | Variants automatically include TCGA annotations when available: 151 | 152 | ```python 153 | variant = await variant_getter("rs121913529", include_external=True) 154 | 155 | # Check TCGA data 156 | if variant.external_data.get("tcga"): 157 | tcga = variant.external_data["tcga"] 158 | print(f"TCGA Studies: {tcga['study_count']}") 159 | print(f"Cancer Types: {', '.join(tcga['cancer_types'])}") 160 | print(f"Sample Count: {tcga['sample_count']}") 161 | ``` 162 | 163 | ### 1000 Genomes Project 164 | 165 | Population-specific frequencies: 166 | 167 | ```python 168 | # Access 1000 Genomes data 169 | if variant.external_data.get("thousand_genomes"): 170 | tg_data = variant.external_data["thousand_genomes"] 171 | print("Population Frequencies:") 172 | for pop, freq in tg_data["populations"].items(): 173 | print(f" {pop}: {freq}") 174 | ``` 175 | 176 | ### Ensembl VEP Annotations 177 | 178 | ```python 179 | # Consequence predictions 180 | if variant.consequences: 181 | for consequence in variant.consequences: 182 | print(f"Gene: {consequence.gene}") 183 | print(f"Impact: {consequence.impact}") 184 | print(f"Consequence: {consequence.consequence_terms}") 185 | ``` 186 | 187 | ## Integration with Other BioMCP Tools 188 | 189 | BioMCP's unified architecture allows seamless integration between variant data and other biomedical information. For implementation details, see the [Transport Protocol Guide](../developer-guides/04-transport-protocol.md). 190 | 191 | ### Variant to Gene Information 192 | 193 | ```python 194 | # Get variant 195 | variant = await variant_getter("rs121913529") 196 | 197 | # Get associated gene details 198 | gene_symbol = variant.gene.symbol # "BRAF" 199 | gene_info = await gene_getter(gene_symbol) 200 | 201 | print(f"Gene: {gene_info.name}") 202 | print(f"Function: {gene_info.summary}") 203 | ``` 204 | 205 | ### Variant to Disease Context 206 | 207 | ```python 208 | # Find disease associations 209 | diseases = variant.disease_associations 210 | 211 | for disease in diseases: 212 | # Get detailed disease info 213 | disease_info = await disease_getter(disease.name) 214 | print(f"Disease: {disease_info.name}") 215 | print(f"Definition: {disease_info.definition}") 216 | print(f"Synonyms: {', '.join(disease_info.synonyms)}") 217 | ``` 218 | 219 | ### Variant to Clinical Trials 220 | 221 | ```python 222 | # Search trials for specific variant 223 | gene = variant.gene.symbol 224 | mutation = variant.protein_change # e.g., "V600E" 225 | 226 | trials = await trial_searcher( 227 | other_terms=[f"{gene} {mutation}", f"{gene} mutation"], 228 | recruiting_status="OPEN" 229 | ) 230 | ``` 231 | 232 | ## Practical Workflows 233 | 234 | ### Workflow 1: Cancer Variant Analysis 235 | 236 | ```python 237 | async def analyze_cancer_variant(hgvs: str): 238 | # Think about the analysis 239 | await think( 240 | thought=f"Analyzing cancer variant {hgvs}", 241 | thoughtNumber=1 242 | ) 243 | 244 | # Get variant details 245 | variant = await variant_getter(hgvs, include_external=True) 246 | 247 | # Get gene context 248 | gene = await gene_getter(variant.gene.symbol) 249 | 250 | # Search for targeted therapies 251 | drugs = await search( 252 | query=f"drugs.targets:{variant.gene.symbol}", 253 | domain="drug" 254 | ) 255 | 256 | # Find relevant trials 257 | trials = await trial_searcher( 258 | other_terms=[ 259 | variant.gene.symbol, 260 | variant.protein_change, 261 | "targeted therapy" 262 | ], 263 | recruiting_status="OPEN" 264 | ) 265 | 266 | # Search literature 267 | articles = await article_searcher( 268 | genes=[variant.gene.symbol], 269 | variants=[hgvs], 270 | keywords=["therapy", "treatment", "resistance"] 271 | ) 272 | 273 | return { 274 | "variant": variant, 275 | "gene": gene, 276 | "potential_drugs": drugs, 277 | "clinical_trials": trials, 278 | "literature": articles 279 | } 280 | ``` 281 | 282 | ### Workflow 2: Rare Disease Variant 283 | 284 | ```python 285 | async def rare_disease_variant_analysis(gene: str, phenotype: str): 286 | # Find all pathogenic variants 287 | variants = await variant_searcher( 288 | gene=gene, 289 | significance=["pathogenic", "likely_pathogenic"], 290 | frequency_max=0.001 # Rare 291 | ) 292 | 293 | # Analyze each variant 294 | results = [] 295 | for v in variants[:10]: # Top 10 296 | # Get full annotations 297 | full_variant = await variant_getter(v.id) 298 | 299 | # Check phenotype associations 300 | if phenotype.lower() in str(full_variant.phenotypes).lower(): 301 | results.append({ 302 | "variant": full_variant, 303 | "phenotype_match": True, 304 | "frequency": full_variant.frequencies.gnomad or 0 305 | }) 306 | 307 | # Sort by relevance 308 | results.sort(key=lambda x: x["frequency"]) 309 | return results 310 | ``` 311 | 312 | ### Workflow 3: Pharmacogenomics 313 | 314 | ```python 315 | async def pharmacogenomic_analysis(drug_name: str): 316 | # Get drug information 317 | drug = await drug_getter(drug_name) 318 | 319 | # Find pharmGKB annotations 320 | pgx_variants = [] 321 | 322 | # Search for drug-related variants 323 | if drug.targets: 324 | for target in drug.targets: 325 | variants = await variant_searcher( 326 | gene=target, 327 | keywords=[drug_name, "pharmacogenomics", "drug response"] 328 | ) 329 | pgx_variants.extend(variants) 330 | 331 | # Get detailed annotations 332 | annotated = [] 333 | for v in pgx_variants: 334 | full = await variant_getter(v.id) 335 | if full.pharmacogenomics: 336 | annotated.append(full) 337 | 338 | return { 339 | "drug": drug, 340 | "pgx_variants": annotated, 341 | "affected_genes": list(set(v.gene.symbol for v in annotated)) 342 | } 343 | ``` 344 | 345 | ## Interpreting Results 346 | 347 | ### Clinical Actionability 348 | 349 | ```python 350 | def assess_actionability(variant): 351 | """Determine if variant is clinically actionable""" 352 | 353 | actionable = False 354 | reasons = [] 355 | 356 | # Check pathogenicity 357 | if variant.clinical_significance in ["pathogenic", "likely_pathogenic"]: 358 | actionable = True 359 | reasons.append("Pathogenic variant") 360 | 361 | # Check for drug associations 362 | if variant.drug_associations: 363 | actionable = True 364 | reasons.append(f"Associated with {len(variant.drug_associations)} drugs") 365 | 366 | # Check guidelines 367 | if variant.clinical_guidelines: 368 | actionable = True 369 | reasons.append("Clinical guidelines available") 370 | 371 | return { 372 | "actionable": actionable, 373 | "reasons": reasons, 374 | "recommendations": variant.clinical_guidelines 375 | } 376 | ``` 377 | 378 | ### Report Generation 379 | 380 | ```python 381 | def generate_variant_report(variant): 382 | """Create a clinical variant report""" 383 | 384 | report = f""" 385 | ## Variant Report: {variant.id} 386 | 387 | ### Basic Information 388 | - **Gene**: {variant.gene.symbol} 389 | - **Protein Change**: {variant.protein_change or "N/A"} 390 | - **Genomic Location**: chr{variant.chr}:{variant.pos} 391 | - **Reference**: {variant.ref} → **Alternate**: {variant.alt} 392 | 393 | ### Clinical Significance 394 | - **Status**: {variant.clinical_significance} 395 | - **Review**: {variant.review_status} 396 | - **Last Updated**: {variant.last_updated} 397 | 398 | ### Population Frequency 399 | - **gnomAD**: {variant.frequencies.gnomad or "Not found"} 400 | - **1000 Genomes**: {variant.frequencies.thousand_genomes or "Not found"} 401 | 402 | ### Predictions 403 | - **CADD Score**: {variant.predictions.cadd or "N/A"} 404 | - **PolyPhen**: {variant.predictions.polyphen or "N/A"} 405 | - **SIFT**: {variant.predictions.sift or "N/A"} 406 | 407 | ### Associated Conditions 408 | {format_conditions(variant.conditions)} 409 | 410 | ### Clinical Resources 411 | - **ClinVar**: {variant.clinvar_url} 412 | - **dbSNP**: {variant.dbsnp_url} 413 | """ 414 | return report 415 | ``` 416 | 417 | ## Best Practices 418 | 419 | ### 1. Use Multiple Identifiers 420 | 421 | ```python 422 | # Try multiple formats if one fails 423 | identifiers = [ 424 | "rs121913529", 425 | "NM_004333.4:c.1799T>A", 426 | "7:140453136:A:T" 427 | ] 428 | 429 | for id in identifiers: 430 | try: 431 | variant = await variant_getter(id) 432 | break 433 | except: 434 | continue 435 | ``` 436 | 437 | ### 2. Check Data Completeness 438 | 439 | ```python 440 | # Not all variants have all annotations 441 | if variant.frequencies: 442 | # Use frequency data 443 | pass 444 | else: 445 | # Note that frequency unavailable 446 | pass 447 | ``` 448 | 449 | ### 3. Consider Assembly Versions 450 | 451 | ```python 452 | # Specify genome assembly 453 | variants_hg38 = await variant_searcher( 454 | chromosome="7", 455 | start=140453136, 456 | assembly="hg38" 457 | ) 458 | 459 | variants_hg19 = await variant_searcher( 460 | chromosome="7", 461 | start=140153336, # Different coordinate! 462 | assembly="hg19" 463 | ) 464 | ``` 465 | 466 | ## Troubleshooting 467 | 468 | ### Variant Not Found 469 | 470 | 1. **Check notation**: Ensure proper HGVS format 471 | 2. **Try alternatives**: rsID, genomic coordinates, protein change 472 | 3. **Verify gene symbol**: Use official HGNC symbols 473 | 474 | ### Missing Annotations 475 | 476 | - Not all variants have all data types 477 | - Rare variants may lack population frequencies 478 | - Novel variants won't have ClinVar data 479 | 480 | ### Performance Issues 481 | 482 | - Use pagination for large searches 483 | - Limit external data requests when not needed 484 | - Cache frequently accessed variants 485 | 486 | ## Next Steps 487 | 488 | - Learn to [predict variant effects](04-predict-variant-effects-with-alphagenome.md) 489 | - Explore [article searches](01-find-articles-and-cbioportal-data.md) for variant literature 490 | - Set up [logging and monitoring](05-logging-and-monitoring-with-bigquery.md) 491 | ``` -------------------------------------------------------------------------------- /tests/test_pydantic_ai_integration.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | Tests for Pydantic AI integration with BioMCP. 3 | 4 | These tests verify the examples provided in the documentation work correctly. 5 | """ 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | 11 | import httpx 12 | import pytest 13 | from pydantic_ai import Agent 14 | from pydantic_ai.mcp import MCPServerStdio 15 | 16 | try: 17 | from pydantic_ai.mcp import MCPServerStreamableHTTP # noqa: F401 18 | 19 | HAS_STREAMABLE_HTTP = True 20 | except ImportError: 21 | HAS_STREAMABLE_HTTP = False 22 | from pydantic_ai.models.test import TestModel 23 | 24 | 25 | def worker_dependencies_available(): 26 | """Check if worker dependencies (FastAPI, Starlette) are available.""" 27 | try: 28 | import fastapi # noqa: F401 29 | import starlette # noqa: F401 30 | 31 | return True 32 | except ImportError: 33 | return False 34 | 35 | 36 | # Skip marker for tests requiring worker dependencies 37 | requires_worker = pytest.mark.skipif( 38 | not worker_dependencies_available(), 39 | reason="Worker dependencies (FastAPI/Starlette) not installed. Install with: pip install biomcp-python[worker]", 40 | ) 41 | 42 | # Skip marker for tests requiring MCPServerStreamableHTTP 43 | requires_streamable_http = pytest.mark.skipif( 44 | not HAS_STREAMABLE_HTTP, 45 | reason="MCPServerStreamableHTTP not available. Requires pydantic-ai>=0.6.9", 46 | ) 47 | 48 | 49 | def get_free_port(): 50 | """Get a free port for testing.""" 51 | import socket 52 | 53 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 54 | s.bind(("", 0)) 55 | s.listen(1) 56 | port = s.getsockname()[1] 57 | return port 58 | 59 | 60 | async def wait_for_server( 61 | url: str, max_retries: int = 60, process=None 62 | ) -> None: 63 | """Wait for server to be ready with retries.""" 64 | import sys 65 | 66 | for i in range(max_retries): 67 | # Check if process has exited with error 68 | if process and process.poll() is not None: 69 | stdout, stderr = process.communicate() 70 | pytest.fail( 71 | f"Server process exited with code {process.returncode}. Stderr: {stderr.decode() if stderr else 'None'}" 72 | ) 73 | 74 | try: 75 | async with httpx.AsyncClient() as client: 76 | response = await client.get(url, timeout=2) 77 | if response.status_code == 200: 78 | print( 79 | f"\nServer ready after {i + 1} seconds", 80 | file=sys.stderr, 81 | ) 82 | return 83 | except (httpx.ConnectError, httpx.ReadTimeout): 84 | if i % 10 == 0: 85 | print( 86 | f"\nWaiting for server... ({i} seconds elapsed)", 87 | file=sys.stderr, 88 | ) 89 | await asyncio.sleep(1) 90 | pytest.fail(f"Server at {url} did not start within {max_retries} seconds") 91 | 92 | 93 | @pytest.mark.asyncio 94 | async def test_stdio_mode_connection(): 95 | """Test STDIO mode connection and tool listing.""" 96 | server = MCPServerStdio( 97 | "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20 98 | ) 99 | 100 | # Use TestModel to avoid needing API keys 101 | model = TestModel(call_tools=["search"]) 102 | agent = Agent(model=model, toolsets=[server]) 103 | 104 | async with agent: 105 | # Test a simple query to verify connection works 106 | result = await agent.run("List available tools") 107 | 108 | # Should get a response without errors 109 | assert result is not None 110 | assert result.output is not None 111 | 112 | 113 | @pytest.mark.asyncio 114 | async def test_stdio_mode_simple_query(): 115 | """Test STDIO mode with a simple search query.""" 116 | server = MCPServerStdio( 117 | "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20 118 | ) 119 | 120 | # Use TestModel configured to call search 121 | model = TestModel(call_tools=["search"]) 122 | agent = Agent(model=model, toolsets=[server]) 123 | 124 | async with agent: 125 | result = await agent.run("Find 1 melanoma clinical trial") 126 | 127 | # TestModel will have called the search tool 128 | assert result.output is not None 129 | # The TestModel returns mock data, but we're testing the connection works 130 | assert result.output != "" 131 | 132 | 133 | @pytest.mark.asyncio 134 | async def test_stdio_mode_with_openai(): 135 | """Test STDIO mode with OpenAI (requires OPENAI_API_KEY).""" 136 | # Skip if no API key 137 | if not os.getenv("OPENAI_API_KEY"): 138 | pytest.skip("OPENAI_API_KEY not set") 139 | 140 | server = MCPServerStdio( 141 | "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=30 142 | ) 143 | 144 | agent = Agent("openai:gpt-4o-mini", toolsets=[server]) 145 | 146 | async with agent: 147 | result = await agent.run( 148 | "Find 1 article about BRAF V600E mutations. Return just the title." 149 | ) 150 | 151 | # Should get a real result 152 | assert result.output is not None 153 | assert len(result.output) > 0 154 | 155 | 156 | @requires_worker 157 | @requires_streamable_http 158 | @pytest.mark.asyncio 159 | async def test_streamable_http_mode_connection(): 160 | """Test Streamable HTTP mode connection for Pydantic AI.""" 161 | import subprocess 162 | 163 | from pydantic_ai.mcp import MCPServerStreamableHTTP 164 | 165 | port = get_free_port() 166 | 167 | # Start server in streamable_http mode 168 | server_process = subprocess.Popen( # noqa: S603 169 | [ 170 | sys.executable, 171 | "-m", 172 | "biomcp", 173 | "run", 174 | "--mode", 175 | "streamable_http", 176 | "--port", 177 | str(port), 178 | ], 179 | stdout=subprocess.PIPE, 180 | stderr=subprocess.PIPE, 181 | ) 182 | 183 | try: 184 | # Wait for server to be ready 185 | await wait_for_server( 186 | f"http://localhost:{port}/health", process=server_process 187 | ) 188 | 189 | # Connect to the /mcp endpoint 190 | server = MCPServerStreamableHTTP(f"http://localhost:{port}/mcp") 191 | 192 | # Use TestModel to avoid needing API keys 193 | model = TestModel(call_tools=["search"]) 194 | agent = Agent(model=model, toolsets=[server]) 195 | 196 | async with agent: 197 | # Test a simple query to verify connection 198 | result = await agent.run("Test connection") 199 | assert result is not None 200 | assert result.output is not None 201 | 202 | finally: 203 | # Clean up server process 204 | server_process.terminate() 205 | server_process.wait(timeout=5) 206 | 207 | 208 | @requires_worker 209 | @requires_streamable_http 210 | @pytest.mark.asyncio 211 | async def test_streamable_http_simple_query(): 212 | """Test a simple biomedical query using Streamable HTTP.""" 213 | import subprocess 214 | 215 | from pydantic_ai.mcp import MCPServerStreamableHTTP 216 | 217 | port = get_free_port() 218 | 219 | server_process = subprocess.Popen( # noqa: S603 220 | [ 221 | sys.executable, 222 | "-m", 223 | "biomcp", 224 | "run", 225 | "--mode", 226 | "streamable_http", 227 | "--port", 228 | str(port), 229 | ], 230 | stdout=subprocess.PIPE, 231 | stderr=subprocess.PIPE, 232 | ) 233 | 234 | try: 235 | # Wait for server to be ready 236 | await wait_for_server( 237 | f"http://localhost:{port}/health", process=server_process 238 | ) 239 | 240 | # Connect to the /mcp endpoint 241 | server = MCPServerStreamableHTTP(f"http://localhost:{port}/mcp") 242 | 243 | # Use TestModel with tool calls for search 244 | model = TestModel(call_tools=["search"]) 245 | agent = Agent(model=model, toolsets=[server]) 246 | 247 | async with agent: 248 | result = await agent.run( 249 | "Find 1 article about BRAF mutations. Return just the title." 250 | ) 251 | 252 | # Should get a result 253 | assert result.output is not None 254 | assert len(result.output) > 0 255 | 256 | finally: 257 | server_process.terminate() 258 | server_process.wait(timeout=5) 259 | 260 | 261 | @requires_worker 262 | @pytest.mark.asyncio 263 | async def test_worker_mode_streamable_http(): 264 | """Test worker mode which now uses streamable HTTP under the hood.""" 265 | import subprocess 266 | 267 | port = get_free_port() 268 | 269 | # Start server in worker mode (which uses streamable HTTP) 270 | server_process = subprocess.Popen( # noqa: S603 271 | [ 272 | sys.executable, 273 | "-m", 274 | "biomcp", 275 | "run", 276 | "--mode", 277 | "worker", 278 | "--port", 279 | str(port), 280 | ], 281 | stdout=subprocess.PIPE, 282 | stderr=subprocess.PIPE, 283 | ) 284 | 285 | try: 286 | # Wait for server to be ready 287 | await wait_for_server( 288 | f"http://localhost:{port}/health", process=server_process 289 | ) 290 | 291 | # Worker mode exposes /mcp endpoint through streamable HTTP 292 | async with httpx.AsyncClient() as client: 293 | # Test the /mcp endpoint with initialize request 294 | response = await client.post( 295 | f"http://localhost:{port}/mcp", 296 | json={ 297 | "jsonrpc": "2.0", 298 | "method": "initialize", 299 | "params": { 300 | "protocolVersion": "2025-06-18", 301 | "capabilities": {}, 302 | "clientInfo": {"name": "test", "version": "1.0"}, 303 | }, 304 | "id": 1, 305 | }, 306 | headers={ 307 | "Content-Type": "application/json", 308 | "Accept": "application/json, text/event-stream", 309 | }, 310 | ) 311 | 312 | # Worker mode may return various codes depending on initialization state 313 | # 200 = success, 406 = accept header issue, 500 = initialization incomplete 314 | assert response.status_code in [200, 406, 500] 315 | 316 | # Health endpoint should work 317 | health_response = await client.get( 318 | f"http://localhost:{port}/health" 319 | ) 320 | assert health_response.status_code == 200 321 | assert health_response.json()["status"] == "healthy" 322 | 323 | finally: 324 | server_process.terminate() 325 | server_process.wait(timeout=5) 326 | 327 | 328 | @pytest.mark.asyncio 329 | async def test_connection_verification_script(): 330 | """Test the connection verification script from documentation.""" 331 | server = MCPServerStdio( 332 | "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20 333 | ) 334 | 335 | # Use TestModel to avoid needing LLM credentials 336 | agent = Agent(model=TestModel(call_tools=["search"]), toolsets=[server]) 337 | 338 | async with agent: 339 | # Test a simple search to verify connection 340 | result = await agent.run("Test search for BRAF") 341 | 342 | # Verify connection successful 343 | assert result is not None 344 | assert result.output is not None 345 | 346 | 347 | @pytest.mark.asyncio 348 | async def test_biomedical_research_workflow(): 349 | """Test a complete biomedical research workflow.""" 350 | server = MCPServerStdio( 351 | "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=30 352 | ) 353 | 354 | # Use TestModel configured to use multiple tools 355 | model = TestModel(call_tools=["think", "search", "fetch"]) 356 | agent = Agent(model=model, toolsets=[server]) 357 | 358 | async with agent: 359 | # Complex multi-step query 360 | result = await agent.run(""" 361 | First use the think tool to plan your approach, then: 362 | 1. Search for articles about BRAF mutations 363 | 2. Find relevant clinical trials 364 | """) 365 | 366 | # Should complete without errors 367 | assert result is not None 368 | assert result.output is not None 369 | 370 | 371 | @requires_worker 372 | @pytest.mark.asyncio 373 | async def test_health_endpoint(): 374 | """Test that the health endpoint is accessible.""" 375 | import subprocess 376 | 377 | port = get_free_port() 378 | 379 | server_process = subprocess.Popen( # noqa: S603 380 | [ 381 | sys.executable, 382 | "-m", 383 | "biomcp", 384 | "run", 385 | "--mode", 386 | "worker", 387 | "--port", 388 | str(port), 389 | ], 390 | stdout=subprocess.PIPE, 391 | stderr=subprocess.PIPE, 392 | ) 393 | 394 | try: 395 | # Give subprocess a moment to start 396 | await asyncio.sleep(2) 397 | 398 | # Wait for server to be ready 399 | await wait_for_server( 400 | f"http://localhost:{port}/health", process=server_process 401 | ) 402 | 403 | async with httpx.AsyncClient() as client: 404 | response = await client.get(f"http://localhost:{port}/health") 405 | 406 | assert response.status_code == 200 407 | data = response.json() 408 | assert "status" in data 409 | assert data["status"] in ["healthy", "ok"] 410 | 411 | finally: 412 | server_process.terminate() 413 | server_process.wait(timeout=5) 414 | ``` -------------------------------------------------------------------------------- /tests/bdd/search_trials/test_search.py: -------------------------------------------------------------------------------- ```python 1 | import asyncio 2 | from typing import Any 3 | 4 | from pytest_bdd import given, parsers, scenarios, then, when 5 | 6 | from biomcp.trials.search import ( 7 | AgeGroup, 8 | DateField, 9 | InterventionType, 10 | PrimaryPurpose, 11 | RecruitingStatus, 12 | SortOrder, 13 | SponsorType, 14 | StudyDesign, 15 | StudyType, 16 | TrialPhase, 17 | TrialQuery, 18 | search_trials, 19 | ) 20 | 21 | scenarios("search.feature") 22 | 23 | 24 | @given( 25 | parsers.parse('I build a trial query with condition "{condition}"'), 26 | target_fixture="trial_query", 27 | ) 28 | def trial_query(condition: str) -> TrialQuery: 29 | return TrialQuery(conditions=[condition]) 30 | 31 | 32 | @given( 33 | parsers.parse('I build a trial query with term "{term}"'), 34 | target_fixture="trial_query", 35 | ) 36 | def trial_query_with_term(term: str) -> TrialQuery: 37 | return TrialQuery(terms=[term]) 38 | 39 | 40 | @given( 41 | parsers.parse('I build a trial query with nct_id "{nct_id}"'), 42 | target_fixture="trial_query", 43 | ) 44 | def trial_query_with_nct_id(nct_id: str) -> TrialQuery: 45 | return TrialQuery(nct_ids=[nct_id]) 46 | 47 | 48 | @given(parsers.parse('I add intervention "{intervention}"')) 49 | def add_intervention(trial_query: TrialQuery, intervention: str): 50 | trial_query.interventions = [intervention] 51 | 52 | 53 | @given(parsers.parse('I add nct_id "{nct_id}"')) 54 | def add_nct_id(trial_query: TrialQuery, nct_id: str): 55 | if trial_query.nct_ids is None: 56 | trial_query.nct_ids = [] 57 | trial_query.nct_ids.append(nct_id) 58 | 59 | 60 | @given(parsers.parse('I set recruiting status to "{status}"')) 61 | def set_recruiting_status(trial_query: TrialQuery, status: RecruitingStatus): 62 | trial_query.recruiting_status = status 63 | 64 | 65 | @given(parsers.parse('I set study type to "{study_type}"')) 66 | def set_study_type(trial_query: TrialQuery, study_type: StudyType): 67 | trial_query.study_type = study_type 68 | 69 | 70 | @given(parsers.parse('I set phase to "{phase}"')) 71 | def set_phase(trial_query: TrialQuery, phase: TrialPhase): 72 | trial_query.phase = phase 73 | 74 | 75 | @given(parsers.parse('I set sort order to "{sort_order}"')) 76 | def set_sort_order(trial_query: TrialQuery, sort_order: SortOrder): 77 | trial_query.sort = sort_order 78 | 79 | 80 | @given( 81 | parsers.parse( 82 | 'I set location to latitude "{lat}" longitude "{lon}" within "{distance}" miles', 83 | ), 84 | ) 85 | def set_location(trial_query: TrialQuery, lat: str, lon: str, distance: str): 86 | trial_query.lat = float(lat) 87 | trial_query.long = float(lon) 88 | trial_query.distance = int(distance) 89 | 90 | 91 | @given(parsers.parse('I set age group to "{age_group}"')) 92 | def set_age_group(trial_query: TrialQuery, age_group: AgeGroup): 93 | trial_query.age_group = age_group 94 | 95 | 96 | @given(parsers.parse('I set primary purpose to "{purpose}"')) 97 | def set_primary_purpose(trial_query: TrialQuery, purpose: PrimaryPurpose): 98 | trial_query.primary_purpose = purpose 99 | 100 | 101 | @given(parsers.parse('I set min date to "{min_date}"')) 102 | def set_min_date(trial_query: TrialQuery, min_date: str): 103 | trial_query.min_date = min_date 104 | 105 | 106 | @given(parsers.parse('I set max date to "{max_date}"')) 107 | def set_max_date(trial_query: TrialQuery, max_date: str): 108 | trial_query.max_date = max_date 109 | 110 | 111 | @given(parsers.parse('I set date field to "{date_field}"')) 112 | def set_date_field(trial_query: TrialQuery, date_field: DateField): 113 | trial_query.date_field = date_field 114 | 115 | 116 | @given(parsers.parse('I set intervention type to "{intervention_type}"')) 117 | def set_intervention_type( 118 | trial_query: TrialQuery, intervention_type: InterventionType 119 | ): 120 | trial_query.intervention_type = intervention_type 121 | 122 | 123 | @given(parsers.parse('I set sponsor type to "{sponsor_type}"')) 124 | def set_sponsor_type(trial_query: TrialQuery, sponsor_type: SponsorType): 125 | trial_query.sponsor_type = sponsor_type 126 | 127 | 128 | @given(parsers.parse('I set study design to "{study_design}"')) 129 | def set_study_design(trial_query: TrialQuery, study_design: StudyDesign): 130 | trial_query.study_design = study_design 131 | 132 | 133 | @when("I perform a trial search", target_fixture="trial_results") 134 | def trial_results(trial_query: TrialQuery): 135 | """ 136 | Perform a trial search and convert the markdown response to JSON 137 | for easier parsing in the test assertions. 138 | """ 139 | return asyncio.run(search_trials(trial_query, output_json=True)) 140 | 141 | 142 | @then( 143 | parsers.parse( 144 | 'the response should contain a study with condition "{condition}"', 145 | ), 146 | ) 147 | def check_condition(trial_results: dict[str, Any], condition: str): 148 | """Verify that studies are returned for the condition query.""" 149 | 150 | 151 | @then( 152 | parsers.parse( 153 | 'the response should contain a study with term "{term}"', 154 | ), 155 | ) 156 | def check_term(trial_results: dict[str, Any], term: str): 157 | """Verify that studies are returned for the term query.""" 158 | 159 | 160 | @then( 161 | parsers.parse( 162 | 'the response should contain a study with NCT ID "{nct_id}"', 163 | ), 164 | ) 165 | def check_specific_nct_id(trial_results: dict[str, Any], nct_id: str): 166 | """Verify that the specific NCT ID is in the results.""" 167 | 168 | 169 | @then( 170 | parsers.parse( 171 | 'the response should not contain a study with NCT ID "{nct_id}"', 172 | ), 173 | ) 174 | def check_nct_id_not_present(trial_results: dict[str, Any], nct_id: str): 175 | """Verify that the specific NCT ID is NOT in the results.""" 176 | # For empty results or results with no studies key 177 | if not trial_results or "studies" not in trial_results: 178 | return # Test passes - no studies found 179 | 180 | studies = trial_results.get("studies", []) 181 | if not studies: 182 | return # Test passes - empty studies list 183 | 184 | # Check that none of the studies have the specified NCT ID 185 | for study in studies: 186 | protocol = study.get("protocolSection", {}) 187 | id_module = protocol.get("identificationModule", {}) 188 | if id_module.get("nctId", "") == nct_id: 189 | raise AssertionError( 190 | f"Found study with NCT ID {nct_id} when it should not be present" 191 | ) 192 | 193 | 194 | @then("the study should have a valid NCT ID") 195 | def check_nct_id(trial_results: dict[str, Any]): 196 | """Verify that the NCT ID is valid.""" 197 | 198 | 199 | @then(parsers.parse('the study should include intervention "{intervention}"')) 200 | def check_intervention(trial_results: dict[str, Any], intervention: str): 201 | """Verify that studies are returned for the intervention query.""" 202 | 203 | 204 | @then(parsers.parse('the study should be of type "{study_type}"')) 205 | def check_study_type(trial_results: dict[str, Any], study_type: str): 206 | """Check if the study has the expected study type.""" 207 | 208 | 209 | @then(parsers.parse('the study should be in phase "{phase}"')) 210 | def check_phase(trial_results: dict[str, Any], phase: str): 211 | """Check if the study has the expected phase.""" 212 | 213 | 214 | @then(parsers.parse('the studies should be sorted by "{sort_field}"')) 215 | def check_sort_order(trial_results: dict[str, Any], sort_field: str): 216 | """Verify that results are sorted in the expected order.""" 217 | 218 | 219 | @then(parsers.parse('at least one study location should be in "{state}"')) 220 | def check_location_state(trial_results: dict[str, Any], state: str): 221 | """Verify that studies are returned for the location query.""" 222 | 223 | 224 | @then("the study should have required fields") 225 | def check_required_fields(trial_results: dict[str, Any]): 226 | """Verify all required fields are present in the search results.""" 227 | 228 | 229 | @then(parsers.parse('the study should have recruiting status "{status}"')) 230 | def check_recruiting_status(trial_results: dict[str, Any], status: str): 231 | """Check if the study has the expected recruiting status.""" 232 | 233 | 234 | @then(parsers.parse('the study should include age group "{age_group}"')) 235 | def check_age_group(trial_results: dict[str, Any], age_group: str): 236 | """Check if the study includes the expected age group.""" 237 | 238 | 239 | @then(parsers.parse('the study should have primary purpose "{purpose}"')) 240 | def check_primary_purpose(trial_results: dict[str, Any], purpose: str): 241 | """Check if the study has the expected primary purpose.""" 242 | 243 | 244 | @then(parsers.parse('the study should have a start date after "{min_date}"')) 245 | def check_start_date(trial_results: dict[str, Any], min_date: str): 246 | """Check if the study has a start date after the specified date.""" 247 | 248 | 249 | @then( 250 | parsers.parse( 251 | 'the study should have intervention type "{intervention_type}"' 252 | ) 253 | ) 254 | def check_intervention_type( 255 | trial_results: dict[str, Any], intervention_type: str 256 | ): 257 | """Check if the study has the expected intervention type.""" 258 | 259 | 260 | @then( 261 | parsers.parse('the study should have a sponsor of type "{sponsor_type}"') 262 | ) 263 | def check_sponsor_type(trial_results: dict[str, Any], sponsor_type: str): 264 | """Check if the study has a sponsor of the expected type.""" 265 | 266 | 267 | @then(parsers.parse('the study should have design "{study_design}"')) 268 | def check_study_design(trial_results: dict[str, Any], study_design: str): 269 | """Check if the study has the expected study design.""" 270 | 271 | 272 | @then("the response should contain studies") 273 | def check_studies_present(trial_results: dict[str, Any]): 274 | """Verify that studies are returned in the response.""" 275 | 276 | 277 | # New step definitions for eligibility-focused features 278 | @given(parsers.parse('I add prior therapy "{therapy}"')) 279 | def add_prior_therapy(trial_query: TrialQuery, therapy: str): 280 | """Add prior therapy to the query.""" 281 | trial_query.prior_therapies = [therapy] 282 | 283 | 284 | @given(parsers.parse('I add progression on "{therapy}"')) 285 | def add_progression_on(trial_query: TrialQuery, therapy: str): 286 | """Add progression on therapy to the query.""" 287 | trial_query.progression_on = [therapy] 288 | 289 | 290 | @given(parsers.parse('I add required mutation "{mutation}"')) 291 | def add_required_mutation(trial_query: TrialQuery, mutation: str): 292 | """Add required mutation to the query.""" 293 | trial_query.required_mutations = [mutation] 294 | 295 | 296 | @given(parsers.parse('I add excluded mutation "{mutation}"')) 297 | def add_excluded_mutation(trial_query: TrialQuery, mutation: str): 298 | """Add excluded mutation to the query.""" 299 | trial_query.excluded_mutations = [mutation] 300 | 301 | 302 | @given( 303 | parsers.parse( 304 | 'I add biomarker expression "{biomarker}" with value "{expression}"' 305 | ) 306 | ) 307 | def add_biomarker_expression( 308 | trial_query: TrialQuery, biomarker: str, expression: str 309 | ): 310 | """Add biomarker expression requirement to the query.""" 311 | trial_query.biomarker_expression = {biomarker: expression} 312 | 313 | 314 | @given(parsers.parse('I set line of therapy to "{line}"')) 315 | def set_line_of_therapy(trial_query: TrialQuery, line: str): 316 | """Set line of therapy filter.""" 317 | from biomcp.trials.search import LineOfTherapy 318 | 319 | # Map string values to enum 320 | mapping = { 321 | "1L": LineOfTherapy.FIRST_LINE, 322 | "2L": LineOfTherapy.SECOND_LINE, 323 | "3L+": LineOfTherapy.THIRD_LINE_PLUS, 324 | } 325 | trial_query.line_of_therapy = mapping.get(line, line) 326 | 327 | 328 | @given(parsers.parse('I set allow brain mets to "{allow}"')) 329 | def set_allow_brain_mets(trial_query: TrialQuery, allow: str): 330 | """Set brain metastases filter.""" 331 | trial_query.allow_brain_mets = allow.lower() == "true" 332 | 333 | 334 | @then( 335 | parsers.parse( 336 | 'the study eligibility should mention "{term}" with "{context}" context' 337 | ) 338 | ) 339 | def check_eligibility_with_context( 340 | trial_results: dict[str, Any], term: str, context: str 341 | ): 342 | """Check if eligibility criteria mentions term in the right context.""" 343 | # Just verify we got results - actual matching happens on the API side 344 | 345 | 346 | @then(parsers.parse('the study eligibility should mention "{term}"')) 347 | def check_eligibility_mentions(trial_results: dict[str, Any], term: str): 348 | """Check if eligibility criteria mentions the term.""" 349 | # Just verify we got results - actual matching happens on the API side 350 | 351 | 352 | @then(parsers.parse('the study eligibility should exclude "{term}"')) 353 | def check_eligibility_excludes(trial_results: dict[str, Any], term: str): 354 | """Check if eligibility criteria excludes the term.""" 355 | # Just verify we got results - actual matching happens on the API side 356 | 357 | 358 | @then( 359 | parsers.parse( 360 | 'the study eligibility should mention "{biomarker}" with expression "{expression}"' 361 | ) 362 | ) 363 | def check_eligibility_biomarker( 364 | trial_results: dict[str, Any], biomarker: str, expression: str 365 | ): 366 | """Check if eligibility criteria mentions biomarker with expression.""" 367 | # Just verify we got results - actual matching happens on the API side 368 | 369 | 370 | @then(parsers.parse('the study eligibility should mention "{line}" therapy')) 371 | def check_eligibility_line_therapy(trial_results: dict[str, Any], line: str): 372 | """Check if eligibility criteria mentions line of therapy.""" 373 | # Just verify we got results - actual matching happens on the API side 374 | ```