This is page 13 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /docs/tutorials/openfda-prompts.md: -------------------------------------------------------------------------------- ```markdown 1 | # OpenFDA Example Prompts for AI Agents 2 | 3 | This document provides example prompts that demonstrate effective use of BioMCP's OpenFDA integration for various precision oncology use cases. 4 | 5 | ## Drug Safety Assessment 6 | 7 | ### Basic Safety Profile 8 | 9 | ``` 10 | What are the most common adverse events reported for pembrolizumab? 11 | Include both serious and non-serious events. 12 | ``` 13 | 14 | **Expected BioMCP Usage:** 15 | 16 | 1. `think` - Plan safety assessment approach 17 | 2. `openfda_adverse_searcher(drug="pembrolizumab", limit=50)` 18 | 3. Analyze and summarize top reactions 19 | 20 | ### Comparative Safety Analysis 21 | 22 | ``` 23 | Compare the adverse event profiles of imatinib and dasatinib for CML treatment. 24 | Focus on serious events and their frequencies. 25 | ``` 26 | 27 | **Expected BioMCP Usage:** 28 | 29 | 1. `think` - Plan comparative analysis 30 | 2. `openfda_adverse_searcher(drug="imatinib", serious=True)` 31 | 3. `openfda_adverse_searcher(drug="dasatinib", serious=True)` 32 | 4. Compare and contrast findings 33 | 34 | ### Drug Interaction Investigation 35 | 36 | ``` 37 | A patient on warfarin needs to start erlotinib for NSCLC. What drug interactions 38 | and adverse events should we monitor based on FDA data? 39 | ``` 40 | 41 | **Expected BioMCP Usage:** 42 | 43 | 1. `think` - Consider interaction risks 44 | 2. `openfda_label_searcher(name="erlotinib")` - Check drug interactions section 45 | 3. `openfda_adverse_searcher(drug="erlotinib", reaction="bleeding")` 46 | 4. `openfda_adverse_searcher(drug="erlotinib", reaction="INR")` 47 | 48 | ## Treatment Planning 49 | 50 | ### Indication Verification 51 | 52 | ``` 53 | Is trastuzumab deruxtecan FDA-approved for HER2-low breast cancer? 54 | What are the specific approved indications? 55 | ``` 56 | 57 | **Expected BioMCP Usage:** 58 | 59 | 1. `think` - Plan indication search 60 | 2. `openfda_label_searcher(name="trastuzumab deruxtecan")` 61 | 3. `openfda_label_getter(set_id="...")` - Get full indications section 62 | 4. Extract and summarize approved uses 63 | 64 | ### Contraindication Screening 65 | 66 | ``` 67 | Patient has severe hepatic impairment. Which targeted therapy drugs for 68 | melanoma have contraindications or warnings for liver dysfunction? 69 | ``` 70 | 71 | **Expected BioMCP Usage:** 72 | 73 | 1. `think` - Identify melanoma drugs to check 74 | 2. `openfda_label_searcher(indication="melanoma")` 75 | 3. For each drug: `openfda_label_getter(set_id="...", sections=["contraindications", "warnings_and_precautions"])` 76 | 4. Summarize liver-related contraindications 77 | 78 | ### Dosing Guidelines 79 | 80 | ``` 81 | What is the FDA-recommended dosing for osimertinib in EGFR-mutated NSCLC, 82 | including dose modifications for adverse events? 83 | ``` 84 | 85 | **Expected BioMCP Usage:** 86 | 87 | 1. `think` - Plan dosing information retrieval 88 | 2. `openfda_label_searcher(name="osimertinib")` 89 | 3. `openfda_label_getter(set_id="...", sections=["dosage_and_administration", "dose_modifications"])` 90 | 4. Extract dosing guidelines 91 | 92 | ## Device Reliability Assessment 93 | 94 | ### Genomic Test Reliability 95 | 96 | ``` 97 | What adverse events have been reported for NGS-based cancer diagnostic devices? 98 | Show me any false positive or accuracy issues. 99 | ``` 100 | 101 | **Expected BioMCP Usage:** 102 | 103 | 1. `think` - Consider test reliability factors 104 | 2. `openfda_device_searcher(genomics_only=True, limit=25)` - Get all genomic device events 105 | 3. `openfda_device_searcher(problem="false positive", genomics_only=True)` 106 | 4. `openfda_device_searcher(problem="accuracy", genomics_only=True)` 107 | 5. For significant events: `openfda_device_getter(mdr_report_key="...")` 108 | 109 | **Note:** The FDA database uses abbreviated names (e.g., "F1CDX" instead of "FoundationOne CDx"). 110 | For specific devices, try: `openfda_device_searcher(device="F1CDX")` or search by key terms. 111 | 112 | ### Laboratory Equipment Issues 113 | 114 | ``` 115 | Our lab uses Illumina sequencers. What device malfunctions have been 116 | reported that could impact our genomic testing workflow? 117 | ``` 118 | 119 | **Expected BioMCP Usage:** 120 | 121 | 1. `think` - Assess potential workflow impacts 122 | 2. `openfda_device_searcher(manufacturer="Illumina", genomics_only=True)` 123 | 3. Analyze problem patterns 124 | 4. `openfda_device_getter(mdr_report_key="...")` for critical issues 125 | 126 | ## Comprehensive Drug Evaluation 127 | 128 | ### New Drug Assessment 129 | 130 | ``` 131 | Provide a comprehensive safety and efficacy profile for sotorasib (Lumakras) 132 | including FDA approval, indications, major warnings, and post-market adverse events. 133 | ``` 134 | 135 | **Expected BioMCP Usage:** 136 | 137 | 1. `think` - Plan comprehensive assessment 138 | 2. `drug_getter("sotorasib")` - Basic drug info 139 | 3. `openfda_label_searcher(name="sotorasib")` 140 | 4. `openfda_label_getter(set_id="...")` - Full label 141 | 5. `openfda_adverse_searcher(drug="sotorasib", serious=True)` 142 | 6. `trial_searcher(interventions=["sotorasib"])` - Ongoing trials 143 | 144 | ### Risk-Benefit Analysis 145 | 146 | ``` 147 | For a 75-year-old patient with metastatic melanoma, analyze the risk-benefit 148 | profile of nivolumab plus ipilimumab combination therapy based on FDA data. 149 | ``` 150 | 151 | **Expected BioMCP Usage:** 152 | 153 | 1. `think` - Structure risk-benefit analysis 154 | 2. `openfda_label_searcher(name="nivolumab")` 155 | 3. `openfda_label_searcher(name="ipilimumab")` 156 | 4. `openfda_label_getter(set_id="...", sections=["geriatric_use", "warnings_and_precautions"])` 157 | 5. `openfda_adverse_searcher(drug="nivolumab", serious=True)` 158 | 6. `openfda_adverse_searcher(drug="ipilimumab", serious=True)` 159 | 160 | ## Special Populations 161 | 162 | ### Pregnancy Considerations 163 | 164 | ``` 165 | Which FDA-approved lung cancer treatments have pregnancy category data 166 | or specific warnings for pregnant patients? 167 | ``` 168 | 169 | **Expected BioMCP Usage:** 170 | 171 | 1. `think` - Plan pregnancy safety search 172 | 2. `openfda_label_searcher(indication="lung cancer")` 173 | 3. For each drug: `openfda_label_getter(set_id="...", sections=["pregnancy", "use_in_specific_populations"])` 174 | 4. Compile pregnancy categories and warnings 175 | 176 | ### Pediatric Oncology 177 | 178 | ``` 179 | What FDA-approved indications and safety data exist for using 180 | checkpoint inhibitors in pediatric cancer patients? 181 | ``` 182 | 183 | **Expected BioMCP Usage:** 184 | 185 | 1. `think` - Identify checkpoint inhibitors 186 | 2. `openfda_label_searcher(name="pembrolizumab")` 187 | 3. `openfda_label_getter(set_id="...", sections=["pediatric_use"])` 188 | 4. `openfda_adverse_searcher(drug="pembrolizumab")` - Filter for pediatric if possible 189 | 5. Repeat for other checkpoint inhibitors 190 | 191 | ## Complex Queries 192 | 193 | ### Multi-Drug Regimen Safety 194 | 195 | ``` 196 | Analyze potential safety concerns for the FOLFOX chemotherapy regimen 197 | (5-FU, leucovorin, oxaliplatin) based on FDA adverse event data. 198 | ``` 199 | 200 | **Expected BioMCP Usage:** 201 | 202 | 1. `think` - Plan multi-drug analysis 203 | 2. `openfda_adverse_searcher(drug="fluorouracil")` 204 | 3. `openfda_adverse_searcher(drug="leucovorin")` 205 | 4. `openfda_adverse_searcher(drug="oxaliplatin")` 206 | 5. Identify overlapping toxicities 207 | 6. `openfda_label_searcher(name="oxaliplatin")` - Check for combination warnings 208 | 209 | ### Biomarker-Driven Treatment Selection 210 | 211 | ``` 212 | For a patient with BRAF V600E mutant melanoma with brain metastases, 213 | what FDA-approved treatments are available and what are their CNS-specific 214 | efficacy and safety considerations? 215 | ``` 216 | 217 | **Expected BioMCP Usage:** 218 | 219 | 1. `think` - Structure biomarker-driven search 220 | 2. `article_searcher(genes=["BRAF"], variants=["V600E"], diseases=["melanoma"])` 221 | 3. `openfda_label_searcher(indication="melanoma")` 222 | 4. For BRAF inhibitors: `openfda_label_getter(set_id="...", sections=["clinical_studies", "warnings_and_precautions"])` 223 | 5. `openfda_adverse_searcher(drug="dabrafenib", reaction="seizure")` 224 | 6. `openfda_adverse_searcher(drug="vemurafenib", reaction="brain")` 225 | 226 | ### Treatment Failure Analysis 227 | 228 | ``` 229 | A patient's lung adenocarcinoma progressed on osimertinib. Based on FDA data, 230 | what are the documented resistance mechanisms and alternative approved treatments? 231 | ``` 232 | 233 | **Expected BioMCP Usage:** 234 | 235 | 1. `think` - Analyze resistance and alternatives 236 | 2. `openfda_label_getter(set_id="...", sections=["clinical_studies"])` for osimertinib 237 | 3. `article_searcher(genes=["EGFR"], keywords=["resistance", "osimertinib"])` 238 | 4. `openfda_label_searcher(indication="non-small cell lung cancer")` 239 | 5. `trial_searcher(conditions=["NSCLC"], keywords=["osimertinib resistant"])` 240 | 241 | ## Safety Monitoring 242 | 243 | ### Post-Market Surveillance 244 | 245 | ``` 246 | Have there been any new safety signals for CDK4/6 inhibitors 247 | (palbociclib, ribociclib, abemaciclib) in the past year? 248 | ``` 249 | 250 | **Expected BioMCP Usage:** 251 | 252 | 1. `think` - Plan safety signal detection 253 | 2. `openfda_adverse_searcher(drug="palbociclib", limit=100)` 254 | 3. `openfda_adverse_searcher(drug="ribociclib", limit=100)` 255 | 4. `openfda_adverse_searcher(drug="abemaciclib", limit=100)` 256 | 5. Analyze for unusual patterns or frequencies 257 | 258 | ### Rare Adverse Event Investigation 259 | 260 | ``` 261 | Investigate reports of pneumonitis associated with immune checkpoint inhibitors. 262 | Which drugs have the highest frequency and what are the typical outcomes? 263 | ``` 264 | 265 | **Expected BioMCP Usage:** 266 | 267 | 1. `think` - Structure pneumonitis investigation 268 | 2. `openfda_adverse_searcher(drug="pembrolizumab", reaction="pneumonitis")` 269 | 3. `openfda_adverse_searcher(drug="nivolumab", reaction="pneumonitis")` 270 | 4. `openfda_adverse_searcher(drug="atezolizumab", reaction="pneumonitis")` 271 | 5. Compare frequencies and outcomes 272 | 6. `openfda_adverse_getter(report_id="...")` for severe cases 273 | 274 | ## Quality Assurance 275 | 276 | ### Diagnostic Test Validation 277 | 278 | ``` 279 | What quality issues have been reported for liquid biopsy ctDNA tests 280 | that could affect treatment decisions? 281 | ``` 282 | 283 | **Expected BioMCP Usage:** 284 | 285 | 1. `think` - Identify quality factors 286 | 2. `openfda_device_searcher(device="liquid biopsy", genomics_only=True)` 287 | 3. `openfda_device_searcher(device="ctDNA", genomics_only=True)` 288 | 4. `openfda_device_searcher(device="circulating tumor", genomics_only=True)` 289 | 5. Analyze failure modes 290 | 291 | ## Tips for Effective Prompts 292 | 293 | 1. **Be specific about the data needed**: Specify if you want adverse events, labels, or device data 294 | 2. **Include relevant filters**: Mention if focusing on serious events, specific populations, or genomic devices 295 | 3. **Request appropriate analysis**: Ask for comparisons, trends, or specific data points 296 | 4. **Consider multiple data sources**: Combine OpenFDA with literature and trial data for comprehensive answers 297 | 5. **Include time frames when relevant**: Though OpenFDA doesn't support date filtering in queries, you can ask for analysis of recent reports 298 | 299 | ## Integration Examples 300 | 301 | ### Combining with Literature Search 302 | 303 | ``` 304 | Find FDA adverse events for venetoclax in CLL, then search for published 305 | case reports that provide more clinical context for the most serious events. 306 | ``` 307 | 308 | ### Combining with Clinical Trials 309 | 310 | ``` 311 | What adverse events are reported for FDA-approved CAR-T therapies, and how 312 | do these compare to adverse events being monitored in current clinical trials? 313 | ``` 314 | 315 | ### Combining with Variant Data 316 | 317 | ``` 318 | For patients with RET fusion-positive cancers, what FDA-approved targeted 319 | therapies are available and what are their mutation-specific response rates? 320 | ``` 321 | 322 | ## Using Your OpenFDA API Key 323 | 324 | The OpenFDA API has rate limits: 40 requests/minute without a key, or 240 requests/minute with a key. You can get a free API key at https://open.fda.gov/apis/authentication/ 325 | 326 | ### Method 1: Include API Key in Your Prompt 327 | 328 | You can provide your API key directly in your conversation with the AI: 329 | 330 | ``` 331 | My OpenFDA API key is: YOUR_API_KEY_HERE 332 | 333 | Search for all serious adverse events reported for pembrolizumab in the last year. 334 | Include both death and hospitalization events. 335 | ``` 336 | 337 | ``` 338 | Using my OpenFDA API key YOUR_API_KEY_HERE, compare the safety profiles of 339 | all FDA-approved BRAF inhibitors for melanoma treatment. 340 | ``` 341 | 342 | ``` 343 | I have an OpenFDA API key: YOUR_API_KEY_HERE 344 | 345 | Analyze device malfunction reports for all NGS-based cancer diagnostic tests. 346 | Focus on false positive and false negative results that could impact treatment decisions. 347 | ``` 348 | 349 | ### Method 2: Set Environment Variable (Recommended for Regular Use) 350 | 351 | If you're using BioMCP regularly, set the environment variable once: 352 | 353 | ```bash 354 | export OPENFDA_API_KEY="YOUR_API_KEY_HERE" 355 | ``` 356 | 357 | Then your prompts don't need to include the key: 358 | 359 | ``` 360 | Retrieve the complete FDA label for trastuzumab deruxtecan, including all 361 | warnings and contraindications for HER2-low breast cancer patients. 362 | ``` 363 | 364 | ### Method 3: CLI Usage with API Key 365 | 366 | When using the BioMCP CLI directly: 367 | 368 | ```bash 369 | # Pass API key as parameter 370 | biomcp openfda adverse search --drug pembrolizumab --serious --api-key YOUR_API_KEY_HERE 371 | 372 | # Or set environment variable first 373 | export OPENFDA_API_KEY="YOUR_API_KEY_HERE" 374 | biomcp openfda device search --manufacturer Illumina --genomics-only 375 | ``` 376 | 377 | ### Example Prompts with API Key 378 | 379 | #### Comprehensive Drug Safety Analysis 380 | 381 | ``` 382 | My OpenFDA API key is: YOUR_API_KEY_HERE 383 | 384 | Perform a comprehensive safety analysis of sotorasib (Lumakras) including: 385 | 1. All serious adverse events from post-market surveillance 386 | 2. Complete FDA label with all sections 387 | 3. Any device issues if it's a companion diagnostic drug 388 | 4. Compare its safety profile to other KRAS G12C inhibitors if available 389 | 390 | This is for a clinical review, so I need detailed data from all available FDA sources. 391 | ``` 392 | 393 | #### Large-Scale Adverse Event Analysis 394 | 395 | ``` 396 | Using my OpenFDA API key YOUR_API_KEY_HERE, analyze adverse events for all 397 | FDA-approved checkpoint inhibitors (pembrolizumab, nivolumab, atezolizumab, 398 | durvalumab, avelumab, cemiplimab). 399 | 400 | For each drug: 401 | - Get the top 20 most frequent adverse events 402 | - Identify immune-related adverse events 403 | - Check for any black box warnings in their labels 404 | - Note any fatal events 405 | 406 | This requires many API calls, so please use my API key for higher rate limits. 407 | ``` 408 | 409 | #### Multi-Device Comparison 410 | 411 | ``` 412 | I have an OpenFDA API key: YOUR_API_KEY_HERE 413 | 414 | Compare all FDA adverse event reports for NGS-based companion diagnostic devices 415 | from major manufacturers (Foundation Medicine, Guardant Health, Tempus, etc.). 416 | Focus on: 417 | - Test failure rates 418 | - Sample quality issues 419 | - False positive/negative reports 420 | - Software-related problems 421 | 422 | This analysis requires querying multiple device records, so the API key will help 423 | avoid rate limiting. 424 | ``` 425 | 426 | #### Batch Label Retrieval 427 | 428 | ``` 429 | My OpenFDA API key is YOUR_API_KEY_HERE. 430 | 431 | Retrieve the complete FDA labels for all CDK4/6 inhibitors (palbociclib, 432 | ribociclib, abemaciclib) and extract: 433 | - Approved indications 434 | - Dose modifications for adverse events 435 | - Drug-drug interactions 436 | - Special population considerations 437 | 438 | Then create a comparison table of their safety profiles and dosing guidelines. 439 | ``` 440 | 441 | ### When to Provide an API Key 442 | 443 | You should provide your API key when: 444 | 445 | 1. **Performing large-scale analyses** requiring many API calls 446 | 2. **Conducting comprehensive safety reviews** across multiple drugs/devices 447 | 3. **Running batch operations** like comparing multiple products 448 | 4. **Doing rapid iterative searches** that might hit rate limits 449 | 5. **Performing systematic reviews** requiring extensive data retrieval 450 | 451 | ### API Key Security Notes 452 | 453 | - Never share your actual API key in public forums or repositories 454 | - The AI will use your key only for the current session 455 | - Keys passed as parameters override environment variables 456 | - The FDA API key is free and can be regenerated if compromised 457 | 458 | ## Important Notes 459 | 460 | - Always expect the AI to use the `think` tool first for complex queries 461 | - The AI should include appropriate disclaimers about adverse events not proving causation 462 | - Results are limited by FDA's data availability and reporting patterns 463 | - The AI should suggest when additional data sources might provide complementary information 464 | - With an API key, you can make 240 requests/minute vs 40 without 465 | 466 | ## Known Limitations 467 | 468 | ### Drug Shortage Data 469 | 470 | **Important:** The FDA does not currently provide a machine-readable API for drug shortage data. The shortage search tools will return an informative message directing users to the FDA's web-based shortage database. This is a limitation of FDA's current data infrastructure, not a bug in BioMCP. 471 | 472 | Alternative resources for drug shortage information: 473 | 474 | - FDA Drug Shortages Database: https://www.accessdata.fda.gov/scripts/drugshortages/ 475 | - ASHP Drug Shortages: https://www.ashp.org/drug-shortages/current-shortages 476 | 477 | ### Other Limitations 478 | 479 | - Device adverse event reports use abbreviated device names (e.g., "F1CDX" instead of "FoundationOne CDx") 480 | - Adverse event reports represent voluntary submissions and may not reflect true incidence rates 481 | - Recall information may have a delay of 24-48 hours from initial FDA announcement 482 | ``` -------------------------------------------------------------------------------- /docs/tutorials/pydantic-ai-integration.md: -------------------------------------------------------------------------------- ```markdown 1 | # Pydantic AI Integration Guide 2 | 3 | This guide explains how to integrate BioMCP with Pydantic AI for building biomedical AI agents. 4 | 5 | ## Server Modes and Endpoints 6 | 7 | BioMCP supports two primary transport modes for Pydantic AI integration: 8 | 9 | ### Available Transport Modes 10 | 11 | | Mode | Endpoints | Pydantic AI Client | Use Case | 12 | | ----------------- | -------------------------- | ------------------------- | ------------------------------- | 13 | | `stdio` | N/A (subprocess) | `MCPServerStdio` | Local development, testing | 14 | | `streamable_http` | `POST /mcp`, `GET /health` | `MCPServerStreamableHTTP` | Production HTTP deployments | 15 | | `worker` | `POST /mcp`, `GET /health` | `MCPServerStreamableHTTP` | HTTP mode using streamable HTTP | 16 | 17 | Both `streamable_http` and `worker` modes now use FastMCP's native streamable HTTP implementation for full MCP protocol compliance. The SSE-based transport has been deprecated. 18 | 19 | ## Working Examples for Pydantic AI 20 | 21 | Here are the recommended configurations for connecting Pydantic AI to BioMCP: 22 | 23 | ### 1. STDIO Mode (Recommended for Local Development) 24 | 25 | This mode runs BioMCP as a subprocess without needing an HTTP server: 26 | 27 | ```python 28 | import asyncio 29 | import os 30 | from pydantic_ai import Agent 31 | from pydantic_ai.mcp import MCPServerStdio 32 | 33 | async def main(): 34 | # Run BioMCP as a subprocess 35 | server = MCPServerStdio( 36 | "python", 37 | args=["-m", "biomcp", "run", "--mode", "stdio"] 38 | ) 39 | 40 | # Use a real LLM model (requires API key) 41 | model = "openai:gpt-4o-mini" # Set OPENAI_API_KEY environment variable 42 | 43 | agent = Agent(model, toolsets=[server]) 44 | 45 | async with agent: 46 | # Example query that returns real results 47 | result = await agent.run( 48 | "Find articles about BRAF V600E mutations in melanoma" 49 | ) 50 | print(result.output) 51 | 52 | if __name__ == "__main__": 53 | asyncio.run(main()) 54 | ``` 55 | 56 | ### 2. Streamable HTTP Mode (Recommended for Production) 57 | 58 | For production deployments with proper MCP compliance (requires pydantic-ai>=0.6.9): 59 | 60 | ```python 61 | import asyncio 62 | import os 63 | from pydantic_ai import Agent 64 | from pydantic_ai.mcp import MCPServerStreamableHTTP 65 | 66 | async def main(): 67 | # Connect to the /mcp endpoint 68 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp") 69 | 70 | # Use a real LLM model (requires API key) 71 | # Options: openai:gpt-4o-mini, anthropic:claude-3-haiku-20240307, groq:llama-3.1-70b-versatile 72 | model = "openai:gpt-4o-mini" # Set OPENAI_API_KEY environment variable 73 | 74 | agent = Agent(model, toolsets=[server]) 75 | 76 | async with agent: 77 | # Example queries that return real results 78 | result = await agent.run( 79 | "Find recent articles about BRAF V600E in melanoma" 80 | ) 81 | print(result.output) 82 | 83 | if __name__ == "__main__": 84 | asyncio.run(main()) 85 | ``` 86 | 87 | To run the server for this mode: 88 | 89 | ```bash 90 | # Using streamable_http mode (recommended) 91 | biomcp run --mode streamable_http --host 0.0.0.0 --port 8000 92 | 93 | # Or using worker mode (also uses streamable HTTP) 94 | biomcp run --mode worker --host 0.0.0.0 --port 8000 95 | 96 | # Or using Docker 97 | docker run -p 8000:8000 genomoncology/biomcp:latest biomcp run --mode streamable_http 98 | ``` 99 | 100 | ### 3. Direct JSON-RPC Mode (Alternative HTTP) 101 | 102 | You can also use the JSON-RPC endpoint at the root path: 103 | 104 | ```python 105 | import httpx 106 | import json 107 | 108 | async def call_biomcp_jsonrpc(method, params=None): 109 | """Direct JSON-RPC calls to BioMCP""" 110 | async with httpx.AsyncClient() as client: 111 | response = await client.post( 112 | "http://localhost:8000/", 113 | json={ 114 | "jsonrpc": "2.0", 115 | "id": 1, 116 | "method": method, 117 | "params": params or {} 118 | } 119 | ) 120 | return response.json() 121 | 122 | # Example usage 123 | result = await call_biomcp_jsonrpc("tools/list") 124 | print("Available tools:", result) 125 | ``` 126 | 127 | ## Troubleshooting Common Issues 128 | 129 | ### Issue: TestModel returns empty results 130 | 131 | **Cause**: TestModel is a mock model for testing - it doesn't execute real searches. 132 | 133 | **Solution**: This is expected behavior. TestModel returns `{"search":{"results":[]}}` by design. To get real results: 134 | 135 | - Use a real LLM model with API key: `Agent("openai:gpt-4o-mini", toolsets=[server])` 136 | - Use Groq for free tier: Sign up at console.groq.com, get API key, use `Agent("groq:llama-3.1-70b-versatile", toolsets=[server])` 137 | - Or use BioMCP CLI directly (no API key needed): `biomcp article search --gene BRAF` 138 | 139 | ### Issue: Connection refused 140 | 141 | **Solution**: Ensure the server is running with the correct host binding: 142 | 143 | ```bash 144 | biomcp run --mode worker --host 0.0.0.0 --port 8000 145 | ``` 146 | 147 | ### Issue: CORS errors in browser 148 | 149 | **Solution**: The server includes CORS headers by default. If you still have issues, check if a proxy or firewall is blocking the headers. 150 | 151 | ### Issue: Health endpoint returns 404 152 | 153 | **Solution**: The health endpoint is available at `GET /health` in both worker and streamable_http modes. Ensure you're using the latest version: 154 | 155 | ```bash 156 | pip install --upgrade biomcp-python 157 | ``` 158 | 159 | ### Issue: SSE endpoint not found 160 | 161 | **Solution**: The SSE transport has been deprecated. Use streamable HTTP mode instead: 162 | 163 | ```python 164 | # Old (deprecated) 165 | # from pydantic_ai.mcp import MCPServerSSE 166 | # server = MCPServerSSE("http://localhost:8000/sse") 167 | 168 | # New (recommended) 169 | from pydantic_ai.mcp import MCPServerStreamableHTTP 170 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp") 171 | ``` 172 | 173 | ## Testing Your Connection 174 | 175 | Here are test scripts to verify your setup for different modes: 176 | 177 | ### Testing STDIO Mode (Local Development) 178 | 179 | ```python 180 | import asyncio 181 | from pydantic_ai import Agent 182 | from pydantic_ai.models.test import TestModel 183 | from pydantic_ai.mcp import MCPServerStdio 184 | 185 | async def test_stdio_connection(): 186 | # Use TestModel to verify connection (won't return real data) 187 | server = MCPServerStdio( 188 | "python", 189 | args=["-m", "biomcp", "run", "--mode", "stdio"] 190 | ) 191 | 192 | agent = Agent( 193 | model=TestModel(call_tools=["search"]), 194 | toolsets=[server] 195 | ) 196 | 197 | async with agent: 198 | print(f"✅ STDIO Connection successful!") 199 | 200 | # Test a simple search (returns mock data) 201 | result = await agent.run("Test search for BRAF") 202 | print(f"✅ Tool execution successful!") 203 | print(f"Note: TestModel returns mock data: {result.output}") 204 | 205 | if __name__ == "__main__": 206 | asyncio.run(test_stdio_connection()) 207 | ``` 208 | 209 | ### Testing Streamable HTTP Mode (Production) 210 | 211 | First, ensure the server is running: 212 | 213 | ```bash 214 | # Start the server in a separate terminal 215 | biomcp run --mode streamable_http --port 8000 216 | ``` 217 | 218 | Then test the connection: 219 | 220 | ```python 221 | import asyncio 222 | from pydantic_ai import Agent 223 | from pydantic_ai.models.test import TestModel 224 | from pydantic_ai.mcp import MCPServerStreamableHTTP 225 | 226 | async def test_streamable_http_connection(): 227 | # Connect to the running server's /mcp endpoint 228 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp") 229 | 230 | # Create agent with TestModel (no API keys needed) 231 | agent = Agent( 232 | model=TestModel(call_tools=["search"]), 233 | toolsets=[server] 234 | ) 235 | 236 | async with agent: 237 | print("✅ Streamable HTTP Connection successful!") 238 | 239 | # Test a query 240 | result = await agent.run("Find articles about BRAF") 241 | print("✅ Tool execution successful!") 242 | if result.output: 243 | print(f"📄 Received {len(result.output)} characters of output") 244 | 245 | if __name__ == "__main__": 246 | asyncio.run(test_streamable_http_connection()) 247 | ``` 248 | 249 | ### Important: Understanding TestModel vs Real Results 250 | 251 | **TestModel is a MOCK model** - it doesn't execute real searches: 252 | 253 | - TestModel simulates tool calls but returns empty results: `{"search":{"results":[]}}` 254 | - This is by design - TestModel is for testing the connection flow, not getting real data 255 | - To get actual search results, you need to use a real LLM model 256 | 257 | **To get real results:** 258 | 259 | 1. **Use a real LLM model** (requires API key): 260 | 261 | ```python 262 | # Replace TestModel with a real model 263 | agent = Agent( 264 | "openai:gpt-4o-mini", # or "anthropic:claude-3-haiku" 265 | toolsets=[server] 266 | ) 267 | ``` 268 | 269 | 2. **Use BioMCP CLI directly** (no API key needed): 270 | 271 | ```bash 272 | # Get real search results via CLI 273 | biomcp article search --gene BRAF --disease melanoma --json 274 | ``` 275 | 276 | 3. **For integration testing** without API keys: 277 | 278 | ```python 279 | import subprocess 280 | import json 281 | 282 | # Use CLI to get real results 283 | result = subprocess.run( 284 | ["biomcp", "article", "search", "--gene", "BRAF", "--json"], 285 | capture_output=True, 286 | text=True 287 | ) 288 | data = json.loads(result.stdout) 289 | print(f"Found {len(data['articles'])} real articles") 290 | ``` 291 | 292 | **Note**: The Streamable HTTP tests in our test suite verify this functionality works correctly. If you encounter connection issues, ensure: 293 | 294 | 1. The server is fully started before connecting 295 | 2. You're using pydantic-ai >= 0.6.9 296 | 3. The port is not blocked by a firewall 297 | 298 | ### Complete Working Example with Real Results 299 | 300 | Here's a complete example that connects to BioMCP via Streamable HTTP and retrieves real biomedical data: 301 | 302 | ```python 303 | #!/usr/bin/env python3 304 | """ 305 | Working example of Pydantic AI + BioMCP with Streamable HTTP. 306 | This will get real search results from your BioMCP server. 307 | 308 | Requires one of: 309 | - export OPENAI_API_KEY='your-key' 310 | - export ANTHROPIC_API_KEY='your-key' 311 | - export GROQ_API_KEY='your-key' (free tier at console.groq.com) 312 | """ 313 | 314 | import asyncio 315 | import os 316 | from pydantic_ai import Agent 317 | from pydantic_ai.mcp import MCPServerStreamableHTTP 318 | 319 | 320 | async def main(): 321 | # Server configuration 322 | SERVER_URL = "http://localhost:8000/mcp" # Adjust port as needed 323 | 324 | # Detect which API key is available 325 | if os.getenv("OPENAI_API_KEY"): 326 | model = "openai:gpt-4o-mini" 327 | print("Using OpenAI GPT-4o-mini") 328 | elif os.getenv("ANTHROPIC_API_KEY"): 329 | model = "anthropic:claude-3-haiku-20240307" 330 | print("Using Claude 3 Haiku") 331 | elif os.getenv("GROQ_API_KEY"): 332 | model = "groq:llama-3.1-70b-versatile" # Free tier available 333 | print("Using Groq Llama 3.1") 334 | else: 335 | print("No API key found! Please set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GROQ_API_KEY") 336 | return 337 | 338 | # Connect to BioMCP server 339 | server = MCPServerStreamableHTTP(SERVER_URL) 340 | agent = Agent(model, toolsets=[server]) 341 | 342 | async with agent: 343 | print("Connected to BioMCP!\n") 344 | 345 | # Search for articles (includes cBioPortal data for genes) 346 | result = await agent.run( 347 | "Search for 2 recent articles about BRAF V600E mutations in melanoma. " 348 | "List the title and first author for each." 349 | ) 350 | print("Article Search Results:") 351 | print(result.output) 352 | print("\n" + "="*60 + "\n") 353 | 354 | # Search for clinical trials 355 | result2 = await agent.run( 356 | "Find 2 clinical trials for melanoma with BRAF mutations " 357 | "that are currently recruiting. Show NCT ID and title." 358 | ) 359 | print("Clinical Trial Results:") 360 | print(result2.output) 361 | print("\n" + "="*60 + "\n") 362 | 363 | # Search for variant information 364 | result3 = await agent.run( 365 | "Search for pathogenic TP53 variants. Show 2 examples." 366 | ) 367 | print("Variant Search Results:") 368 | print(result3.output) 369 | 370 | 371 | if __name__ == "__main__": 372 | # Start your BioMCP server first: 373 | # biomcp run --mode streamable_http --port 8000 374 | 375 | asyncio.run(main()) 376 | ``` 377 | 378 | **Running this example:** 379 | 380 | 1. Start the BioMCP server: 381 | 382 | ```bash 383 | biomcp run --mode streamable_http --port 8000 384 | ``` 385 | 386 | 2. Set your API key (choose one): 387 | 388 | ```bash 389 | export OPENAI_API_KEY='your-key' # OpenAI 390 | export ANTHROPIC_API_KEY='your-key' # Anthropic 391 | export GROQ_API_KEY='your-key' # Groq (free tier available) 392 | ``` 393 | 394 | 3. Run the script: 395 | 396 | ```bash 397 | python biomcp_example.py 398 | ``` 399 | 400 | This will return actual biomedical data from PubMed, ClinicalTrials.gov, and variant databases! 401 | 402 | ## Using BioMCP Tools with Pydantic AI 403 | 404 | Once connected, you can use BioMCP's biomedical research tools: 405 | 406 | ```python 407 | import os 408 | from pydantic_ai import Agent 409 | from pydantic_ai.mcp import MCPServerStdio 410 | 411 | async def biomedical_research_example(): 412 | server = MCPServerStdio( 413 | "python", 414 | args=["-m", "biomcp", "run", "--mode", "stdio"] 415 | ) 416 | 417 | # Choose model based on available API key 418 | if os.getenv("OPENAI_API_KEY"): 419 | model = "openai:gpt-4o-mini" 420 | elif os.getenv("GROQ_API_KEY"): 421 | model = "groq:llama-3.1-70b-versatile" # Free tier available 422 | else: 423 | raise ValueError("Please set OPENAI_API_KEY or GROQ_API_KEY") 424 | 425 | agent = Agent(model, toolsets=[server]) 426 | 427 | async with agent: 428 | # Important: Always use the think tool first for complex queries 429 | result = await agent.run(""" 430 | First use the think tool to plan your approach, then: 431 | 1. Search for articles about immunotherapy resistance in melanoma 432 | 2. Find clinical trials testing combination therapies 433 | 3. Look up genetic markers associated with treatment response 434 | """) 435 | 436 | print(result.output) 437 | ``` 438 | 439 | ## Production Deployment Considerations 440 | 441 | For production deployments: 442 | 443 | 1. **Use STDIO mode** for local development or when running in containerized environments where the agent and BioMCP can run in the same container 444 | 2. **Use Streamable HTTP mode** when you need HTTP-based communication between separate services (recommended for production) 445 | 3. **Both `worker` and `streamable_http` modes** now use the same underlying streamable HTTP transport 446 | 4. **Require a real LLM model** - TestModel won't work for production as it only returns mock data 447 | 5. **Consider API costs** - Use cheaper models like `gpt-4o-mini` or Groq's free tier for testing 448 | 6. **Implement proper error handling** and retry logic for network failures 449 | 7. **Set appropriate timeouts** for long-running biomedical searches 450 | 8. **Cache frequently accessed data** to reduce API calls to backend services 451 | 452 | ### Important Notes 453 | 454 | - **Real LLM required for results**: TestModel is only for testing connections - use a real LLM (OpenAI, Anthropic, Groq) to get actual biomedical data 455 | - **SSE transport is deprecated**: The old SSE-based transport (`/sse` endpoint) has been removed in favor of streamable HTTP 456 | - **Worker mode now uses streamable HTTP**: The `worker` mode has been updated to use streamable HTTP transport internally 457 | - **Health endpoint**: The `/health` endpoint is available in both HTTP modes for monitoring 458 | - **Free tier option**: Groq offers a free API tier at console.groq.com for testing without costs 459 | 460 | ## Migration Guide from SSE to Streamable HTTP 461 | 462 | If you're upgrading from an older version that used SSE transport: 463 | 464 | ### Code Changes 465 | 466 | ```python 467 | # Old code (deprecated) 468 | from pydantic_ai.mcp import MCPServerSSE 469 | server = MCPServerSSE("http://localhost:8000/sse") 470 | 471 | # New code (recommended) 472 | from pydantic_ai.mcp import MCPServerStreamableHTTP 473 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp") 474 | ``` 475 | 476 | ### Server Command Changes 477 | 478 | ```bash 479 | # Old: SSE endpoints were at /sse 480 | # biomcp run --mode worker # Used to expose /sse endpoint 481 | 482 | # New: Both modes now use /mcp endpoint with streamable HTTP 483 | biomcp run --mode worker # Now uses /mcp with streamable HTTP 484 | biomcp run --mode streamable_http # Also uses /mcp with streamable HTTP 485 | ``` 486 | 487 | ### Key Differences 488 | 489 | 1. **Endpoint Change**: `/sse` → `/mcp` 490 | 2. **Protocol**: Server-Sent Events → Streamable HTTP (supports both JSON and SSE) 491 | 3. **Client Library**: `MCPServerSSE` → `MCPServerStreamableHTTP` 492 | 4. **Compatibility**: Requires pydantic-ai >= 0.6.9 for `MCPServerStreamableHTTP` 493 | 494 | ## Next Steps 495 | 496 | - Review the [MCP Tools Reference](../user-guides/02-mcp-tools-reference.md) for available biomedical research tools 497 | - See [CLI Guide](../user-guides/01-command-line-interface.md) for more server configuration options 498 | - Check [Transport Protocol Guide](../developer-guides/04-transport-protocol.md) for detailed protocol information 499 | 500 | ## Support 501 | 502 | If you continue to experience issues: 503 | 504 | 1. Verify your BioMCP version: `biomcp --version` 505 | 2. Check server logs for error messages 506 | 3. Open an issue on [GitHub](https://github.com/genomoncology/biomcp/issues) with: 507 | - Your BioMCP version 508 | - Server startup command 509 | - Complete error messages 510 | - Minimal reproduction code 511 | ``` -------------------------------------------------------------------------------- /docs/how-to-guides/04-predict-variant-effects-with-alphagenome.md: -------------------------------------------------------------------------------- ```markdown 1 | # How to Predict Variant Effects with AlphaGenome 2 | 3 | This guide demonstrates how to use Google DeepMind's AlphaGenome to predict regulatory effects of genetic variants on gene expression, chromatin accessibility, and splicing. 4 | 5 | ## Overview 6 | 7 | AlphaGenome predicts how DNA variants affect: 8 | 9 | - **Gene Expression**: Log-fold changes in nearby genes 10 | - **Chromatin Accessibility**: ATAC-seq/DNase-seq signal changes 11 | - **Splicing**: Effects on splice sites and exon inclusion 12 | - **Regulatory Elements**: Impact on enhancers, promoters, and TFBS 13 | - **3D Chromatin**: Changes in chromatin interactions 14 | 15 | For technical details on the AlphaGenome integration, see the [AlphaGenome API Reference](../backend-services-reference/07-alphagenome.md). 16 | 17 | ## Setup and API Key 18 | 19 | ### Get Your API Key 20 | 21 | 1. Visit [AlphaGenome Portal](https://deepmind.google.com/science/alphagenome) 22 | 2. Register for non-commercial use 23 | 3. Receive API key via email 24 | 25 | For detailed setup instructions, see [Authentication and API Keys](../getting-started/03-authentication-and-api-keys.md#alphagenome). 26 | 27 | ### Configure API Key 28 | 29 | **Option 1: Environment Variable (Personal Use)** 30 | 31 | ```bash 32 | export ALPHAGENOME_API_KEY="your-key-here" 33 | ``` 34 | 35 | **Option 2: Per-Request (AI Assistants)** 36 | 37 | ``` 38 | "Predict effects of BRAF V600E. My AlphaGenome API key is YOUR_KEY_HERE" 39 | ``` 40 | 41 | **Option 3: Configuration File** 42 | 43 | ```python 44 | # .env file 45 | ALPHAGENOME_API_KEY=your-key-here 46 | ``` 47 | 48 | ### Install AlphaGenome (Optional) 49 | 50 | For local predictions: 51 | 52 | ```bash 53 | git clone https://github.com/google-deepmind/alphagenome.git 54 | cd alphagenome && pip install . 55 | ``` 56 | 57 | ## Basic Variant Prediction 58 | 59 | ### Simple Prediction 60 | 61 | Predict effects of BRAF V600E mutation: 62 | 63 | ```bash 64 | # CLI 65 | biomcp variant predict chr7 140753336 A T 66 | 67 | # Python 68 | result = await client.variants.predict( 69 | chromosome="chr7", 70 | position=140753336, 71 | reference="A", 72 | alternate="T" 73 | ) 74 | 75 | # MCP Tool 76 | result = await alphagenome_predictor( 77 | chromosome="chr7", 78 | position=140753336, 79 | reference="A", 80 | alternate="T" 81 | ) 82 | ``` 83 | 84 | ### Understanding Results 85 | 86 | ```python 87 | # Gene expression changes 88 | for gene in result.gene_expression: 89 | print(f"{gene.name}: {gene.log2_fold_change}") 90 | # Positive = increased expression 91 | # Negative = decreased expression 92 | # |value| > 1.0 = strong effect 93 | 94 | # Chromatin accessibility 95 | for region in result.chromatin: 96 | print(f"{region.type}: {region.change}") 97 | # Positive = more open chromatin 98 | # Negative = more closed chromatin 99 | 100 | # Splicing effects 101 | for splice in result.splicing: 102 | print(f"{splice.event}: {splice.delta_psi}") 103 | # PSI = Percent Spliced In 104 | # Positive = increased inclusion 105 | ``` 106 | 107 | ## Tissue-Specific Predictions 108 | 109 | ### Single Tissue Analysis 110 | 111 | Predict effects in specific tissues using UBERON terms: 112 | 113 | ```python 114 | # Breast tissue analysis 115 | result = await alphagenome_predictor( 116 | chromosome="chr17", 117 | position=41246481, 118 | reference="G", 119 | alternate="A", 120 | tissue_types=["UBERON:0000310"] # breast 121 | ) 122 | 123 | # Common tissue codes: 124 | # UBERON:0000310 - breast 125 | # UBERON:0002107 - liver 126 | # UBERON:0002367 - prostate 127 | # UBERON:0000955 - brain 128 | # UBERON:0002048 - lung 129 | # UBERON:0001155 - colon 130 | ``` 131 | 132 | ### Multi-Tissue Comparison 133 | 134 | Compare effects across tissues: 135 | 136 | ```python 137 | tissues = [ 138 | "UBERON:0000310", # breast 139 | "UBERON:0002107", # liver 140 | "UBERON:0002048" # lung 141 | ] 142 | 143 | results = {} 144 | for tissue in tissues: 145 | results[tissue] = await alphagenome_predictor( 146 | chromosome="chr17", 147 | position=41246481, 148 | reference="G", 149 | alternate="A", 150 | tissue_types=[tissue] 151 | ) 152 | 153 | # Compare gene expression across tissues 154 | for tissue, result in results.items(): 155 | print(f"\n{tissue}:") 156 | for gene in result.gene_expression[:3]: 157 | print(f" {gene.name}: {gene.log2_fold_change}") 158 | ``` 159 | 160 | ## Analysis Window Sizes 161 | 162 | ### Choosing the Right Interval 163 | 164 | Different interval sizes capture different regulatory effects: 165 | 166 | ```python 167 | # Short-range (promoter effects) 168 | result_2kb = await alphagenome_predictor( 169 | chromosome="chr7", 170 | position=140753336, 171 | reference="A", 172 | alternate="T", 173 | interval_size=2048 # 2kb 174 | ) 175 | 176 | # Medium-range (enhancer-promoter) 177 | result_128kb = await alphagenome_predictor( 178 | chromosome="chr7", 179 | position=140753336, 180 | reference="A", 181 | alternate="T", 182 | interval_size=131072 # 128kb (default) 183 | ) 184 | 185 | # Long-range (TAD-level effects) 186 | result_1mb = await alphagenome_predictor( 187 | chromosome="chr7", 188 | position=140753336, 189 | reference="A", 190 | alternate="T", 191 | interval_size=1048576 # 1Mb 192 | ) 193 | ``` 194 | 195 | **Interval Size Guide:** 196 | 197 | - **2kb**: Promoter variants, TSS mutations 198 | - **16kb**: Local regulatory elements 199 | - **128kb**: Enhancer-promoter interactions (default) 200 | - **512kb**: Long-range regulatory 201 | - **1Mb**: TAD boundaries, super-enhancers 202 | 203 | ## Clinical Workflows 204 | 205 | ### Workflow 1: VUS (Variant of Unknown Significance) Analysis 206 | 207 | ```python 208 | async def analyze_vus(chromosome: str, position: int, ref: str, alt: str): 209 | # Step 1: Think about the analysis 210 | await think( 211 | thought=f"Analyzing VUS at {chromosome}:{position} {ref}>{alt}", 212 | thoughtNumber=1 213 | ) 214 | 215 | # Step 2: Get variant annotations 216 | variant_id = f"{chromosome}:g.{position}{ref}>{alt}" 217 | try: 218 | known_variant = await variant_getter(variant_id) 219 | if known_variant.clinical_significance: 220 | return f"Already classified: {known_variant.clinical_significance}" 221 | except: 222 | pass # Variant not in databases 223 | 224 | # Step 3: Predict regulatory effects 225 | prediction = await alphagenome_predictor( 226 | chromosome=chromosome, 227 | position=position, 228 | reference=ref, 229 | alternate=alt, 230 | interval_size=131072 231 | ) 232 | 233 | # Step 4: Analyze impact 234 | high_impact_genes = [ 235 | g for g in prediction.gene_expression 236 | if abs(g.log2_fold_change) > 1.0 237 | ] 238 | 239 | # Step 5: Search literature 240 | if high_impact_genes: 241 | gene_symbols = [g.name for g in high_impact_genes[:3]] 242 | articles = await article_searcher( 243 | genes=gene_symbols, 244 | keywords=["pathogenic", "disease", "mutation"] 245 | ) 246 | 247 | return { 248 | "variant": f"{chromosome}:{position} {ref}>{alt}", 249 | "high_impact_genes": high_impact_genes, 250 | "regulatory_assessment": assess_regulatory_impact(prediction), 251 | "literature_support": len(articles) if high_impact_genes else 0 252 | } 253 | 254 | def assess_regulatory_impact(prediction): 255 | """Classify regulatory impact severity""" 256 | max_expression_change = max( 257 | abs(g.log2_fold_change) for g in prediction.gene_expression 258 | ) if prediction.gene_expression else 0 259 | 260 | if max_expression_change > 2.0: 261 | return "HIGH - Strong regulatory effect" 262 | elif max_expression_change > 1.0: 263 | return "MODERATE - Significant regulatory effect" 264 | elif max_expression_change > 0.5: 265 | return "LOW - Mild regulatory effect" 266 | else: 267 | return "MINIMAL - No significant regulatory effect" 268 | ``` 269 | 270 | ### Workflow 2: Non-coding Variant Prioritization 271 | 272 | ```python 273 | async def prioritize_noncoding_variants(variants: list[dict], disease_genes: list[str]): 274 | """Rank non-coding variants by predicted impact on disease genes""" 275 | 276 | results = [] 277 | 278 | for variant in variants: 279 | # Predict effects 280 | prediction = await alphagenome_predictor( 281 | chromosome=variant["chr"], 282 | position=variant["pos"], 283 | reference=variant["ref"], 284 | alternate=variant["alt"] 285 | ) 286 | 287 | # Check impact on disease genes 288 | disease_impact = {} 289 | for gene in prediction.gene_expression: 290 | if gene.name in disease_genes: 291 | disease_impact[gene.name] = gene.log2_fold_change 292 | 293 | # Calculate priority score 294 | if disease_impact: 295 | max_impact = max(abs(v) for v in disease_impact.values()) 296 | results.append({ 297 | "variant": variant, 298 | "disease_genes_affected": disease_impact, 299 | "priority_score": max_impact, 300 | "chromatin_changes": len([c for c in prediction.chromatin if c.change > 0.5]) 301 | }) 302 | 303 | # Sort by priority 304 | results.sort(key=lambda x: x["priority_score"], reverse=True) 305 | return results 306 | 307 | # Example usage 308 | variants_to_test = [ 309 | {"chr": "chr17", "pos": 41246000, "ref": "A", "alt": "G"}, 310 | {"chr": "chr17", "pos": 41246500, "ref": "C", "alt": "T"}, 311 | {"chr": "chr17", "pos": 41247000, "ref": "G", "alt": "A"} 312 | ] 313 | 314 | breast_cancer_genes = ["BRCA1", "BRCA2", "TP53", "PTEN"] 315 | prioritized = await prioritize_noncoding_variants(variants_to_test, breast_cancer_genes) 316 | ``` 317 | 318 | ### Workflow 3: Splicing Analysis 319 | 320 | ```python 321 | async def analyze_splicing_variant(gene: str, exon: int, variant_pos: int, ref: str, alt: str): 322 | """Analyze potential splicing effects of a variant""" 323 | 324 | # Get gene information 325 | gene_info = await gene_getter(gene) 326 | chromosome = f"chr{gene_info.genomic_location.chr}" 327 | 328 | # Predict splicing effects 329 | prediction = await alphagenome_predictor( 330 | chromosome=chromosome, 331 | position=variant_pos, 332 | reference=ref, 333 | alternate=alt, 334 | interval_size=16384 # Smaller window for splicing 335 | ) 336 | 337 | # Analyze splicing predictions 338 | splicing_effects = [] 339 | for event in prediction.splicing: 340 | if abs(event.delta_psi) > 0.1: # 10% change in splicing 341 | splicing_effects.append({ 342 | "type": event.event_type, 343 | "change": event.delta_psi, 344 | "affected_exon": event.exon, 345 | "interpretation": interpret_splicing(event) 346 | }) 347 | 348 | # Search for similar splicing variants 349 | articles = await article_searcher( 350 | genes=[gene], 351 | keywords=[f"exon {exon}", "splicing", "splice site"] 352 | ) 353 | 354 | return { 355 | "variant": f"{gene} exon {exon} {ref}>{alt}", 356 | "splicing_effects": splicing_effects, 357 | "likely_consequence": predict_consequence(splicing_effects), 358 | "literature_precedent": len(articles) 359 | } 360 | 361 | def interpret_splicing(event): 362 | """Interpret splicing changes""" 363 | if event.delta_psi > 0.5: 364 | return "Strong increase in exon inclusion" 365 | elif event.delta_psi > 0.1: 366 | return "Moderate increase in exon inclusion" 367 | elif event.delta_psi < -0.5: 368 | return "Strong exon skipping" 369 | elif event.delta_psi < -0.1: 370 | return "Moderate exon skipping" 371 | else: 372 | return "Minimal splicing change" 373 | ``` 374 | 375 | ## Research Applications 376 | 377 | ### Enhancer Variant Analysis 378 | 379 | ```python 380 | async def analyze_enhancer_variant(chr: str, pos: int, ref: str, alt: str, target_gene: str): 381 | """Analyze variant in potential enhancer region""" 382 | 383 | # Use larger window to capture enhancer-promoter interactions 384 | prediction = await alphagenome_predictor( 385 | chromosome=chr, 386 | position=pos, 387 | reference=ref, 388 | alternate=alt, 389 | interval_size=524288 # 512kb 390 | ) 391 | 392 | # Find target gene effect 393 | target_effect = None 394 | for gene in prediction.gene_expression: 395 | if gene.name == target_gene: 396 | target_effect = gene.log2_fold_change 397 | break 398 | 399 | # Analyze chromatin changes 400 | chromatin_opening = sum( 401 | 1 for c in prediction.chromatin 402 | if c.change > 0 and c.type == "enhancer" 403 | ) 404 | 405 | return { 406 | "variant_location": f"{chr}:{pos}", 407 | "target_gene": target_gene, 408 | "expression_change": target_effect, 409 | "enhancer_activity": "increased" if chromatin_opening > 0 else "decreased", 410 | "likely_enhancer": abs(target_effect or 0) > 0.5 and chromatin_opening > 0 411 | } 412 | ``` 413 | 414 | ### Pharmacogenomic Predictions 415 | 416 | ```python 417 | async def predict_drug_response_variant(drug_target: str, variant: dict): 418 | """Predict how variant affects drug target expression""" 419 | 420 | # Get drug information 421 | drug_info = await drug_getter(drug_target) 422 | target_genes = drug_info.targets 423 | 424 | # Predict variant effects 425 | prediction = await alphagenome_predictor( 426 | chromosome=variant["chr"], 427 | position=variant["pos"], 428 | reference=variant["ref"], 429 | alternate=variant["alt"], 430 | tissue_types=["UBERON:0002107"] # liver for drug metabolism 431 | ) 432 | 433 | # Check effects on drug targets 434 | target_effects = {} 435 | for gene in prediction.gene_expression: 436 | if gene.name in target_genes: 437 | target_effects[gene.name] = gene.log2_fold_change 438 | 439 | # Interpret results 440 | if any(abs(effect) > 1.0 for effect in target_effects.values()): 441 | response = "Likely altered drug response" 442 | elif any(abs(effect) > 0.5 for effect in target_effects.values()): 443 | response = "Possible altered drug response" 444 | else: 445 | response = "Unlikely to affect drug response" 446 | 447 | return { 448 | "drug": drug_target, 449 | "variant": variant, 450 | "target_effects": target_effects, 451 | "prediction": response, 452 | "recommendation": "Consider dose adjustment" if "altered" in response else "Standard dosing" 453 | } 454 | ``` 455 | 456 | ## Best Practices 457 | 458 | ### 1. Validate Input Coordinates 459 | 460 | ```python 461 | # Always use "chr" prefix 462 | chromosome = "chr7" # ✅ Correct 463 | # chromosome = "7" # ❌ Wrong 464 | 465 | # Use 1-based positions (not 0-based) 466 | position = 140753336 # ✅ 1-based 467 | ``` 468 | 469 | ### 2. Handle API Errors Gracefully 470 | 471 | ```python 472 | try: 473 | result = await alphagenome_predictor(...) 474 | except Exception as e: 475 | if "API key" in str(e): 476 | print("Please provide AlphaGenome API key") 477 | elif "Invalid sequence" in str(e): 478 | print("Check chromosome and position") 479 | else: 480 | print(f"Prediction failed: {e}") 481 | ``` 482 | 483 | ### 3. Combine with Other Tools 484 | 485 | ```python 486 | # Complete variant analysis pipeline 487 | async def comprehensive_variant_analysis(variant_id: str): 488 | # 1. Get known annotations 489 | known = await variant_getter(variant_id) 490 | 491 | # 2. Predict regulatory effects 492 | prediction = await alphagenome_predictor( 493 | chromosome=f"chr{known.chr}", 494 | position=known.pos, 495 | reference=known.ref, 496 | alternate=known.alt 497 | ) 498 | 499 | # 3. Search literature 500 | articles = await article_searcher( 501 | variants=[variant_id], 502 | genes=[known.gene.symbol] 503 | ) 504 | 505 | # 4. Find relevant trials 506 | trials = await trial_searcher( 507 | other_terms=[known.gene.symbol, "mutation"] 508 | ) 509 | 510 | return { 511 | "annotations": known, 512 | "predictions": prediction, 513 | "literature": articles, 514 | "trials": trials 515 | } 516 | ``` 517 | 518 | ### 4. Interpret Results Appropriately 519 | 520 | ```python 521 | def interpret_expression_change(log2_fc): 522 | """Convert log2 fold change to interpretation""" 523 | if log2_fc > 2.0: 524 | return "Very strong increase (>4x)" 525 | elif log2_fc > 1.0: 526 | return "Strong increase (2-4x)" 527 | elif log2_fc > 0.5: 528 | return "Moderate increase (1.4-2x)" 529 | elif log2_fc < -2.0: 530 | return "Very strong decrease (<0.25x)" 531 | elif log2_fc < -1.0: 532 | return "Strong decrease (0.25-0.5x)" 533 | elif log2_fc < -0.5: 534 | return "Moderate decrease (0.5-0.7x)" 535 | else: 536 | return "Minimal change" 537 | ``` 538 | 539 | ## Limitations and Considerations 540 | 541 | ### Technical Limitations 542 | 543 | - **Human only**: GRCh38 reference genome 544 | - **SNVs only**: No indels or structural variants 545 | - **Exact coordinates**: Must have precise genomic position 546 | - **Sequence context**: Requires reference sequence match 547 | 548 | ### Interpretation Caveats 549 | 550 | - **Predictions not certainties**: Validate with functional studies 551 | - **Context matters**: Cell type, developmental stage affect outcomes 552 | - **Indirect effects**: May miss complex regulatory cascades 553 | - **Population variation**: Individual genetic background influences 554 | 555 | ## Troubleshooting 556 | 557 | ### Common Issues 558 | 559 | **"API key required"** 560 | 561 | - Set environment variable or provide per-request 562 | - Check key validity at AlphaGenome portal 563 | 564 | **"Invalid sequence length"** 565 | 566 | - Verify chromosome format (use "chr" prefix) 567 | - Check position is within chromosome bounds 568 | - Ensure ref/alt are single nucleotides 569 | 570 | **"No results returned"** 571 | 572 | - May be no genes in analysis window 573 | - Try larger interval size 574 | - Check if variant is in gene desert 575 | 576 | **Installation issues** 577 | 578 | - Ensure Python 3.10+ 579 | - Try `pip install --upgrade pip` first 580 | - Check for conflicting protobuf versions 581 | 582 | ## Next Steps 583 | 584 | - Explore [comprehensive variant annotations](03-get-comprehensive-variant-annotations.md) 585 | - Learn about [article searches](01-find-articles-and-cbioportal-data.md) for variants 586 | - Set up [logging and monitoring](05-logging-and-monitoring-with-bigquery.md) 587 | ``` -------------------------------------------------------------------------------- /docs/how-to-guides/06-search-nci-organizations-and-interventions.md: -------------------------------------------------------------------------------- ```markdown 1 | # How to Search NCI Organizations and Interventions 2 | 3 | This guide demonstrates how to use BioMCP's NCI-specific tools to search for cancer research organizations, interventions (drugs, devices, procedures), and biomarkers. 4 | 5 | ## Prerequisites 6 | 7 | All NCI tools require an API key from [api.cancer.gov](https://api.cancer.gov): 8 | 9 | ```bash 10 | # Set as environment variable 11 | export NCI_API_KEY="your-key-here" 12 | 13 | # Or provide per-request in your prompts 14 | "Find cancer centers in Boston, my NCI API key is YOUR_KEY" 15 | ``` 16 | 17 | ## Organization Search and Lookup 18 | 19 | ### Understanding Organization Search 20 | 21 | The NCI Organization database contains: 22 | 23 | - Cancer research centers and hospitals 24 | - Clinical trial sponsors 25 | - Academic institutions 26 | - Pharmaceutical companies 27 | - Government facilities 28 | 29 | ### Basic Organization Search 30 | 31 | Find organizations by name: 32 | 33 | ```bash 34 | # CLI 35 | biomcp organization search --name "MD Anderson" --api-key YOUR_KEY 36 | 37 | # Python 38 | orgs = await nci_organization_searcher( 39 | name="MD Anderson", 40 | api_key="your-key" 41 | ) 42 | 43 | # MCP/AI Assistant 44 | "Search for MD Anderson Cancer Center, my NCI API key is YOUR_KEY" 45 | ``` 46 | 47 | ### Location-Based Search 48 | 49 | **CRITICAL**: Always use city AND state together to avoid Elasticsearch errors! 50 | 51 | ```python 52 | # ✅ CORRECT - City and state together 53 | orgs = await nci_organization_searcher( 54 | city="Houston", 55 | state="TX", 56 | api_key="your-key" 57 | ) 58 | 59 | # ❌ WRONG - Will cause API error 60 | orgs = await nci_organization_searcher( 61 | city="Houston", # Missing state! 62 | api_key="your-key" 63 | ) 64 | 65 | # ❌ WRONG - Will cause API error 66 | orgs = await nci_organization_searcher( 67 | state="TX", # Missing city! 68 | api_key="your-key" 69 | ) 70 | ``` 71 | 72 | ### Organization Types 73 | 74 | Search by organization type: 75 | 76 | ```python 77 | # Find academic cancer centers 78 | academic_centers = await nci_organization_searcher( 79 | organization_type="Academic", 80 | api_key="your-key" 81 | ) 82 | 83 | # Find pharmaceutical companies 84 | pharma_companies = await nci_organization_searcher( 85 | organization_type="Industry", 86 | api_key="your-key" 87 | ) 88 | 89 | # Find government research facilities 90 | gov_facilities = await nci_organization_searcher( 91 | organization_type="Government", 92 | api_key="your-key" 93 | ) 94 | ``` 95 | 96 | Valid organization types: 97 | 98 | - `Academic` - Universities and medical schools 99 | - `Industry` - Pharmaceutical and biotech companies 100 | - `Government` - NIH, FDA, VA hospitals 101 | - `Community` - Community hospitals and clinics 102 | - `Network` - Research networks and consortiums 103 | - `Other` - Other organization types 104 | 105 | ### Getting Organization Details 106 | 107 | Retrieve complete information about a specific organization: 108 | 109 | ```python 110 | # Get organization by ID 111 | org_details = await nci_organization_getter( 112 | organization_id="NCI-2011-03337", 113 | api_key="your-key" 114 | ) 115 | 116 | # Returns: 117 | # - Full name and aliases 118 | # - Contact information 119 | # - Address and location 120 | # - Associated clinical trials 121 | # - Organization type and status 122 | ``` 123 | 124 | ### Practical Organization Workflows 125 | 126 | #### Find Regional Cancer Centers 127 | 128 | ```python 129 | async def find_cancer_centers_by_region(state: str, cities: list[str]): 130 | """Find all cancer centers in specific cities within a state""" 131 | 132 | all_centers = [] 133 | 134 | for city in cities: 135 | # ALWAYS use city + state together 136 | centers = await nci_organization_searcher( 137 | city=city, 138 | state=state, 139 | organization_type="Academic", 140 | api_key=os.getenv("NCI_API_KEY") 141 | ) 142 | all_centers.extend(centers) 143 | 144 | # Remove duplicates 145 | unique_centers = {org['id']: org for org in all_centers} 146 | 147 | return list(unique_centers.values()) 148 | 149 | # Example: Find cancer centers in major Texas cities 150 | texas_centers = await find_cancer_centers_by_region( 151 | state="TX", 152 | cities=["Houston", "Dallas", "San Antonio", "Austin"] 153 | ) 154 | ``` 155 | 156 | #### Find Trial Sponsors 157 | 158 | ```python 159 | async def find_trial_sponsors_by_type(org_type: str, name_filter: str = None): 160 | """Find organizations sponsoring trials""" 161 | 162 | # Search organizations 163 | orgs = await nci_organization_searcher( 164 | name=name_filter, 165 | organization_type=org_type, 166 | api_key=os.getenv("NCI_API_KEY") 167 | ) 168 | 169 | # For each org, get details including trial count 170 | sponsors = [] 171 | for org in orgs[:10]: # Limit to avoid rate limits 172 | details = await nci_organization_getter( 173 | organization_id=org['id'], 174 | api_key=os.getenv("NCI_API_KEY") 175 | ) 176 | if details.get('trial_count', 0) > 0: 177 | sponsors.append(details) 178 | 179 | return sorted(sponsors, key=lambda x: x.get('trial_count', 0), reverse=True) 180 | 181 | # Find pharmaceutical companies with active trials 182 | pharma_sponsors = await find_trial_sponsors_by_type("Industry") 183 | ``` 184 | 185 | ## Intervention Search and Lookup 186 | 187 | ### Understanding Interventions 188 | 189 | Interventions in clinical trials include: 190 | 191 | - **Drugs**: Chemotherapy, targeted therapy, immunotherapy 192 | - **Devices**: Medical devices, diagnostic tools 193 | - **Procedures**: Surgical techniques, radiation protocols 194 | - **Biologicals**: Cell therapies, vaccines, antibodies 195 | - **Behavioral**: Lifestyle interventions, counseling 196 | - **Other**: Dietary supplements, alternative therapies 197 | 198 | ### Drug Search 199 | 200 | Find specific drugs or drug classes: 201 | 202 | ```bash 203 | # CLI - Find a specific drug 204 | biomcp intervention search --name pembrolizumab --type Drug --api-key YOUR_KEY 205 | 206 | # CLI - Find drug class 207 | biomcp intervention search --name "PD-1 inhibitor" --type Drug --api-key YOUR_KEY 208 | ``` 209 | 210 | ```python 211 | # Python - Search with synonyms 212 | drugs = await nci_intervention_searcher( 213 | name="pembrolizumab", 214 | intervention_type="Drug", 215 | synonyms=True, # Include Keytruda, MK-3475, etc. 216 | api_key="your-key" 217 | ) 218 | 219 | # Search for drug combinations 220 | combos = await nci_intervention_searcher( 221 | name="nivolumab AND ipilimumab", 222 | intervention_type="Drug", 223 | api_key="your-key" 224 | ) 225 | ``` 226 | 227 | ### Device and Procedure Search 228 | 229 | ```python 230 | # Find medical devices 231 | devices = await nci_intervention_searcher( 232 | intervention_type="Device", 233 | name="robot", # Surgical robots 234 | api_key="your-key" 235 | ) 236 | 237 | # Find procedures 238 | procedures = await nci_intervention_searcher( 239 | intervention_type="Procedure", 240 | name="minimally invasive", 241 | api_key="your-key" 242 | ) 243 | 244 | # Find radiation protocols 245 | radiation = await nci_intervention_searcher( 246 | intervention_type="Radiation", 247 | name="proton beam", 248 | api_key="your-key" 249 | ) 250 | ``` 251 | 252 | ### Getting Intervention Details 253 | 254 | ```python 255 | # Get complete intervention information 256 | intervention = await nci_intervention_getter( 257 | intervention_id="INT123456", 258 | api_key="your-key" 259 | ) 260 | 261 | # Returns: 262 | # - Official name and synonyms 263 | # - Intervention type and subtype 264 | # - Mechanism of action (for drugs) 265 | # - FDA approval status 266 | # - Associated clinical trials 267 | # - Manufacturer information 268 | ``` 269 | 270 | ### Practical Intervention Workflows 271 | 272 | #### Drug Development Pipeline 273 | 274 | ```python 275 | async def analyze_drug_pipeline(drug_target: str): 276 | """Analyze drugs in development for a specific target""" 277 | 278 | # Search for drugs targeting specific pathway 279 | drugs = await nci_intervention_searcher( 280 | name=drug_target, 281 | intervention_type="Drug", 282 | api_key=os.getenv("NCI_API_KEY") 283 | ) 284 | 285 | pipeline = { 286 | "preclinical": [], 287 | "phase1": [], 288 | "phase2": [], 289 | "phase3": [], 290 | "approved": [] 291 | } 292 | 293 | for drug in drugs: 294 | # Get detailed information 295 | details = await nci_intervention_getter( 296 | intervention_id=drug['id'], 297 | api_key=os.getenv("NCI_API_KEY") 298 | ) 299 | 300 | # Categorize by development stage 301 | if details.get('fda_approved'): 302 | pipeline['approved'].append(details) 303 | else: 304 | # Check associated trials for phase 305 | trial_phases = details.get('trial_phases', []) 306 | if 'PHASE3' in trial_phases: 307 | pipeline['phase3'].append(details) 308 | elif 'PHASE2' in trial_phases: 309 | pipeline['phase2'].append(details) 310 | elif 'PHASE1' in trial_phases: 311 | pipeline['phase1'].append(details) 312 | else: 313 | pipeline['preclinical'].append(details) 314 | 315 | return pipeline 316 | 317 | # Analyze PD-1/PD-L1 inhibitor pipeline 318 | pd1_pipeline = await analyze_drug_pipeline("PD-1 inhibitor") 319 | ``` 320 | 321 | #### Compare Similar Interventions 322 | 323 | ```python 324 | async def compare_interventions(intervention_names: list[str]): 325 | """Compare multiple interventions side by side""" 326 | 327 | comparisons = [] 328 | 329 | for name in intervention_names: 330 | # Search for intervention 331 | results = await nci_intervention_searcher( 332 | name=name, 333 | synonyms=True, 334 | api_key=os.getenv("NCI_API_KEY") 335 | ) 336 | 337 | if results: 338 | # Get detailed info for first match 339 | details = await nci_intervention_getter( 340 | intervention_id=results[0]['id'], 341 | api_key=os.getenv("NCI_API_KEY") 342 | ) 343 | 344 | comparisons.append({ 345 | "name": details['name'], 346 | "type": details['type'], 347 | "synonyms": details.get('synonyms', []), 348 | "fda_approved": details.get('fda_approved', False), 349 | "trial_count": len(details.get('trials', [])), 350 | "mechanism": details.get('mechanism_of_action', 'Not specified') 351 | }) 352 | 353 | return comparisons 354 | 355 | # Compare checkpoint inhibitors 356 | comparison = await compare_interventions([ 357 | "pembrolizumab", 358 | "nivolumab", 359 | "atezolizumab", 360 | "durvalumab" 361 | ]) 362 | ``` 363 | 364 | ## Biomarker Search 365 | 366 | ### Understanding Biomarker Types 367 | 368 | The NCI API supports two biomarker types: 369 | 370 | - `reference_gene` - Gene-based biomarkers (e.g., EGFR, BRAF) 371 | - `branch` - Pathway/branch biomarkers 372 | 373 | **Note**: You cannot search by gene symbol directly; use the name parameter. 374 | 375 | ### Basic Biomarker Search 376 | 377 | ```python 378 | # Search for PD-L1 biomarkers 379 | pdl1_biomarkers = await nci_biomarker_searcher( 380 | name="PD-L1", 381 | api_key="your-key" 382 | ) 383 | 384 | # Search for specific biomarker type 385 | gene_biomarkers = await nci_biomarker_searcher( 386 | biomarker_type="reference_gene", 387 | api_key="your-key" 388 | ) 389 | ``` 390 | 391 | ### Biomarker Analysis Workflow 392 | 393 | ```python 394 | async def analyze_trial_biomarkers(disease: str): 395 | """Find biomarkers used in trials for a disease""" 396 | 397 | # Get all biomarkers 398 | all_biomarkers = await nci_biomarker_searcher( 399 | biomarker_type="reference_gene", 400 | api_key=os.getenv("NCI_API_KEY") 401 | ) 402 | 403 | # Filter by disease association 404 | disease_biomarkers = [] 405 | for biomarker in all_biomarkers: 406 | if disease.lower() in str(biomarker).lower(): 407 | disease_biomarkers.append(biomarker) 408 | 409 | # Group by frequency 410 | biomarker_counts = {} 411 | for bio in disease_biomarkers: 412 | name = bio.get('name', 'Unknown') 413 | biomarker_counts[name] = biomarker_counts.get(name, 0) + 1 414 | 415 | # Sort by frequency 416 | return sorted( 417 | biomarker_counts.items(), 418 | key=lambda x: x[1], 419 | reverse=True 420 | ) 421 | 422 | # Find most common biomarkers in lung cancer trials 423 | lung_biomarkers = await analyze_trial_biomarkers("lung cancer") 424 | ``` 425 | 426 | ## Combined Workflows 427 | 428 | ### Regional Drug Development Analysis 429 | 430 | ```python 431 | async def analyze_regional_drug_development( 432 | state: str, 433 | cities: list[str], 434 | drug_class: str 435 | ): 436 | """Analyze drug development in a specific region""" 437 | 438 | # Step 1: Find organizations in the region 439 | organizations = [] 440 | for city in cities: 441 | orgs = await nci_organization_searcher( 442 | city=city, 443 | state=state, 444 | organization_type="Industry", 445 | api_key=os.getenv("NCI_API_KEY") 446 | ) 447 | organizations.extend(orgs) 448 | 449 | # Step 2: Find drugs of interest 450 | drugs = await nci_intervention_searcher( 451 | name=drug_class, 452 | intervention_type="Drug", 453 | api_key=os.getenv("NCI_API_KEY") 454 | ) 455 | 456 | # Step 3: Cross-reference trials 457 | regional_development = [] 458 | for drug in drugs[:10]: # Limit for performance 459 | drug_details = await nci_intervention_getter( 460 | intervention_id=drug['id'], 461 | api_key=os.getenv("NCI_API_KEY") 462 | ) 463 | 464 | # Check if any trials are sponsored by regional orgs 465 | for trial in drug_details.get('trials', []): 466 | for org in organizations: 467 | if org['id'] in str(trial): 468 | regional_development.append({ 469 | 'drug': drug_details['name'], 470 | 'organization': org['name'], 471 | 'location': f"{org.get('city', '')}, {org.get('state', '')}", 472 | 'trial': trial 473 | }) 474 | 475 | return regional_development 476 | 477 | # Analyze immunotherapy development in California 478 | ca_immuno = await analyze_regional_drug_development( 479 | state="CA", 480 | cities=["San Francisco", "San Diego", "Los Angeles"], 481 | drug_class="immunotherapy" 482 | ) 483 | ``` 484 | 485 | ### Organization to Intervention Pipeline 486 | 487 | ```python 488 | async def org_to_intervention_pipeline(org_name: str): 489 | """Trace from organization to their interventions""" 490 | 491 | # Find organization 492 | orgs = await nci_organization_searcher( 493 | name=org_name, 494 | api_key=os.getenv("NCI_API_KEY") 495 | ) 496 | 497 | if not orgs: 498 | return None 499 | 500 | # Get organization details 501 | org_details = await nci_organization_getter( 502 | organization_id=orgs[0]['id'], 503 | api_key=os.getenv("NCI_API_KEY") 504 | ) 505 | 506 | # Get their trials 507 | org_trials = org_details.get('trials', []) 508 | 509 | # Extract unique interventions 510 | interventions = set() 511 | for trial_id in org_trials[:20]: # Sample trials 512 | trial = await trial_getter( 513 | nct_id=trial_id, 514 | source="nci", 515 | api_key=os.getenv("NCI_API_KEY") 516 | ) 517 | 518 | if trial.get('interventions'): 519 | interventions.update(trial['interventions']) 520 | 521 | # Get details for each intervention 522 | intervention_details = [] 523 | for intervention_name in interventions: 524 | results = await nci_intervention_searcher( 525 | name=intervention_name, 526 | api_key=os.getenv("NCI_API_KEY") 527 | ) 528 | if results: 529 | intervention_details.append(results[0]) 530 | 531 | return { 532 | 'organization': org_details, 533 | 'trial_count': len(org_trials), 534 | 'interventions': intervention_details 535 | } 536 | 537 | # Analyze Genentech's intervention portfolio 538 | genentech_portfolio = await org_to_intervention_pipeline("Genentech") 539 | ``` 540 | 541 | ## Best Practices 542 | 543 | ### 1. Always Use City + State Together 544 | 545 | ```python 546 | # ✅ GOOD - Prevents API errors 547 | await nci_organization_searcher(city="Boston", state="MA") 548 | 549 | # ❌ BAD - Will cause Elasticsearch error 550 | await nci_organization_searcher(city="Boston") 551 | ``` 552 | 553 | ### 2. Handle Rate Limits 554 | 555 | ```python 556 | import asyncio 557 | 558 | async def search_with_rate_limit(searches: list): 559 | """Execute searches with rate limiting""" 560 | results = [] 561 | 562 | for search in searches: 563 | result = await search() 564 | results.append(result) 565 | 566 | # Add delay to respect rate limits 567 | await asyncio.sleep(0.1) # 10 requests per second 568 | 569 | return results 570 | ``` 571 | 572 | ### 3. Use Pagination for Large Results 573 | 574 | ```python 575 | async def get_all_organizations(org_type: str): 576 | """Get all organizations of a type using pagination""" 577 | 578 | all_orgs = [] 579 | page = 1 580 | 581 | while True: 582 | orgs = await nci_organization_searcher( 583 | organization_type=org_type, 584 | page=page, 585 | page_size=100, # Maximum allowed 586 | api_key=os.getenv("NCI_API_KEY") 587 | ) 588 | 589 | if not orgs: 590 | break 591 | 592 | all_orgs.extend(orgs) 593 | page += 1 594 | 595 | # Note: Total count may not be available 596 | if len(orgs) < 100: 597 | break 598 | 599 | return all_orgs 600 | ``` 601 | 602 | ### 4. Cache Results 603 | 604 | ```python 605 | from functools import lru_cache 606 | import hashlib 607 | 608 | @lru_cache(maxsize=100) 609 | async def cached_org_search(city: str, state: str, org_type: str): 610 | """Cache organization searches to reduce API calls""" 611 | 612 | return await nci_organization_searcher( 613 | city=city, 614 | state=state, 615 | organization_type=org_type, 616 | api_key=os.getenv("NCI_API_KEY") 617 | ) 618 | ``` 619 | 620 | ## Troubleshooting 621 | 622 | ### Common Errors and Solutions 623 | 624 | 1. **"Search Too Broad" Error** 625 | 626 | - Always use city + state together for location searches 627 | - Add more specific filters (name, type) 628 | - Reduce page_size parameter 629 | 630 | 2. **"NCI API key required"** 631 | 632 | - Set NCI_API_KEY environment variable 633 | - Or provide api_key parameter in function calls 634 | - Or include in prompt: "my NCI API key is YOUR_KEY" 635 | 636 | 3. **No Results Found** 637 | 638 | - Check spelling of organization/drug names 639 | - Try partial name matches 640 | - Remove filters and broaden search 641 | - Enable synonyms for intervention searches 642 | 643 | 4. **Rate Limit Exceeded** 644 | - Add delays between requests 645 | - Reduce concurrent requests 646 | - Cache frequently accessed data 647 | - Consider upgrading API key tier 648 | 649 | ### Debugging Tips 650 | 651 | ```python 652 | # Enable debug logging 653 | import logging 654 | logging.basicConfig(level=logging.DEBUG) 655 | 656 | # Test API key 657 | async def test_nci_connection(): 658 | try: 659 | result = await nci_organization_searcher( 660 | name="Mayo", 661 | api_key=os.getenv("NCI_API_KEY") 662 | ) 663 | print(f"✅ API key valid, found {len(result)} results") 664 | except Exception as e: 665 | print(f"❌ API key error: {e}") 666 | 667 | # Check specific organization exists 668 | async def verify_org_id(org_id: str): 669 | try: 670 | org = await nci_organization_getter( 671 | organization_id=org_id, 672 | api_key=os.getenv("NCI_API_KEY") 673 | ) 674 | print(f"✅ Organization found: {org['name']}") 675 | except: 676 | print(f"❌ Organization ID not found: {org_id}") 677 | ``` 678 | 679 | ## Next Steps 680 | 681 | - Review [NCI prompts examples](../tutorials/nci-prompts.md) for AI assistant usage 682 | - Explore [trial search with biomarkers](02-find-trials-with-nci-and-biothings.md) 683 | - Learn about [variant effect prediction](04-predict-variant-effects-with-alphagenome.md) 684 | - Set up [API authentication](../getting-started/03-authentication-and-api-keys.md) 685 | ``` -------------------------------------------------------------------------------- /tests/tdd/test_router.py: -------------------------------------------------------------------------------- ```python 1 | """Comprehensive tests for the unified router module.""" 2 | 3 | import json 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | from biomcp.exceptions import ( 9 | InvalidDomainError, 10 | InvalidParameterError, 11 | QueryParsingError, 12 | SearchExecutionError, 13 | ) 14 | from biomcp.router import fetch, format_results, search 15 | 16 | 17 | class TestFormatResults: 18 | """Test the format_results function.""" 19 | 20 | def test_format_article_results(self): 21 | """Test formatting article results.""" 22 | results = [ 23 | { 24 | "pmid": "12345", 25 | "title": "Test Article", 26 | "abstract": "This is a test abstract", 27 | # Note: url in input is ignored, always generates PubMed URL 28 | } 29 | ] 30 | 31 | # Mock thinking tracker to prevent reminder 32 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 33 | formatted = format_results(results, "article", 1, 10, 1) 34 | 35 | assert "results" in formatted 36 | assert len(formatted["results"]) == 1 37 | result = formatted["results"][0] 38 | assert result["id"] == "12345" 39 | assert result["title"] == "Test Article" 40 | assert "test abstract" in result["text"] 41 | assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/" 42 | 43 | def test_format_trial_results_api_v2(self): 44 | """Test formatting trial results with API v2 structure.""" 45 | results = [ 46 | { 47 | "protocolSection": { 48 | "identificationModule": { 49 | "nctId": "NCT12345", 50 | "briefTitle": "Test Trial", 51 | }, 52 | "descriptionModule": { 53 | "briefSummary": "This is a test trial summary" 54 | }, 55 | "statusModule": {"overallStatus": "RECRUITING"}, 56 | "designModule": {"phases": ["PHASE3"]}, 57 | } 58 | } 59 | ] 60 | 61 | # Mock thinking tracker to prevent reminder 62 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 63 | formatted = format_results(results, "trial", 1, 10, 1) 64 | 65 | assert "results" in formatted 66 | assert len(formatted["results"]) == 1 67 | result = formatted["results"][0] 68 | assert result["id"] == "NCT12345" 69 | assert result["title"] == "Test Trial" 70 | assert "test trial summary" in result["text"] 71 | assert "NCT12345" in result["url"] 72 | 73 | def test_format_trial_results_legacy(self): 74 | """Test formatting trial results with legacy structure.""" 75 | results = [ 76 | { 77 | "NCT Number": "NCT67890", 78 | "Study Title": "Legacy Trial", 79 | "Brief Summary": "Legacy trial summary", 80 | "Study Status": "COMPLETED", 81 | "Phases": "Phase 2", 82 | } 83 | ] 84 | 85 | # Mock thinking tracker to prevent reminder 86 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 87 | formatted = format_results(results, "trial", 1, 10, 1) 88 | 89 | assert "results" in formatted 90 | assert len(formatted["results"]) == 1 91 | result = formatted["results"][0] 92 | assert result["id"] == "NCT67890" 93 | assert result["title"] == "Legacy Trial" 94 | assert "Legacy trial summary" in result["text"] 95 | 96 | def test_format_variant_results(self): 97 | """Test formatting variant results.""" 98 | results = [ 99 | { 100 | "_id": "chr7:g.140453136A>T", 101 | "dbsnp": {"rsid": "rs121913529"}, 102 | "dbnsfp": {"genename": "BRAF"}, 103 | "clinvar": {"rcv": {"clinical_significance": "Pathogenic"}}, 104 | } 105 | ] 106 | 107 | # Mock thinking tracker to prevent reminder 108 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 109 | formatted = format_results(results, "variant", 1, 10, 1) 110 | 111 | assert "results" in formatted 112 | assert len(formatted["results"]) == 1 113 | result = formatted["results"][0] 114 | assert result["id"] == "chr7:g.140453136A>T" 115 | assert "BRAF" in result["title"] 116 | assert "Pathogenic" in result["text"] 117 | assert "rs121913529" in result["url"] 118 | 119 | def test_format_results_invalid_domain(self): 120 | """Test format_results with invalid domain.""" 121 | with pytest.raises(InvalidDomainError) as exc_info: 122 | format_results([], "invalid_domain", 1, 10, 0) 123 | 124 | assert "Unknown domain: invalid_domain" in str(exc_info.value) 125 | 126 | def test_format_results_malformed_data(self): 127 | """Test format_results handles malformed data gracefully.""" 128 | results = [ 129 | {"title": "Good Article", "pmid": "123"}, 130 | None, # Malformed - will be skipped 131 | { 132 | "invalid": "data" 133 | }, # Missing required fields but won't fail (treated as preprint) 134 | ] 135 | 136 | # Mock thinking tracker to prevent reminder 137 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 138 | formatted = format_results(results, "article", 1, 10, 3) 139 | 140 | # Should skip None but include the third (treated as preprint with empty fields) 141 | assert len(formatted["results"]) == 2 142 | assert formatted["results"][0]["id"] == "123" 143 | assert formatted["results"][1]["id"] == "" # Empty ID for invalid data 144 | 145 | 146 | @pytest.mark.asyncio 147 | class TestSearchFunction: 148 | """Test the unified search function.""" 149 | 150 | async def test_search_article_domain(self): 151 | """Test search with article domain.""" 152 | mock_result = json.dumps([ 153 | {"pmid": "123", "title": "Test", "abstract": "Abstract"} 154 | ]) 155 | 156 | with patch( 157 | "biomcp.articles.unified.search_articles_unified" 158 | ) as mock_search: 159 | mock_search.return_value = mock_result 160 | 161 | # Mock thinking tracker to prevent reminder 162 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 163 | result = await search( 164 | query="", 165 | domain="article", 166 | genes="BRAF", 167 | diseases=["cancer"], 168 | page_size=10, 169 | ) 170 | 171 | assert "results" in result 172 | assert len(result["results"]) == 1 173 | assert result["results"][0]["id"] == "123" 174 | 175 | async def test_search_trial_domain(self): 176 | """Test search with trial domain.""" 177 | mock_result = json.dumps({ 178 | "studies": [ 179 | { 180 | "protocolSection": { 181 | "identificationModule": {"nctId": "NCT123"}, 182 | } 183 | } 184 | ] 185 | }) 186 | 187 | with patch("biomcp.trials.search.search_trials") as mock_search: 188 | mock_search.return_value = mock_result 189 | 190 | # Mock thinking tracker to prevent reminder 191 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 192 | result = await search( 193 | query="", 194 | domain="trial", 195 | conditions=["cancer"], 196 | phase="Phase 3", 197 | page_size=20, 198 | ) 199 | 200 | assert "results" in result 201 | mock_search.assert_called_once() 202 | 203 | async def test_search_variant_domain(self): 204 | """Test search with variant domain.""" 205 | mock_result = json.dumps([ 206 | {"_id": "rs123", "gene": {"symbol": "BRAF"}} 207 | ]) 208 | 209 | with patch("biomcp.variants.search.search_variants") as mock_search: 210 | mock_search.return_value = mock_result 211 | 212 | # Mock thinking tracker to prevent reminder 213 | with patch("biomcp.router.get_thinking_reminder", return_value=""): 214 | result = await search( 215 | query="", 216 | domain="variant", 217 | genes="BRAF", 218 | significance="pathogenic", 219 | page_size=10, 220 | ) 221 | 222 | assert "results" in result 223 | assert len(result["results"]) == 1 224 | 225 | async def test_search_unified_query(self): 226 | """Test search with unified query language.""" 227 | with patch("biomcp.router._unified_search") as mock_unified: 228 | mock_unified.return_value = { 229 | "results": [{"id": "1", "title": "Test"}] 230 | } 231 | 232 | result = await search( 233 | query="gene:BRAF AND disease:cancer", 234 | max_results_per_domain=20, 235 | ) 236 | 237 | assert "results" in result 238 | mock_unified.assert_called_once_with( 239 | query="gene:BRAF AND disease:cancer", 240 | max_results_per_domain=20, 241 | domains=None, 242 | explain_query=False, 243 | ) 244 | 245 | async def test_search_no_domain_or_query(self): 246 | """Test search without domain or query raises error.""" 247 | with pytest.raises(InvalidParameterError) as exc_info: 248 | await search(query="") 249 | 250 | assert "query or domain" in str(exc_info.value) 251 | 252 | async def test_search_invalid_domain(self): 253 | """Test search with invalid domain.""" 254 | with pytest.raises(InvalidDomainError): 255 | await search(query="", domain="invalid_domain") 256 | 257 | async def test_search_get_schema(self): 258 | """Test search with get_schema flag.""" 259 | result = await search(query="", get_schema=True) 260 | 261 | assert "domains" in result 262 | assert "cross_domain_fields" in result 263 | assert "domain_fields" in result 264 | assert isinstance(result["cross_domain_fields"], dict) 265 | 266 | async def test_search_pagination_validation(self): 267 | """Test search with invalid pagination parameters.""" 268 | with pytest.raises(InvalidParameterError) as exc_info: 269 | await search( 270 | query="", 271 | domain="article", 272 | page=0, # Invalid - must be >= 1 273 | page_size=10, 274 | ) 275 | 276 | assert "page" in str(exc_info.value) 277 | 278 | async def test_search_parameter_parsing(self): 279 | """Test parameter parsing for list inputs.""" 280 | mock_result = json.dumps([]) 281 | 282 | with patch( 283 | "biomcp.articles.unified.search_articles_unified" 284 | ) as mock_search: 285 | mock_search.return_value = mock_result 286 | 287 | # Test with JSON array string 288 | await search( 289 | query="", 290 | domain="article", 291 | genes='["BRAF", "KRAS"]', 292 | diseases="cancer,melanoma", # Comma-separated 293 | ) 294 | 295 | # Check the request was parsed correctly 296 | call_args = mock_search.call_args[0][0] 297 | assert call_args.genes == ["BRAF", "KRAS"] 298 | assert call_args.diseases == ["cancer", "melanoma"] 299 | 300 | 301 | @pytest.mark.asyncio 302 | class TestFetchFunction: 303 | """Test the unified fetch function.""" 304 | 305 | async def test_fetch_article(self): 306 | """Test fetching article details.""" 307 | mock_result = json.dumps([ 308 | { 309 | "pmid": 12345, 310 | "title": "Test Article", 311 | "abstract": "Full abstract", 312 | "full_text": "Full text content", 313 | } 314 | ]) 315 | 316 | with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch: 317 | mock_fetch.return_value = mock_result 318 | 319 | result = await fetch( 320 | domain="article", 321 | id="12345", 322 | ) 323 | 324 | assert result["id"] == "12345" 325 | assert result["title"] == "Test Article" 326 | assert result["text"] == "Full text content" 327 | assert "metadata" in result 328 | 329 | async def test_fetch_article_invalid_pmid(self): 330 | """Test fetching article with invalid identifier.""" 331 | result = await fetch(domain="article", id="not_a_number") 332 | 333 | # Should return an error since "not_a_number" is neither a valid PMID nor DOI 334 | assert "error" in result 335 | assert "Invalid identifier format" in result["error"] 336 | assert "not_a_number" in result["error"] 337 | 338 | async def test_fetch_trial_all_sections(self): 339 | """Test fetching trial with all sections.""" 340 | mock_protocol = json.dumps({ 341 | "title": "Test Trial", 342 | "nct_id": "NCT123", 343 | "brief_summary": "Summary", 344 | }) 345 | mock_locations = json.dumps({"locations": [{"city": "Boston"}]}) 346 | mock_outcomes = json.dumps({ 347 | "outcomes": {"primary_outcomes": ["Outcome1"]} 348 | }) 349 | mock_references = json.dumps({"references": [{"pmid": "456"}]}) 350 | 351 | with ( 352 | patch("biomcp.trials.getter._trial_protocol") as mock_p, 353 | patch("biomcp.trials.getter._trial_locations") as mock_l, 354 | patch("biomcp.trials.getter._trial_outcomes") as mock_o, 355 | patch("biomcp.trials.getter._trial_references") as mock_r, 356 | ): 357 | mock_p.return_value = mock_protocol 358 | mock_l.return_value = mock_locations 359 | mock_o.return_value = mock_outcomes 360 | mock_r.return_value = mock_references 361 | 362 | result = await fetch(domain="trial", id="NCT123", detail="all") 363 | 364 | assert result["id"] == "NCT123" 365 | assert "metadata" in result 366 | assert "locations" in result["metadata"] 367 | assert "outcomes" in result["metadata"] 368 | assert "references" in result["metadata"] 369 | 370 | async def test_fetch_trial_invalid_detail(self): 371 | """Test fetching trial with invalid detail parameter.""" 372 | with pytest.raises(InvalidParameterError) as exc_info: 373 | await fetch( 374 | domain="trial", 375 | id="NCT123", 376 | detail="invalid_section", 377 | ) 378 | 379 | assert "one of:" in str(exc_info.value) 380 | 381 | async def test_fetch_variant(self): 382 | """Test fetching variant details.""" 383 | mock_result = json.dumps([ 384 | { 385 | "_id": "rs123", 386 | "gene": {"symbol": "BRAF"}, 387 | "clinvar": {"clinical_significance": "Pathogenic"}, 388 | "tcga": {"cancer_types": {}}, 389 | "external_links": {"dbSNP": "https://example.com"}, 390 | } 391 | ]) 392 | 393 | with patch("biomcp.variants.getter.get_variant") as mock_get: 394 | mock_get.return_value = mock_result 395 | 396 | result = await fetch(domain="variant", id="rs123") 397 | 398 | assert result["id"] == "rs123" 399 | assert "TCGA Data: Available" in result["text"] 400 | assert "external_links" in result["metadata"] 401 | 402 | async def test_fetch_variant_list_response(self): 403 | """Test fetching variant when API returns list.""" 404 | mock_result = json.dumps([ 405 | {"_id": "rs123", "gene": {"symbol": "BRAF"}} 406 | ]) 407 | 408 | with patch("biomcp.variants.getter.get_variant") as mock_get: 409 | mock_get.return_value = mock_result 410 | 411 | result = await fetch(domain="variant", id="rs123") 412 | 413 | assert result["id"] == "rs123" 414 | 415 | async def test_fetch_invalid_domain(self): 416 | """Test fetch with invalid domain.""" 417 | with pytest.raises(InvalidDomainError): 418 | await fetch(domain="invalid", id="123") 419 | 420 | async def test_fetch_error_handling(self): 421 | """Test fetch error handling.""" 422 | with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch: 423 | mock_fetch.side_effect = Exception("API Error") 424 | 425 | with pytest.raises(SearchExecutionError) as exc_info: 426 | await fetch(domain="article", id="123") 427 | 428 | assert "Failed to execute search" in str(exc_info.value) 429 | 430 | async def test_fetch_domain_auto_detection_pmid(self): 431 | """Test domain auto-detection for PMID.""" 432 | with patch("biomcp.articles.fetch._article_details") as mock_fetch: 433 | mock_fetch.return_value = json.dumps([ 434 | {"pmid": "12345", "title": "Test"} 435 | ]) 436 | 437 | # Numeric ID should auto-detect as article 438 | result = await fetch(id="12345") 439 | assert result["id"] == "12345" 440 | mock_fetch.assert_called_once() 441 | 442 | async def test_fetch_domain_auto_detection_nct(self): 443 | """Test domain auto-detection for NCT ID.""" 444 | with patch("biomcp.trials.getter.get_trial") as mock_get: 445 | mock_get.return_value = json.dumps({ 446 | "protocolSection": { 447 | "identificationModule": {"briefTitle": "Test Trial"} 448 | } 449 | }) 450 | 451 | # NCT ID should auto-detect as trial 452 | result = await fetch(id="NCT12345") 453 | assert "NCT12345" in result["url"] 454 | mock_get.assert_called() 455 | 456 | async def test_fetch_domain_auto_detection_doi(self): 457 | """Test domain auto-detection for DOI.""" 458 | with patch("biomcp.articles.fetch._article_details") as mock_fetch: 459 | mock_fetch.return_value = json.dumps([ 460 | {"doi": "10.1038/nature12345", "title": "Test"} 461 | ]) 462 | 463 | # DOI should auto-detect as article 464 | await fetch(id="10.1038/nature12345") 465 | mock_fetch.assert_called_once() 466 | 467 | async def test_fetch_domain_auto_detection_variant(self): 468 | """Test domain auto-detection for variant IDs.""" 469 | with patch("biomcp.variants.getter.get_variant") as mock_get: 470 | mock_get.return_value = json.dumps([{"_id": "rs12345"}]) 471 | 472 | # rsID should auto-detect as variant 473 | await fetch(id="rs12345") 474 | mock_get.assert_called_once() 475 | 476 | # Test HGVS notation 477 | with patch("biomcp.variants.getter.get_variant") as mock_get: 478 | mock_get.return_value = json.dumps([ 479 | {"_id": "chr7:g.140453136A>T"} 480 | ]) 481 | 482 | await fetch(id="chr7:g.140453136A>T") 483 | mock_get.assert_called_once() 484 | 485 | 486 | @pytest.mark.asyncio 487 | class TestUnifiedSearch: 488 | """Test the _unified_search internal function.""" 489 | 490 | async def test_unified_search_explain_query(self): 491 | """Test unified search with explain_query flag.""" 492 | from biomcp.router import _unified_search 493 | 494 | result = await _unified_search( 495 | query="gene:BRAF AND disease:cancer", explain_query=True 496 | ) 497 | 498 | assert "original_query" in result 499 | assert "parsed_structure" in result 500 | assert "routing_plan" in result 501 | assert "schema" in result 502 | 503 | async def test_unified_search_execution(self): 504 | """Test unified search normal execution.""" 505 | from biomcp.router import _unified_search 506 | 507 | with patch("biomcp.query_router.execute_routing_plan") as mock_execute: 508 | mock_execute.return_value = { 509 | "articles": json.dumps([{"pmid": "123", "title": "Article 1"}]) 510 | } 511 | 512 | result = await _unified_search( 513 | query="gene:BRAF", max_results_per_domain=10 514 | ) 515 | 516 | assert "results" in result 517 | assert isinstance(result["results"], list) 518 | 519 | async def test_unified_search_parse_error(self): 520 | """Test unified search with invalid query.""" 521 | from biomcp.router import _unified_search 522 | 523 | with patch("biomcp.query_parser.QueryParser.parse") as mock_parse: 524 | mock_parse.side_effect = Exception("Parse error") 525 | 526 | with pytest.raises(QueryParsingError): 527 | await _unified_search( 528 | query="invalid::query", max_results_per_domain=10 529 | ) 530 | ``` -------------------------------------------------------------------------------- /src/biomcp/integrations/biothings_client.py: -------------------------------------------------------------------------------- ```python 1 | """BioThings API client for unified access to the BioThings suite. 2 | 3 | The BioThings suite (https://biothings.io) provides high-performance biomedical 4 | data APIs including: 5 | - MyGene.info - Gene annotations and information 6 | - MyVariant.info - Genetic variant annotations (existing integration enhanced) 7 | - MyDisease.info - Disease ontology and synonyms 8 | - MyChem.info - Drug/chemical annotations and information 9 | 10 | This module provides a centralized client for interacting with all BioThings APIs, 11 | handling common concerns like error handling, rate limiting, and response parsing. 12 | While MyVariant.info has specialized modules for complex variant operations, this 13 | client provides the base layer for all BioThings API interactions. 14 | """ 15 | 16 | import logging 17 | from typing import Any 18 | from urllib.parse import quote 19 | 20 | from pydantic import BaseModel, Field 21 | 22 | from .. import http_client 23 | from ..constants import ( 24 | MYVARIANT_GET_URL, 25 | ) 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | # BioThings API endpoints 30 | MYGENE_BASE_URL = "https://mygene.info/v3" 31 | MYGENE_QUERY_URL = f"{MYGENE_BASE_URL}/query" 32 | MYGENE_GET_URL = f"{MYGENE_BASE_URL}/gene" 33 | 34 | MYDISEASE_BASE_URL = "https://mydisease.info/v1" 35 | MYDISEASE_QUERY_URL = f"{MYDISEASE_BASE_URL}/query" 36 | MYDISEASE_GET_URL = f"{MYDISEASE_BASE_URL}/disease" 37 | 38 | MYCHEM_BASE_URL = "https://mychem.info/v1" 39 | MYCHEM_QUERY_URL = f"{MYCHEM_BASE_URL}/query" 40 | MYCHEM_GET_URL = f"{MYCHEM_BASE_URL}/chem" 41 | 42 | 43 | class GeneInfo(BaseModel): 44 | """Gene information from MyGene.info.""" 45 | 46 | gene_id: str = Field(alias="_id") 47 | symbol: str | None = None 48 | name: str | None = None 49 | summary: str | None = None 50 | alias: list[str] | None = Field(default_factory=list) 51 | entrezgene: int | str | None = None 52 | ensembl: dict[str, Any] | None = None 53 | refseq: dict[str, Any] | None = None 54 | type_of_gene: str | None = None 55 | taxid: int | None = None 56 | 57 | 58 | class DiseaseInfo(BaseModel): 59 | """Disease information from MyDisease.info.""" 60 | 61 | disease_id: str = Field(alias="_id") 62 | name: str | None = None 63 | mondo: dict[str, Any] | None = None 64 | definition: str | None = None 65 | synonyms: list[str] | None = Field(default_factory=list) 66 | xrefs: dict[str, Any] | None = None 67 | phenotypes: list[dict[str, Any]] | None = None 68 | 69 | 70 | class DrugInfo(BaseModel): 71 | """Drug/chemical information from MyChem.info.""" 72 | 73 | drug_id: str = Field(alias="_id") 74 | name: str | None = None 75 | tradename: list[str] | None = Field(default_factory=list) 76 | drugbank_id: str | None = None 77 | chebi_id: str | None = None 78 | chembl_id: str | None = None 79 | pubchem_cid: str | None = None 80 | unii: str | dict[str, Any] | None = None 81 | inchikey: str | None = None 82 | formula: str | None = None 83 | description: str | None = None 84 | indication: str | None = None 85 | pharmacology: dict[str, Any] | None = None 86 | mechanism_of_action: str | None = None 87 | 88 | 89 | class BioThingsClient: 90 | """Unified client for BioThings APIs (MyGene, MyVariant, MyDisease, MyChem).""" 91 | 92 | def __init__(self): 93 | """Initialize the BioThings client.""" 94 | self.logger = logger 95 | 96 | async def get_gene_info( 97 | self, gene_id_or_symbol: str, fields: list[str] | None = None 98 | ) -> GeneInfo | None: 99 | """Get gene information from MyGene.info. 100 | 101 | Args: 102 | gene_id_or_symbol: Gene ID (Entrez, Ensembl) or symbol (e.g., "TP53") 103 | fields: Optional list of fields to return 104 | 105 | Returns: 106 | GeneInfo object or None if not found 107 | """ 108 | try: 109 | # First, try direct GET (works for Entrez IDs) 110 | if gene_id_or_symbol.isdigit(): 111 | return await self._get_gene_by_id(gene_id_or_symbol, fields) 112 | 113 | # For symbols, we need to query first 114 | query_result = await self._query_gene(gene_id_or_symbol) 115 | if not query_result: 116 | return None 117 | 118 | # Get the best match 119 | gene_id = query_result[0].get("_id") 120 | if not gene_id: 121 | return None 122 | 123 | # Now get full details 124 | return await self._get_gene_by_id(gene_id, fields) 125 | 126 | except Exception as e: 127 | self.logger.warning( 128 | f"Failed to get gene info for {gene_id_or_symbol}: {e}" 129 | ) 130 | return None 131 | 132 | async def _query_gene(self, symbol: str) -> list[dict[str, Any]] | None: 133 | """Query MyGene.info for a gene symbol.""" 134 | params = { 135 | "q": f"symbol:{quote(symbol)}", 136 | "species": "human", 137 | "fields": "_id,symbol,name,taxid", 138 | "size": 5, 139 | } 140 | 141 | response, error = await http_client.request_api( 142 | url=MYGENE_QUERY_URL, 143 | request=params, 144 | method="GET", 145 | domain="mygene", 146 | ) 147 | 148 | if error or not response: 149 | return None 150 | 151 | hits = response.get("hits", []) 152 | # Filter for human genes (taxid 9606) 153 | human_hits = [h for h in hits if h.get("taxid") == 9606] 154 | return human_hits if human_hits else hits 155 | 156 | async def _get_gene_by_id( 157 | self, gene_id: str, fields: list[str] | None = None 158 | ) -> GeneInfo | None: 159 | """Get gene details by ID from MyGene.info.""" 160 | if fields is None: 161 | fields = [ 162 | "symbol", 163 | "name", 164 | "summary", 165 | "alias", 166 | "type_of_gene", 167 | "ensembl", 168 | "refseq", 169 | "entrezgene", 170 | ] 171 | 172 | params = {"fields": ",".join(fields)} 173 | 174 | response, error = await http_client.request_api( 175 | url=f"{MYGENE_GET_URL}/{gene_id}", 176 | request=params, 177 | method="GET", 178 | domain="mygene", 179 | ) 180 | 181 | if error or not response: 182 | return None 183 | 184 | try: 185 | return GeneInfo(**response) 186 | except Exception as e: 187 | self.logger.warning(f"Failed to parse gene response: {e}") 188 | return None 189 | 190 | async def batch_get_genes( 191 | self, gene_ids: list[str], fields: list[str] | None = None 192 | ) -> list[GeneInfo]: 193 | """Get multiple genes in a single request. 194 | 195 | Args: 196 | gene_ids: List of gene IDs or symbols 197 | fields: Optional list of fields to return 198 | 199 | Returns: 200 | List of GeneInfo objects 201 | """ 202 | if not gene_ids: 203 | return [] 204 | 205 | if fields is None: 206 | fields = ["symbol", "name", "summary", "alias", "type_of_gene"] 207 | 208 | # MyGene supports POST for batch queries 209 | data = { 210 | "ids": ",".join(gene_ids), 211 | "fields": ",".join(fields), 212 | "species": "human", 213 | } 214 | 215 | response, error = await http_client.request_api( 216 | url=MYGENE_GET_URL, 217 | request=data, 218 | method="POST", 219 | domain="mygene", 220 | ) 221 | 222 | if error or not response: 223 | return [] 224 | 225 | results = [] 226 | for item in response: 227 | try: 228 | if "notfound" not in item: 229 | results.append(GeneInfo(**item)) 230 | except Exception as e: 231 | self.logger.warning(f"Failed to parse gene in batch: {e}") 232 | continue 233 | 234 | return results 235 | 236 | async def get_disease_info( 237 | self, disease_id_or_name: str, fields: list[str] | None = None 238 | ) -> DiseaseInfo | None: 239 | """Get disease information from MyDisease.info. 240 | 241 | Args: 242 | disease_id_or_name: Disease ID (MONDO, DOID) or name 243 | fields: Optional list of fields to return 244 | 245 | Returns: 246 | DiseaseInfo object or None if not found 247 | """ 248 | try: 249 | # Check if it's an ID (starts with known prefixes) 250 | if any( 251 | disease_id_or_name.upper().startswith(prefix) 252 | for prefix in ["MONDO:", "DOID:", "OMIM:", "MESH:"] 253 | ): 254 | return await self._get_disease_by_id( 255 | disease_id_or_name, fields 256 | ) 257 | 258 | # Otherwise, query by name 259 | query_result = await self._query_disease(disease_id_or_name) 260 | if not query_result: 261 | return None 262 | 263 | # Get the best match 264 | disease_id = query_result[0].get("_id") 265 | if not disease_id: 266 | return None 267 | 268 | # Now get full details 269 | return await self._get_disease_by_id(disease_id, fields) 270 | 271 | except Exception as e: 272 | self.logger.warning( 273 | f"Failed to get disease info for {disease_id_or_name}: {e}" 274 | ) 275 | return None 276 | 277 | async def _query_disease(self, name: str) -> list[dict[str, Any]] | None: 278 | """Query MyDisease.info for a disease name.""" 279 | params = { 280 | "q": quote(name), 281 | "fields": "_id,name,mondo", 282 | "size": 10, 283 | } 284 | 285 | response, error = await http_client.request_api( 286 | url=MYDISEASE_QUERY_URL, 287 | request=params, 288 | method="GET", 289 | domain="mydisease", 290 | ) 291 | 292 | if error or not response: 293 | return None 294 | 295 | return response.get("hits", []) 296 | 297 | async def _get_disease_by_id( 298 | self, disease_id: str, fields: list[str] | None = None 299 | ) -> DiseaseInfo | None: 300 | """Get disease details by ID from MyDisease.info.""" 301 | if fields is None: 302 | fields = [ 303 | "name", 304 | "mondo", 305 | "definition", 306 | "synonyms", 307 | "xrefs", 308 | "phenotypes", 309 | ] 310 | 311 | params = {"fields": ",".join(fields)} 312 | 313 | response, error = await http_client.request_api( 314 | url=f"{MYDISEASE_GET_URL}/{quote(disease_id, safe='')}", 315 | request=params, 316 | method="GET", 317 | domain="mydisease", 318 | ) 319 | 320 | if error or not response: 321 | return None 322 | 323 | try: 324 | # Extract definition from mondo if available 325 | if "mondo" in response and isinstance(response["mondo"], dict): 326 | if ( 327 | "definition" in response["mondo"] 328 | and "definition" not in response 329 | ): 330 | response["definition"] = response["mondo"]["definition"] 331 | # Extract synonyms from mondo 332 | if "synonym" in response["mondo"]: 333 | mondo_synonyms = response["mondo"]["synonym"] 334 | if isinstance(mondo_synonyms, dict): 335 | # Handle exact synonyms 336 | exact = mondo_synonyms.get("exact", []) 337 | if isinstance(exact, list): 338 | response["synonyms"] = exact 339 | elif isinstance(mondo_synonyms, list): 340 | response["synonyms"] = mondo_synonyms 341 | 342 | return DiseaseInfo(**response) 343 | except Exception as e: 344 | self.logger.warning(f"Failed to parse disease response: {e}") 345 | return None 346 | 347 | async def get_disease_synonyms(self, disease_id_or_name: str) -> list[str]: 348 | """Get disease synonyms for query expansion. 349 | 350 | Args: 351 | disease_id_or_name: Disease ID or name 352 | 353 | Returns: 354 | List of synonyms including the original term 355 | """ 356 | disease_info = await self.get_disease_info(disease_id_or_name) 357 | if not disease_info: 358 | return [disease_id_or_name] 359 | 360 | synonyms = [disease_id_or_name] 361 | if disease_info.name and disease_info.name != disease_id_or_name: 362 | synonyms.append(disease_info.name) 363 | 364 | if disease_info.synonyms: 365 | synonyms.extend(disease_info.synonyms) 366 | 367 | # Remove duplicates while preserving order 368 | seen = set() 369 | unique_synonyms = [] 370 | for syn in synonyms: 371 | if syn.lower() not in seen: 372 | seen.add(syn.lower()) 373 | unique_synonyms.append(syn) 374 | 375 | return unique_synonyms[ 376 | :5 377 | ] # Limit to top 5 to avoid overly broad searches 378 | 379 | async def get_drug_info( 380 | self, drug_id_or_name: str, fields: list[str] | None = None 381 | ) -> DrugInfo | None: 382 | """Get drug/chemical information from MyChem.info. 383 | 384 | Args: 385 | drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name 386 | fields: Optional list of fields to return 387 | 388 | Returns: 389 | DrugInfo object or None if not found 390 | """ 391 | try: 392 | # Check if it's an ID (starts with known prefixes) 393 | if any( 394 | drug_id_or_name.upper().startswith(prefix) 395 | for prefix in ["DRUGBANK:", "DB", "CHEMBL", "CHEBI:", "CID"] 396 | ): 397 | return await self._get_drug_by_id(drug_id_or_name, fields) 398 | 399 | # Otherwise, query by name 400 | query_result = await self._query_drug(drug_id_or_name) 401 | if not query_result: 402 | return None 403 | 404 | # Get the best match 405 | drug_id = query_result[0].get("_id") 406 | if not drug_id: 407 | return None 408 | 409 | # Now get full details 410 | return await self._get_drug_by_id(drug_id, fields) 411 | 412 | except Exception as e: 413 | self.logger.warning( 414 | f"Failed to get drug info for {drug_id_or_name}: {e}" 415 | ) 416 | return None 417 | 418 | async def _query_drug(self, name: str) -> list[dict[str, Any]] | None: 419 | """Query MyChem.info for a drug name.""" 420 | params = { 421 | "q": quote(name), 422 | "fields": "_id,name,drugbank.name,chebi.name,chembl.pref_name,unii.display_name", 423 | "size": 10, 424 | } 425 | 426 | response, error = await http_client.request_api( 427 | url=MYCHEM_QUERY_URL, 428 | request=params, 429 | method="GET", 430 | domain="mychem", 431 | ) 432 | 433 | if error or not response: 434 | return None 435 | 436 | hits = response.get("hits", []) 437 | 438 | # Sort hits to prioritize those with actual drug names 439 | def score_hit(hit): 440 | score = hit.get("_score", 0) 441 | # Boost score if hit has drug name fields 442 | if hit.get("drugbank", {}).get("name"): 443 | score += 10 444 | if hit.get("chembl", {}).get("pref_name"): 445 | score += 5 446 | if hit.get("unii", {}).get("display_name"): 447 | score += 3 448 | return score 449 | 450 | hits.sort(key=score_hit, reverse=True) 451 | return hits 452 | 453 | async def _get_drug_by_id( 454 | self, drug_id: str, fields: list[str] | None = None 455 | ) -> DrugInfo | None: 456 | """Get drug details by ID from MyChem.info.""" 457 | if fields is None: 458 | fields = [ 459 | "name", 460 | "drugbank", 461 | "chebi", 462 | "chembl", 463 | "pubchem", 464 | "unii", 465 | "inchikey", 466 | "formula", 467 | "description", 468 | "indication", 469 | "pharmacology", 470 | "mechanism_of_action", 471 | ] 472 | 473 | params = {"fields": ",".join(fields)} 474 | 475 | response, error = await http_client.request_api( 476 | url=f"{MYCHEM_GET_URL}/{quote(drug_id, safe='')}", 477 | request=params, 478 | method="GET", 479 | domain="mychem", 480 | ) 481 | 482 | if error or not response: 483 | return None 484 | 485 | try: 486 | # Handle array response (multiple results) 487 | if isinstance(response, list): 488 | if not response: 489 | return None 490 | # Take the first result 491 | response = response[0] 492 | 493 | # Extract fields from nested structures 494 | self._extract_drugbank_fields(response) 495 | self._extract_chebi_fields(response) 496 | self._extract_chembl_fields(response) 497 | self._extract_pubchem_fields(response) 498 | self._extract_unii_fields(response) 499 | 500 | return DrugInfo(**response) 501 | except Exception as e: 502 | self.logger.warning(f"Failed to parse drug response: {e}") 503 | return None 504 | 505 | def _extract_drugbank_fields(self, response: dict[str, Any]) -> None: 506 | """Extract DrugBank fields from response.""" 507 | if "drugbank" in response and isinstance(response["drugbank"], dict): 508 | db = response["drugbank"] 509 | response["drugbank_id"] = db.get("id") 510 | response["name"] = response.get("name") or db.get("name") 511 | response["tradename"] = db.get("products", {}).get("name", []) 512 | if isinstance(response["tradename"], str): 513 | response["tradename"] = [response["tradename"]] 514 | response["indication"] = db.get("indication") 515 | response["mechanism_of_action"] = db.get("mechanism_of_action") 516 | response["description"] = db.get("description") 517 | 518 | def _extract_chebi_fields(self, response: dict[str, Any]) -> None: 519 | """Extract ChEBI fields from response.""" 520 | if "chebi" in response and isinstance(response["chebi"], dict): 521 | response["chebi_id"] = response["chebi"].get("id") 522 | if not response.get("name"): 523 | response["name"] = response["chebi"].get("name") 524 | 525 | def _extract_chembl_fields(self, response: dict[str, Any]) -> None: 526 | """Extract ChEMBL fields from response.""" 527 | if "chembl" in response and isinstance(response["chembl"], dict): 528 | response["chembl_id"] = response["chembl"].get( 529 | "molecule_chembl_id" 530 | ) 531 | if not response.get("name"): 532 | response["name"] = response["chembl"].get("pref_name") 533 | 534 | def _extract_pubchem_fields(self, response: dict[str, Any]) -> None: 535 | """Extract PubChem fields from response.""" 536 | if "pubchem" in response and isinstance(response["pubchem"], dict): 537 | response["pubchem_cid"] = str(response["pubchem"].get("cid", "")) 538 | 539 | def _extract_unii_fields(self, response: dict[str, Any]) -> None: 540 | """Extract UNII fields from response.""" 541 | if "unii" in response and isinstance(response["unii"], dict): 542 | unii_data = response["unii"] 543 | # Set UNII code 544 | response["unii"] = unii_data.get("unii", "") 545 | # Use display name as drug name if not already set 546 | if not response.get("name") and unii_data.get("display_name"): 547 | response["name"] = unii_data["display_name"] 548 | # Use NCIT description if no description 549 | if not response.get("description") and unii_data.get( 550 | "ncit_description" 551 | ): 552 | response["description"] = unii_data["ncit_description"] 553 | 554 | async def get_variant_info( 555 | self, variant_id: str, fields: list[str] | None = None 556 | ) -> dict[str, Any] | None: 557 | """Get variant information from MyVariant.info. 558 | 559 | This is a wrapper around the existing MyVariant integration. 560 | 561 | Args: 562 | variant_id: Variant ID (rsID, HGVS) 563 | fields: Optional list of fields to return 564 | 565 | Returns: 566 | Variant data dictionary or None if not found 567 | """ 568 | params = {"fields": "all" if fields is None else ",".join(fields)} 569 | 570 | response, error = await http_client.request_api( 571 | url=f"{MYVARIANT_GET_URL}/{variant_id}", 572 | request=params, 573 | method="GET", 574 | domain="myvariant", 575 | ) 576 | 577 | if error or not response: 578 | return None 579 | 580 | return response 581 | ``` -------------------------------------------------------------------------------- /docs/user-guides/02-mcp-tools-reference.md: -------------------------------------------------------------------------------- ```markdown 1 | # MCP Tools Reference 2 | 3 | BioMCP provides 35 specialized tools for biomedical research through the Model Context Protocol (MCP). This reference covers all available tools, their parameters, and usage patterns. 4 | 5 | ## Related Guides 6 | 7 | - **Conceptual Overview**: [Sequential Thinking with the Think Tool](../concepts/03-sequential-thinking-with-the-think-tool.md) 8 | - **Practical Examples**: See the [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for real-world usage patterns 9 | - **Integration Setup**: [Claude Desktop Integration](../getting-started/02-claude-desktop-integration.md) 10 | 11 | ## Tool Categories 12 | 13 | | Category | Count | Tools | 14 | | ------------------- | ----- | -------------------------------------------------------------- | 15 | | **Core Tools** | 3 | `search`, `fetch`, `think` | 16 | | **Article Tools** | 2 | `article_searcher`, `article_getter` | 17 | | **Trial Tools** | 6 | `trial_searcher`, `trial_getter`, + 4 detail getters | 18 | | **Variant Tools** | 3 | `variant_searcher`, `variant_getter`, `alphagenome_predictor` | 19 | | **BioThings Tools** | 3 | `gene_getter`, `disease_getter`, `drug_getter` | 20 | | **NCI Tools** | 6 | Organization, intervention, biomarker, and disease tools | 21 | | **OpenFDA Tools** | 12 | Adverse events, labels, devices, approvals, recalls, shortages | 22 | 23 | ## Core Unified Tools 24 | 25 | ### 1. search 26 | 27 | **Universal search across all biomedical domains with unified query language.** 28 | 29 | ```python 30 | search( 31 | query: str = None, # Unified query syntax 32 | domain: str = None, # Target domain 33 | genes: list[str] = None, # Gene symbols 34 | diseases: list[str] = None, # Disease/condition terms 35 | variants: list[str] = None, # Variant notations 36 | chemicals: list[str] = None, # Drug/chemical names 37 | keywords: list[str] = None, # Additional keywords 38 | conditions: list[str] = None, # Trial conditions 39 | interventions: list[str] = None,# Trial interventions 40 | lat: float = None, # Latitude for trials 41 | long: float = None, # Longitude for trials 42 | page: int = 1, # Page number 43 | page_size: int = 10, # Results per page 44 | api_key: str = None # For NCI domains 45 | ) -> dict 46 | ``` 47 | 48 | **Domains:** `article`, `trial`, `variant`, `gene`, `drug`, `disease`, `nci_organization`, `nci_intervention`, `nci_biomarker`, `nci_disease`, `fda_adverse`, `fda_label`, `fda_device`, `fda_approval`, `fda_recall`, `fda_shortage` 49 | 50 | **Query Language Examples:** 51 | 52 | - `"gene:BRAF AND disease:melanoma"` 53 | - `"drugs.tradename:gleevec"` 54 | - `"gene:TP53 AND (mutation OR variant)"` 55 | 56 | **Usage Examples:** 57 | 58 | ```python 59 | # Domain-specific search 60 | search(domain="article", genes=["BRAF"], diseases=["melanoma"]) 61 | 62 | # Unified query language 63 | search(query="gene:EGFR AND mutation:T790M") 64 | 65 | # Clinical trials by location 66 | search(domain="trial", conditions=["lung cancer"], lat=40.7128, long=-74.0060) 67 | 68 | # FDA adverse events 69 | search(domain="fda_adverse", chemicals=["aspirin"]) 70 | 71 | # FDA drug approvals 72 | search(domain="fda_approval", chemicals=["keytruda"]) 73 | ``` 74 | 75 | ### 2. fetch 76 | 77 | **Retrieve detailed information for any biomedical record.** 78 | 79 | ```python 80 | fetch( 81 | id: str, # Record identifier 82 | domain: str = None, # Domain (auto-detected if not provided) 83 | detail: str = None, # Specific section for trials 84 | api_key: str = None # For NCI records 85 | ) -> dict 86 | ``` 87 | 88 | **Supported IDs:** 89 | 90 | - Articles: PMID (e.g., "38768446"), DOI (e.g., "10.1101/2024.01.20") 91 | - Trials: NCT ID (e.g., "NCT03006926") 92 | - Variants: HGVS, rsID, genomic coordinates 93 | - Genes/Drugs/Diseases: Names or database IDs 94 | - FDA Records: Report IDs, Application Numbers (e.g., "BLA125514"), Recall Numbers, etc. 95 | 96 | **Detail Options for Trials:** `protocol`, `locations`, `outcomes`, `references`, `all` 97 | 98 | **Usage Examples:** 99 | 100 | ```python 101 | # Fetch article by PMID 102 | fetch(id="38768446", domain="article") 103 | 104 | # Fetch trial with specific details 105 | fetch(id="NCT03006926", domain="trial", detail="locations") 106 | 107 | # Auto-detect domain 108 | fetch(id="rs121913529") # Variant 109 | fetch(id="BRAF") # Gene 110 | 111 | # Fetch FDA records 112 | fetch(id="BLA125514", domain="fda_approval") # Drug approval 113 | fetch(id="D-0001-2023", domain="fda_recall") # Drug recall 114 | ``` 115 | 116 | ### 3. think 117 | 118 | **Sequential thinking tool for structured problem-solving.** 119 | 120 | ```python 121 | think( 122 | thought: str, # Current reasoning step 123 | thoughtNumber: int, # Sequential number (1, 2, 3...) 124 | totalThoughts: int = None, # Estimated total thoughts 125 | nextThoughtNeeded: bool = True # Continue thinking? 126 | ) -> str 127 | ``` 128 | 129 | **CRITICAL:** Always use `think` BEFORE any other BioMCP operation! 130 | 131 | **Usage Pattern:** 132 | 133 | ```python 134 | # Step 1: Problem decomposition 135 | think( 136 | thought="Breaking down query: need to find BRAF inhibitor trials...", 137 | thoughtNumber=1, 138 | nextThoughtNeeded=True 139 | ) 140 | 141 | # Step 2: Search strategy 142 | think( 143 | thought="Will search trials for BRAF V600E melanoma, then articles...", 144 | thoughtNumber=2, 145 | nextThoughtNeeded=True 146 | ) 147 | 148 | # Final step: Synthesis 149 | think( 150 | thought="Ready to synthesize findings from 5 trials and 12 articles...", 151 | thoughtNumber=3, 152 | nextThoughtNeeded=False # Analysis complete 153 | ) 154 | ``` 155 | 156 | ## Article Tools 157 | 158 | ### 4. article_searcher 159 | 160 | **Search PubMed/PubTator3 for biomedical literature.** 161 | 162 | ```python 163 | article_searcher( 164 | chemicals: list[str] = None, 165 | diseases: list[str] = None, 166 | genes: list[str] = None, 167 | keywords: list[str] = None, # Supports OR with "|" 168 | variants: list[str] = None, 169 | include_preprints: bool = True, 170 | include_cbioportal: bool = True, 171 | page: int = 1, 172 | page_size: int = 10 173 | ) -> str 174 | ``` 175 | 176 | **Features:** 177 | 178 | - Automatic cBioPortal integration for gene searches 179 | - Preprint inclusion from bioRxiv/medRxiv 180 | - OR logic in keywords: `"V600E|p.V600E|c.1799T>A"` 181 | 182 | **Example:** 183 | 184 | ```python 185 | # Search with multiple filters 186 | article_searcher( 187 | genes=["BRAF"], 188 | diseases=["melanoma"], 189 | keywords=["resistance|resistant"], 190 | include_cbioportal=True 191 | ) 192 | ``` 193 | 194 | ### 5. article_getter 195 | 196 | **Fetch detailed article information.** 197 | 198 | ```python 199 | article_getter( 200 | pmid: str # PubMed ID, PMC ID, or DOI 201 | ) -> str 202 | ``` 203 | 204 | **Supports:** 205 | 206 | - PubMed IDs: "38768446" 207 | - PMC IDs: "PMC7498215" 208 | - DOIs: "10.1101/2024.01.20.23288905" 209 | 210 | ## Trial Tools 211 | 212 | ### 6. trial_searcher 213 | 214 | **Search ClinicalTrials.gov with comprehensive filters.** 215 | 216 | ```python 217 | trial_searcher( 218 | conditions: list[str] = None, 219 | interventions: list[str] = None, 220 | other_terms: list[str] = None, 221 | recruiting_status: str = "ANY", # "OPEN", "CLOSED", "ANY" 222 | phase: str = None, # "PHASE1", "PHASE2", etc. 223 | lat: float = None, # Location-based search 224 | long: float = None, 225 | distance: int = None, # Miles from coordinates 226 | age_group: str = None, # "CHILD", "ADULT", "OLDER_ADULT" 227 | sex: str = None, # "MALE", "FEMALE", "ALL" 228 | study_type: str = None, # "INTERVENTIONAL", "OBSERVATIONAL" 229 | funder_type: str = None, # "NIH", "INDUSTRY", etc. 230 | page: int = 1, 231 | page_size: int = 10 232 | ) -> str 233 | ``` 234 | 235 | **Location Search Example:** 236 | 237 | ```python 238 | # Trials near Boston 239 | trial_searcher( 240 | conditions=["breast cancer"], 241 | lat=42.3601, 242 | long=-71.0589, 243 | distance=50, 244 | recruiting_status="OPEN" 245 | ) 246 | ``` 247 | 248 | ### 7-11. Trial Detail Getters 249 | 250 | ```python 251 | # Get complete trial information 252 | trial_getter(nct_id: str) -> str 253 | 254 | # Get specific sections 255 | trial_protocol_getter(nct_id: str) -> str # Core protocol info 256 | trial_locations_getter(nct_id: str) -> str # Sites and contacts 257 | trial_outcomes_getter(nct_id: str) -> str # Outcome measures 258 | trial_references_getter(nct_id: str) -> str # Publications 259 | ``` 260 | 261 | ## Variant Tools 262 | 263 | ### 12. variant_searcher 264 | 265 | **Search MyVariant.info for genetic variants.** 266 | 267 | ```python 268 | variant_searcher( 269 | gene: str = None, 270 | hgvs: str = None, 271 | hgvsp: str = None, # Protein HGVS 272 | hgvsc: str = None, # Coding DNA HGVS 273 | rsid: str = None, 274 | region: str = None, # "chr7:140753336-140753337" 275 | significance: str = None, # Clinical significance 276 | frequency_min: float = None, 277 | frequency_max: float = None, 278 | cadd_score_min: float = None, 279 | sift_prediction: str = None, 280 | polyphen_prediction: str = None, 281 | sources: list[str] = None, 282 | include_cbioportal: bool = True, 283 | page: int = 1, 284 | page_size: int = 10 285 | ) -> str 286 | ``` 287 | 288 | **Significance Options:** `pathogenic`, `likely_pathogenic`, `uncertain_significance`, `likely_benign`, `benign` 289 | 290 | **Example:** 291 | 292 | ```python 293 | # Find rare pathogenic BRCA1 variants 294 | variant_searcher( 295 | gene="BRCA1", 296 | significance="pathogenic", 297 | frequency_max=0.001, 298 | cadd_score_min=20 299 | ) 300 | ``` 301 | 302 | ### 13. variant_getter 303 | 304 | **Fetch comprehensive variant details.** 305 | 306 | ```python 307 | variant_getter( 308 | variant_id: str, # HGVS, rsID, or MyVariant ID 309 | include_external: bool = True # Include TCGA, 1000 Genomes 310 | ) -> str 311 | ``` 312 | 313 | ### 14. alphagenome_predictor 314 | 315 | **Predict variant effects using Google DeepMind's AlphaGenome.** 316 | 317 | ```python 318 | alphagenome_predictor( 319 | chromosome: str, # e.g., "chr7" 320 | position: int, # 1-based position 321 | reference: str, # Reference allele 322 | alternate: str, # Alternate allele 323 | interval_size: int = 131072, # Analysis window 324 | tissue_types: list[str] = None, # UBERON terms 325 | significance_threshold: float = 0.5, 326 | api_key: str = None # AlphaGenome API key 327 | ) -> str 328 | ``` 329 | 330 | **Requires:** AlphaGenome API key (environment variable or per-request) 331 | 332 | **Tissue Examples:** 333 | 334 | - `UBERON:0002367` - prostate gland 335 | - `UBERON:0001155` - colon 336 | - `UBERON:0002048` - lung 337 | 338 | **Example:** 339 | 340 | ```python 341 | # Predict BRAF V600E effects 342 | alphagenome_predictor( 343 | chromosome="chr7", 344 | position=140753336, 345 | reference="A", 346 | alternate="T", 347 | tissue_types=["UBERON:0002367"], # prostate 348 | api_key="your-key" 349 | ) 350 | ``` 351 | 352 | ## BioThings Tools 353 | 354 | ### 15. gene_getter 355 | 356 | **Get gene information from MyGene.info.** 357 | 358 | ```python 359 | gene_getter( 360 | gene_id_or_symbol: str # Gene symbol or Entrez ID 361 | ) -> str 362 | ``` 363 | 364 | **Returns:** Official name, aliases, summary, genomic location, database links 365 | 366 | ### 16. disease_getter 367 | 368 | **Get disease information from MyDisease.info.** 369 | 370 | ```python 371 | disease_getter( 372 | disease_id_or_name: str # Disease name or ontology ID 373 | ) -> str 374 | ``` 375 | 376 | **Returns:** Definition, synonyms, MONDO/DOID IDs, associated phenotypes 377 | 378 | ### 17. drug_getter 379 | 380 | **Get drug/chemical information from MyChem.info.** 381 | 382 | ```python 383 | drug_getter( 384 | drug_id_or_name: str # Drug name or database ID 385 | ) -> str 386 | ``` 387 | 388 | **Returns:** Chemical structure, mechanism, indications, trade names, identifiers 389 | 390 | ## NCI-Specific Tools 391 | 392 | All NCI tools require an API key from [api.cancer.gov](https://api.cancer.gov). 393 | 394 | ### 18-19. Organization Tools 395 | 396 | ```python 397 | # Search organizations 398 | nci_organization_searcher( 399 | name: str = None, 400 | organization_type: str = None, 401 | city: str = None, # Must use with state 402 | state: str = None, # Must use with city 403 | api_key: str = None 404 | ) -> str 405 | 406 | # Get organization details 407 | nci_organization_getter( 408 | organization_id: str, 409 | api_key: str = None 410 | ) -> str 411 | ``` 412 | 413 | ### 20-21. Intervention Tools 414 | 415 | ```python 416 | # Search interventions 417 | nci_intervention_searcher( 418 | name: str = None, 419 | intervention_type: str = None, # "Drug", "Device", etc. 420 | synonyms: bool = True, 421 | api_key: str = None 422 | ) -> str 423 | 424 | # Get intervention details 425 | nci_intervention_getter( 426 | intervention_id: str, 427 | api_key: str = None 428 | ) -> str 429 | ``` 430 | 431 | ### 22. Biomarker Search 432 | 433 | ```python 434 | nci_biomarker_searcher( 435 | name: str = None, 436 | biomarker_type: str = None, 437 | api_key: str = None 438 | ) -> str 439 | ``` 440 | 441 | ### 23. Disease Search (NCI) 442 | 443 | ```python 444 | nci_disease_searcher( 445 | name: str = None, 446 | include_synonyms: bool = True, 447 | category: str = None, 448 | api_key: str = None 449 | ) -> str 450 | ``` 451 | 452 | ## OpenFDA Tools 453 | 454 | All OpenFDA tools support optional API keys for higher rate limits (240/min vs 40/min). Get a free key at [open.fda.gov/apis/authentication](https://open.fda.gov/apis/authentication/). 455 | 456 | ### 24. openfda_adverse_searcher 457 | 458 | **Search FDA Adverse Event Reporting System (FAERS).** 459 | 460 | ```python 461 | openfda_adverse_searcher( 462 | drug: str = None, 463 | reaction: str = None, 464 | serious: bool = None, # Filter serious events only 465 | limit: int = 25, 466 | skip: int = 0, 467 | api_key: str = None # Optional OpenFDA API key 468 | ) -> str 469 | ``` 470 | 471 | **Example:** 472 | 473 | ```python 474 | # Find serious bleeding events for warfarin 475 | openfda_adverse_searcher( 476 | drug="warfarin", 477 | reaction="bleeding", 478 | serious=True, 479 | api_key="your-key" # Optional 480 | ) 481 | ``` 482 | 483 | ### 25. openfda_adverse_getter 484 | 485 | **Get detailed adverse event report.** 486 | 487 | ```python 488 | openfda_adverse_getter( 489 | report_id: str, # Safety report ID 490 | api_key: str = None 491 | ) -> str 492 | ``` 493 | 494 | ### 26. openfda_label_searcher 495 | 496 | **Search FDA drug product labels.** 497 | 498 | ```python 499 | openfda_label_searcher( 500 | name: str = None, 501 | indication: str = None, # Search by indication 502 | boxed_warning: bool = False, # Filter for boxed warnings 503 | section: str = None, # Specific label section 504 | limit: int = 25, 505 | skip: int = 0, 506 | api_key: str = None 507 | ) -> str 508 | ``` 509 | 510 | ### 27. openfda_label_getter 511 | 512 | **Get complete drug label information.** 513 | 514 | ```python 515 | openfda_label_getter( 516 | set_id: str, # Label set ID 517 | sections: list[str] = None, # Specific sections to retrieve 518 | api_key: str = None 519 | ) -> str 520 | ``` 521 | 522 | **Label Sections:** `indications_and_usage`, `contraindications`, `warnings_and_precautions`, `dosage_and_administration`, `adverse_reactions`, `drug_interactions`, `pregnancy`, `pediatric_use`, `geriatric_use` 523 | 524 | ### 28. openfda_device_searcher 525 | 526 | **Search FDA device adverse event reports (MAUDE).** 527 | 528 | ```python 529 | openfda_device_searcher( 530 | device: str = None, 531 | manufacturer: str = None, 532 | problem: str = None, 533 | product_code: str = None, # FDA product code 534 | genomics_only: bool = True, # Filter genomic/diagnostic devices 535 | limit: int = 25, 536 | skip: int = 0, 537 | api_key: str = None 538 | ) -> str 539 | ``` 540 | 541 | **Note:** FDA uses abbreviated device names (e.g., "F1CDX" for "FoundationOne CDx"). 542 | 543 | ### 29. openfda_device_getter 544 | 545 | **Get detailed device event report.** 546 | 547 | ```python 548 | openfda_device_getter( 549 | mdr_report_key: str, # MDR report key 550 | api_key: str = None 551 | ) -> str 552 | ``` 553 | 554 | ### 30. openfda_approval_searcher 555 | 556 | **Search FDA drug approval records (Drugs@FDA).** 557 | 558 | ```python 559 | openfda_approval_searcher( 560 | drug: str = None, 561 | application_number: str = None, # NDA/BLA number 562 | approval_year: str = None, # YYYY format 563 | limit: int = 25, 564 | skip: int = 0, 565 | api_key: str = None 566 | ) -> str 567 | ``` 568 | 569 | ### 31. openfda_approval_getter 570 | 571 | **Get drug approval details.** 572 | 573 | ```python 574 | openfda_approval_getter( 575 | application_number: str, # NDA/BLA number 576 | api_key: str = None 577 | ) -> str 578 | ``` 579 | 580 | ### 32. openfda_recall_searcher 581 | 582 | **Search FDA drug recall records.** 583 | 584 | ```python 585 | openfda_recall_searcher( 586 | drug: str = None, 587 | recall_class: str = None, # "1", "2", or "3" 588 | status: str = None, # "ongoing" or "completed" 589 | reason: str = None, 590 | since_date: str = None, # YYYYMMDD format 591 | limit: int = 25, 592 | skip: int = 0, 593 | api_key: str = None 594 | ) -> str 595 | ``` 596 | 597 | **Recall Classes:** 598 | 599 | - Class 1: Dangerous or defective products that could cause serious health problems or death 600 | - Class 2: Products that might cause temporary health problems or pose slight threat 601 | - Class 3: Products unlikely to cause adverse health consequences 602 | 603 | ### 33. openfda_recall_getter 604 | 605 | **Get drug recall details.** 606 | 607 | ```python 608 | openfda_recall_getter( 609 | recall_number: str, # FDA recall number 610 | api_key: str = None 611 | ) -> str 612 | ``` 613 | 614 | ### 34. openfda_shortage_searcher 615 | 616 | **Search FDA drug shortage database.** 617 | 618 | ```python 619 | openfda_shortage_searcher( 620 | drug: str = None, 621 | status: str = None, # "current" or "resolved" 622 | therapeutic_category: str = None, 623 | limit: int = 25, 624 | skip: int = 0, 625 | api_key: str = None 626 | ) -> str 627 | ``` 628 | 629 | ### 35. openfda_shortage_getter 630 | 631 | **Get drug shortage details.** 632 | 633 | ```python 634 | openfda_shortage_getter( 635 | drug_name: str, 636 | api_key: str = None 637 | ) -> str 638 | ``` 639 | 640 | ## Best Practices 641 | 642 | ### 1. Always Think First 643 | 644 | ```python 645 | # ✅ CORRECT - Think before searching 646 | think(thought="Planning BRAF melanoma research...", thoughtNumber=1) 647 | results = article_searcher(genes=["BRAF"], diseases=["melanoma"]) 648 | 649 | # ❌ INCORRECT - Skipping think tool 650 | results = article_searcher(genes=["BRAF"]) # Poor results! 651 | ``` 652 | 653 | ### 2. Use Unified Tools for Flexibility 654 | 655 | ```python 656 | # Unified search supports complex queries 657 | results = search(query="gene:EGFR AND (mutation:T790M OR mutation:C797S)") 658 | 659 | # Unified fetch auto-detects domain 660 | details = fetch(id="NCT03006926") # Knows it's a trial 661 | ``` 662 | 663 | ### 3. Leverage Domain-Specific Features 664 | 665 | ```python 666 | # Article search with cBioPortal 667 | articles = article_searcher( 668 | genes=["KRAS"], 669 | include_cbioportal=True # Adds cancer genomics context 670 | ) 671 | 672 | # Variant search with multiple filters 673 | variants = variant_searcher( 674 | gene="TP53", 675 | significance="pathogenic", 676 | frequency_max=0.01, 677 | cadd_score_min=25 678 | ) 679 | ``` 680 | 681 | ### 4. Handle API Keys Properly 682 | 683 | ```python 684 | # For personal use - environment variable 685 | # export NCI_API_KEY="your-key" 686 | nci_results = search(domain="nci_organization", name="Mayo Clinic") 687 | 688 | # For shared environments - per-request 689 | nci_results = search( 690 | domain="nci_organization", 691 | name="Mayo Clinic", 692 | api_key="user-provided-key" 693 | ) 694 | ``` 695 | 696 | ### 5. Use Appropriate Page Sizes 697 | 698 | ```python 699 | # Large result sets - increase page_size 700 | results = article_searcher( 701 | genes=["TP53"], 702 | page_size=50 # Get more results at once 703 | ) 704 | 705 | # Iterative exploration - use pagination 706 | page1 = trial_searcher(conditions=["cancer"], page=1, page_size=10) 707 | page2 = trial_searcher(conditions=["cancer"], page=2, page_size=10) 708 | ``` 709 | 710 | ## Error Handling 711 | 712 | All tools include comprehensive error handling: 713 | 714 | - **Invalid parameters**: Clear error messages with valid options 715 | - **API failures**: Graceful degradation with informative messages 716 | - **Rate limits**: Automatic retry with exponential backoff 717 | - **Missing API keys**: Helpful instructions for obtaining keys 718 | 719 | ## Tool Selection Guide 720 | 721 | | If you need to... | Use this tool | 722 | | ------------------------------ | ------------------------------------------------- | 723 | | Search across multiple domains | `search` with query language | 724 | | Get any record by ID | `fetch` with auto-detection | 725 | | Plan your research approach | `think` (always first!) | 726 | | Find recent papers | `article_searcher` | 727 | | Locate clinical trials | `trial_searcher` | 728 | | Analyze genetic variants | `variant_searcher` + `variant_getter` | 729 | | Predict variant effects | `alphagenome_predictor` | 730 | | Get gene/drug/disease info | `gene_getter`, `drug_getter`, `disease_getter` | 731 | | Access NCI databases | `nci_*` tools with API key | 732 | | Check drug adverse events | `openfda_adverse_searcher` | 733 | | Review FDA drug labels | `openfda_label_searcher` + `openfda_label_getter` | 734 | | Investigate device issues | `openfda_device_searcher` | 735 | | Find drug approvals | `openfda_approval_searcher` | 736 | | Check drug recalls | `openfda_recall_searcher` | 737 | | Monitor drug shortages | `openfda_shortage_searcher` | 738 | 739 | ## Next Steps 740 | 741 | - Review [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) methodology 742 | - Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for complex workflows 743 | - Set up [API Keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features 744 | ``` -------------------------------------------------------------------------------- /src/biomcp/domain_handlers.py: -------------------------------------------------------------------------------- ```python 1 | """Domain-specific result handlers for BioMCP. 2 | 3 | This module contains formatting functions for converting raw API responses 4 | from different biomedical data sources into a standardized format. 5 | """ 6 | 7 | import logging 8 | from typing import Any 9 | 10 | from biomcp.constants import ( 11 | DEFAULT_SIGNIFICANCE, 12 | DEFAULT_TITLE, 13 | METADATA_AUTHORS, 14 | METADATA_COMPLETION_DATE, 15 | METADATA_CONSEQUENCE, 16 | METADATA_GENE, 17 | METADATA_JOURNAL, 18 | METADATA_PHASE, 19 | METADATA_RSID, 20 | METADATA_SIGNIFICANCE, 21 | METADATA_SOURCE, 22 | METADATA_START_DATE, 23 | METADATA_STATUS, 24 | METADATA_YEAR, 25 | RESULT_ID, 26 | RESULT_METADATA, 27 | RESULT_SNIPPET, 28 | RESULT_TITLE, 29 | RESULT_URL, 30 | SNIPPET_LENGTH, 31 | ) 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class ArticleHandler: 37 | """Handles formatting for article/publication results.""" 38 | 39 | @staticmethod 40 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 41 | """Format a single article result. 42 | 43 | Args: 44 | result: Raw article data from PubTator3 or preprint APIs 45 | 46 | Returns: 47 | Standardized article result with id, title, snippet, url, and metadata 48 | """ 49 | if "pmid" in result: 50 | # PubMed article 51 | # Clean up title - remove extra spaces 52 | title = result.get("title", "").strip() 53 | title = " ".join(title.split()) # Normalize whitespace 54 | 55 | # Use default if empty 56 | if not title: 57 | title = DEFAULT_TITLE 58 | 59 | return { 60 | RESULT_ID: result["pmid"], 61 | RESULT_TITLE: title, 62 | RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH] 63 | + "..." 64 | if result.get("abstract") 65 | else "", 66 | RESULT_URL: f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/", 67 | RESULT_METADATA: { 68 | METADATA_YEAR: result.get("pub_year") 69 | or ( 70 | result.get("date", "")[:4] 71 | if result.get("date") 72 | else None 73 | ), 74 | METADATA_JOURNAL: result.get("journal", ""), 75 | METADATA_AUTHORS: result.get("authors", [])[:3], 76 | }, 77 | } 78 | else: 79 | # Preprint result 80 | return { 81 | RESULT_ID: result.get("doi", result.get("id", "")), 82 | RESULT_TITLE: result.get("title", ""), 83 | RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH] 84 | + "..." 85 | if result.get("abstract") 86 | else "", 87 | RESULT_URL: result.get("url", ""), 88 | RESULT_METADATA: { 89 | METADATA_YEAR: result.get("pub_year"), 90 | METADATA_SOURCE: result.get("source", ""), 91 | METADATA_AUTHORS: result.get("authors", [])[:3], 92 | }, 93 | } 94 | 95 | 96 | class TrialHandler: 97 | """Handles formatting for clinical trial results.""" 98 | 99 | @staticmethod 100 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 101 | """Format a single trial result. 102 | 103 | Handles both ClinicalTrials.gov API v2 nested structure and legacy formats. 104 | 105 | Args: 106 | result: Raw trial data from ClinicalTrials.gov API 107 | 108 | Returns: 109 | Standardized trial result with id, title, snippet, url, and metadata 110 | """ 111 | # Handle ClinicalTrials.gov API v2 nested structure 112 | if "protocolSection" in result: 113 | # API v2 format - extract from nested modules 114 | protocol = result.get("protocolSection", {}) 115 | identification = protocol.get("identificationModule", {}) 116 | status = protocol.get("statusModule", {}) 117 | description = protocol.get("descriptionModule", {}) 118 | 119 | nct_id = identification.get("nctId", "") 120 | brief_title = identification.get("briefTitle", "") 121 | official_title = identification.get("officialTitle", "") 122 | brief_summary = description.get("briefSummary", "") 123 | overall_status = status.get("overallStatus", "") 124 | start_date = status.get("startDateStruct", {}).get("date", "") 125 | completion_date = status.get( 126 | "primaryCompletionDateStruct", {} 127 | ).get("date", "") 128 | 129 | # Extract phase from designModule 130 | design = protocol.get("designModule", {}) 131 | phases = design.get("phases", []) 132 | phase = phases[0] if phases else "" 133 | elif "NCT Number" in result: 134 | # Legacy flat format from search results 135 | nct_id = result.get("NCT Number", "") 136 | brief_title = result.get("Study Title", "") 137 | official_title = "" # Not available in this format 138 | brief_summary = result.get("Brief Summary", "") 139 | overall_status = result.get("Study Status", "") 140 | phase = result.get("Phases", "") 141 | start_date = result.get("Start Date", "") 142 | completion_date = result.get("Completion Date", "") 143 | else: 144 | # Original legacy format or simplified structure 145 | nct_id = result.get("nct_id", "") 146 | brief_title = result.get("brief_title", "") 147 | official_title = result.get("official_title", "") 148 | brief_summary = result.get("brief_summary", "") 149 | overall_status = result.get("overall_status", "") 150 | phase = result.get("phase", "") 151 | start_date = result.get("start_date", "") 152 | completion_date = result.get("primary_completion_date", "") 153 | 154 | return { 155 | RESULT_ID: nct_id, 156 | RESULT_TITLE: brief_title or official_title or DEFAULT_TITLE, 157 | RESULT_SNIPPET: brief_summary[:SNIPPET_LENGTH] + "..." 158 | if brief_summary 159 | else "", 160 | RESULT_URL: f"https://clinicaltrials.gov/study/{nct_id}", 161 | RESULT_METADATA: { 162 | METADATA_STATUS: overall_status, 163 | METADATA_PHASE: phase, 164 | METADATA_START_DATE: start_date, 165 | METADATA_COMPLETION_DATE: completion_date, 166 | }, 167 | } 168 | 169 | 170 | class VariantHandler: 171 | """Handles formatting for genetic variant results.""" 172 | 173 | @staticmethod 174 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 175 | """Format a single variant result. 176 | 177 | Args: 178 | result: Raw variant data from MyVariant.info API 179 | 180 | Returns: 181 | Standardized variant result with id, title, snippet, url, and metadata 182 | """ 183 | # Extract gene symbol - MyVariant.info stores this in multiple locations 184 | gene = ( 185 | result.get("dbnsfp", {}).get("genename", "") 186 | or result.get("dbsnp", {}).get("gene", {}).get("symbol", "") 187 | or "" 188 | ) 189 | # Handle case where gene is a list 190 | if isinstance(gene, list): 191 | gene = gene[0] if gene else "" 192 | 193 | # Extract rsid 194 | rsid = result.get("dbsnp", {}).get("rsid", "") or "" 195 | 196 | # Extract clinical significance 197 | clinvar = result.get("clinvar", {}) 198 | significance = "" 199 | if isinstance(clinvar.get("rcv"), dict): 200 | significance = clinvar["rcv"].get("clinical_significance", "") 201 | elif isinstance(clinvar.get("rcv"), list) and clinvar["rcv"]: 202 | significance = clinvar["rcv"][0].get("clinical_significance", "") 203 | 204 | # Build a meaningful title 205 | hgvs = "" 206 | if "dbnsfp" in result and "hgvsp" in result["dbnsfp"]: 207 | hgvs = result["dbnsfp"]["hgvsp"] 208 | if isinstance(hgvs, list): 209 | hgvs = hgvs[0] if hgvs else "" 210 | 211 | title = f"{gene} {hgvs}".strip() or result.get("_id", DEFAULT_TITLE) 212 | 213 | return { 214 | RESULT_ID: result.get("_id", ""), 215 | RESULT_TITLE: title, 216 | RESULT_SNIPPET: f"Clinical significance: {significance or DEFAULT_SIGNIFICANCE}", 217 | RESULT_URL: f"https://www.ncbi.nlm.nih.gov/snp/{rsid}" 218 | if rsid 219 | else "", 220 | RESULT_METADATA: { 221 | METADATA_GENE: gene, 222 | METADATA_RSID: rsid, 223 | METADATA_SIGNIFICANCE: significance, 224 | METADATA_CONSEQUENCE: result.get("cadd", {}).get( 225 | "consequence", "" 226 | ), 227 | }, 228 | } 229 | 230 | 231 | class GeneHandler: 232 | """Handles formatting for gene information results from MyGene.info.""" 233 | 234 | @staticmethod 235 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 236 | """Format a single gene result. 237 | 238 | Args: 239 | result: Raw gene data from MyGene.info API 240 | 241 | Returns: 242 | Standardized gene result with id, title, snippet, url, and metadata 243 | """ 244 | # Extract gene information 245 | gene_id = result.get("_id", result.get("entrezgene", "")) 246 | symbol = result.get("symbol", "") 247 | name = result.get("name", "") 248 | summary = result.get("summary", "") 249 | 250 | # Build title 251 | title = ( 252 | f"{symbol}: {name}" 253 | if symbol and name 254 | else symbol or name or DEFAULT_TITLE 255 | ) 256 | 257 | # Create snippet from summary 258 | snippet = ( 259 | summary[:SNIPPET_LENGTH] + "..." 260 | if summary and len(summary) > SNIPPET_LENGTH 261 | else summary 262 | ) 263 | 264 | return { 265 | RESULT_ID: str(gene_id), 266 | RESULT_TITLE: title, 267 | RESULT_SNIPPET: snippet or "No summary available", 268 | RESULT_URL: f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{symbol}" 269 | if symbol 270 | else "", 271 | RESULT_METADATA: { 272 | "entrezgene": result.get("entrezgene"), 273 | "symbol": symbol, 274 | "name": name, 275 | "type_of_gene": result.get("type_of_gene", ""), 276 | "ensembl": result.get("ensembl", {}).get("gene") 277 | if isinstance(result.get("ensembl"), dict) 278 | else None, 279 | "refseq": result.get("refseq", {}), 280 | }, 281 | } 282 | 283 | 284 | class DrugHandler: 285 | """Handles formatting for drug/chemical information results from MyChem.info.""" 286 | 287 | @staticmethod 288 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 289 | """Format a single drug result. 290 | 291 | Args: 292 | result: Raw drug data from MyChem.info API 293 | 294 | Returns: 295 | Standardized drug result with id, title, snippet, url, and metadata 296 | """ 297 | # Extract drug information 298 | drug_id = result.get("_id", "") 299 | name = result.get("name", "") 300 | drugbank_id = result.get("drugbank_id", "") 301 | description = result.get("description", "") 302 | indication = result.get("indication", "") 303 | 304 | # Build title 305 | title = name or drug_id or DEFAULT_TITLE 306 | 307 | # Create snippet from description or indication 308 | snippet_text = indication or description 309 | snippet = ( 310 | snippet_text[:SNIPPET_LENGTH] + "..." 311 | if snippet_text and len(snippet_text) > SNIPPET_LENGTH 312 | else snippet_text 313 | ) 314 | 315 | # Determine URL based on available IDs 316 | url = "" 317 | if drugbank_id: 318 | url = f"https://www.drugbank.ca/drugs/{drugbank_id}" 319 | elif result.get("pubchem_cid"): 320 | url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{result['pubchem_cid']}" 321 | 322 | return { 323 | RESULT_ID: drug_id, 324 | RESULT_TITLE: title, 325 | RESULT_SNIPPET: snippet or "No description available", 326 | RESULT_URL: url, 327 | RESULT_METADATA: { 328 | "drugbank_id": drugbank_id, 329 | "chembl_id": result.get("chembl_id", ""), 330 | "pubchem_cid": result.get("pubchem_cid", ""), 331 | "chebi_id": result.get("chebi_id", ""), 332 | "formula": result.get("formula", ""), 333 | "tradename": result.get("tradename", []), 334 | }, 335 | } 336 | 337 | 338 | class DiseaseHandler: 339 | """Handles formatting for disease information results from MyDisease.info.""" 340 | 341 | @staticmethod 342 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 343 | """Format a single disease result. 344 | 345 | Args: 346 | result: Raw disease data from MyDisease.info API 347 | 348 | Returns: 349 | Standardized disease result with id, title, snippet, url, and metadata 350 | """ 351 | # Extract disease information 352 | disease_id = result.get("_id", "") 353 | name = result.get("name", "") 354 | definition = result.get("definition", "") 355 | mondo_info = result.get("mondo", {}) 356 | 357 | # Build title 358 | title = name or disease_id or DEFAULT_TITLE 359 | 360 | # Create snippet from definition 361 | snippet = ( 362 | definition[:SNIPPET_LENGTH] + "..." 363 | if definition and len(definition) > SNIPPET_LENGTH 364 | else definition 365 | ) 366 | 367 | # Extract MONDO ID for URL 368 | mondo_id = mondo_info.get("id") if isinstance(mondo_info, dict) else "" 369 | url = ( 370 | f"https://monarchinitiative.org/disease/{mondo_id}" 371 | if mondo_id 372 | else "" 373 | ) 374 | 375 | return { 376 | RESULT_ID: disease_id, 377 | RESULT_TITLE: title, 378 | RESULT_SNIPPET: snippet or "No definition available", 379 | RESULT_URL: url, 380 | RESULT_METADATA: { 381 | "mondo_id": mondo_id, 382 | "definition": definition, 383 | "synonyms": result.get("synonyms", []), 384 | "xrefs": result.get("xrefs", {}), 385 | "phenotypes": len(result.get("phenotypes", [])), 386 | }, 387 | } 388 | 389 | 390 | class NCIOrganizationHandler: 391 | """Handles formatting for NCI organization results.""" 392 | 393 | @staticmethod 394 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 395 | """Format a single NCI organization result. 396 | 397 | Args: 398 | result: Raw organization data from NCI CTS API 399 | 400 | Returns: 401 | Standardized organization result with id, title, snippet, url, and metadata 402 | """ 403 | org_id = result.get("id", result.get("org_id", "")) 404 | name = result.get("name", "Unknown Organization") 405 | org_type = result.get("type", result.get("category", "")) 406 | city = result.get("city", "") 407 | state = result.get("state", "") 408 | 409 | # Build location string 410 | location_parts = [p for p in [city, state] if p] 411 | location = ", ".join(location_parts) if location_parts else "" 412 | 413 | # Create snippet 414 | snippet_parts = [] 415 | if org_type: 416 | snippet_parts.append(f"Type: {org_type}") 417 | if location: 418 | snippet_parts.append(f"Location: {location}") 419 | snippet = " | ".join(snippet_parts) or "No details available" 420 | 421 | return { 422 | RESULT_ID: org_id, 423 | RESULT_TITLE: name, 424 | RESULT_SNIPPET: snippet, 425 | RESULT_URL: "", # NCI doesn't provide direct URLs to organizations 426 | RESULT_METADATA: { 427 | "type": org_type, 428 | "city": city, 429 | "state": state, 430 | "country": result.get("country", ""), 431 | }, 432 | } 433 | 434 | 435 | class NCIInterventionHandler: 436 | """Handles formatting for NCI intervention results.""" 437 | 438 | @staticmethod 439 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 440 | """Format a single NCI intervention result. 441 | 442 | Args: 443 | result: Raw intervention data from NCI CTS API 444 | 445 | Returns: 446 | Standardized intervention result with id, title, snippet, url, and metadata 447 | """ 448 | int_id = result.get("id", result.get("intervention_id", "")) 449 | name = result.get("name", "Unknown Intervention") 450 | int_type = result.get("type", result.get("category", "")) 451 | synonyms = result.get("synonyms", []) 452 | 453 | # Create snippet 454 | snippet_parts = [] 455 | if int_type: 456 | snippet_parts.append(f"Type: {int_type}") 457 | if synonyms: 458 | if isinstance(synonyms, list) and synonyms: 459 | snippet_parts.append( 460 | f"Also known as: {', '.join(synonyms[:3])}" 461 | ) 462 | elif isinstance(synonyms, str): 463 | snippet_parts.append(f"Also known as: {synonyms}") 464 | snippet = " | ".join(snippet_parts) or "No details available" 465 | 466 | return { 467 | RESULT_ID: int_id, 468 | RESULT_TITLE: name, 469 | RESULT_SNIPPET: snippet, 470 | RESULT_URL: "", # NCI doesn't provide direct URLs to interventions 471 | RESULT_METADATA: { 472 | "type": int_type, 473 | "synonyms": synonyms, 474 | "description": result.get("description", ""), 475 | }, 476 | } 477 | 478 | 479 | class NCIBiomarkerHandler: 480 | """Handles formatting for NCI biomarker results.""" 481 | 482 | @staticmethod 483 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 484 | """Format a single NCI biomarker result. 485 | 486 | Args: 487 | result: Raw biomarker data from NCI CTS API 488 | 489 | Returns: 490 | Standardized biomarker result with id, title, snippet, url, and metadata 491 | """ 492 | bio_id = result.get("id", result.get("biomarker_id", "")) 493 | name = result.get("name", "Unknown Biomarker") 494 | gene = result.get("gene", result.get("gene_symbol", "")) 495 | bio_type = result.get("type", result.get("category", "")) 496 | assay_type = result.get("assay_type", "") 497 | 498 | # Build title 499 | title = name 500 | if gene and gene not in name: 501 | title = f"{gene} - {name}" 502 | 503 | # Create snippet 504 | snippet_parts = [] 505 | if bio_type: 506 | snippet_parts.append(f"Type: {bio_type}") 507 | if assay_type: 508 | snippet_parts.append(f"Assay: {assay_type}") 509 | snippet = ( 510 | " | ".join(snippet_parts) or "Biomarker for trial eligibility" 511 | ) 512 | 513 | return { 514 | RESULT_ID: bio_id, 515 | RESULT_TITLE: title, 516 | RESULT_SNIPPET: snippet, 517 | RESULT_URL: "", # NCI doesn't provide direct URLs to biomarkers 518 | RESULT_METADATA: { 519 | "gene": gene, 520 | "type": bio_type, 521 | "assay_type": assay_type, 522 | "trial_count": result.get("trial_count", 0), 523 | }, 524 | } 525 | 526 | 527 | class NCIDiseaseHandler: 528 | """Handles formatting for NCI disease vocabulary results.""" 529 | 530 | @staticmethod 531 | def format_result(result: dict[str, Any]) -> dict[str, Any]: 532 | """Format a single NCI disease result. 533 | 534 | Args: 535 | result: Raw disease data from NCI CTS API 536 | 537 | Returns: 538 | Standardized disease result with id, title, snippet, url, and metadata 539 | """ 540 | disease_id = result.get("id", result.get("disease_id", "")) 541 | name = result.get( 542 | "name", result.get("preferred_name", "Unknown Disease") 543 | ) 544 | category = result.get("category", result.get("type", "")) 545 | synonyms = result.get("synonyms", []) 546 | 547 | # Create snippet 548 | snippet_parts = [] 549 | if category: 550 | snippet_parts.append(f"Category: {category}") 551 | if synonyms: 552 | if isinstance(synonyms, list) and synonyms: 553 | snippet_parts.append( 554 | f"Also known as: {', '.join(synonyms[:3])}" 555 | ) 556 | if len(synonyms) > 3: 557 | snippet_parts.append(f"and {len(synonyms) - 3} more") 558 | elif isinstance(synonyms, str): 559 | snippet_parts.append(f"Also known as: {synonyms}") 560 | snippet = " | ".join(snippet_parts) or "NCI cancer vocabulary term" 561 | 562 | return { 563 | RESULT_ID: disease_id, 564 | RESULT_TITLE: name, 565 | RESULT_SNIPPET: snippet, 566 | RESULT_URL: "", # NCI doesn't provide direct URLs to disease terms 567 | RESULT_METADATA: { 568 | "category": category, 569 | "synonyms": synonyms, 570 | "codes": result.get("codes", {}), 571 | }, 572 | } 573 | 574 | 575 | def get_domain_handler( 576 | domain: str, 577 | ) -> ( 578 | type[ArticleHandler] 579 | | type[TrialHandler] 580 | | type[VariantHandler] 581 | | type[GeneHandler] 582 | | type[DrugHandler] 583 | | type[DiseaseHandler] 584 | | type[NCIOrganizationHandler] 585 | | type[NCIInterventionHandler] 586 | | type[NCIBiomarkerHandler] 587 | | type[NCIDiseaseHandler] 588 | ): 589 | """Get the appropriate handler class for a domain. 590 | 591 | Args: 592 | domain: The domain name ('article', 'trial', 'variant', 'gene', 'drug', 'disease', 593 | 'nci_organization', 'nci_intervention', 'nci_biomarker', 'nci_disease') 594 | 595 | Returns: 596 | The handler class for the domain 597 | 598 | Raises: 599 | ValueError: If domain is not recognized 600 | """ 601 | handlers: dict[ 602 | str, 603 | type[ArticleHandler] 604 | | type[TrialHandler] 605 | | type[VariantHandler] 606 | | type[GeneHandler] 607 | | type[DrugHandler] 608 | | type[DiseaseHandler] 609 | | type[NCIOrganizationHandler] 610 | | type[NCIInterventionHandler] 611 | | type[NCIBiomarkerHandler] 612 | | type[NCIDiseaseHandler], 613 | ] = { 614 | "article": ArticleHandler, 615 | "trial": TrialHandler, 616 | "variant": VariantHandler, 617 | "gene": GeneHandler, 618 | "drug": DrugHandler, 619 | "disease": DiseaseHandler, 620 | "nci_organization": NCIOrganizationHandler, 621 | "nci_intervention": NCIInterventionHandler, 622 | "nci_biomarker": NCIBiomarkerHandler, 623 | "nci_disease": NCIDiseaseHandler, 624 | } 625 | 626 | handler = handlers.get(domain) 627 | if handler is None: 628 | raise ValueError(f"Unknown domain: {domain}") 629 | 630 | return handler 631 | ```