genomoncology/biomcp # codebase.md

This is page 13 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/docs/tutorials/openfda-prompts.md:
--------------------------------------------------------------------------------

```markdown
  1 | # OpenFDA Example Prompts for AI Agents
  2 | 
  3 | This document provides example prompts that demonstrate effective use of BioMCP's OpenFDA integration for various precision oncology use cases.
  4 | 
  5 | ## Drug Safety Assessment
  6 | 
  7 | ### Basic Safety Profile
  8 | 
  9 | ```
 10 | What are the most common adverse events reported for pembrolizumab?
 11 | Include both serious and non-serious events.
 12 | ```
 13 | 
 14 | **Expected BioMCP Usage:**
 15 | 
 16 | 1. `think` - Plan safety assessment approach
 17 | 2. `openfda_adverse_searcher(drug="pembrolizumab", limit=50)`
 18 | 3. Analyze and summarize top reactions
 19 | 
 20 | ### Comparative Safety Analysis
 21 | 
 22 | ```
 23 | Compare the adverse event profiles of imatinib and dasatinib for CML treatment.
 24 | Focus on serious events and their frequencies.
 25 | ```
 26 | 
 27 | **Expected BioMCP Usage:**
 28 | 
 29 | 1. `think` - Plan comparative analysis
 30 | 2. `openfda_adverse_searcher(drug="imatinib", serious=True)`
 31 | 3. `openfda_adverse_searcher(drug="dasatinib", serious=True)`
 32 | 4. Compare and contrast findings
 33 | 
 34 | ### Drug Interaction Investigation
 35 | 
 36 | ```
 37 | A patient on warfarin needs to start erlotinib for NSCLC. What drug interactions
 38 | and adverse events should we monitor based on FDA data?
 39 | ```
 40 | 
 41 | **Expected BioMCP Usage:**
 42 | 
 43 | 1. `think` - Consider interaction risks
 44 | 2. `openfda_label_searcher(name="erlotinib")` - Check drug interactions section
 45 | 3. `openfda_adverse_searcher(drug="erlotinib", reaction="bleeding")`
 46 | 4. `openfda_adverse_searcher(drug="erlotinib", reaction="INR")`
 47 | 
 48 | ## Treatment Planning
 49 | 
 50 | ### Indication Verification
 51 | 
 52 | ```
 53 | Is trastuzumab deruxtecan FDA-approved for HER2-low breast cancer?
 54 | What are the specific approved indications?
 55 | ```
 56 | 
 57 | **Expected BioMCP Usage:**
 58 | 
 59 | 1. `think` - Plan indication search
 60 | 2. `openfda_label_searcher(name="trastuzumab deruxtecan")`
 61 | 3. `openfda_label_getter(set_id="...")` - Get full indications section
 62 | 4. Extract and summarize approved uses
 63 | 
 64 | ### Contraindication Screening
 65 | 
 66 | ```
 67 | Patient has severe hepatic impairment. Which targeted therapy drugs for
 68 | melanoma have contraindications or warnings for liver dysfunction?
 69 | ```
 70 | 
 71 | **Expected BioMCP Usage:**
 72 | 
 73 | 1. `think` - Identify melanoma drugs to check
 74 | 2. `openfda_label_searcher(indication="melanoma")`
 75 | 3. For each drug: `openfda_label_getter(set_id="...", sections=["contraindications", "warnings_and_precautions"])`
 76 | 4. Summarize liver-related contraindications
 77 | 
 78 | ### Dosing Guidelines
 79 | 
 80 | ```
 81 | What is the FDA-recommended dosing for osimertinib in EGFR-mutated NSCLC,
 82 | including dose modifications for adverse events?
 83 | ```
 84 | 
 85 | **Expected BioMCP Usage:**
 86 | 
 87 | 1. `think` - Plan dosing information retrieval
 88 | 2. `openfda_label_searcher(name="osimertinib")`
 89 | 3. `openfda_label_getter(set_id="...", sections=["dosage_and_administration", "dose_modifications"])`
 90 | 4. Extract dosing guidelines
 91 | 
 92 | ## Device Reliability Assessment
 93 | 
 94 | ### Genomic Test Reliability
 95 | 
 96 | ```
 97 | What adverse events have been reported for NGS-based cancer diagnostic devices?
 98 | Show me any false positive or accuracy issues.
 99 | ```
100 | 
101 | **Expected BioMCP Usage:**
102 | 
103 | 1. `think` - Consider test reliability factors
104 | 2. `openfda_device_searcher(genomics_only=True, limit=25)` - Get all genomic device events
105 | 3. `openfda_device_searcher(problem="false positive", genomics_only=True)`
106 | 4. `openfda_device_searcher(problem="accuracy", genomics_only=True)`
107 | 5. For significant events: `openfda_device_getter(mdr_report_key="...")`
108 | 
109 | **Note:** The FDA database uses abbreviated names (e.g., "F1CDX" instead of "FoundationOne CDx").
110 | For specific devices, try: `openfda_device_searcher(device="F1CDX")` or search by key terms.
111 | 
112 | ### Laboratory Equipment Issues
113 | 
114 | ```
115 | Our lab uses Illumina sequencers. What device malfunctions have been
116 | reported that could impact our genomic testing workflow?
117 | ```
118 | 
119 | **Expected BioMCP Usage:**
120 | 
121 | 1. `think` - Assess potential workflow impacts
122 | 2. `openfda_device_searcher(manufacturer="Illumina", genomics_only=True)`
123 | 3. Analyze problem patterns
124 | 4. `openfda_device_getter(mdr_report_key="...")` for critical issues
125 | 
126 | ## Comprehensive Drug Evaluation
127 | 
128 | ### New Drug Assessment
129 | 
130 | ```
131 | Provide a comprehensive safety and efficacy profile for sotorasib (Lumakras)
132 | including FDA approval, indications, major warnings, and post-market adverse events.
133 | ```
134 | 
135 | **Expected BioMCP Usage:**
136 | 
137 | 1. `think` - Plan comprehensive assessment
138 | 2. `drug_getter("sotorasib")` - Basic drug info
139 | 3. `openfda_label_searcher(name="sotorasib")`
140 | 4. `openfda_label_getter(set_id="...")` - Full label
141 | 5. `openfda_adverse_searcher(drug="sotorasib", serious=True)`
142 | 6. `trial_searcher(interventions=["sotorasib"])` - Ongoing trials
143 | 
144 | ### Risk-Benefit Analysis
145 | 
146 | ```
147 | For a 75-year-old patient with metastatic melanoma, analyze the risk-benefit
148 | profile of nivolumab plus ipilimumab combination therapy based on FDA data.
149 | ```
150 | 
151 | **Expected BioMCP Usage:**
152 | 
153 | 1. `think` - Structure risk-benefit analysis
154 | 2. `openfda_label_searcher(name="nivolumab")`
155 | 3. `openfda_label_searcher(name="ipilimumab")`
156 | 4. `openfda_label_getter(set_id="...", sections=["geriatric_use", "warnings_and_precautions"])`
157 | 5. `openfda_adverse_searcher(drug="nivolumab", serious=True)`
158 | 6. `openfda_adverse_searcher(drug="ipilimumab", serious=True)`
159 | 
160 | ## Special Populations
161 | 
162 | ### Pregnancy Considerations
163 | 
164 | ```
165 | Which FDA-approved lung cancer treatments have pregnancy category data
166 | or specific warnings for pregnant patients?
167 | ```
168 | 
169 | **Expected BioMCP Usage:**
170 | 
171 | 1. `think` - Plan pregnancy safety search
172 | 2. `openfda_label_searcher(indication="lung cancer")`
173 | 3. For each drug: `openfda_label_getter(set_id="...", sections=["pregnancy", "use_in_specific_populations"])`
174 | 4. Compile pregnancy categories and warnings
175 | 
176 | ### Pediatric Oncology
177 | 
178 | ```
179 | What FDA-approved indications and safety data exist for using
180 | checkpoint inhibitors in pediatric cancer patients?
181 | ```
182 | 
183 | **Expected BioMCP Usage:**
184 | 
185 | 1. `think` - Identify checkpoint inhibitors
186 | 2. `openfda_label_searcher(name="pembrolizumab")`
187 | 3. `openfda_label_getter(set_id="...", sections=["pediatric_use"])`
188 | 4. `openfda_adverse_searcher(drug="pembrolizumab")` - Filter for pediatric if possible
189 | 5. Repeat for other checkpoint inhibitors
190 | 
191 | ## Complex Queries
192 | 
193 | ### Multi-Drug Regimen Safety
194 | 
195 | ```
196 | Analyze potential safety concerns for the FOLFOX chemotherapy regimen
197 | (5-FU, leucovorin, oxaliplatin) based on FDA adverse event data.
198 | ```
199 | 
200 | **Expected BioMCP Usage:**
201 | 
202 | 1. `think` - Plan multi-drug analysis
203 | 2. `openfda_adverse_searcher(drug="fluorouracil")`
204 | 3. `openfda_adverse_searcher(drug="leucovorin")`
205 | 4. `openfda_adverse_searcher(drug="oxaliplatin")`
206 | 5. Identify overlapping toxicities
207 | 6. `openfda_label_searcher(name="oxaliplatin")` - Check for combination warnings
208 | 
209 | ### Biomarker-Driven Treatment Selection
210 | 
211 | ```
212 | For a patient with BRAF V600E mutant melanoma with brain metastases,
213 | what FDA-approved treatments are available and what are their CNS-specific
214 | efficacy and safety considerations?
215 | ```
216 | 
217 | **Expected BioMCP Usage:**
218 | 
219 | 1. `think` - Structure biomarker-driven search
220 | 2. `article_searcher(genes=["BRAF"], variants=["V600E"], diseases=["melanoma"])`
221 | 3. `openfda_label_searcher(indication="melanoma")`
222 | 4. For BRAF inhibitors: `openfda_label_getter(set_id="...", sections=["clinical_studies", "warnings_and_precautions"])`
223 | 5. `openfda_adverse_searcher(drug="dabrafenib", reaction="seizure")`
224 | 6. `openfda_adverse_searcher(drug="vemurafenib", reaction="brain")`
225 | 
226 | ### Treatment Failure Analysis
227 | 
228 | ```
229 | A patient's lung adenocarcinoma progressed on osimertinib. Based on FDA data,
230 | what are the documented resistance mechanisms and alternative approved treatments?
231 | ```
232 | 
233 | **Expected BioMCP Usage:**
234 | 
235 | 1. `think` - Analyze resistance and alternatives
236 | 2. `openfda_label_getter(set_id="...", sections=["clinical_studies"])` for osimertinib
237 | 3. `article_searcher(genes=["EGFR"], keywords=["resistance", "osimertinib"])`
238 | 4. `openfda_label_searcher(indication="non-small cell lung cancer")`
239 | 5. `trial_searcher(conditions=["NSCLC"], keywords=["osimertinib resistant"])`
240 | 
241 | ## Safety Monitoring
242 | 
243 | ### Post-Market Surveillance
244 | 
245 | ```
246 | Have there been any new safety signals for CDK4/6 inhibitors
247 | (palbociclib, ribociclib, abemaciclib) in the past year?
248 | ```
249 | 
250 | **Expected BioMCP Usage:**
251 | 
252 | 1. `think` - Plan safety signal detection
253 | 2. `openfda_adverse_searcher(drug="palbociclib", limit=100)`
254 | 3. `openfda_adverse_searcher(drug="ribociclib", limit=100)`
255 | 4. `openfda_adverse_searcher(drug="abemaciclib", limit=100)`
256 | 5. Analyze for unusual patterns or frequencies
257 | 
258 | ### Rare Adverse Event Investigation
259 | 
260 | ```
261 | Investigate reports of pneumonitis associated with immune checkpoint inhibitors.
262 | Which drugs have the highest frequency and what are the typical outcomes?
263 | ```
264 | 
265 | **Expected BioMCP Usage:**
266 | 
267 | 1. `think` - Structure pneumonitis investigation
268 | 2. `openfda_adverse_searcher(drug="pembrolizumab", reaction="pneumonitis")`
269 | 3. `openfda_adverse_searcher(drug="nivolumab", reaction="pneumonitis")`
270 | 4. `openfda_adverse_searcher(drug="atezolizumab", reaction="pneumonitis")`
271 | 5. Compare frequencies and outcomes
272 | 6. `openfda_adverse_getter(report_id="...")` for severe cases
273 | 
274 | ## Quality Assurance
275 | 
276 | ### Diagnostic Test Validation
277 | 
278 | ```
279 | What quality issues have been reported for liquid biopsy ctDNA tests
280 | that could affect treatment decisions?
281 | ```
282 | 
283 | **Expected BioMCP Usage:**
284 | 
285 | 1. `think` - Identify quality factors
286 | 2. `openfda_device_searcher(device="liquid biopsy", genomics_only=True)`
287 | 3. `openfda_device_searcher(device="ctDNA", genomics_only=True)`
288 | 4. `openfda_device_searcher(device="circulating tumor", genomics_only=True)`
289 | 5. Analyze failure modes
290 | 
291 | ## Tips for Effective Prompts
292 | 
293 | 1. **Be specific about the data needed**: Specify if you want adverse events, labels, or device data
294 | 2. **Include relevant filters**: Mention if focusing on serious events, specific populations, or genomic devices
295 | 3. **Request appropriate analysis**: Ask for comparisons, trends, or specific data points
296 | 4. **Consider multiple data sources**: Combine OpenFDA with literature and trial data for comprehensive answers
297 | 5. **Include time frames when relevant**: Though OpenFDA doesn't support date filtering in queries, you can ask for analysis of recent reports
298 | 
299 | ## Integration Examples
300 | 
301 | ### Combining with Literature Search
302 | 
303 | ```
304 | Find FDA adverse events for venetoclax in CLL, then search for published
305 | case reports that provide more clinical context for the most serious events.
306 | ```
307 | 
308 | ### Combining with Clinical Trials
309 | 
310 | ```
311 | What adverse events are reported for FDA-approved CAR-T therapies, and how
312 | do these compare to adverse events being monitored in current clinical trials?
313 | ```
314 | 
315 | ### Combining with Variant Data
316 | 
317 | ```
318 | For patients with RET fusion-positive cancers, what FDA-approved targeted
319 | therapies are available and what are their mutation-specific response rates?
320 | ```
321 | 
322 | ## Using Your OpenFDA API Key
323 | 
324 | The OpenFDA API has rate limits: 40 requests/minute without a key, or 240 requests/minute with a key. You can get a free API key at https://open.fda.gov/apis/authentication/
325 | 
326 | ### Method 1: Include API Key in Your Prompt
327 | 
328 | You can provide your API key directly in your conversation with the AI:
329 | 
330 | ```
331 | My OpenFDA API key is: YOUR_API_KEY_HERE
332 | 
333 | Search for all serious adverse events reported for pembrolizumab in the last year.
334 | Include both death and hospitalization events.
335 | ```
336 | 
337 | ```
338 | Using my OpenFDA API key YOUR_API_KEY_HERE, compare the safety profiles of
339 | all FDA-approved BRAF inhibitors for melanoma treatment.
340 | ```
341 | 
342 | ```
343 | I have an OpenFDA API key: YOUR_API_KEY_HERE
344 | 
345 | Analyze device malfunction reports for all NGS-based cancer diagnostic tests.
346 | Focus on false positive and false negative results that could impact treatment decisions.
347 | ```
348 | 
349 | ### Method 2: Set Environment Variable (Recommended for Regular Use)
350 | 
351 | If you're using BioMCP regularly, set the environment variable once:
352 | 
353 | ```bash
354 | export OPENFDA_API_KEY="YOUR_API_KEY_HERE"
355 | ```
356 | 
357 | Then your prompts don't need to include the key:
358 | 
359 | ```
360 | Retrieve the complete FDA label for trastuzumab deruxtecan, including all
361 | warnings and contraindications for HER2-low breast cancer patients.
362 | ```
363 | 
364 | ### Method 3: CLI Usage with API Key
365 | 
366 | When using the BioMCP CLI directly:
367 | 
368 | ```bash
369 | # Pass API key as parameter
370 | biomcp openfda adverse search --drug pembrolizumab --serious --api-key YOUR_API_KEY_HERE
371 | 
372 | # Or set environment variable first
373 | export OPENFDA_API_KEY="YOUR_API_KEY_HERE"
374 | biomcp openfda device search --manufacturer Illumina --genomics-only
375 | ```
376 | 
377 | ### Example Prompts with API Key
378 | 
379 | #### Comprehensive Drug Safety Analysis
380 | 
381 | ```
382 | My OpenFDA API key is: YOUR_API_KEY_HERE
383 | 
384 | Perform a comprehensive safety analysis of sotorasib (Lumakras) including:
385 | 1. All serious adverse events from post-market surveillance
386 | 2. Complete FDA label with all sections
387 | 3. Any device issues if it's a companion diagnostic drug
388 | 4. Compare its safety profile to other KRAS G12C inhibitors if available
389 | 
390 | This is for a clinical review, so I need detailed data from all available FDA sources.
391 | ```
392 | 
393 | #### Large-Scale Adverse Event Analysis
394 | 
395 | ```
396 | Using my OpenFDA API key YOUR_API_KEY_HERE, analyze adverse events for all
397 | FDA-approved checkpoint inhibitors (pembrolizumab, nivolumab, atezolizumab,
398 | durvalumab, avelumab, cemiplimab).
399 | 
400 | For each drug:
401 | - Get the top 20 most frequent adverse events
402 | - Identify immune-related adverse events
403 | - Check for any black box warnings in their labels
404 | - Note any fatal events
405 | 
406 | This requires many API calls, so please use my API key for higher rate limits.
407 | ```
408 | 
409 | #### Multi-Device Comparison
410 | 
411 | ```
412 | I have an OpenFDA API key: YOUR_API_KEY_HERE
413 | 
414 | Compare all FDA adverse event reports for NGS-based companion diagnostic devices
415 | from major manufacturers (Foundation Medicine, Guardant Health, Tempus, etc.).
416 | Focus on:
417 | - Test failure rates
418 | - Sample quality issues
419 | - False positive/negative reports
420 | - Software-related problems
421 | 
422 | This analysis requires querying multiple device records, so the API key will help
423 | avoid rate limiting.
424 | ```
425 | 
426 | #### Batch Label Retrieval
427 | 
428 | ```
429 | My OpenFDA API key is YOUR_API_KEY_HERE.
430 | 
431 | Retrieve the complete FDA labels for all CDK4/6 inhibitors (palbociclib,
432 | ribociclib, abemaciclib) and extract:
433 | - Approved indications
434 | - Dose modifications for adverse events
435 | - Drug-drug interactions
436 | - Special population considerations
437 | 
438 | Then create a comparison table of their safety profiles and dosing guidelines.
439 | ```
440 | 
441 | ### When to Provide an API Key
442 | 
443 | You should provide your API key when:
444 | 
445 | 1. **Performing large-scale analyses** requiring many API calls
446 | 2. **Conducting comprehensive safety reviews** across multiple drugs/devices
447 | 3. **Running batch operations** like comparing multiple products
448 | 4. **Doing rapid iterative searches** that might hit rate limits
449 | 5. **Performing systematic reviews** requiring extensive data retrieval
450 | 
451 | ### API Key Security Notes
452 | 
453 | - Never share your actual API key in public forums or repositories
454 | - The AI will use your key only for the current session
455 | - Keys passed as parameters override environment variables
456 | - The FDA API key is free and can be regenerated if compromised
457 | 
458 | ## Important Notes
459 | 
460 | - Always expect the AI to use the `think` tool first for complex queries
461 | - The AI should include appropriate disclaimers about adverse events not proving causation
462 | - Results are limited by FDA's data availability and reporting patterns
463 | - The AI should suggest when additional data sources might provide complementary information
464 | - With an API key, you can make 240 requests/minute vs 40 without
465 | 
466 | ## Known Limitations
467 | 
468 | ### Drug Shortage Data
469 | 
470 | **Important:** The FDA does not currently provide a machine-readable API for drug shortage data. The shortage search tools will return an informative message directing users to the FDA's web-based shortage database. This is a limitation of FDA's current data infrastructure, not a bug in BioMCP.
471 | 
472 | Alternative resources for drug shortage information:
473 | 
474 | - FDA Drug Shortages Database: https://www.accessdata.fda.gov/scripts/drugshortages/
475 | - ASHP Drug Shortages: https://www.ashp.org/drug-shortages/current-shortages
476 | 
477 | ### Other Limitations
478 | 
479 | - Device adverse event reports use abbreviated device names (e.g., "F1CDX" instead of "FoundationOne CDx")
480 | - Adverse event reports represent voluntary submissions and may not reflect true incidence rates
481 | - Recall information may have a delay of 24-48 hours from initial FDA announcement
482 | 
```

--------------------------------------------------------------------------------
/docs/tutorials/pydantic-ai-integration.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Pydantic AI Integration Guide
  2 | 
  3 | This guide explains how to integrate BioMCP with Pydantic AI for building biomedical AI agents.
  4 | 
  5 | ## Server Modes and Endpoints
  6 | 
  7 | BioMCP supports two primary transport modes for Pydantic AI integration:
  8 | 
  9 | ### Available Transport Modes
 10 | 
 11 | | Mode              | Endpoints                  | Pydantic AI Client        | Use Case                        |
 12 | | ----------------- | -------------------------- | ------------------------- | ------------------------------- |
 13 | | `stdio`           | N/A (subprocess)           | `MCPServerStdio`          | Local development, testing      |
 14 | | `streamable_http` | `POST /mcp`, `GET /health` | `MCPServerStreamableHTTP` | Production HTTP deployments     |
 15 | | `worker`          | `POST /mcp`, `GET /health` | `MCPServerStreamableHTTP` | HTTP mode using streamable HTTP |
 16 | 
 17 | Both `streamable_http` and `worker` modes now use FastMCP's native streamable HTTP implementation for full MCP protocol compliance. The SSE-based transport has been deprecated.
 18 | 
 19 | ## Working Examples for Pydantic AI
 20 | 
 21 | Here are the recommended configurations for connecting Pydantic AI to BioMCP:
 22 | 
 23 | ### 1. STDIO Mode (Recommended for Local Development)
 24 | 
 25 | This mode runs BioMCP as a subprocess without needing an HTTP server:
 26 | 
 27 | ```python
 28 | import asyncio
 29 | import os
 30 | from pydantic_ai import Agent
 31 | from pydantic_ai.mcp import MCPServerStdio
 32 | 
 33 | async def main():
 34 |     # Run BioMCP as a subprocess
 35 |     server = MCPServerStdio(
 36 |         "python",
 37 |         args=["-m", "biomcp", "run", "--mode", "stdio"]
 38 |     )
 39 | 
 40 |     # Use a real LLM model (requires API key)
 41 |     model = "openai:gpt-4o-mini"  # Set OPENAI_API_KEY environment variable
 42 | 
 43 |     agent = Agent(model, toolsets=[server])
 44 | 
 45 |     async with agent:
 46 |         # Example query that returns real results
 47 |         result = await agent.run(
 48 |             "Find articles about BRAF V600E mutations in melanoma"
 49 |         )
 50 |         print(result.output)
 51 | 
 52 | if __name__ == "__main__":
 53 |     asyncio.run(main())
 54 | ```
 55 | 
 56 | ### 2. Streamable HTTP Mode (Recommended for Production)
 57 | 
 58 | For production deployments with proper MCP compliance (requires pydantic-ai>=0.6.9):
 59 | 
 60 | ```python
 61 | import asyncio
 62 | import os
 63 | from pydantic_ai import Agent
 64 | from pydantic_ai.mcp import MCPServerStreamableHTTP
 65 | 
 66 | async def main():
 67 |     # Connect to the /mcp endpoint
 68 |     server = MCPServerStreamableHTTP("http://localhost:8000/mcp")
 69 | 
 70 |     # Use a real LLM model (requires API key)
 71 |     # Options: openai:gpt-4o-mini, anthropic:claude-3-haiku-20240307, groq:llama-3.1-70b-versatile
 72 |     model = "openai:gpt-4o-mini"  # Set OPENAI_API_KEY environment variable
 73 | 
 74 |     agent = Agent(model, toolsets=[server])
 75 | 
 76 |     async with agent:
 77 |         # Example queries that return real results
 78 |         result = await agent.run(
 79 |             "Find recent articles about BRAF V600E in melanoma"
 80 |         )
 81 |         print(result.output)
 82 | 
 83 | if __name__ == "__main__":
 84 |     asyncio.run(main())
 85 | ```
 86 | 
 87 | To run the server for this mode:
 88 | 
 89 | ```bash
 90 | # Using streamable_http mode (recommended)
 91 | biomcp run --mode streamable_http --host 0.0.0.0 --port 8000
 92 | 
 93 | # Or using worker mode (also uses streamable HTTP)
 94 | biomcp run --mode worker --host 0.0.0.0 --port 8000
 95 | 
 96 | # Or using Docker
 97 | docker run -p 8000:8000 genomoncology/biomcp:latest biomcp run --mode streamable_http
 98 | ```
 99 | 
100 | ### 3. Direct JSON-RPC Mode (Alternative HTTP)
101 | 
102 | You can also use the JSON-RPC endpoint at the root path:
103 | 
104 | ```python
105 | import httpx
106 | import json
107 | 
108 | async def call_biomcp_jsonrpc(method, params=None):
109 |     """Direct JSON-RPC calls to BioMCP"""
110 |     async with httpx.AsyncClient() as client:
111 |         response = await client.post(
112 |             "http://localhost:8000/",
113 |             json={
114 |                 "jsonrpc": "2.0",
115 |                 "id": 1,
116 |                 "method": method,
117 |                 "params": params or {}
118 |             }
119 |         )
120 |         return response.json()
121 | 
122 | # Example usage
123 | result = await call_biomcp_jsonrpc("tools/list")
124 | print("Available tools:", result)
125 | ```
126 | 
127 | ## Troubleshooting Common Issues
128 | 
129 | ### Issue: TestModel returns empty results
130 | 
131 | **Cause**: TestModel is a mock model for testing - it doesn't execute real searches.
132 | 
133 | **Solution**: This is expected behavior. TestModel returns `{"search":{"results":[]}}` by design. To get real results:
134 | 
135 | - Use a real LLM model with API key: `Agent("openai:gpt-4o-mini", toolsets=[server])`
136 | - Use Groq for free tier: Sign up at console.groq.com, get API key, use `Agent("groq:llama-3.1-70b-versatile", toolsets=[server])`
137 | - Or use BioMCP CLI directly (no API key needed): `biomcp article search --gene BRAF`
138 | 
139 | ### Issue: Connection refused
140 | 
141 | **Solution**: Ensure the server is running with the correct host binding:
142 | 
143 | ```bash
144 | biomcp run --mode worker --host 0.0.0.0 --port 8000
145 | ```
146 | 
147 | ### Issue: CORS errors in browser
148 | 
149 | **Solution**: The server includes CORS headers by default. If you still have issues, check if a proxy or firewall is blocking the headers.
150 | 
151 | ### Issue: Health endpoint returns 404
152 | 
153 | **Solution**: The health endpoint is available at `GET /health` in both worker and streamable_http modes. Ensure you're using the latest version:
154 | 
155 | ```bash
156 | pip install --upgrade biomcp-python
157 | ```
158 | 
159 | ### Issue: SSE endpoint not found
160 | 
161 | **Solution**: The SSE transport has been deprecated. Use streamable HTTP mode instead:
162 | 
163 | ```python
164 | # Old (deprecated)
165 | # from pydantic_ai.mcp import MCPServerSSE
166 | # server = MCPServerSSE("http://localhost:8000/sse")
167 | 
168 | # New (recommended)
169 | from pydantic_ai.mcp import MCPServerStreamableHTTP
170 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp")
171 | ```
172 | 
173 | ## Testing Your Connection
174 | 
175 | Here are test scripts to verify your setup for different modes:
176 | 
177 | ### Testing STDIO Mode (Local Development)
178 | 
179 | ```python
180 | import asyncio
181 | from pydantic_ai import Agent
182 | from pydantic_ai.models.test import TestModel
183 | from pydantic_ai.mcp import MCPServerStdio
184 | 
185 | async def test_stdio_connection():
186 |     # Use TestModel to verify connection (won't return real data)
187 |     server = MCPServerStdio(
188 |         "python",
189 |         args=["-m", "biomcp", "run", "--mode", "stdio"]
190 |     )
191 | 
192 |     agent = Agent(
193 |         model=TestModel(call_tools=["search"]),
194 |         toolsets=[server]
195 |     )
196 | 
197 |     async with agent:
198 |         print(f"✅ STDIO Connection successful!")
199 | 
200 |         # Test a simple search (returns mock data)
201 |         result = await agent.run("Test search for BRAF")
202 |         print(f"✅ Tool execution successful!")
203 |         print(f"Note: TestModel returns mock data: {result.output}")
204 | 
205 | if __name__ == "__main__":
206 |     asyncio.run(test_stdio_connection())
207 | ```
208 | 
209 | ### Testing Streamable HTTP Mode (Production)
210 | 
211 | First, ensure the server is running:
212 | 
213 | ```bash
214 | # Start the server in a separate terminal
215 | biomcp run --mode streamable_http --port 8000
216 | ```
217 | 
218 | Then test the connection:
219 | 
220 | ```python
221 | import asyncio
222 | from pydantic_ai import Agent
223 | from pydantic_ai.models.test import TestModel
224 | from pydantic_ai.mcp import MCPServerStreamableHTTP
225 | 
226 | async def test_streamable_http_connection():
227 |     # Connect to the running server's /mcp endpoint
228 |     server = MCPServerStreamableHTTP("http://localhost:8000/mcp")
229 | 
230 |     # Create agent with TestModel (no API keys needed)
231 |     agent = Agent(
232 |         model=TestModel(call_tools=["search"]),
233 |         toolsets=[server]
234 |     )
235 | 
236 |     async with agent:
237 |         print("✅ Streamable HTTP Connection successful!")
238 | 
239 |         # Test a query
240 |         result = await agent.run("Find articles about BRAF")
241 |         print("✅ Tool execution successful!")
242 |         if result.output:
243 |             print(f"📄 Received {len(result.output)} characters of output")
244 | 
245 | if __name__ == "__main__":
246 |     asyncio.run(test_streamable_http_connection())
247 | ```
248 | 
249 | ### Important: Understanding TestModel vs Real Results
250 | 
251 | **TestModel is a MOCK model** - it doesn't execute real searches:
252 | 
253 | - TestModel simulates tool calls but returns empty results: `{"search":{"results":[]}}`
254 | - This is by design - TestModel is for testing the connection flow, not getting real data
255 | - To get actual search results, you need to use a real LLM model
256 | 
257 | **To get real results:**
258 | 
259 | 1. **Use a real LLM model** (requires API key):
260 | 
261 | ```python
262 | # Replace TestModel with a real model
263 | agent = Agent(
264 |     "openai:gpt-4o-mini",  # or "anthropic:claude-3-haiku"
265 |     toolsets=[server]
266 | )
267 | ```
268 | 
269 | 2. **Use BioMCP CLI directly** (no API key needed):
270 | 
271 | ```bash
272 | # Get real search results via CLI
273 | biomcp article search --gene BRAF --disease melanoma --json
274 | ```
275 | 
276 | 3. **For integration testing** without API keys:
277 | 
278 | ```python
279 | import subprocess
280 | import json
281 | 
282 | # Use CLI to get real results
283 | result = subprocess.run(
284 |     ["biomcp", "article", "search", "--gene", "BRAF", "--json"],
285 |     capture_output=True,
286 |     text=True
287 | )
288 | data = json.loads(result.stdout)
289 | print(f"Found {len(data['articles'])} real articles")
290 | ```
291 | 
292 | **Note**: The Streamable HTTP tests in our test suite verify this functionality works correctly. If you encounter connection issues, ensure:
293 | 
294 | 1. The server is fully started before connecting
295 | 2. You're using pydantic-ai >= 0.6.9
296 | 3. The port is not blocked by a firewall
297 | 
298 | ### Complete Working Example with Real Results
299 | 
300 | Here's a complete example that connects to BioMCP via Streamable HTTP and retrieves real biomedical data:
301 | 
302 | ```python
303 | #!/usr/bin/env python3
304 | """
305 | Working example of Pydantic AI + BioMCP with Streamable HTTP.
306 | This will get real search results from your BioMCP server.
307 | 
308 | Requires one of:
309 | - export OPENAI_API_KEY='your-key'
310 | - export ANTHROPIC_API_KEY='your-key'
311 | - export GROQ_API_KEY='your-key'  (free tier at console.groq.com)
312 | """
313 | 
314 | import asyncio
315 | import os
316 | from pydantic_ai import Agent
317 | from pydantic_ai.mcp import MCPServerStreamableHTTP
318 | 
319 | 
320 | async def main():
321 |     # Server configuration
322 |     SERVER_URL = "http://localhost:8000/mcp"  # Adjust port as needed
323 | 
324 |     # Detect which API key is available
325 |     if os.getenv("OPENAI_API_KEY"):
326 |         model = "openai:gpt-4o-mini"
327 |         print("Using OpenAI GPT-4o-mini")
328 |     elif os.getenv("ANTHROPIC_API_KEY"):
329 |         model = "anthropic:claude-3-haiku-20240307"
330 |         print("Using Claude 3 Haiku")
331 |     elif os.getenv("GROQ_API_KEY"):
332 |         model = "groq:llama-3.1-70b-versatile"  # Free tier available
333 |         print("Using Groq Llama 3.1")
334 |     else:
335 |         print("No API key found! Please set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GROQ_API_KEY")
336 |         return
337 | 
338 |     # Connect to BioMCP server
339 |     server = MCPServerStreamableHTTP(SERVER_URL)
340 |     agent = Agent(model, toolsets=[server])
341 | 
342 |     async with agent:
343 |         print("Connected to BioMCP!\n")
344 | 
345 |         # Search for articles (includes cBioPortal data for genes)
346 |         result = await agent.run(
347 |             "Search for 2 recent articles about BRAF V600E mutations in melanoma. "
348 |             "List the title and first author for each."
349 |         )
350 |         print("Article Search Results:")
351 |         print(result.output)
352 |         print("\n" + "="*60 + "\n")
353 | 
354 |         # Search for clinical trials
355 |         result2 = await agent.run(
356 |             "Find 2 clinical trials for melanoma with BRAF mutations "
357 |             "that are currently recruiting. Show NCT ID and title."
358 |         )
359 |         print("Clinical Trial Results:")
360 |         print(result2.output)
361 |         print("\n" + "="*60 + "\n")
362 | 
363 |         # Search for variant information
364 |         result3 = await agent.run(
365 |             "Search for pathogenic TP53 variants. Show 2 examples."
366 |         )
367 |         print("Variant Search Results:")
368 |         print(result3.output)
369 | 
370 | 
371 | if __name__ == "__main__":
372 |     # Start your BioMCP server first:
373 |     # biomcp run --mode streamable_http --port 8000
374 | 
375 |     asyncio.run(main())
376 | ```
377 | 
378 | **Running this example:**
379 | 
380 | 1. Start the BioMCP server:
381 | 
382 | ```bash
383 | biomcp run --mode streamable_http --port 8000
384 | ```
385 | 
386 | 2. Set your API key (choose one):
387 | 
388 | ```bash
389 | export OPENAI_API_KEY='your-key'        # OpenAI
390 | export ANTHROPIC_API_KEY='your-key'     # Anthropic
391 | export GROQ_API_KEY='your-key'          # Groq (free tier available)
392 | ```
393 | 
394 | 3. Run the script:
395 | 
396 | ```bash
397 | python biomcp_example.py
398 | ```
399 | 
400 | This will return actual biomedical data from PubMed, ClinicalTrials.gov, and variant databases!
401 | 
402 | ## Using BioMCP Tools with Pydantic AI
403 | 
404 | Once connected, you can use BioMCP's biomedical research tools:
405 | 
406 | ```python
407 | import os
408 | from pydantic_ai import Agent
409 | from pydantic_ai.mcp import MCPServerStdio
410 | 
411 | async def biomedical_research_example():
412 |     server = MCPServerStdio(
413 |         "python",
414 |         args=["-m", "biomcp", "run", "--mode", "stdio"]
415 |     )
416 | 
417 |     # Choose model based on available API key
418 |     if os.getenv("OPENAI_API_KEY"):
419 |         model = "openai:gpt-4o-mini"
420 |     elif os.getenv("GROQ_API_KEY"):
421 |         model = "groq:llama-3.1-70b-versatile"  # Free tier available
422 |     else:
423 |         raise ValueError("Please set OPENAI_API_KEY or GROQ_API_KEY")
424 | 
425 |     agent = Agent(model, toolsets=[server])
426 | 
427 |     async with agent:
428 |         # Important: Always use the think tool first for complex queries
429 |         result = await agent.run("""
430 |             First use the think tool to plan your approach, then:
431 |             1. Search for articles about immunotherapy resistance in melanoma
432 |             2. Find clinical trials testing combination therapies
433 |             3. Look up genetic markers associated with treatment response
434 |         """)
435 | 
436 |         print(result.output)
437 | ```
438 | 
439 | ## Production Deployment Considerations
440 | 
441 | For production deployments:
442 | 
443 | 1. **Use STDIO mode** for local development or when running in containerized environments where the agent and BioMCP can run in the same container
444 | 2. **Use Streamable HTTP mode** when you need HTTP-based communication between separate services (recommended for production)
445 | 3. **Both `worker` and `streamable_http` modes** now use the same underlying streamable HTTP transport
446 | 4. **Require a real LLM model** - TestModel won't work for production as it only returns mock data
447 | 5. **Consider API costs** - Use cheaper models like `gpt-4o-mini` or Groq's free tier for testing
448 | 6. **Implement proper error handling** and retry logic for network failures
449 | 7. **Set appropriate timeouts** for long-running biomedical searches
450 | 8. **Cache frequently accessed data** to reduce API calls to backend services
451 | 
452 | ### Important Notes
453 | 
454 | - **Real LLM required for results**: TestModel is only for testing connections - use a real LLM (OpenAI, Anthropic, Groq) to get actual biomedical data
455 | - **SSE transport is deprecated**: The old SSE-based transport (`/sse` endpoint) has been removed in favor of streamable HTTP
456 | - **Worker mode now uses streamable HTTP**: The `worker` mode has been updated to use streamable HTTP transport internally
457 | - **Health endpoint**: The `/health` endpoint is available in both HTTP modes for monitoring
458 | - **Free tier option**: Groq offers a free API tier at console.groq.com for testing without costs
459 | 
460 | ## Migration Guide from SSE to Streamable HTTP
461 | 
462 | If you're upgrading from an older version that used SSE transport:
463 | 
464 | ### Code Changes
465 | 
466 | ```python
467 | # Old code (deprecated)
468 | from pydantic_ai.mcp import MCPServerSSE
469 | server = MCPServerSSE("http://localhost:8000/sse")
470 | 
471 | # New code (recommended)
472 | from pydantic_ai.mcp import MCPServerStreamableHTTP
473 | server = MCPServerStreamableHTTP("http://localhost:8000/mcp")
474 | ```
475 | 
476 | ### Server Command Changes
477 | 
478 | ```bash
479 | # Old: SSE endpoints were at /sse
480 | # biomcp run --mode worker  # Used to expose /sse endpoint
481 | 
482 | # New: Both modes now use /mcp endpoint with streamable HTTP
483 | biomcp run --mode worker         # Now uses /mcp with streamable HTTP
484 | biomcp run --mode streamable_http # Also uses /mcp with streamable HTTP
485 | ```
486 | 
487 | ### Key Differences
488 | 
489 | 1. **Endpoint Change**: `/sse` → `/mcp`
490 | 2. **Protocol**: Server-Sent Events → Streamable HTTP (supports both JSON and SSE)
491 | 3. **Client Library**: `MCPServerSSE` → `MCPServerStreamableHTTP`
492 | 4. **Compatibility**: Requires pydantic-ai >= 0.6.9 for `MCPServerStreamableHTTP`
493 | 
494 | ## Next Steps
495 | 
496 | - Review the [MCP Tools Reference](../user-guides/02-mcp-tools-reference.md) for available biomedical research tools
497 | - See [CLI Guide](../user-guides/01-command-line-interface.md) for more server configuration options
498 | - Check [Transport Protocol Guide](../developer-guides/04-transport-protocol.md) for detailed protocol information
499 | 
500 | ## Support
501 | 
502 | If you continue to experience issues:
503 | 
504 | 1. Verify your BioMCP version: `biomcp --version`
505 | 2. Check server logs for error messages
506 | 3. Open an issue on [GitHub](https://github.com/genomoncology/biomcp/issues) with:
507 |    - Your BioMCP version
508 |    - Server startup command
509 |    - Complete error messages
510 |    - Minimal reproduction code
511 | 
```

--------------------------------------------------------------------------------
/docs/how-to-guides/04-predict-variant-effects-with-alphagenome.md:
--------------------------------------------------------------------------------

```markdown
  1 | # How to Predict Variant Effects with AlphaGenome
  2 | 
  3 | This guide demonstrates how to use Google DeepMind's AlphaGenome to predict regulatory effects of genetic variants on gene expression, chromatin accessibility, and splicing.
  4 | 
  5 | ## Overview
  6 | 
  7 | AlphaGenome predicts how DNA variants affect:
  8 | 
  9 | - **Gene Expression**: Log-fold changes in nearby genes
 10 | - **Chromatin Accessibility**: ATAC-seq/DNase-seq signal changes
 11 | - **Splicing**: Effects on splice sites and exon inclusion
 12 | - **Regulatory Elements**: Impact on enhancers, promoters, and TFBS
 13 | - **3D Chromatin**: Changes in chromatin interactions
 14 | 
 15 | For technical details on the AlphaGenome integration, see the [AlphaGenome API Reference](../backend-services-reference/07-alphagenome.md).
 16 | 
 17 | ## Setup and API Key
 18 | 
 19 | ### Get Your API Key
 20 | 
 21 | 1. Visit [AlphaGenome Portal](https://deepmind.google.com/science/alphagenome)
 22 | 2. Register for non-commercial use
 23 | 3. Receive API key via email
 24 | 
 25 | For detailed setup instructions, see [Authentication and API Keys](../getting-started/03-authentication-and-api-keys.md#alphagenome).
 26 | 
 27 | ### Configure API Key
 28 | 
 29 | **Option 1: Environment Variable (Personal Use)**
 30 | 
 31 | ```bash
 32 | export ALPHAGENOME_API_KEY="your-key-here"
 33 | ```
 34 | 
 35 | **Option 2: Per-Request (AI Assistants)**
 36 | 
 37 | ```
 38 | "Predict effects of BRAF V600E. My AlphaGenome API key is YOUR_KEY_HERE"
 39 | ```
 40 | 
 41 | **Option 3: Configuration File**
 42 | 
 43 | ```python
 44 | # .env file
 45 | ALPHAGENOME_API_KEY=your-key-here
 46 | ```
 47 | 
 48 | ### Install AlphaGenome (Optional)
 49 | 
 50 | For local predictions:
 51 | 
 52 | ```bash
 53 | git clone https://github.com/google-deepmind/alphagenome.git
 54 | cd alphagenome && pip install .
 55 | ```
 56 | 
 57 | ## Basic Variant Prediction
 58 | 
 59 | ### Simple Prediction
 60 | 
 61 | Predict effects of BRAF V600E mutation:
 62 | 
 63 | ```bash
 64 | # CLI
 65 | biomcp variant predict chr7 140753336 A T
 66 | 
 67 | # Python
 68 | result = await client.variants.predict(
 69 |     chromosome="chr7",
 70 |     position=140753336,
 71 |     reference="A",
 72 |     alternate="T"
 73 | )
 74 | 
 75 | # MCP Tool
 76 | result = await alphagenome_predictor(
 77 |     chromosome="chr7",
 78 |     position=140753336,
 79 |     reference="A",
 80 |     alternate="T"
 81 | )
 82 | ```
 83 | 
 84 | ### Understanding Results
 85 | 
 86 | ```python
 87 | # Gene expression changes
 88 | for gene in result.gene_expression:
 89 |     print(f"{gene.name}: {gene.log2_fold_change}")
 90 |     # Positive = increased expression
 91 |     # Negative = decreased expression
 92 |     # |value| > 1.0 = strong effect
 93 | 
 94 | # Chromatin accessibility
 95 | for region in result.chromatin:
 96 |     print(f"{region.type}: {region.change}")
 97 |     # Positive = more open chromatin
 98 |     # Negative = more closed chromatin
 99 | 
100 | # Splicing effects
101 | for splice in result.splicing:
102 |     print(f"{splice.event}: {splice.delta_psi}")
103 |     # PSI = Percent Spliced In
104 |     # Positive = increased inclusion
105 | ```
106 | 
107 | ## Tissue-Specific Predictions
108 | 
109 | ### Single Tissue Analysis
110 | 
111 | Predict effects in specific tissues using UBERON terms:
112 | 
113 | ```python
114 | # Breast tissue analysis
115 | result = await alphagenome_predictor(
116 |     chromosome="chr17",
117 |     position=41246481,
118 |     reference="G",
119 |     alternate="A",
120 |     tissue_types=["UBERON:0000310"]  # breast
121 | )
122 | 
123 | # Common tissue codes:
124 | # UBERON:0000310 - breast
125 | # UBERON:0002107 - liver
126 | # UBERON:0002367 - prostate
127 | # UBERON:0000955 - brain
128 | # UBERON:0002048 - lung
129 | # UBERON:0001155 - colon
130 | ```
131 | 
132 | ### Multi-Tissue Comparison
133 | 
134 | Compare effects across tissues:
135 | 
136 | ```python
137 | tissues = [
138 |     "UBERON:0000310",  # breast
139 |     "UBERON:0002107",  # liver
140 |     "UBERON:0002048"   # lung
141 | ]
142 | 
143 | results = {}
144 | for tissue in tissues:
145 |     results[tissue] = await alphagenome_predictor(
146 |         chromosome="chr17",
147 |         position=41246481,
148 |         reference="G",
149 |         alternate="A",
150 |         tissue_types=[tissue]
151 |     )
152 | 
153 | # Compare gene expression across tissues
154 | for tissue, result in results.items():
155 |     print(f"\n{tissue}:")
156 |     for gene in result.gene_expression[:3]:
157 |         print(f"  {gene.name}: {gene.log2_fold_change}")
158 | ```
159 | 
160 | ## Analysis Window Sizes
161 | 
162 | ### Choosing the Right Interval
163 | 
164 | Different interval sizes capture different regulatory effects:
165 | 
166 | ```python
167 | # Short-range (promoter effects)
168 | result_2kb = await alphagenome_predictor(
169 |     chromosome="chr7",
170 |     position=140753336,
171 |     reference="A",
172 |     alternate="T",
173 |     interval_size=2048  # 2kb
174 | )
175 | 
176 | # Medium-range (enhancer-promoter)
177 | result_128kb = await alphagenome_predictor(
178 |     chromosome="chr7",
179 |     position=140753336,
180 |     reference="A",
181 |     alternate="T",
182 |     interval_size=131072  # 128kb (default)
183 | )
184 | 
185 | # Long-range (TAD-level effects)
186 | result_1mb = await alphagenome_predictor(
187 |     chromosome="chr7",
188 |     position=140753336,
189 |     reference="A",
190 |     alternate="T",
191 |     interval_size=1048576  # 1Mb
192 | )
193 | ```
194 | 
195 | **Interval Size Guide:**
196 | 
197 | - **2kb**: Promoter variants, TSS mutations
198 | - **16kb**: Local regulatory elements
199 | - **128kb**: Enhancer-promoter interactions (default)
200 | - **512kb**: Long-range regulatory
201 | - **1Mb**: TAD boundaries, super-enhancers
202 | 
203 | ## Clinical Workflows
204 | 
205 | ### Workflow 1: VUS (Variant of Unknown Significance) Analysis
206 | 
207 | ```python
208 | async def analyze_vus(chromosome: str, position: int, ref: str, alt: str):
209 |     # Step 1: Think about the analysis
210 |     await think(
211 |         thought=f"Analyzing VUS at {chromosome}:{position} {ref}>{alt}",
212 |         thoughtNumber=1
213 |     )
214 | 
215 |     # Step 2: Get variant annotations
216 |     variant_id = f"{chromosome}:g.{position}{ref}>{alt}"
217 |     try:
218 |         known_variant = await variant_getter(variant_id)
219 |         if known_variant.clinical_significance:
220 |             return f"Already classified: {known_variant.clinical_significance}"
221 |     except:
222 |         pass  # Variant not in databases
223 | 
224 |     # Step 3: Predict regulatory effects
225 |     prediction = await alphagenome_predictor(
226 |         chromosome=chromosome,
227 |         position=position,
228 |         reference=ref,
229 |         alternate=alt,
230 |         interval_size=131072
231 |     )
232 | 
233 |     # Step 4: Analyze impact
234 |     high_impact_genes = [
235 |         g for g in prediction.gene_expression
236 |         if abs(g.log2_fold_change) > 1.0
237 |     ]
238 | 
239 |     # Step 5: Search literature
240 |     if high_impact_genes:
241 |         gene_symbols = [g.name for g in high_impact_genes[:3]]
242 |         articles = await article_searcher(
243 |             genes=gene_symbols,
244 |             keywords=["pathogenic", "disease", "mutation"]
245 |         )
246 | 
247 |     return {
248 |         "variant": f"{chromosome}:{position} {ref}>{alt}",
249 |         "high_impact_genes": high_impact_genes,
250 |         "regulatory_assessment": assess_regulatory_impact(prediction),
251 |         "literature_support": len(articles) if high_impact_genes else 0
252 |     }
253 | 
254 | def assess_regulatory_impact(prediction):
255 |     """Classify regulatory impact severity"""
256 |     max_expression_change = max(
257 |         abs(g.log2_fold_change) for g in prediction.gene_expression
258 |     ) if prediction.gene_expression else 0
259 | 
260 |     if max_expression_change > 2.0:
261 |         return "HIGH - Strong regulatory effect"
262 |     elif max_expression_change > 1.0:
263 |         return "MODERATE - Significant regulatory effect"
264 |     elif max_expression_change > 0.5:
265 |         return "LOW - Mild regulatory effect"
266 |     else:
267 |         return "MINIMAL - No significant regulatory effect"
268 | ```
269 | 
270 | ### Workflow 2: Non-coding Variant Prioritization
271 | 
272 | ```python
273 | async def prioritize_noncoding_variants(variants: list[dict], disease_genes: list[str]):
274 |     """Rank non-coding variants by predicted impact on disease genes"""
275 | 
276 |     results = []
277 | 
278 |     for variant in variants:
279 |         # Predict effects
280 |         prediction = await alphagenome_predictor(
281 |             chromosome=variant["chr"],
282 |             position=variant["pos"],
283 |             reference=variant["ref"],
284 |             alternate=variant["alt"]
285 |         )
286 | 
287 |         # Check impact on disease genes
288 |         disease_impact = {}
289 |         for gene in prediction.gene_expression:
290 |             if gene.name in disease_genes:
291 |                 disease_impact[gene.name] = gene.log2_fold_change
292 | 
293 |         # Calculate priority score
294 |         if disease_impact:
295 |             max_impact = max(abs(v) for v in disease_impact.values())
296 |             results.append({
297 |                 "variant": variant,
298 |                 "disease_genes_affected": disease_impact,
299 |                 "priority_score": max_impact,
300 |                 "chromatin_changes": len([c for c in prediction.chromatin if c.change > 0.5])
301 |             })
302 | 
303 |     # Sort by priority
304 |     results.sort(key=lambda x: x["priority_score"], reverse=True)
305 |     return results
306 | 
307 | # Example usage
308 | variants_to_test = [
309 |     {"chr": "chr17", "pos": 41246000, "ref": "A", "alt": "G"},
310 |     {"chr": "chr17", "pos": 41246500, "ref": "C", "alt": "T"},
311 |     {"chr": "chr17", "pos": 41247000, "ref": "G", "alt": "A"}
312 | ]
313 | 
314 | breast_cancer_genes = ["BRCA1", "BRCA2", "TP53", "PTEN"]
315 | prioritized = await prioritize_noncoding_variants(variants_to_test, breast_cancer_genes)
316 | ```
317 | 
318 | ### Workflow 3: Splicing Analysis
319 | 
320 | ```python
321 | async def analyze_splicing_variant(gene: str, exon: int, variant_pos: int, ref: str, alt: str):
322 |     """Analyze potential splicing effects of a variant"""
323 | 
324 |     # Get gene information
325 |     gene_info = await gene_getter(gene)
326 |     chromosome = f"chr{gene_info.genomic_location.chr}"
327 | 
328 |     # Predict splicing effects
329 |     prediction = await alphagenome_predictor(
330 |         chromosome=chromosome,
331 |         position=variant_pos,
332 |         reference=ref,
333 |         alternate=alt,
334 |         interval_size=16384  # Smaller window for splicing
335 |     )
336 | 
337 |     # Analyze splicing predictions
338 |     splicing_effects = []
339 |     for event in prediction.splicing:
340 |         if abs(event.delta_psi) > 0.1:  # 10% change in splicing
341 |             splicing_effects.append({
342 |                 "type": event.event_type,
343 |                 "change": event.delta_psi,
344 |                 "affected_exon": event.exon,
345 |                 "interpretation": interpret_splicing(event)
346 |             })
347 | 
348 |     # Search for similar splicing variants
349 |     articles = await article_searcher(
350 |         genes=[gene],
351 |         keywords=[f"exon {exon}", "splicing", "splice site"]
352 |     )
353 | 
354 |     return {
355 |         "variant": f"{gene} exon {exon} {ref}>{alt}",
356 |         "splicing_effects": splicing_effects,
357 |         "likely_consequence": predict_consequence(splicing_effects),
358 |         "literature_precedent": len(articles)
359 |     }
360 | 
361 | def interpret_splicing(event):
362 |     """Interpret splicing changes"""
363 |     if event.delta_psi > 0.5:
364 |         return "Strong increase in exon inclusion"
365 |     elif event.delta_psi > 0.1:
366 |         return "Moderate increase in exon inclusion"
367 |     elif event.delta_psi < -0.5:
368 |         return "Strong exon skipping"
369 |     elif event.delta_psi < -0.1:
370 |         return "Moderate exon skipping"
371 |     else:
372 |         return "Minimal splicing change"
373 | ```
374 | 
375 | ## Research Applications
376 | 
377 | ### Enhancer Variant Analysis
378 | 
379 | ```python
380 | async def analyze_enhancer_variant(chr: str, pos: int, ref: str, alt: str, target_gene: str):
381 |     """Analyze variant in potential enhancer region"""
382 | 
383 |     # Use larger window to capture enhancer-promoter interactions
384 |     prediction = await alphagenome_predictor(
385 |         chromosome=chr,
386 |         position=pos,
387 |         reference=ref,
388 |         alternate=alt,
389 |         interval_size=524288  # 512kb
390 |     )
391 | 
392 |     # Find target gene effect
393 |     target_effect = None
394 |     for gene in prediction.gene_expression:
395 |         if gene.name == target_gene:
396 |             target_effect = gene.log2_fold_change
397 |             break
398 | 
399 |     # Analyze chromatin changes
400 |     chromatin_opening = sum(
401 |         1 for c in prediction.chromatin
402 |         if c.change > 0 and c.type == "enhancer"
403 |     )
404 | 
405 |     return {
406 |         "variant_location": f"{chr}:{pos}",
407 |         "target_gene": target_gene,
408 |         "expression_change": target_effect,
409 |         "enhancer_activity": "increased" if chromatin_opening > 0 else "decreased",
410 |         "likely_enhancer": abs(target_effect or 0) > 0.5 and chromatin_opening > 0
411 |     }
412 | ```
413 | 
414 | ### Pharmacogenomic Predictions
415 | 
416 | ```python
417 | async def predict_drug_response_variant(drug_target: str, variant: dict):
418 |     """Predict how variant affects drug target expression"""
419 | 
420 |     # Get drug information
421 |     drug_info = await drug_getter(drug_target)
422 |     target_genes = drug_info.targets
423 | 
424 |     # Predict variant effects
425 |     prediction = await alphagenome_predictor(
426 |         chromosome=variant["chr"],
427 |         position=variant["pos"],
428 |         reference=variant["ref"],
429 |         alternate=variant["alt"],
430 |         tissue_types=["UBERON:0002107"]  # liver for drug metabolism
431 |     )
432 | 
433 |     # Check effects on drug targets
434 |     target_effects = {}
435 |     for gene in prediction.gene_expression:
436 |         if gene.name in target_genes:
437 |             target_effects[gene.name] = gene.log2_fold_change
438 | 
439 |     # Interpret results
440 |     if any(abs(effect) > 1.0 for effect in target_effects.values()):
441 |         response = "Likely altered drug response"
442 |     elif any(abs(effect) > 0.5 for effect in target_effects.values()):
443 |         response = "Possible altered drug response"
444 |     else:
445 |         response = "Unlikely to affect drug response"
446 | 
447 |     return {
448 |         "drug": drug_target,
449 |         "variant": variant,
450 |         "target_effects": target_effects,
451 |         "prediction": response,
452 |         "recommendation": "Consider dose adjustment" if "altered" in response else "Standard dosing"
453 |     }
454 | ```
455 | 
456 | ## Best Practices
457 | 
458 | ### 1. Validate Input Coordinates
459 | 
460 | ```python
461 | # Always use "chr" prefix
462 | chromosome = "chr7"  # ✅ Correct
463 | # chromosome = "7"   # ❌ Wrong
464 | 
465 | # Use 1-based positions (not 0-based)
466 | position = 140753336  # ✅ 1-based
467 | ```
468 | 
469 | ### 2. Handle API Errors Gracefully
470 | 
471 | ```python
472 | try:
473 |     result = await alphagenome_predictor(...)
474 | except Exception as e:
475 |     if "API key" in str(e):
476 |         print("Please provide AlphaGenome API key")
477 |     elif "Invalid sequence" in str(e):
478 |         print("Check chromosome and position")
479 |     else:
480 |         print(f"Prediction failed: {e}")
481 | ```
482 | 
483 | ### 3. Combine with Other Tools
484 | 
485 | ```python
486 | # Complete variant analysis pipeline
487 | async def comprehensive_variant_analysis(variant_id: str):
488 |     # 1. Get known annotations
489 |     known = await variant_getter(variant_id)
490 | 
491 |     # 2. Predict regulatory effects
492 |     prediction = await alphagenome_predictor(
493 |         chromosome=f"chr{known.chr}",
494 |         position=known.pos,
495 |         reference=known.ref,
496 |         alternate=known.alt
497 |     )
498 | 
499 |     # 3. Search literature
500 |     articles = await article_searcher(
501 |         variants=[variant_id],
502 |         genes=[known.gene.symbol]
503 |     )
504 | 
505 |     # 4. Find relevant trials
506 |     trials = await trial_searcher(
507 |         other_terms=[known.gene.symbol, "mutation"]
508 |     )
509 | 
510 |     return {
511 |         "annotations": known,
512 |         "predictions": prediction,
513 |         "literature": articles,
514 |         "trials": trials
515 |     }
516 | ```
517 | 
518 | ### 4. Interpret Results Appropriately
519 | 
520 | ```python
521 | def interpret_expression_change(log2_fc):
522 |     """Convert log2 fold change to interpretation"""
523 |     if log2_fc > 2.0:
524 |         return "Very strong increase (>4x)"
525 |     elif log2_fc > 1.0:
526 |         return "Strong increase (2-4x)"
527 |     elif log2_fc > 0.5:
528 |         return "Moderate increase (1.4-2x)"
529 |     elif log2_fc < -2.0:
530 |         return "Very strong decrease (<0.25x)"
531 |     elif log2_fc < -1.0:
532 |         return "Strong decrease (0.25-0.5x)"
533 |     elif log2_fc < -0.5:
534 |         return "Moderate decrease (0.5-0.7x)"
535 |     else:
536 |         return "Minimal change"
537 | ```
538 | 
539 | ## Limitations and Considerations
540 | 
541 | ### Technical Limitations
542 | 
543 | - **Human only**: GRCh38 reference genome
544 | - **SNVs only**: No indels or structural variants
545 | - **Exact coordinates**: Must have precise genomic position
546 | - **Sequence context**: Requires reference sequence match
547 | 
548 | ### Interpretation Caveats
549 | 
550 | - **Predictions not certainties**: Validate with functional studies
551 | - **Context matters**: Cell type, developmental stage affect outcomes
552 | - **Indirect effects**: May miss complex regulatory cascades
553 | - **Population variation**: Individual genetic background influences
554 | 
555 | ## Troubleshooting
556 | 
557 | ### Common Issues
558 | 
559 | **"API key required"**
560 | 
561 | - Set environment variable or provide per-request
562 | - Check key validity at AlphaGenome portal
563 | 
564 | **"Invalid sequence length"**
565 | 
566 | - Verify chromosome format (use "chr" prefix)
567 | - Check position is within chromosome bounds
568 | - Ensure ref/alt are single nucleotides
569 | 
570 | **"No results returned"**
571 | 
572 | - May be no genes in analysis window
573 | - Try larger interval size
574 | - Check if variant is in gene desert
575 | 
576 | **Installation issues**
577 | 
578 | - Ensure Python 3.10+
579 | - Try `pip install --upgrade pip` first
580 | - Check for conflicting protobuf versions
581 | 
582 | ## Next Steps
583 | 
584 | - Explore [comprehensive variant annotations](03-get-comprehensive-variant-annotations.md)
585 | - Learn about [article searches](01-find-articles-and-cbioportal-data.md) for variants
586 | - Set up [logging and monitoring](05-logging-and-monitoring-with-bigquery.md)
587 | 
```

--------------------------------------------------------------------------------
/docs/how-to-guides/06-search-nci-organizations-and-interventions.md:
--------------------------------------------------------------------------------

```markdown
  1 | # How to Search NCI Organizations and Interventions
  2 | 
  3 | This guide demonstrates how to use BioMCP's NCI-specific tools to search for cancer research organizations, interventions (drugs, devices, procedures), and biomarkers.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | All NCI tools require an API key from [api.cancer.gov](https://api.cancer.gov):
  8 | 
  9 | ```bash
 10 | # Set as environment variable
 11 | export NCI_API_KEY="your-key-here"
 12 | 
 13 | # Or provide per-request in your prompts
 14 | "Find cancer centers in Boston, my NCI API key is YOUR_KEY"
 15 | ```
 16 | 
 17 | ## Organization Search and Lookup
 18 | 
 19 | ### Understanding Organization Search
 20 | 
 21 | The NCI Organization database contains:
 22 | 
 23 | - Cancer research centers and hospitals
 24 | - Clinical trial sponsors
 25 | - Academic institutions
 26 | - Pharmaceutical companies
 27 | - Government facilities
 28 | 
 29 | ### Basic Organization Search
 30 | 
 31 | Find organizations by name:
 32 | 
 33 | ```bash
 34 | # CLI
 35 | biomcp organization search --name "MD Anderson" --api-key YOUR_KEY
 36 | 
 37 | # Python
 38 | orgs = await nci_organization_searcher(
 39 |     name="MD Anderson",
 40 |     api_key="your-key"
 41 | )
 42 | 
 43 | # MCP/AI Assistant
 44 | "Search for MD Anderson Cancer Center, my NCI API key is YOUR_KEY"
 45 | ```
 46 | 
 47 | ### Location-Based Search
 48 | 
 49 | **CRITICAL**: Always use city AND state together to avoid Elasticsearch errors!
 50 | 
 51 | ```python
 52 | # ✅ CORRECT - City and state together
 53 | orgs = await nci_organization_searcher(
 54 |     city="Houston",
 55 |     state="TX",
 56 |     api_key="your-key"
 57 | )
 58 | 
 59 | # ❌ WRONG - Will cause API error
 60 | orgs = await nci_organization_searcher(
 61 |     city="Houston",  # Missing state!
 62 |     api_key="your-key"
 63 | )
 64 | 
 65 | # ❌ WRONG - Will cause API error
 66 | orgs = await nci_organization_searcher(
 67 |     state="TX",  # Missing city!
 68 |     api_key="your-key"
 69 | )
 70 | ```
 71 | 
 72 | ### Organization Types
 73 | 
 74 | Search by organization type:
 75 | 
 76 | ```python
 77 | # Find academic cancer centers
 78 | academic_centers = await nci_organization_searcher(
 79 |     organization_type="Academic",
 80 |     api_key="your-key"
 81 | )
 82 | 
 83 | # Find pharmaceutical companies
 84 | pharma_companies = await nci_organization_searcher(
 85 |     organization_type="Industry",
 86 |     api_key="your-key"
 87 | )
 88 | 
 89 | # Find government research facilities
 90 | gov_facilities = await nci_organization_searcher(
 91 |     organization_type="Government",
 92 |     api_key="your-key"
 93 | )
 94 | ```
 95 | 
 96 | Valid organization types:
 97 | 
 98 | - `Academic` - Universities and medical schools
 99 | - `Industry` - Pharmaceutical and biotech companies
100 | - `Government` - NIH, FDA, VA hospitals
101 | - `Community` - Community hospitals and clinics
102 | - `Network` - Research networks and consortiums
103 | - `Other` - Other organization types
104 | 
105 | ### Getting Organization Details
106 | 
107 | Retrieve complete information about a specific organization:
108 | 
109 | ```python
110 | # Get organization by ID
111 | org_details = await nci_organization_getter(
112 |     organization_id="NCI-2011-03337",
113 |     api_key="your-key"
114 | )
115 | 
116 | # Returns:
117 | # - Full name and aliases
118 | # - Contact information
119 | # - Address and location
120 | # - Associated clinical trials
121 | # - Organization type and status
122 | ```
123 | 
124 | ### Practical Organization Workflows
125 | 
126 | #### Find Regional Cancer Centers
127 | 
128 | ```python
129 | async def find_cancer_centers_by_region(state: str, cities: list[str]):
130 |     """Find all cancer centers in specific cities within a state"""
131 | 
132 |     all_centers = []
133 | 
134 |     for city in cities:
135 |         # ALWAYS use city + state together
136 |         centers = await nci_organization_searcher(
137 |             city=city,
138 |             state=state,
139 |             organization_type="Academic",
140 |             api_key=os.getenv("NCI_API_KEY")
141 |         )
142 |         all_centers.extend(centers)
143 | 
144 |     # Remove duplicates
145 |     unique_centers = {org['id']: org for org in all_centers}
146 | 
147 |     return list(unique_centers.values())
148 | 
149 | # Example: Find cancer centers in major Texas cities
150 | texas_centers = await find_cancer_centers_by_region(
151 |     state="TX",
152 |     cities=["Houston", "Dallas", "San Antonio", "Austin"]
153 | )
154 | ```
155 | 
156 | #### Find Trial Sponsors
157 | 
158 | ```python
159 | async def find_trial_sponsors_by_type(org_type: str, name_filter: str = None):
160 |     """Find organizations sponsoring trials"""
161 | 
162 |     # Search organizations
163 |     orgs = await nci_organization_searcher(
164 |         name=name_filter,
165 |         organization_type=org_type,
166 |         api_key=os.getenv("NCI_API_KEY")
167 |     )
168 | 
169 |     # For each org, get details including trial count
170 |     sponsors = []
171 |     for org in orgs[:10]:  # Limit to avoid rate limits
172 |         details = await nci_organization_getter(
173 |             organization_id=org['id'],
174 |             api_key=os.getenv("NCI_API_KEY")
175 |         )
176 |         if details.get('trial_count', 0) > 0:
177 |             sponsors.append(details)
178 | 
179 |     return sorted(sponsors, key=lambda x: x.get('trial_count', 0), reverse=True)
180 | 
181 | # Find pharmaceutical companies with active trials
182 | pharma_sponsors = await find_trial_sponsors_by_type("Industry")
183 | ```
184 | 
185 | ## Intervention Search and Lookup
186 | 
187 | ### Understanding Interventions
188 | 
189 | Interventions in clinical trials include:
190 | 
191 | - **Drugs**: Chemotherapy, targeted therapy, immunotherapy
192 | - **Devices**: Medical devices, diagnostic tools
193 | - **Procedures**: Surgical techniques, radiation protocols
194 | - **Biologicals**: Cell therapies, vaccines, antibodies
195 | - **Behavioral**: Lifestyle interventions, counseling
196 | - **Other**: Dietary supplements, alternative therapies
197 | 
198 | ### Drug Search
199 | 
200 | Find specific drugs or drug classes:
201 | 
202 | ```bash
203 | # CLI - Find a specific drug
204 | biomcp intervention search --name pembrolizumab --type Drug --api-key YOUR_KEY
205 | 
206 | # CLI - Find drug class
207 | biomcp intervention search --name "PD-1 inhibitor" --type Drug --api-key YOUR_KEY
208 | ```
209 | 
210 | ```python
211 | # Python - Search with synonyms
212 | drugs = await nci_intervention_searcher(
213 |     name="pembrolizumab",
214 |     intervention_type="Drug",
215 |     synonyms=True,  # Include Keytruda, MK-3475, etc.
216 |     api_key="your-key"
217 | )
218 | 
219 | # Search for drug combinations
220 | combos = await nci_intervention_searcher(
221 |     name="nivolumab AND ipilimumab",
222 |     intervention_type="Drug",
223 |     api_key="your-key"
224 | )
225 | ```
226 | 
227 | ### Device and Procedure Search
228 | 
229 | ```python
230 | # Find medical devices
231 | devices = await nci_intervention_searcher(
232 |     intervention_type="Device",
233 |     name="robot",  # Surgical robots
234 |     api_key="your-key"
235 | )
236 | 
237 | # Find procedures
238 | procedures = await nci_intervention_searcher(
239 |     intervention_type="Procedure",
240 |     name="minimally invasive",
241 |     api_key="your-key"
242 | )
243 | 
244 | # Find radiation protocols
245 | radiation = await nci_intervention_searcher(
246 |     intervention_type="Radiation",
247 |     name="proton beam",
248 |     api_key="your-key"
249 | )
250 | ```
251 | 
252 | ### Getting Intervention Details
253 | 
254 | ```python
255 | # Get complete intervention information
256 | intervention = await nci_intervention_getter(
257 |     intervention_id="INT123456",
258 |     api_key="your-key"
259 | )
260 | 
261 | # Returns:
262 | # - Official name and synonyms
263 | # - Intervention type and subtype
264 | # - Mechanism of action (for drugs)
265 | # - FDA approval status
266 | # - Associated clinical trials
267 | # - Manufacturer information
268 | ```
269 | 
270 | ### Practical Intervention Workflows
271 | 
272 | #### Drug Development Pipeline
273 | 
274 | ```python
275 | async def analyze_drug_pipeline(drug_target: str):
276 |     """Analyze drugs in development for a specific target"""
277 | 
278 |     # Search for drugs targeting specific pathway
279 |     drugs = await nci_intervention_searcher(
280 |         name=drug_target,
281 |         intervention_type="Drug",
282 |         api_key=os.getenv("NCI_API_KEY")
283 |     )
284 | 
285 |     pipeline = {
286 |         "preclinical": [],
287 |         "phase1": [],
288 |         "phase2": [],
289 |         "phase3": [],
290 |         "approved": []
291 |     }
292 | 
293 |     for drug in drugs:
294 |         # Get detailed information
295 |         details = await nci_intervention_getter(
296 |             intervention_id=drug['id'],
297 |             api_key=os.getenv("NCI_API_KEY")
298 |         )
299 | 
300 |         # Categorize by development stage
301 |         if details.get('fda_approved'):
302 |             pipeline['approved'].append(details)
303 |         else:
304 |             # Check associated trials for phase
305 |             trial_phases = details.get('trial_phases', [])
306 |             if 'PHASE3' in trial_phases:
307 |                 pipeline['phase3'].append(details)
308 |             elif 'PHASE2' in trial_phases:
309 |                 pipeline['phase2'].append(details)
310 |             elif 'PHASE1' in trial_phases:
311 |                 pipeline['phase1'].append(details)
312 |             else:
313 |                 pipeline['preclinical'].append(details)
314 | 
315 |     return pipeline
316 | 
317 | # Analyze PD-1/PD-L1 inhibitor pipeline
318 | pd1_pipeline = await analyze_drug_pipeline("PD-1 inhibitor")
319 | ```
320 | 
321 | #### Compare Similar Interventions
322 | 
323 | ```python
324 | async def compare_interventions(intervention_names: list[str]):
325 |     """Compare multiple interventions side by side"""
326 | 
327 |     comparisons = []
328 | 
329 |     for name in intervention_names:
330 |         # Search for intervention
331 |         results = await nci_intervention_searcher(
332 |             name=name,
333 |             synonyms=True,
334 |             api_key=os.getenv("NCI_API_KEY")
335 |         )
336 | 
337 |         if results:
338 |             # Get detailed info for first match
339 |             details = await nci_intervention_getter(
340 |                 intervention_id=results[0]['id'],
341 |                 api_key=os.getenv("NCI_API_KEY")
342 |             )
343 | 
344 |             comparisons.append({
345 |                 "name": details['name'],
346 |                 "type": details['type'],
347 |                 "synonyms": details.get('synonyms', []),
348 |                 "fda_approved": details.get('fda_approved', False),
349 |                 "trial_count": len(details.get('trials', [])),
350 |                 "mechanism": details.get('mechanism_of_action', 'Not specified')
351 |             })
352 | 
353 |     return comparisons
354 | 
355 | # Compare checkpoint inhibitors
356 | comparison = await compare_interventions([
357 |     "pembrolizumab",
358 |     "nivolumab",
359 |     "atezolizumab",
360 |     "durvalumab"
361 | ])
362 | ```
363 | 
364 | ## Biomarker Search
365 | 
366 | ### Understanding Biomarker Types
367 | 
368 | The NCI API supports two biomarker types:
369 | 
370 | - `reference_gene` - Gene-based biomarkers (e.g., EGFR, BRAF)
371 | - `branch` - Pathway/branch biomarkers
372 | 
373 | **Note**: You cannot search by gene symbol directly; use the name parameter.
374 | 
375 | ### Basic Biomarker Search
376 | 
377 | ```python
378 | # Search for PD-L1 biomarkers
379 | pdl1_biomarkers = await nci_biomarker_searcher(
380 |     name="PD-L1",
381 |     api_key="your-key"
382 | )
383 | 
384 | # Search for specific biomarker type
385 | gene_biomarkers = await nci_biomarker_searcher(
386 |     biomarker_type="reference_gene",
387 |     api_key="your-key"
388 | )
389 | ```
390 | 
391 | ### Biomarker Analysis Workflow
392 | 
393 | ```python
394 | async def analyze_trial_biomarkers(disease: str):
395 |     """Find biomarkers used in trials for a disease"""
396 | 
397 |     # Get all biomarkers
398 |     all_biomarkers = await nci_biomarker_searcher(
399 |         biomarker_type="reference_gene",
400 |         api_key=os.getenv("NCI_API_KEY")
401 |     )
402 | 
403 |     # Filter by disease association
404 |     disease_biomarkers = []
405 |     for biomarker in all_biomarkers:
406 |         if disease.lower() in str(biomarker).lower():
407 |             disease_biomarkers.append(biomarker)
408 | 
409 |     # Group by frequency
410 |     biomarker_counts = {}
411 |     for bio in disease_biomarkers:
412 |         name = bio.get('name', 'Unknown')
413 |         biomarker_counts[name] = biomarker_counts.get(name, 0) + 1
414 | 
415 |     # Sort by frequency
416 |     return sorted(
417 |         biomarker_counts.items(),
418 |         key=lambda x: x[1],
419 |         reverse=True
420 |     )
421 | 
422 | # Find most common biomarkers in lung cancer trials
423 | lung_biomarkers = await analyze_trial_biomarkers("lung cancer")
424 | ```
425 | 
426 | ## Combined Workflows
427 | 
428 | ### Regional Drug Development Analysis
429 | 
430 | ```python
431 | async def analyze_regional_drug_development(
432 |     state: str,
433 |     cities: list[str],
434 |     drug_class: str
435 | ):
436 |     """Analyze drug development in a specific region"""
437 | 
438 |     # Step 1: Find organizations in the region
439 |     organizations = []
440 |     for city in cities:
441 |         orgs = await nci_organization_searcher(
442 |             city=city,
443 |             state=state,
444 |             organization_type="Industry",
445 |             api_key=os.getenv("NCI_API_KEY")
446 |         )
447 |         organizations.extend(orgs)
448 | 
449 |     # Step 2: Find drugs of interest
450 |     drugs = await nci_intervention_searcher(
451 |         name=drug_class,
452 |         intervention_type="Drug",
453 |         api_key=os.getenv("NCI_API_KEY")
454 |     )
455 | 
456 |     # Step 3: Cross-reference trials
457 |     regional_development = []
458 |     for drug in drugs[:10]:  # Limit for performance
459 |         drug_details = await nci_intervention_getter(
460 |             intervention_id=drug['id'],
461 |             api_key=os.getenv("NCI_API_KEY")
462 |         )
463 | 
464 |         # Check if any trials are sponsored by regional orgs
465 |         for trial in drug_details.get('trials', []):
466 |             for org in organizations:
467 |                 if org['id'] in str(trial):
468 |                     regional_development.append({
469 |                         'drug': drug_details['name'],
470 |                         'organization': org['name'],
471 |                         'location': f"{org.get('city', '')}, {org.get('state', '')}",
472 |                         'trial': trial
473 |                     })
474 | 
475 |     return regional_development
476 | 
477 | # Analyze immunotherapy development in California
478 | ca_immuno = await analyze_regional_drug_development(
479 |     state="CA",
480 |     cities=["San Francisco", "San Diego", "Los Angeles"],
481 |     drug_class="immunotherapy"
482 | )
483 | ```
484 | 
485 | ### Organization to Intervention Pipeline
486 | 
487 | ```python
488 | async def org_to_intervention_pipeline(org_name: str):
489 |     """Trace from organization to their interventions"""
490 | 
491 |     # Find organization
492 |     orgs = await nci_organization_searcher(
493 |         name=org_name,
494 |         api_key=os.getenv("NCI_API_KEY")
495 |     )
496 | 
497 |     if not orgs:
498 |         return None
499 | 
500 |     # Get organization details
501 |     org_details = await nci_organization_getter(
502 |         organization_id=orgs[0]['id'],
503 |         api_key=os.getenv("NCI_API_KEY")
504 |     )
505 | 
506 |     # Get their trials
507 |     org_trials = org_details.get('trials', [])
508 | 
509 |     # Extract unique interventions
510 |     interventions = set()
511 |     for trial_id in org_trials[:20]:  # Sample trials
512 |         trial = await trial_getter(
513 |             nct_id=trial_id,
514 |             source="nci",
515 |             api_key=os.getenv("NCI_API_KEY")
516 |         )
517 | 
518 |         if trial.get('interventions'):
519 |             interventions.update(trial['interventions'])
520 | 
521 |     # Get details for each intervention
522 |     intervention_details = []
523 |     for intervention_name in interventions:
524 |         results = await nci_intervention_searcher(
525 |             name=intervention_name,
526 |             api_key=os.getenv("NCI_API_KEY")
527 |         )
528 |         if results:
529 |             intervention_details.append(results[0])
530 | 
531 |     return {
532 |         'organization': org_details,
533 |         'trial_count': len(org_trials),
534 |         'interventions': intervention_details
535 |     }
536 | 
537 | # Analyze Genentech's intervention portfolio
538 | genentech_portfolio = await org_to_intervention_pipeline("Genentech")
539 | ```
540 | 
541 | ## Best Practices
542 | 
543 | ### 1. Always Use City + State Together
544 | 
545 | ```python
546 | # ✅ GOOD - Prevents API errors
547 | await nci_organization_searcher(city="Boston", state="MA")
548 | 
549 | # ❌ BAD - Will cause Elasticsearch error
550 | await nci_organization_searcher(city="Boston")
551 | ```
552 | 
553 | ### 2. Handle Rate Limits
554 | 
555 | ```python
556 | import asyncio
557 | 
558 | async def search_with_rate_limit(searches: list):
559 |     """Execute searches with rate limiting"""
560 |     results = []
561 | 
562 |     for search in searches:
563 |         result = await search()
564 |         results.append(result)
565 | 
566 |         # Add delay to respect rate limits
567 |         await asyncio.sleep(0.1)  # 10 requests per second
568 | 
569 |     return results
570 | ```
571 | 
572 | ### 3. Use Pagination for Large Results
573 | 
574 | ```python
575 | async def get_all_organizations(org_type: str):
576 |     """Get all organizations of a type using pagination"""
577 | 
578 |     all_orgs = []
579 |     page = 1
580 | 
581 |     while True:
582 |         orgs = await nci_organization_searcher(
583 |             organization_type=org_type,
584 |             page=page,
585 |             page_size=100,  # Maximum allowed
586 |             api_key=os.getenv("NCI_API_KEY")
587 |         )
588 | 
589 |         if not orgs:
590 |             break
591 | 
592 |         all_orgs.extend(orgs)
593 |         page += 1
594 | 
595 |         # Note: Total count may not be available
596 |         if len(orgs) < 100:
597 |             break
598 | 
599 |     return all_orgs
600 | ```
601 | 
602 | ### 4. Cache Results
603 | 
604 | ```python
605 | from functools import lru_cache
606 | import hashlib
607 | 
608 | @lru_cache(maxsize=100)
609 | async def cached_org_search(city: str, state: str, org_type: str):
610 |     """Cache organization searches to reduce API calls"""
611 | 
612 |     return await nci_organization_searcher(
613 |         city=city,
614 |         state=state,
615 |         organization_type=org_type,
616 |         api_key=os.getenv("NCI_API_KEY")
617 |     )
618 | ```
619 | 
620 | ## Troubleshooting
621 | 
622 | ### Common Errors and Solutions
623 | 
624 | 1. **"Search Too Broad" Error**
625 | 
626 |    - Always use city + state together for location searches
627 |    - Add more specific filters (name, type)
628 |    - Reduce page_size parameter
629 | 
630 | 2. **"NCI API key required"**
631 | 
632 |    - Set NCI_API_KEY environment variable
633 |    - Or provide api_key parameter in function calls
634 |    - Or include in prompt: "my NCI API key is YOUR_KEY"
635 | 
636 | 3. **No Results Found**
637 | 
638 |    - Check spelling of organization/drug names
639 |    - Try partial name matches
640 |    - Remove filters and broaden search
641 |    - Enable synonyms for intervention searches
642 | 
643 | 4. **Rate Limit Exceeded**
644 |    - Add delays between requests
645 |    - Reduce concurrent requests
646 |    - Cache frequently accessed data
647 |    - Consider upgrading API key tier
648 | 
649 | ### Debugging Tips
650 | 
651 | ```python
652 | # Enable debug logging
653 | import logging
654 | logging.basicConfig(level=logging.DEBUG)
655 | 
656 | # Test API key
657 | async def test_nci_connection():
658 |     try:
659 |         result = await nci_organization_searcher(
660 |             name="Mayo",
661 |             api_key=os.getenv("NCI_API_KEY")
662 |         )
663 |         print(f"✅ API key valid, found {len(result)} results")
664 |     except Exception as e:
665 |         print(f"❌ API key error: {e}")
666 | 
667 | # Check specific organization exists
668 | async def verify_org_id(org_id: str):
669 |     try:
670 |         org = await nci_organization_getter(
671 |             organization_id=org_id,
672 |             api_key=os.getenv("NCI_API_KEY")
673 |         )
674 |         print(f"✅ Organization found: {org['name']}")
675 |     except:
676 |         print(f"❌ Organization ID not found: {org_id}")
677 | ```
678 | 
679 | ## Next Steps
680 | 
681 | - Review [NCI prompts examples](../tutorials/nci-prompts.md) for AI assistant usage
682 | - Explore [trial search with biomarkers](02-find-trials-with-nci-and-biothings.md)
683 | - Learn about [variant effect prediction](04-predict-variant-effects-with-alphagenome.md)
684 | - Set up [API authentication](../getting-started/03-authentication-and-api-keys.md)
685 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_router.py:
--------------------------------------------------------------------------------

```python
  1 | """Comprehensive tests for the unified router module."""
  2 | 
  3 | import json
  4 | from unittest.mock import patch
  5 | 
  6 | import pytest
  7 | 
  8 | from biomcp.exceptions import (
  9 |     InvalidDomainError,
 10 |     InvalidParameterError,
 11 |     QueryParsingError,
 12 |     SearchExecutionError,
 13 | )
 14 | from biomcp.router import fetch, format_results, search
 15 | 
 16 | 
 17 | class TestFormatResults:
 18 |     """Test the format_results function."""
 19 | 
 20 |     def test_format_article_results(self):
 21 |         """Test formatting article results."""
 22 |         results = [
 23 |             {
 24 |                 "pmid": "12345",
 25 |                 "title": "Test Article",
 26 |                 "abstract": "This is a test abstract",
 27 |                 # Note: url in input is ignored, always generates PubMed URL
 28 |             }
 29 |         ]
 30 | 
 31 |         # Mock thinking tracker to prevent reminder
 32 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 33 |             formatted = format_results(results, "article", 1, 10, 1)
 34 | 
 35 |         assert "results" in formatted
 36 |         assert len(formatted["results"]) == 1
 37 |         result = formatted["results"][0]
 38 |         assert result["id"] == "12345"
 39 |         assert result["title"] == "Test Article"
 40 |         assert "test abstract" in result["text"]
 41 |         assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/"
 42 | 
 43 |     def test_format_trial_results_api_v2(self):
 44 |         """Test formatting trial results with API v2 structure."""
 45 |         results = [
 46 |             {
 47 |                 "protocolSection": {
 48 |                     "identificationModule": {
 49 |                         "nctId": "NCT12345",
 50 |                         "briefTitle": "Test Trial",
 51 |                     },
 52 |                     "descriptionModule": {
 53 |                         "briefSummary": "This is a test trial summary"
 54 |                     },
 55 |                     "statusModule": {"overallStatus": "RECRUITING"},
 56 |                     "designModule": {"phases": ["PHASE3"]},
 57 |                 }
 58 |             }
 59 |         ]
 60 | 
 61 |         # Mock thinking tracker to prevent reminder
 62 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 63 |             formatted = format_results(results, "trial", 1, 10, 1)
 64 | 
 65 |         assert "results" in formatted
 66 |         assert len(formatted["results"]) == 1
 67 |         result = formatted["results"][0]
 68 |         assert result["id"] == "NCT12345"
 69 |         assert result["title"] == "Test Trial"
 70 |         assert "test trial summary" in result["text"]
 71 |         assert "NCT12345" in result["url"]
 72 | 
 73 |     def test_format_trial_results_legacy(self):
 74 |         """Test formatting trial results with legacy structure."""
 75 |         results = [
 76 |             {
 77 |                 "NCT Number": "NCT67890",
 78 |                 "Study Title": "Legacy Trial",
 79 |                 "Brief Summary": "Legacy trial summary",
 80 |                 "Study Status": "COMPLETED",
 81 |                 "Phases": "Phase 2",
 82 |             }
 83 |         ]
 84 | 
 85 |         # Mock thinking tracker to prevent reminder
 86 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 87 |             formatted = format_results(results, "trial", 1, 10, 1)
 88 | 
 89 |         assert "results" in formatted
 90 |         assert len(formatted["results"]) == 1
 91 |         result = formatted["results"][0]
 92 |         assert result["id"] == "NCT67890"
 93 |         assert result["title"] == "Legacy Trial"
 94 |         assert "Legacy trial summary" in result["text"]
 95 | 
 96 |     def test_format_variant_results(self):
 97 |         """Test formatting variant results."""
 98 |         results = [
 99 |             {
100 |                 "_id": "chr7:g.140453136A>T",
101 |                 "dbsnp": {"rsid": "rs121913529"},
102 |                 "dbnsfp": {"genename": "BRAF"},
103 |                 "clinvar": {"rcv": {"clinical_significance": "Pathogenic"}},
104 |             }
105 |         ]
106 | 
107 |         # Mock thinking tracker to prevent reminder
108 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
109 |             formatted = format_results(results, "variant", 1, 10, 1)
110 | 
111 |         assert "results" in formatted
112 |         assert len(formatted["results"]) == 1
113 |         result = formatted["results"][0]
114 |         assert result["id"] == "chr7:g.140453136A>T"
115 |         assert "BRAF" in result["title"]
116 |         assert "Pathogenic" in result["text"]
117 |         assert "rs121913529" in result["url"]
118 | 
119 |     def test_format_results_invalid_domain(self):
120 |         """Test format_results with invalid domain."""
121 |         with pytest.raises(InvalidDomainError) as exc_info:
122 |             format_results([], "invalid_domain", 1, 10, 0)
123 | 
124 |         assert "Unknown domain: invalid_domain" in str(exc_info.value)
125 | 
126 |     def test_format_results_malformed_data(self):
127 |         """Test format_results handles malformed data gracefully."""
128 |         results = [
129 |             {"title": "Good Article", "pmid": "123"},
130 |             None,  # Malformed - will be skipped
131 |             {
132 |                 "invalid": "data"
133 |             },  # Missing required fields but won't fail (treated as preprint)
134 |         ]
135 | 
136 |         # Mock thinking tracker to prevent reminder
137 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
138 |             formatted = format_results(results, "article", 1, 10, 3)
139 | 
140 |         # Should skip None but include the third (treated as preprint with empty fields)
141 |         assert len(formatted["results"]) == 2
142 |         assert formatted["results"][0]["id"] == "123"
143 |         assert formatted["results"][1]["id"] == ""  # Empty ID for invalid data
144 | 
145 | 
146 | @pytest.mark.asyncio
147 | class TestSearchFunction:
148 |     """Test the unified search function."""
149 | 
150 |     async def test_search_article_domain(self):
151 |         """Test search with article domain."""
152 |         mock_result = json.dumps([
153 |             {"pmid": "123", "title": "Test", "abstract": "Abstract"}
154 |         ])
155 | 
156 |         with patch(
157 |             "biomcp.articles.unified.search_articles_unified"
158 |         ) as mock_search:
159 |             mock_search.return_value = mock_result
160 | 
161 |             # Mock thinking tracker to prevent reminder
162 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
163 |                 result = await search(
164 |                     query="",
165 |                     domain="article",
166 |                     genes="BRAF",
167 |                     diseases=["cancer"],
168 |                     page_size=10,
169 |                 )
170 | 
171 |             assert "results" in result
172 |             assert len(result["results"]) == 1
173 |             assert result["results"][0]["id"] == "123"
174 | 
175 |     async def test_search_trial_domain(self):
176 |         """Test search with trial domain."""
177 |         mock_result = json.dumps({
178 |             "studies": [
179 |                 {
180 |                     "protocolSection": {
181 |                         "identificationModule": {"nctId": "NCT123"},
182 |                     }
183 |                 }
184 |             ]
185 |         })
186 | 
187 |         with patch("biomcp.trials.search.search_trials") as mock_search:
188 |             mock_search.return_value = mock_result
189 | 
190 |             # Mock thinking tracker to prevent reminder
191 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
192 |                 result = await search(
193 |                     query="",
194 |                     domain="trial",
195 |                     conditions=["cancer"],
196 |                     phase="Phase 3",
197 |                     page_size=20,
198 |                 )
199 | 
200 |             assert "results" in result
201 |             mock_search.assert_called_once()
202 | 
203 |     async def test_search_variant_domain(self):
204 |         """Test search with variant domain."""
205 |         mock_result = json.dumps([
206 |             {"_id": "rs123", "gene": {"symbol": "BRAF"}}
207 |         ])
208 | 
209 |         with patch("biomcp.variants.search.search_variants") as mock_search:
210 |             mock_search.return_value = mock_result
211 | 
212 |             # Mock thinking tracker to prevent reminder
213 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
214 |                 result = await search(
215 |                     query="",
216 |                     domain="variant",
217 |                     genes="BRAF",
218 |                     significance="pathogenic",
219 |                     page_size=10,
220 |                 )
221 | 
222 |             assert "results" in result
223 |             assert len(result["results"]) == 1
224 | 
225 |     async def test_search_unified_query(self):
226 |         """Test search with unified query language."""
227 |         with patch("biomcp.router._unified_search") as mock_unified:
228 |             mock_unified.return_value = {
229 |                 "results": [{"id": "1", "title": "Test"}]
230 |             }
231 | 
232 |             result = await search(
233 |                 query="gene:BRAF AND disease:cancer",
234 |                 max_results_per_domain=20,
235 |             )
236 | 
237 |             assert "results" in result
238 |             mock_unified.assert_called_once_with(
239 |                 query="gene:BRAF AND disease:cancer",
240 |                 max_results_per_domain=20,
241 |                 domains=None,
242 |                 explain_query=False,
243 |             )
244 | 
245 |     async def test_search_no_domain_or_query(self):
246 |         """Test search without domain or query raises error."""
247 |         with pytest.raises(InvalidParameterError) as exc_info:
248 |             await search(query="")
249 | 
250 |         assert "query or domain" in str(exc_info.value)
251 | 
252 |     async def test_search_invalid_domain(self):
253 |         """Test search with invalid domain."""
254 |         with pytest.raises(InvalidDomainError):
255 |             await search(query="", domain="invalid_domain")
256 | 
257 |     async def test_search_get_schema(self):
258 |         """Test search with get_schema flag."""
259 |         result = await search(query="", get_schema=True)
260 | 
261 |         assert "domains" in result
262 |         assert "cross_domain_fields" in result
263 |         assert "domain_fields" in result
264 |         assert isinstance(result["cross_domain_fields"], dict)
265 | 
266 |     async def test_search_pagination_validation(self):
267 |         """Test search with invalid pagination parameters."""
268 |         with pytest.raises(InvalidParameterError) as exc_info:
269 |             await search(
270 |                 query="",
271 |                 domain="article",
272 |                 page=0,  # Invalid - must be >= 1
273 |                 page_size=10,
274 |             )
275 | 
276 |         assert "page" in str(exc_info.value)
277 | 
278 |     async def test_search_parameter_parsing(self):
279 |         """Test parameter parsing for list inputs."""
280 |         mock_result = json.dumps([])
281 | 
282 |         with patch(
283 |             "biomcp.articles.unified.search_articles_unified"
284 |         ) as mock_search:
285 |             mock_search.return_value = mock_result
286 | 
287 |             # Test with JSON array string
288 |             await search(
289 |                 query="",
290 |                 domain="article",
291 |                 genes='["BRAF", "KRAS"]',
292 |                 diseases="cancer,melanoma",  # Comma-separated
293 |             )
294 | 
295 |             # Check the request was parsed correctly
296 |             call_args = mock_search.call_args[0][0]
297 |             assert call_args.genes == ["BRAF", "KRAS"]
298 |             assert call_args.diseases == ["cancer", "melanoma"]
299 | 
300 | 
301 | @pytest.mark.asyncio
302 | class TestFetchFunction:
303 |     """Test the unified fetch function."""
304 | 
305 |     async def test_fetch_article(self):
306 |         """Test fetching article details."""
307 |         mock_result = json.dumps([
308 |             {
309 |                 "pmid": 12345,
310 |                 "title": "Test Article",
311 |                 "abstract": "Full abstract",
312 |                 "full_text": "Full text content",
313 |             }
314 |         ])
315 | 
316 |         with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
317 |             mock_fetch.return_value = mock_result
318 | 
319 |             result = await fetch(
320 |                 domain="article",
321 |                 id="12345",
322 |             )
323 | 
324 |             assert result["id"] == "12345"
325 |             assert result["title"] == "Test Article"
326 |             assert result["text"] == "Full text content"
327 |             assert "metadata" in result
328 | 
329 |     async def test_fetch_article_invalid_pmid(self):
330 |         """Test fetching article with invalid identifier."""
331 |         result = await fetch(domain="article", id="not_a_number")
332 | 
333 |         # Should return an error since "not_a_number" is neither a valid PMID nor DOI
334 |         assert "error" in result
335 |         assert "Invalid identifier format" in result["error"]
336 |         assert "not_a_number" in result["error"]
337 | 
338 |     async def test_fetch_trial_all_sections(self):
339 |         """Test fetching trial with all sections."""
340 |         mock_protocol = json.dumps({
341 |             "title": "Test Trial",
342 |             "nct_id": "NCT123",
343 |             "brief_summary": "Summary",
344 |         })
345 |         mock_locations = json.dumps({"locations": [{"city": "Boston"}]})
346 |         mock_outcomes = json.dumps({
347 |             "outcomes": {"primary_outcomes": ["Outcome1"]}
348 |         })
349 |         mock_references = json.dumps({"references": [{"pmid": "456"}]})
350 | 
351 |         with (
352 |             patch("biomcp.trials.getter._trial_protocol") as mock_p,
353 |             patch("biomcp.trials.getter._trial_locations") as mock_l,
354 |             patch("biomcp.trials.getter._trial_outcomes") as mock_o,
355 |             patch("biomcp.trials.getter._trial_references") as mock_r,
356 |         ):
357 |             mock_p.return_value = mock_protocol
358 |             mock_l.return_value = mock_locations
359 |             mock_o.return_value = mock_outcomes
360 |             mock_r.return_value = mock_references
361 | 
362 |             result = await fetch(domain="trial", id="NCT123", detail="all")
363 | 
364 |             assert result["id"] == "NCT123"
365 |             assert "metadata" in result
366 |             assert "locations" in result["metadata"]
367 |             assert "outcomes" in result["metadata"]
368 |             assert "references" in result["metadata"]
369 | 
370 |     async def test_fetch_trial_invalid_detail(self):
371 |         """Test fetching trial with invalid detail parameter."""
372 |         with pytest.raises(InvalidParameterError) as exc_info:
373 |             await fetch(
374 |                 domain="trial",
375 |                 id="NCT123",
376 |                 detail="invalid_section",
377 |             )
378 | 
379 |         assert "one of:" in str(exc_info.value)
380 | 
381 |     async def test_fetch_variant(self):
382 |         """Test fetching variant details."""
383 |         mock_result = json.dumps([
384 |             {
385 |                 "_id": "rs123",
386 |                 "gene": {"symbol": "BRAF"},
387 |                 "clinvar": {"clinical_significance": "Pathogenic"},
388 |                 "tcga": {"cancer_types": {}},
389 |                 "external_links": {"dbSNP": "https://example.com"},
390 |             }
391 |         ])
392 | 
393 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
394 |             mock_get.return_value = mock_result
395 | 
396 |             result = await fetch(domain="variant", id="rs123")
397 | 
398 |             assert result["id"] == "rs123"
399 |             assert "TCGA Data: Available" in result["text"]
400 |             assert "external_links" in result["metadata"]
401 | 
402 |     async def test_fetch_variant_list_response(self):
403 |         """Test fetching variant when API returns list."""
404 |         mock_result = json.dumps([
405 |             {"_id": "rs123", "gene": {"symbol": "BRAF"}}
406 |         ])
407 | 
408 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
409 |             mock_get.return_value = mock_result
410 | 
411 |             result = await fetch(domain="variant", id="rs123")
412 | 
413 |             assert result["id"] == "rs123"
414 | 
415 |     async def test_fetch_invalid_domain(self):
416 |         """Test fetch with invalid domain."""
417 |         with pytest.raises(InvalidDomainError):
418 |             await fetch(domain="invalid", id="123")
419 | 
420 |     async def test_fetch_error_handling(self):
421 |         """Test fetch error handling."""
422 |         with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
423 |             mock_fetch.side_effect = Exception("API Error")
424 | 
425 |             with pytest.raises(SearchExecutionError) as exc_info:
426 |                 await fetch(domain="article", id="123")
427 | 
428 |             assert "Failed to execute search" in str(exc_info.value)
429 | 
430 |     async def test_fetch_domain_auto_detection_pmid(self):
431 |         """Test domain auto-detection for PMID."""
432 |         with patch("biomcp.articles.fetch._article_details") as mock_fetch:
433 |             mock_fetch.return_value = json.dumps([
434 |                 {"pmid": "12345", "title": "Test"}
435 |             ])
436 | 
437 |             # Numeric ID should auto-detect as article
438 |             result = await fetch(id="12345")
439 |             assert result["id"] == "12345"
440 |             mock_fetch.assert_called_once()
441 | 
442 |     async def test_fetch_domain_auto_detection_nct(self):
443 |         """Test domain auto-detection for NCT ID."""
444 |         with patch("biomcp.trials.getter.get_trial") as mock_get:
445 |             mock_get.return_value = json.dumps({
446 |                 "protocolSection": {
447 |                     "identificationModule": {"briefTitle": "Test Trial"}
448 |                 }
449 |             })
450 | 
451 |             # NCT ID should auto-detect as trial
452 |             result = await fetch(id="NCT12345")
453 |             assert "NCT12345" in result["url"]
454 |             mock_get.assert_called()
455 | 
456 |     async def test_fetch_domain_auto_detection_doi(self):
457 |         """Test domain auto-detection for DOI."""
458 |         with patch("biomcp.articles.fetch._article_details") as mock_fetch:
459 |             mock_fetch.return_value = json.dumps([
460 |                 {"doi": "10.1038/nature12345", "title": "Test"}
461 |             ])
462 | 
463 |             # DOI should auto-detect as article
464 |             await fetch(id="10.1038/nature12345")
465 |             mock_fetch.assert_called_once()
466 | 
467 |     async def test_fetch_domain_auto_detection_variant(self):
468 |         """Test domain auto-detection for variant IDs."""
469 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
470 |             mock_get.return_value = json.dumps([{"_id": "rs12345"}])
471 | 
472 |             # rsID should auto-detect as variant
473 |             await fetch(id="rs12345")
474 |             mock_get.assert_called_once()
475 | 
476 |         # Test HGVS notation
477 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
478 |             mock_get.return_value = json.dumps([
479 |                 {"_id": "chr7:g.140453136A>T"}
480 |             ])
481 | 
482 |             await fetch(id="chr7:g.140453136A>T")
483 |             mock_get.assert_called_once()
484 | 
485 | 
486 | @pytest.mark.asyncio
487 | class TestUnifiedSearch:
488 |     """Test the _unified_search internal function."""
489 | 
490 |     async def test_unified_search_explain_query(self):
491 |         """Test unified search with explain_query flag."""
492 |         from biomcp.router import _unified_search
493 | 
494 |         result = await _unified_search(
495 |             query="gene:BRAF AND disease:cancer", explain_query=True
496 |         )
497 | 
498 |         assert "original_query" in result
499 |         assert "parsed_structure" in result
500 |         assert "routing_plan" in result
501 |         assert "schema" in result
502 | 
503 |     async def test_unified_search_execution(self):
504 |         """Test unified search normal execution."""
505 |         from biomcp.router import _unified_search
506 | 
507 |         with patch("biomcp.query_router.execute_routing_plan") as mock_execute:
508 |             mock_execute.return_value = {
509 |                 "articles": json.dumps([{"pmid": "123", "title": "Article 1"}])
510 |             }
511 | 
512 |             result = await _unified_search(
513 |                 query="gene:BRAF", max_results_per_domain=10
514 |             )
515 | 
516 |             assert "results" in result
517 |             assert isinstance(result["results"], list)
518 | 
519 |     async def test_unified_search_parse_error(self):
520 |         """Test unified search with invalid query."""
521 |         from biomcp.router import _unified_search
522 | 
523 |         with patch("biomcp.query_parser.QueryParser.parse") as mock_parse:
524 |             mock_parse.side_effect = Exception("Parse error")
525 | 
526 |             with pytest.raises(QueryParsingError):
527 |                 await _unified_search(
528 |                     query="invalid::query", max_results_per_domain=10
529 |                 )
530 | 
```

--------------------------------------------------------------------------------
/src/biomcp/integrations/biothings_client.py:
--------------------------------------------------------------------------------

```python
  1 | """BioThings API client for unified access to the BioThings suite.
  2 | 
  3 | The BioThings suite (https://biothings.io) provides high-performance biomedical
  4 | data APIs including:
  5 | - MyGene.info - Gene annotations and information
  6 | - MyVariant.info - Genetic variant annotations (existing integration enhanced)
  7 | - MyDisease.info - Disease ontology and synonyms
  8 | - MyChem.info - Drug/chemical annotations and information
  9 | 
 10 | This module provides a centralized client for interacting with all BioThings APIs,
 11 | handling common concerns like error handling, rate limiting, and response parsing.
 12 | While MyVariant.info has specialized modules for complex variant operations, this
 13 | client provides the base layer for all BioThings API interactions.
 14 | """
 15 | 
 16 | import logging
 17 | from typing import Any
 18 | from urllib.parse import quote
 19 | 
 20 | from pydantic import BaseModel, Field
 21 | 
 22 | from .. import http_client
 23 | from ..constants import (
 24 |     MYVARIANT_GET_URL,
 25 | )
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | # BioThings API endpoints
 30 | MYGENE_BASE_URL = "https://mygene.info/v3"
 31 | MYGENE_QUERY_URL = f"{MYGENE_BASE_URL}/query"
 32 | MYGENE_GET_URL = f"{MYGENE_BASE_URL}/gene"
 33 | 
 34 | MYDISEASE_BASE_URL = "https://mydisease.info/v1"
 35 | MYDISEASE_QUERY_URL = f"{MYDISEASE_BASE_URL}/query"
 36 | MYDISEASE_GET_URL = f"{MYDISEASE_BASE_URL}/disease"
 37 | 
 38 | MYCHEM_BASE_URL = "https://mychem.info/v1"
 39 | MYCHEM_QUERY_URL = f"{MYCHEM_BASE_URL}/query"
 40 | MYCHEM_GET_URL = f"{MYCHEM_BASE_URL}/chem"
 41 | 
 42 | 
 43 | class GeneInfo(BaseModel):
 44 |     """Gene information from MyGene.info."""
 45 | 
 46 |     gene_id: str = Field(alias="_id")
 47 |     symbol: str | None = None
 48 |     name: str | None = None
 49 |     summary: str | None = None
 50 |     alias: list[str] | None = Field(default_factory=list)
 51 |     entrezgene: int | str | None = None
 52 |     ensembl: dict[str, Any] | None = None
 53 |     refseq: dict[str, Any] | None = None
 54 |     type_of_gene: str | None = None
 55 |     taxid: int | None = None
 56 | 
 57 | 
 58 | class DiseaseInfo(BaseModel):
 59 |     """Disease information from MyDisease.info."""
 60 | 
 61 |     disease_id: str = Field(alias="_id")
 62 |     name: str | None = None
 63 |     mondo: dict[str, Any] | None = None
 64 |     definition: str | None = None
 65 |     synonyms: list[str] | None = Field(default_factory=list)
 66 |     xrefs: dict[str, Any] | None = None
 67 |     phenotypes: list[dict[str, Any]] | None = None
 68 | 
 69 | 
 70 | class DrugInfo(BaseModel):
 71 |     """Drug/chemical information from MyChem.info."""
 72 | 
 73 |     drug_id: str = Field(alias="_id")
 74 |     name: str | None = None
 75 |     tradename: list[str] | None = Field(default_factory=list)
 76 |     drugbank_id: str | None = None
 77 |     chebi_id: str | None = None
 78 |     chembl_id: str | None = None
 79 |     pubchem_cid: str | None = None
 80 |     unii: str | dict[str, Any] | None = None
 81 |     inchikey: str | None = None
 82 |     formula: str | None = None
 83 |     description: str | None = None
 84 |     indication: str | None = None
 85 |     pharmacology: dict[str, Any] | None = None
 86 |     mechanism_of_action: str | None = None
 87 | 
 88 | 
 89 | class BioThingsClient:
 90 |     """Unified client for BioThings APIs (MyGene, MyVariant, MyDisease, MyChem)."""
 91 | 
 92 |     def __init__(self):
 93 |         """Initialize the BioThings client."""
 94 |         self.logger = logger
 95 | 
 96 |     async def get_gene_info(
 97 |         self, gene_id_or_symbol: str, fields: list[str] | None = None
 98 |     ) -> GeneInfo | None:
 99 |         """Get gene information from MyGene.info.
100 | 
101 |         Args:
102 |             gene_id_or_symbol: Gene ID (Entrez, Ensembl) or symbol (e.g., "TP53")
103 |             fields: Optional list of fields to return
104 | 
105 |         Returns:
106 |             GeneInfo object or None if not found
107 |         """
108 |         try:
109 |             # First, try direct GET (works for Entrez IDs)
110 |             if gene_id_or_symbol.isdigit():
111 |                 return await self._get_gene_by_id(gene_id_or_symbol, fields)
112 | 
113 |             # For symbols, we need to query first
114 |             query_result = await self._query_gene(gene_id_or_symbol)
115 |             if not query_result:
116 |                 return None
117 | 
118 |             # Get the best match
119 |             gene_id = query_result[0].get("_id")
120 |             if not gene_id:
121 |                 return None
122 | 
123 |             # Now get full details
124 |             return await self._get_gene_by_id(gene_id, fields)
125 | 
126 |         except Exception as e:
127 |             self.logger.warning(
128 |                 f"Failed to get gene info for {gene_id_or_symbol}: {e}"
129 |             )
130 |             return None
131 | 
132 |     async def _query_gene(self, symbol: str) -> list[dict[str, Any]] | None:
133 |         """Query MyGene.info for a gene symbol."""
134 |         params = {
135 |             "q": f"symbol:{quote(symbol)}",
136 |             "species": "human",
137 |             "fields": "_id,symbol,name,taxid",
138 |             "size": 5,
139 |         }
140 | 
141 |         response, error = await http_client.request_api(
142 |             url=MYGENE_QUERY_URL,
143 |             request=params,
144 |             method="GET",
145 |             domain="mygene",
146 |         )
147 | 
148 |         if error or not response:
149 |             return None
150 | 
151 |         hits = response.get("hits", [])
152 |         # Filter for human genes (taxid 9606)
153 |         human_hits = [h for h in hits if h.get("taxid") == 9606]
154 |         return human_hits if human_hits else hits
155 | 
156 |     async def _get_gene_by_id(
157 |         self, gene_id: str, fields: list[str] | None = None
158 |     ) -> GeneInfo | None:
159 |         """Get gene details by ID from MyGene.info."""
160 |         if fields is None:
161 |             fields = [
162 |                 "symbol",
163 |                 "name",
164 |                 "summary",
165 |                 "alias",
166 |                 "type_of_gene",
167 |                 "ensembl",
168 |                 "refseq",
169 |                 "entrezgene",
170 |             ]
171 | 
172 |         params = {"fields": ",".join(fields)}
173 | 
174 |         response, error = await http_client.request_api(
175 |             url=f"{MYGENE_GET_URL}/{gene_id}",
176 |             request=params,
177 |             method="GET",
178 |             domain="mygene",
179 |         )
180 | 
181 |         if error or not response:
182 |             return None
183 | 
184 |         try:
185 |             return GeneInfo(**response)
186 |         except Exception as e:
187 |             self.logger.warning(f"Failed to parse gene response: {e}")
188 |             return None
189 | 
190 |     async def batch_get_genes(
191 |         self, gene_ids: list[str], fields: list[str] | None = None
192 |     ) -> list[GeneInfo]:
193 |         """Get multiple genes in a single request.
194 | 
195 |         Args:
196 |             gene_ids: List of gene IDs or symbols
197 |             fields: Optional list of fields to return
198 | 
199 |         Returns:
200 |             List of GeneInfo objects
201 |         """
202 |         if not gene_ids:
203 |             return []
204 | 
205 |         if fields is None:
206 |             fields = ["symbol", "name", "summary", "alias", "type_of_gene"]
207 | 
208 |         # MyGene supports POST for batch queries
209 |         data = {
210 |             "ids": ",".join(gene_ids),
211 |             "fields": ",".join(fields),
212 |             "species": "human",
213 |         }
214 | 
215 |         response, error = await http_client.request_api(
216 |             url=MYGENE_GET_URL,
217 |             request=data,
218 |             method="POST",
219 |             domain="mygene",
220 |         )
221 | 
222 |         if error or not response:
223 |             return []
224 | 
225 |         results = []
226 |         for item in response:
227 |             try:
228 |                 if "notfound" not in item:
229 |                     results.append(GeneInfo(**item))
230 |             except Exception as e:
231 |                 self.logger.warning(f"Failed to parse gene in batch: {e}")
232 |                 continue
233 | 
234 |         return results
235 | 
236 |     async def get_disease_info(
237 |         self, disease_id_or_name: str, fields: list[str] | None = None
238 |     ) -> DiseaseInfo | None:
239 |         """Get disease information from MyDisease.info.
240 | 
241 |         Args:
242 |             disease_id_or_name: Disease ID (MONDO, DOID) or name
243 |             fields: Optional list of fields to return
244 | 
245 |         Returns:
246 |             DiseaseInfo object or None if not found
247 |         """
248 |         try:
249 |             # Check if it's an ID (starts with known prefixes)
250 |             if any(
251 |                 disease_id_or_name.upper().startswith(prefix)
252 |                 for prefix in ["MONDO:", "DOID:", "OMIM:", "MESH:"]
253 |             ):
254 |                 return await self._get_disease_by_id(
255 |                     disease_id_or_name, fields
256 |                 )
257 | 
258 |             # Otherwise, query by name
259 |             query_result = await self._query_disease(disease_id_or_name)
260 |             if not query_result:
261 |                 return None
262 | 
263 |             # Get the best match
264 |             disease_id = query_result[0].get("_id")
265 |             if not disease_id:
266 |                 return None
267 | 
268 |             # Now get full details
269 |             return await self._get_disease_by_id(disease_id, fields)
270 | 
271 |         except Exception as e:
272 |             self.logger.warning(
273 |                 f"Failed to get disease info for {disease_id_or_name}: {e}"
274 |             )
275 |             return None
276 | 
277 |     async def _query_disease(self, name: str) -> list[dict[str, Any]] | None:
278 |         """Query MyDisease.info for a disease name."""
279 |         params = {
280 |             "q": quote(name),
281 |             "fields": "_id,name,mondo",
282 |             "size": 10,
283 |         }
284 | 
285 |         response, error = await http_client.request_api(
286 |             url=MYDISEASE_QUERY_URL,
287 |             request=params,
288 |             method="GET",
289 |             domain="mydisease",
290 |         )
291 | 
292 |         if error or not response:
293 |             return None
294 | 
295 |         return response.get("hits", [])
296 | 
297 |     async def _get_disease_by_id(
298 |         self, disease_id: str, fields: list[str] | None = None
299 |     ) -> DiseaseInfo | None:
300 |         """Get disease details by ID from MyDisease.info."""
301 |         if fields is None:
302 |             fields = [
303 |                 "name",
304 |                 "mondo",
305 |                 "definition",
306 |                 "synonyms",
307 |                 "xrefs",
308 |                 "phenotypes",
309 |             ]
310 | 
311 |         params = {"fields": ",".join(fields)}
312 | 
313 |         response, error = await http_client.request_api(
314 |             url=f"{MYDISEASE_GET_URL}/{quote(disease_id, safe='')}",
315 |             request=params,
316 |             method="GET",
317 |             domain="mydisease",
318 |         )
319 | 
320 |         if error or not response:
321 |             return None
322 | 
323 |         try:
324 |             # Extract definition from mondo if available
325 |             if "mondo" in response and isinstance(response["mondo"], dict):
326 |                 if (
327 |                     "definition" in response["mondo"]
328 |                     and "definition" not in response
329 |                 ):
330 |                     response["definition"] = response["mondo"]["definition"]
331 |                 # Extract synonyms from mondo
332 |                 if "synonym" in response["mondo"]:
333 |                     mondo_synonyms = response["mondo"]["synonym"]
334 |                     if isinstance(mondo_synonyms, dict):
335 |                         # Handle exact synonyms
336 |                         exact = mondo_synonyms.get("exact", [])
337 |                         if isinstance(exact, list):
338 |                             response["synonyms"] = exact
339 |                     elif isinstance(mondo_synonyms, list):
340 |                         response["synonyms"] = mondo_synonyms
341 | 
342 |             return DiseaseInfo(**response)
343 |         except Exception as e:
344 |             self.logger.warning(f"Failed to parse disease response: {e}")
345 |             return None
346 | 
347 |     async def get_disease_synonyms(self, disease_id_or_name: str) -> list[str]:
348 |         """Get disease synonyms for query expansion.
349 | 
350 |         Args:
351 |             disease_id_or_name: Disease ID or name
352 | 
353 |         Returns:
354 |             List of synonyms including the original term
355 |         """
356 |         disease_info = await self.get_disease_info(disease_id_or_name)
357 |         if not disease_info:
358 |             return [disease_id_or_name]
359 | 
360 |         synonyms = [disease_id_or_name]
361 |         if disease_info.name and disease_info.name != disease_id_or_name:
362 |             synonyms.append(disease_info.name)
363 | 
364 |         if disease_info.synonyms:
365 |             synonyms.extend(disease_info.synonyms)
366 | 
367 |         # Remove duplicates while preserving order
368 |         seen = set()
369 |         unique_synonyms = []
370 |         for syn in synonyms:
371 |             if syn.lower() not in seen:
372 |                 seen.add(syn.lower())
373 |                 unique_synonyms.append(syn)
374 | 
375 |         return unique_synonyms[
376 |             :5
377 |         ]  # Limit to top 5 to avoid overly broad searches
378 | 
379 |     async def get_drug_info(
380 |         self, drug_id_or_name: str, fields: list[str] | None = None
381 |     ) -> DrugInfo | None:
382 |         """Get drug/chemical information from MyChem.info.
383 | 
384 |         Args:
385 |             drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name
386 |             fields: Optional list of fields to return
387 | 
388 |         Returns:
389 |             DrugInfo object or None if not found
390 |         """
391 |         try:
392 |             # Check if it's an ID (starts with known prefixes)
393 |             if any(
394 |                 drug_id_or_name.upper().startswith(prefix)
395 |                 for prefix in ["DRUGBANK:", "DB", "CHEMBL", "CHEBI:", "CID"]
396 |             ):
397 |                 return await self._get_drug_by_id(drug_id_or_name, fields)
398 | 
399 |             # Otherwise, query by name
400 |             query_result = await self._query_drug(drug_id_or_name)
401 |             if not query_result:
402 |                 return None
403 | 
404 |             # Get the best match
405 |             drug_id = query_result[0].get("_id")
406 |             if not drug_id:
407 |                 return None
408 | 
409 |             # Now get full details
410 |             return await self._get_drug_by_id(drug_id, fields)
411 | 
412 |         except Exception as e:
413 |             self.logger.warning(
414 |                 f"Failed to get drug info for {drug_id_or_name}: {e}"
415 |             )
416 |             return None
417 | 
418 |     async def _query_drug(self, name: str) -> list[dict[str, Any]] | None:
419 |         """Query MyChem.info for a drug name."""
420 |         params = {
421 |             "q": quote(name),
422 |             "fields": "_id,name,drugbank.name,chebi.name,chembl.pref_name,unii.display_name",
423 |             "size": 10,
424 |         }
425 | 
426 |         response, error = await http_client.request_api(
427 |             url=MYCHEM_QUERY_URL,
428 |             request=params,
429 |             method="GET",
430 |             domain="mychem",
431 |         )
432 | 
433 |         if error or not response:
434 |             return None
435 | 
436 |         hits = response.get("hits", [])
437 | 
438 |         # Sort hits to prioritize those with actual drug names
439 |         def score_hit(hit):
440 |             score = hit.get("_score", 0)
441 |             # Boost score if hit has drug name fields
442 |             if hit.get("drugbank", {}).get("name"):
443 |                 score += 10
444 |             if hit.get("chembl", {}).get("pref_name"):
445 |                 score += 5
446 |             if hit.get("unii", {}).get("display_name"):
447 |                 score += 3
448 |             return score
449 | 
450 |         hits.sort(key=score_hit, reverse=True)
451 |         return hits
452 | 
453 |     async def _get_drug_by_id(
454 |         self, drug_id: str, fields: list[str] | None = None
455 |     ) -> DrugInfo | None:
456 |         """Get drug details by ID from MyChem.info."""
457 |         if fields is None:
458 |             fields = [
459 |                 "name",
460 |                 "drugbank",
461 |                 "chebi",
462 |                 "chembl",
463 |                 "pubchem",
464 |                 "unii",
465 |                 "inchikey",
466 |                 "formula",
467 |                 "description",
468 |                 "indication",
469 |                 "pharmacology",
470 |                 "mechanism_of_action",
471 |             ]
472 | 
473 |         params = {"fields": ",".join(fields)}
474 | 
475 |         response, error = await http_client.request_api(
476 |             url=f"{MYCHEM_GET_URL}/{quote(drug_id, safe='')}",
477 |             request=params,
478 |             method="GET",
479 |             domain="mychem",
480 |         )
481 | 
482 |         if error or not response:
483 |             return None
484 | 
485 |         try:
486 |             # Handle array response (multiple results)
487 |             if isinstance(response, list):
488 |                 if not response:
489 |                     return None
490 |                 # Take the first result
491 |                 response = response[0]
492 | 
493 |             # Extract fields from nested structures
494 |             self._extract_drugbank_fields(response)
495 |             self._extract_chebi_fields(response)
496 |             self._extract_chembl_fields(response)
497 |             self._extract_pubchem_fields(response)
498 |             self._extract_unii_fields(response)
499 | 
500 |             return DrugInfo(**response)
501 |         except Exception as e:
502 |             self.logger.warning(f"Failed to parse drug response: {e}")
503 |             return None
504 | 
505 |     def _extract_drugbank_fields(self, response: dict[str, Any]) -> None:
506 |         """Extract DrugBank fields from response."""
507 |         if "drugbank" in response and isinstance(response["drugbank"], dict):
508 |             db = response["drugbank"]
509 |             response["drugbank_id"] = db.get("id")
510 |             response["name"] = response.get("name") or db.get("name")
511 |             response["tradename"] = db.get("products", {}).get("name", [])
512 |             if isinstance(response["tradename"], str):
513 |                 response["tradename"] = [response["tradename"]]
514 |             response["indication"] = db.get("indication")
515 |             response["mechanism_of_action"] = db.get("mechanism_of_action")
516 |             response["description"] = db.get("description")
517 | 
518 |     def _extract_chebi_fields(self, response: dict[str, Any]) -> None:
519 |         """Extract ChEBI fields from response."""
520 |         if "chebi" in response and isinstance(response["chebi"], dict):
521 |             response["chebi_id"] = response["chebi"].get("id")
522 |             if not response.get("name"):
523 |                 response["name"] = response["chebi"].get("name")
524 | 
525 |     def _extract_chembl_fields(self, response: dict[str, Any]) -> None:
526 |         """Extract ChEMBL fields from response."""
527 |         if "chembl" in response and isinstance(response["chembl"], dict):
528 |             response["chembl_id"] = response["chembl"].get(
529 |                 "molecule_chembl_id"
530 |             )
531 |             if not response.get("name"):
532 |                 response["name"] = response["chembl"].get("pref_name")
533 | 
534 |     def _extract_pubchem_fields(self, response: dict[str, Any]) -> None:
535 |         """Extract PubChem fields from response."""
536 |         if "pubchem" in response and isinstance(response["pubchem"], dict):
537 |             response["pubchem_cid"] = str(response["pubchem"].get("cid", ""))
538 | 
539 |     def _extract_unii_fields(self, response: dict[str, Any]) -> None:
540 |         """Extract UNII fields from response."""
541 |         if "unii" in response and isinstance(response["unii"], dict):
542 |             unii_data = response["unii"]
543 |             # Set UNII code
544 |             response["unii"] = unii_data.get("unii", "")
545 |             # Use display name as drug name if not already set
546 |             if not response.get("name") and unii_data.get("display_name"):
547 |                 response["name"] = unii_data["display_name"]
548 |             # Use NCIT description if no description
549 |             if not response.get("description") and unii_data.get(
550 |                 "ncit_description"
551 |             ):
552 |                 response["description"] = unii_data["ncit_description"]
553 | 
554 |     async def get_variant_info(
555 |         self, variant_id: str, fields: list[str] | None = None
556 |     ) -> dict[str, Any] | None:
557 |         """Get variant information from MyVariant.info.
558 | 
559 |         This is a wrapper around the existing MyVariant integration.
560 | 
561 |         Args:
562 |             variant_id: Variant ID (rsID, HGVS)
563 |             fields: Optional list of fields to return
564 | 
565 |         Returns:
566 |             Variant data dictionary or None if not found
567 |         """
568 |         params = {"fields": "all" if fields is None else ",".join(fields)}
569 | 
570 |         response, error = await http_client.request_api(
571 |             url=f"{MYVARIANT_GET_URL}/{variant_id}",
572 |             request=params,
573 |             method="GET",
574 |             domain="myvariant",
575 |         )
576 | 
577 |         if error or not response:
578 |             return None
579 | 
580 |         return response
581 | 
```

--------------------------------------------------------------------------------
/docs/user-guides/02-mcp-tools-reference.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Tools Reference
  2 | 
  3 | BioMCP provides 35 specialized tools for biomedical research through the Model Context Protocol (MCP). This reference covers all available tools, their parameters, and usage patterns.
  4 | 
  5 | ## Related Guides
  6 | 
  7 | - **Conceptual Overview**: [Sequential Thinking with the Think Tool](../concepts/03-sequential-thinking-with-the-think-tool.md)
  8 | - **Practical Examples**: See the [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for real-world usage patterns
  9 | - **Integration Setup**: [Claude Desktop Integration](../getting-started/02-claude-desktop-integration.md)
 10 | 
 11 | ## Tool Categories
 12 | 
 13 | | Category            | Count | Tools                                                          |
 14 | | ------------------- | ----- | -------------------------------------------------------------- |
 15 | | **Core Tools**      | 3     | `search`, `fetch`, `think`                                     |
 16 | | **Article Tools**   | 2     | `article_searcher`, `article_getter`                           |
 17 | | **Trial Tools**     | 6     | `trial_searcher`, `trial_getter`, + 4 detail getters           |
 18 | | **Variant Tools**   | 3     | `variant_searcher`, `variant_getter`, `alphagenome_predictor`  |
 19 | | **BioThings Tools** | 3     | `gene_getter`, `disease_getter`, `drug_getter`                 |
 20 | | **NCI Tools**       | 6     | Organization, intervention, biomarker, and disease tools       |
 21 | | **OpenFDA Tools**   | 12    | Adverse events, labels, devices, approvals, recalls, shortages |
 22 | 
 23 | ## Core Unified Tools
 24 | 
 25 | ### 1. search
 26 | 
 27 | **Universal search across all biomedical domains with unified query language.**
 28 | 
 29 | ```python
 30 | search(
 31 |     query: str = None,              # Unified query syntax
 32 |     domain: str = None,             # Target domain
 33 |     genes: list[str] = None,        # Gene symbols
 34 |     diseases: list[str] = None,     # Disease/condition terms
 35 |     variants: list[str] = None,     # Variant notations
 36 |     chemicals: list[str] = None,    # Drug/chemical names
 37 |     keywords: list[str] = None,     # Additional keywords
 38 |     conditions: list[str] = None,   # Trial conditions
 39 |     interventions: list[str] = None,# Trial interventions
 40 |     lat: float = None,              # Latitude for trials
 41 |     long: float = None,             # Longitude for trials
 42 |     page: int = 1,                  # Page number
 43 |     page_size: int = 10,            # Results per page
 44 |     api_key: str = None             # For NCI domains
 45 | ) -> dict
 46 | ```
 47 | 
 48 | **Domains:** `article`, `trial`, `variant`, `gene`, `drug`, `disease`, `nci_organization`, `nci_intervention`, `nci_biomarker`, `nci_disease`, `fda_adverse`, `fda_label`, `fda_device`, `fda_approval`, `fda_recall`, `fda_shortage`
 49 | 
 50 | **Query Language Examples:**
 51 | 
 52 | - `"gene:BRAF AND disease:melanoma"`
 53 | - `"drugs.tradename:gleevec"`
 54 | - `"gene:TP53 AND (mutation OR variant)"`
 55 | 
 56 | **Usage Examples:**
 57 | 
 58 | ```python
 59 | # Domain-specific search
 60 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
 61 | 
 62 | # Unified query language
 63 | search(query="gene:EGFR AND mutation:T790M")
 64 | 
 65 | # Clinical trials by location
 66 | search(domain="trial", conditions=["lung cancer"], lat=40.7128, long=-74.0060)
 67 | 
 68 | # FDA adverse events
 69 | search(domain="fda_adverse", chemicals=["aspirin"])
 70 | 
 71 | # FDA drug approvals
 72 | search(domain="fda_approval", chemicals=["keytruda"])
 73 | ```
 74 | 
 75 | ### 2. fetch
 76 | 
 77 | **Retrieve detailed information for any biomedical record.**
 78 | 
 79 | ```python
 80 | fetch(
 81 |     id: str,                    # Record identifier
 82 |     domain: str = None,         # Domain (auto-detected if not provided)
 83 |     detail: str = None,         # Specific section for trials
 84 |     api_key: str = None         # For NCI records
 85 | ) -> dict
 86 | ```
 87 | 
 88 | **Supported IDs:**
 89 | 
 90 | - Articles: PMID (e.g., "38768446"), DOI (e.g., "10.1101/2024.01.20")
 91 | - Trials: NCT ID (e.g., "NCT03006926")
 92 | - Variants: HGVS, rsID, genomic coordinates
 93 | - Genes/Drugs/Diseases: Names or database IDs
 94 | - FDA Records: Report IDs, Application Numbers (e.g., "BLA125514"), Recall Numbers, etc.
 95 | 
 96 | **Detail Options for Trials:** `protocol`, `locations`, `outcomes`, `references`, `all`
 97 | 
 98 | **Usage Examples:**
 99 | 
100 | ```python
101 | # Fetch article by PMID
102 | fetch(id="38768446", domain="article")
103 | 
104 | # Fetch trial with specific details
105 | fetch(id="NCT03006926", domain="trial", detail="locations")
106 | 
107 | # Auto-detect domain
108 | fetch(id="rs121913529")  # Variant
109 | fetch(id="BRAF")         # Gene
110 | 
111 | # Fetch FDA records
112 | fetch(id="BLA125514", domain="fda_approval")  # Drug approval
113 | fetch(id="D-0001-2023", domain="fda_recall")   # Drug recall
114 | ```
115 | 
116 | ### 3. think
117 | 
118 | **Sequential thinking tool for structured problem-solving.**
119 | 
120 | ```python
121 | think(
122 |     thought: str,               # Current reasoning step
123 |     thoughtNumber: int,         # Sequential number (1, 2, 3...)
124 |     totalThoughts: int = None,  # Estimated total thoughts
125 |     nextThoughtNeeded: bool = True  # Continue thinking?
126 | ) -> str
127 | ```
128 | 
129 | **CRITICAL:** Always use `think` BEFORE any other BioMCP operation!
130 | 
131 | **Usage Pattern:**
132 | 
133 | ```python
134 | # Step 1: Problem decomposition
135 | think(
136 |     thought="Breaking down query: need to find BRAF inhibitor trials...",
137 |     thoughtNumber=1,
138 |     nextThoughtNeeded=True
139 | )
140 | 
141 | # Step 2: Search strategy
142 | think(
143 |     thought="Will search trials for BRAF V600E melanoma, then articles...",
144 |     thoughtNumber=2,
145 |     nextThoughtNeeded=True
146 | )
147 | 
148 | # Final step: Synthesis
149 | think(
150 |     thought="Ready to synthesize findings from 5 trials and 12 articles...",
151 |     thoughtNumber=3,
152 |     nextThoughtNeeded=False  # Analysis complete
153 | )
154 | ```
155 | 
156 | ## Article Tools
157 | 
158 | ### 4. article_searcher
159 | 
160 | **Search PubMed/PubTator3 for biomedical literature.**
161 | 
162 | ```python
163 | article_searcher(
164 |     chemicals: list[str] = None,
165 |     diseases: list[str] = None,
166 |     genes: list[str] = None,
167 |     keywords: list[str] = None,    # Supports OR with "|"
168 |     variants: list[str] = None,
169 |     include_preprints: bool = True,
170 |     include_cbioportal: bool = True,
171 |     page: int = 1,
172 |     page_size: int = 10
173 | ) -> str
174 | ```
175 | 
176 | **Features:**
177 | 
178 | - Automatic cBioPortal integration for gene searches
179 | - Preprint inclusion from bioRxiv/medRxiv
180 | - OR logic in keywords: `"V600E|p.V600E|c.1799T>A"`
181 | 
182 | **Example:**
183 | 
184 | ```python
185 | # Search with multiple filters
186 | article_searcher(
187 |     genes=["BRAF"],
188 |     diseases=["melanoma"],
189 |     keywords=["resistance|resistant"],
190 |     include_cbioportal=True
191 | )
192 | ```
193 | 
194 | ### 5. article_getter
195 | 
196 | **Fetch detailed article information.**
197 | 
198 | ```python
199 | article_getter(
200 |     pmid: str  # PubMed ID, PMC ID, or DOI
201 | ) -> str
202 | ```
203 | 
204 | **Supports:**
205 | 
206 | - PubMed IDs: "38768446"
207 | - PMC IDs: "PMC7498215"
208 | - DOIs: "10.1101/2024.01.20.23288905"
209 | 
210 | ## Trial Tools
211 | 
212 | ### 6. trial_searcher
213 | 
214 | **Search ClinicalTrials.gov with comprehensive filters.**
215 | 
216 | ```python
217 | trial_searcher(
218 |     conditions: list[str] = None,
219 |     interventions: list[str] = None,
220 |     other_terms: list[str] = None,
221 |     recruiting_status: str = "ANY",  # "OPEN", "CLOSED", "ANY"
222 |     phase: str = None,               # "PHASE1", "PHASE2", etc.
223 |     lat: float = None,               # Location-based search
224 |     long: float = None,
225 |     distance: int = None,            # Miles from coordinates
226 |     age_group: str = None,           # "CHILD", "ADULT", "OLDER_ADULT"
227 |     sex: str = None,                 # "MALE", "FEMALE", "ALL"
228 |     study_type: str = None,          # "INTERVENTIONAL", "OBSERVATIONAL"
229 |     funder_type: str = None,         # "NIH", "INDUSTRY", etc.
230 |     page: int = 1,
231 |     page_size: int = 10
232 | ) -> str
233 | ```
234 | 
235 | **Location Search Example:**
236 | 
237 | ```python
238 | # Trials near Boston
239 | trial_searcher(
240 |     conditions=["breast cancer"],
241 |     lat=42.3601,
242 |     long=-71.0589,
243 |     distance=50,
244 |     recruiting_status="OPEN"
245 | )
246 | ```
247 | 
248 | ### 7-11. Trial Detail Getters
249 | 
250 | ```python
251 | # Get complete trial information
252 | trial_getter(nct_id: str) -> str
253 | 
254 | # Get specific sections
255 | trial_protocol_getter(nct_id: str) -> str     # Core protocol info
256 | trial_locations_getter(nct_id: str) -> str    # Sites and contacts
257 | trial_outcomes_getter(nct_id: str) -> str     # Outcome measures
258 | trial_references_getter(nct_id: str) -> str   # Publications
259 | ```
260 | 
261 | ## Variant Tools
262 | 
263 | ### 12. variant_searcher
264 | 
265 | **Search MyVariant.info for genetic variants.**
266 | 
267 | ```python
268 | variant_searcher(
269 |     gene: str = None,
270 |     hgvs: str = None,
271 |     hgvsp: str = None,              # Protein HGVS
272 |     hgvsc: str = None,              # Coding DNA HGVS
273 |     rsid: str = None,
274 |     region: str = None,             # "chr7:140753336-140753337"
275 |     significance: str = None,        # Clinical significance
276 |     frequency_min: float = None,
277 |     frequency_max: float = None,
278 |     cadd_score_min: float = None,
279 |     sift_prediction: str = None,
280 |     polyphen_prediction: str = None,
281 |     sources: list[str] = None,
282 |     include_cbioportal: bool = True,
283 |     page: int = 1,
284 |     page_size: int = 10
285 | ) -> str
286 | ```
287 | 
288 | **Significance Options:** `pathogenic`, `likely_pathogenic`, `uncertain_significance`, `likely_benign`, `benign`
289 | 
290 | **Example:**
291 | 
292 | ```python
293 | # Find rare pathogenic BRCA1 variants
294 | variant_searcher(
295 |     gene="BRCA1",
296 |     significance="pathogenic",
297 |     frequency_max=0.001,
298 |     cadd_score_min=20
299 | )
300 | ```
301 | 
302 | ### 13. variant_getter
303 | 
304 | **Fetch comprehensive variant details.**
305 | 
306 | ```python
307 | variant_getter(
308 |     variant_id: str,              # HGVS, rsID, or MyVariant ID
309 |     include_external: bool = True  # Include TCGA, 1000 Genomes
310 | ) -> str
311 | ```
312 | 
313 | ### 14. alphagenome_predictor
314 | 
315 | **Predict variant effects using Google DeepMind's AlphaGenome.**
316 | 
317 | ```python
318 | alphagenome_predictor(
319 |     chromosome: str,              # e.g., "chr7"
320 |     position: int,                # 1-based position
321 |     reference: str,               # Reference allele
322 |     alternate: str,               # Alternate allele
323 |     interval_size: int = 131072,  # Analysis window
324 |     tissue_types: list[str] = None,  # UBERON terms
325 |     significance_threshold: float = 0.5,
326 |     api_key: str = None          # AlphaGenome API key
327 | ) -> str
328 | ```
329 | 
330 | **Requires:** AlphaGenome API key (environment variable or per-request)
331 | 
332 | **Tissue Examples:**
333 | 
334 | - `UBERON:0002367` - prostate gland
335 | - `UBERON:0001155` - colon
336 | - `UBERON:0002048` - lung
337 | 
338 | **Example:**
339 | 
340 | ```python
341 | # Predict BRAF V600E effects
342 | alphagenome_predictor(
343 |     chromosome="chr7",
344 |     position=140753336,
345 |     reference="A",
346 |     alternate="T",
347 |     tissue_types=["UBERON:0002367"],  # prostate
348 |     api_key="your-key"
349 | )
350 | ```
351 | 
352 | ## BioThings Tools
353 | 
354 | ### 15. gene_getter
355 | 
356 | **Get gene information from MyGene.info.**
357 | 
358 | ```python
359 | gene_getter(
360 |     gene_id_or_symbol: str  # Gene symbol or Entrez ID
361 | ) -> str
362 | ```
363 | 
364 | **Returns:** Official name, aliases, summary, genomic location, database links
365 | 
366 | ### 16. disease_getter
367 | 
368 | **Get disease information from MyDisease.info.**
369 | 
370 | ```python
371 | disease_getter(
372 |     disease_id_or_name: str  # Disease name or ontology ID
373 | ) -> str
374 | ```
375 | 
376 | **Returns:** Definition, synonyms, MONDO/DOID IDs, associated phenotypes
377 | 
378 | ### 17. drug_getter
379 | 
380 | **Get drug/chemical information from MyChem.info.**
381 | 
382 | ```python
383 | drug_getter(
384 |     drug_id_or_name: str  # Drug name or database ID
385 | ) -> str
386 | ```
387 | 
388 | **Returns:** Chemical structure, mechanism, indications, trade names, identifiers
389 | 
390 | ## NCI-Specific Tools
391 | 
392 | All NCI tools require an API key from [api.cancer.gov](https://api.cancer.gov).
393 | 
394 | ### 18-19. Organization Tools
395 | 
396 | ```python
397 | # Search organizations
398 | nci_organization_searcher(
399 |     name: str = None,
400 |     organization_type: str = None,
401 |     city: str = None,              # Must use with state
402 |     state: str = None,             # Must use with city
403 |     api_key: str = None
404 | ) -> str
405 | 
406 | # Get organization details
407 | nci_organization_getter(
408 |     organization_id: str,
409 |     api_key: str = None
410 | ) -> str
411 | ```
412 | 
413 | ### 20-21. Intervention Tools
414 | 
415 | ```python
416 | # Search interventions
417 | nci_intervention_searcher(
418 |     name: str = None,
419 |     intervention_type: str = None,  # "Drug", "Device", etc.
420 |     synonyms: bool = True,
421 |     api_key: str = None
422 | ) -> str
423 | 
424 | # Get intervention details
425 | nci_intervention_getter(
426 |     intervention_id: str,
427 |     api_key: str = None
428 | ) -> str
429 | ```
430 | 
431 | ### 22. Biomarker Search
432 | 
433 | ```python
434 | nci_biomarker_searcher(
435 |     name: str = None,
436 |     biomarker_type: str = None,
437 |     api_key: str = None
438 | ) -> str
439 | ```
440 | 
441 | ### 23. Disease Search (NCI)
442 | 
443 | ```python
444 | nci_disease_searcher(
445 |     name: str = None,
446 |     include_synonyms: bool = True,
447 |     category: str = None,
448 |     api_key: str = None
449 | ) -> str
450 | ```
451 | 
452 | ## OpenFDA Tools
453 | 
454 | All OpenFDA tools support optional API keys for higher rate limits (240/min vs 40/min). Get a free key at [open.fda.gov/apis/authentication](https://open.fda.gov/apis/authentication/).
455 | 
456 | ### 24. openfda_adverse_searcher
457 | 
458 | **Search FDA Adverse Event Reporting System (FAERS).**
459 | 
460 | ```python
461 | openfda_adverse_searcher(
462 |     drug: str = None,
463 |     reaction: str = None,
464 |     serious: bool = None,        # Filter serious events only
465 |     limit: int = 25,
466 |     skip: int = 0,
467 |     api_key: str = None          # Optional OpenFDA API key
468 | ) -> str
469 | ```
470 | 
471 | **Example:**
472 | 
473 | ```python
474 | # Find serious bleeding events for warfarin
475 | openfda_adverse_searcher(
476 |     drug="warfarin",
477 |     reaction="bleeding",
478 |     serious=True,
479 |     api_key="your-key"  # Optional
480 | )
481 | ```
482 | 
483 | ### 25. openfda_adverse_getter
484 | 
485 | **Get detailed adverse event report.**
486 | 
487 | ```python
488 | openfda_adverse_getter(
489 |     report_id: str,              # Safety report ID
490 |     api_key: str = None
491 | ) -> str
492 | ```
493 | 
494 | ### 26. openfda_label_searcher
495 | 
496 | **Search FDA drug product labels.**
497 | 
498 | ```python
499 | openfda_label_searcher(
500 |     name: str = None,
501 |     indication: str = None,      # Search by indication
502 |     boxed_warning: bool = False, # Filter for boxed warnings
503 |     section: str = None,         # Specific label section
504 |     limit: int = 25,
505 |     skip: int = 0,
506 |     api_key: str = None
507 | ) -> str
508 | ```
509 | 
510 | ### 27. openfda_label_getter
511 | 
512 | **Get complete drug label information.**
513 | 
514 | ```python
515 | openfda_label_getter(
516 |     set_id: str,                 # Label set ID
517 |     sections: list[str] = None,  # Specific sections to retrieve
518 |     api_key: str = None
519 | ) -> str
520 | ```
521 | 
522 | **Label Sections:** `indications_and_usage`, `contraindications`, `warnings_and_precautions`, `dosage_and_administration`, `adverse_reactions`, `drug_interactions`, `pregnancy`, `pediatric_use`, `geriatric_use`
523 | 
524 | ### 28. openfda_device_searcher
525 | 
526 | **Search FDA device adverse event reports (MAUDE).**
527 | 
528 | ```python
529 | openfda_device_searcher(
530 |     device: str = None,
531 |     manufacturer: str = None,
532 |     problem: str = None,
533 |     product_code: str = None,    # FDA product code
534 |     genomics_only: bool = True,  # Filter genomic/diagnostic devices
535 |     limit: int = 25,
536 |     skip: int = 0,
537 |     api_key: str = None
538 | ) -> str
539 | ```
540 | 
541 | **Note:** FDA uses abbreviated device names (e.g., "F1CDX" for "FoundationOne CDx").
542 | 
543 | ### 29. openfda_device_getter
544 | 
545 | **Get detailed device event report.**
546 | 
547 | ```python
548 | openfda_device_getter(
549 |     mdr_report_key: str,         # MDR report key
550 |     api_key: str = None
551 | ) -> str
552 | ```
553 | 
554 | ### 30. openfda_approval_searcher
555 | 
556 | **Search FDA drug approval records (Drugs@FDA).**
557 | 
558 | ```python
559 | openfda_approval_searcher(
560 |     drug: str = None,
561 |     application_number: str = None,  # NDA/BLA number
562 |     approval_year: str = None,       # YYYY format
563 |     limit: int = 25,
564 |     skip: int = 0,
565 |     api_key: str = None
566 | ) -> str
567 | ```
568 | 
569 | ### 31. openfda_approval_getter
570 | 
571 | **Get drug approval details.**
572 | 
573 | ```python
574 | openfda_approval_getter(
575 |     application_number: str,     # NDA/BLA number
576 |     api_key: str = None
577 | ) -> str
578 | ```
579 | 
580 | ### 32. openfda_recall_searcher
581 | 
582 | **Search FDA drug recall records.**
583 | 
584 | ```python
585 | openfda_recall_searcher(
586 |     drug: str = None,
587 |     recall_class: str = None,    # "1", "2", or "3"
588 |     status: str = None,          # "ongoing" or "completed"
589 |     reason: str = None,
590 |     since_date: str = None,      # YYYYMMDD format
591 |     limit: int = 25,
592 |     skip: int = 0,
593 |     api_key: str = None
594 | ) -> str
595 | ```
596 | 
597 | **Recall Classes:**
598 | 
599 | - Class 1: Dangerous or defective products that could cause serious health problems or death
600 | - Class 2: Products that might cause temporary health problems or pose slight threat
601 | - Class 3: Products unlikely to cause adverse health consequences
602 | 
603 | ### 33. openfda_recall_getter
604 | 
605 | **Get drug recall details.**
606 | 
607 | ```python
608 | openfda_recall_getter(
609 |     recall_number: str,          # FDA recall number
610 |     api_key: str = None
611 | ) -> str
612 | ```
613 | 
614 | ### 34. openfda_shortage_searcher
615 | 
616 | **Search FDA drug shortage database.**
617 | 
618 | ```python
619 | openfda_shortage_searcher(
620 |     drug: str = None,
621 |     status: str = None,          # "current" or "resolved"
622 |     therapeutic_category: str = None,
623 |     limit: int = 25,
624 |     skip: int = 0,
625 |     api_key: str = None
626 | ) -> str
627 | ```
628 | 
629 | ### 35. openfda_shortage_getter
630 | 
631 | **Get drug shortage details.**
632 | 
633 | ```python
634 | openfda_shortage_getter(
635 |     drug_name: str,
636 |     api_key: str = None
637 | ) -> str
638 | ```
639 | 
640 | ## Best Practices
641 | 
642 | ### 1. Always Think First
643 | 
644 | ```python
645 | # ✅ CORRECT - Think before searching
646 | think(thought="Planning BRAF melanoma research...", thoughtNumber=1)
647 | results = article_searcher(genes=["BRAF"], diseases=["melanoma"])
648 | 
649 | # ❌ INCORRECT - Skipping think tool
650 | results = article_searcher(genes=["BRAF"])  # Poor results!
651 | ```
652 | 
653 | ### 2. Use Unified Tools for Flexibility
654 | 
655 | ```python
656 | # Unified search supports complex queries
657 | results = search(query="gene:EGFR AND (mutation:T790M OR mutation:C797S)")
658 | 
659 | # Unified fetch auto-detects domain
660 | details = fetch(id="NCT03006926")  # Knows it's a trial
661 | ```
662 | 
663 | ### 3. Leverage Domain-Specific Features
664 | 
665 | ```python
666 | # Article search with cBioPortal
667 | articles = article_searcher(
668 |     genes=["KRAS"],
669 |     include_cbioportal=True  # Adds cancer genomics context
670 | )
671 | 
672 | # Variant search with multiple filters
673 | variants = variant_searcher(
674 |     gene="TP53",
675 |     significance="pathogenic",
676 |     frequency_max=0.01,
677 |     cadd_score_min=25
678 | )
679 | ```
680 | 
681 | ### 4. Handle API Keys Properly
682 | 
683 | ```python
684 | # For personal use - environment variable
685 | # export NCI_API_KEY="your-key"
686 | nci_results = search(domain="nci_organization", name="Mayo Clinic")
687 | 
688 | # For shared environments - per-request
689 | nci_results = search(
690 |     domain="nci_organization",
691 |     name="Mayo Clinic",
692 |     api_key="user-provided-key"
693 | )
694 | ```
695 | 
696 | ### 5. Use Appropriate Page Sizes
697 | 
698 | ```python
699 | # Large result sets - increase page_size
700 | results = article_searcher(
701 |     genes=["TP53"],
702 |     page_size=50  # Get more results at once
703 | )
704 | 
705 | # Iterative exploration - use pagination
706 | page1 = trial_searcher(conditions=["cancer"], page=1, page_size=10)
707 | page2 = trial_searcher(conditions=["cancer"], page=2, page_size=10)
708 | ```
709 | 
710 | ## Error Handling
711 | 
712 | All tools include comprehensive error handling:
713 | 
714 | - **Invalid parameters**: Clear error messages with valid options
715 | - **API failures**: Graceful degradation with informative messages
716 | - **Rate limits**: Automatic retry with exponential backoff
717 | - **Missing API keys**: Helpful instructions for obtaining keys
718 | 
719 | ## Tool Selection Guide
720 | 
721 | | If you need to...              | Use this tool                                     |
722 | | ------------------------------ | ------------------------------------------------- |
723 | | Search across multiple domains | `search` with query language                      |
724 | | Get any record by ID           | `fetch` with auto-detection                       |
725 | | Plan your research approach    | `think` (always first!)                           |
726 | | Find recent papers             | `article_searcher`                                |
727 | | Locate clinical trials         | `trial_searcher`                                  |
728 | | Analyze genetic variants       | `variant_searcher` + `variant_getter`             |
729 | | Predict variant effects        | `alphagenome_predictor`                           |
730 | | Get gene/drug/disease info     | `gene_getter`, `drug_getter`, `disease_getter`    |
731 | | Access NCI databases           | `nci_*` tools with API key                        |
732 | | Check drug adverse events      | `openfda_adverse_searcher`                        |
733 | | Review FDA drug labels         | `openfda_label_searcher` + `openfda_label_getter` |
734 | | Investigate device issues      | `openfda_device_searcher`                         |
735 | | Find drug approvals            | `openfda_approval_searcher`                       |
736 | | Check drug recalls             | `openfda_recall_searcher`                         |
737 | | Monitor drug shortages         | `openfda_shortage_searcher`                       |
738 | 
739 | ## Next Steps
740 | 
741 | - Review [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) methodology
742 | - Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for complex workflows
743 | - Set up [API Keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features
744 | 
```

--------------------------------------------------------------------------------
/src/biomcp/domain_handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """Domain-specific result handlers for BioMCP.
  2 | 
  3 | This module contains formatting functions for converting raw API responses
  4 | from different biomedical data sources into a standardized format.
  5 | """
  6 | 
  7 | import logging
  8 | from typing import Any
  9 | 
 10 | from biomcp.constants import (
 11 |     DEFAULT_SIGNIFICANCE,
 12 |     DEFAULT_TITLE,
 13 |     METADATA_AUTHORS,
 14 |     METADATA_COMPLETION_DATE,
 15 |     METADATA_CONSEQUENCE,
 16 |     METADATA_GENE,
 17 |     METADATA_JOURNAL,
 18 |     METADATA_PHASE,
 19 |     METADATA_RSID,
 20 |     METADATA_SIGNIFICANCE,
 21 |     METADATA_SOURCE,
 22 |     METADATA_START_DATE,
 23 |     METADATA_STATUS,
 24 |     METADATA_YEAR,
 25 |     RESULT_ID,
 26 |     RESULT_METADATA,
 27 |     RESULT_SNIPPET,
 28 |     RESULT_TITLE,
 29 |     RESULT_URL,
 30 |     SNIPPET_LENGTH,
 31 | )
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | class ArticleHandler:
 37 |     """Handles formatting for article/publication results."""
 38 | 
 39 |     @staticmethod
 40 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
 41 |         """Format a single article result.
 42 | 
 43 |         Args:
 44 |             result: Raw article data from PubTator3 or preprint APIs
 45 | 
 46 |         Returns:
 47 |             Standardized article result with id, title, snippet, url, and metadata
 48 |         """
 49 |         if "pmid" in result:
 50 |             # PubMed article
 51 |             # Clean up title - remove extra spaces
 52 |             title = result.get("title", "").strip()
 53 |             title = " ".join(title.split())  # Normalize whitespace
 54 | 
 55 |             # Use default if empty
 56 |             if not title:
 57 |                 title = DEFAULT_TITLE
 58 | 
 59 |             return {
 60 |                 RESULT_ID: result["pmid"],
 61 |                 RESULT_TITLE: title,
 62 |                 RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
 63 |                 + "..."
 64 |                 if result.get("abstract")
 65 |                 else "",
 66 |                 RESULT_URL: f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/",
 67 |                 RESULT_METADATA: {
 68 |                     METADATA_YEAR: result.get("pub_year")
 69 |                     or (
 70 |                         result.get("date", "")[:4]
 71 |                         if result.get("date")
 72 |                         else None
 73 |                     ),
 74 |                     METADATA_JOURNAL: result.get("journal", ""),
 75 |                     METADATA_AUTHORS: result.get("authors", [])[:3],
 76 |                 },
 77 |             }
 78 |         else:
 79 |             # Preprint result
 80 |             return {
 81 |                 RESULT_ID: result.get("doi", result.get("id", "")),
 82 |                 RESULT_TITLE: result.get("title", ""),
 83 |                 RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
 84 |                 + "..."
 85 |                 if result.get("abstract")
 86 |                 else "",
 87 |                 RESULT_URL: result.get("url", ""),
 88 |                 RESULT_METADATA: {
 89 |                     METADATA_YEAR: result.get("pub_year"),
 90 |                     METADATA_SOURCE: result.get("source", ""),
 91 |                     METADATA_AUTHORS: result.get("authors", [])[:3],
 92 |                 },
 93 |             }
 94 | 
 95 | 
 96 | class TrialHandler:
 97 |     """Handles formatting for clinical trial results."""
 98 | 
 99 |     @staticmethod
100 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
101 |         """Format a single trial result.
102 | 
103 |         Handles both ClinicalTrials.gov API v2 nested structure and legacy formats.
104 | 
105 |         Args:
106 |             result: Raw trial data from ClinicalTrials.gov API
107 | 
108 |         Returns:
109 |             Standardized trial result with id, title, snippet, url, and metadata
110 |         """
111 |         # Handle ClinicalTrials.gov API v2 nested structure
112 |         if "protocolSection" in result:
113 |             # API v2 format - extract from nested modules
114 |             protocol = result.get("protocolSection", {})
115 |             identification = protocol.get("identificationModule", {})
116 |             status = protocol.get("statusModule", {})
117 |             description = protocol.get("descriptionModule", {})
118 | 
119 |             nct_id = identification.get("nctId", "")
120 |             brief_title = identification.get("briefTitle", "")
121 |             official_title = identification.get("officialTitle", "")
122 |             brief_summary = description.get("briefSummary", "")
123 |             overall_status = status.get("overallStatus", "")
124 |             start_date = status.get("startDateStruct", {}).get("date", "")
125 |             completion_date = status.get(
126 |                 "primaryCompletionDateStruct", {}
127 |             ).get("date", "")
128 | 
129 |             # Extract phase from designModule
130 |             design = protocol.get("designModule", {})
131 |             phases = design.get("phases", [])
132 |             phase = phases[0] if phases else ""
133 |         elif "NCT Number" in result:
134 |             # Legacy flat format from search results
135 |             nct_id = result.get("NCT Number", "")
136 |             brief_title = result.get("Study Title", "")
137 |             official_title = ""  # Not available in this format
138 |             brief_summary = result.get("Brief Summary", "")
139 |             overall_status = result.get("Study Status", "")
140 |             phase = result.get("Phases", "")
141 |             start_date = result.get("Start Date", "")
142 |             completion_date = result.get("Completion Date", "")
143 |         else:
144 |             # Original legacy format or simplified structure
145 |             nct_id = result.get("nct_id", "")
146 |             brief_title = result.get("brief_title", "")
147 |             official_title = result.get("official_title", "")
148 |             brief_summary = result.get("brief_summary", "")
149 |             overall_status = result.get("overall_status", "")
150 |             phase = result.get("phase", "")
151 |             start_date = result.get("start_date", "")
152 |             completion_date = result.get("primary_completion_date", "")
153 | 
154 |         return {
155 |             RESULT_ID: nct_id,
156 |             RESULT_TITLE: brief_title or official_title or DEFAULT_TITLE,
157 |             RESULT_SNIPPET: brief_summary[:SNIPPET_LENGTH] + "..."
158 |             if brief_summary
159 |             else "",
160 |             RESULT_URL: f"https://clinicaltrials.gov/study/{nct_id}",
161 |             RESULT_METADATA: {
162 |                 METADATA_STATUS: overall_status,
163 |                 METADATA_PHASE: phase,
164 |                 METADATA_START_DATE: start_date,
165 |                 METADATA_COMPLETION_DATE: completion_date,
166 |             },
167 |         }
168 | 
169 | 
170 | class VariantHandler:
171 |     """Handles formatting for genetic variant results."""
172 | 
173 |     @staticmethod
174 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
175 |         """Format a single variant result.
176 | 
177 |         Args:
178 |             result: Raw variant data from MyVariant.info API
179 | 
180 |         Returns:
181 |             Standardized variant result with id, title, snippet, url, and metadata
182 |         """
183 |         # Extract gene symbol - MyVariant.info stores this in multiple locations
184 |         gene = (
185 |             result.get("dbnsfp", {}).get("genename", "")
186 |             or result.get("dbsnp", {}).get("gene", {}).get("symbol", "")
187 |             or ""
188 |         )
189 |         # Handle case where gene is a list
190 |         if isinstance(gene, list):
191 |             gene = gene[0] if gene else ""
192 | 
193 |         # Extract rsid
194 |         rsid = result.get("dbsnp", {}).get("rsid", "") or ""
195 | 
196 |         # Extract clinical significance
197 |         clinvar = result.get("clinvar", {})
198 |         significance = ""
199 |         if isinstance(clinvar.get("rcv"), dict):
200 |             significance = clinvar["rcv"].get("clinical_significance", "")
201 |         elif isinstance(clinvar.get("rcv"), list) and clinvar["rcv"]:
202 |             significance = clinvar["rcv"][0].get("clinical_significance", "")
203 | 
204 |         # Build a meaningful title
205 |         hgvs = ""
206 |         if "dbnsfp" in result and "hgvsp" in result["dbnsfp"]:
207 |             hgvs = result["dbnsfp"]["hgvsp"]
208 |             if isinstance(hgvs, list):
209 |                 hgvs = hgvs[0] if hgvs else ""
210 | 
211 |         title = f"{gene} {hgvs}".strip() or result.get("_id", DEFAULT_TITLE)
212 | 
213 |         return {
214 |             RESULT_ID: result.get("_id", ""),
215 |             RESULT_TITLE: title,
216 |             RESULT_SNIPPET: f"Clinical significance: {significance or DEFAULT_SIGNIFICANCE}",
217 |             RESULT_URL: f"https://www.ncbi.nlm.nih.gov/snp/{rsid}"
218 |             if rsid
219 |             else "",
220 |             RESULT_METADATA: {
221 |                 METADATA_GENE: gene,
222 |                 METADATA_RSID: rsid,
223 |                 METADATA_SIGNIFICANCE: significance,
224 |                 METADATA_CONSEQUENCE: result.get("cadd", {}).get(
225 |                     "consequence", ""
226 |                 ),
227 |             },
228 |         }
229 | 
230 | 
231 | class GeneHandler:
232 |     """Handles formatting for gene information results from MyGene.info."""
233 | 
234 |     @staticmethod
235 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
236 |         """Format a single gene result.
237 | 
238 |         Args:
239 |             result: Raw gene data from MyGene.info API
240 | 
241 |         Returns:
242 |             Standardized gene result with id, title, snippet, url, and metadata
243 |         """
244 |         # Extract gene information
245 |         gene_id = result.get("_id", result.get("entrezgene", ""))
246 |         symbol = result.get("symbol", "")
247 |         name = result.get("name", "")
248 |         summary = result.get("summary", "")
249 | 
250 |         # Build title
251 |         title = (
252 |             f"{symbol}: {name}"
253 |             if symbol and name
254 |             else symbol or name or DEFAULT_TITLE
255 |         )
256 | 
257 |         # Create snippet from summary
258 |         snippet = (
259 |             summary[:SNIPPET_LENGTH] + "..."
260 |             if summary and len(summary) > SNIPPET_LENGTH
261 |             else summary
262 |         )
263 | 
264 |         return {
265 |             RESULT_ID: str(gene_id),
266 |             RESULT_TITLE: title,
267 |             RESULT_SNIPPET: snippet or "No summary available",
268 |             RESULT_URL: f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{symbol}"
269 |             if symbol
270 |             else "",
271 |             RESULT_METADATA: {
272 |                 "entrezgene": result.get("entrezgene"),
273 |                 "symbol": symbol,
274 |                 "name": name,
275 |                 "type_of_gene": result.get("type_of_gene", ""),
276 |                 "ensembl": result.get("ensembl", {}).get("gene")
277 |                 if isinstance(result.get("ensembl"), dict)
278 |                 else None,
279 |                 "refseq": result.get("refseq", {}),
280 |             },
281 |         }
282 | 
283 | 
284 | class DrugHandler:
285 |     """Handles formatting for drug/chemical information results from MyChem.info."""
286 | 
287 |     @staticmethod
288 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
289 |         """Format a single drug result.
290 | 
291 |         Args:
292 |             result: Raw drug data from MyChem.info API
293 | 
294 |         Returns:
295 |             Standardized drug result with id, title, snippet, url, and metadata
296 |         """
297 |         # Extract drug information
298 |         drug_id = result.get("_id", "")
299 |         name = result.get("name", "")
300 |         drugbank_id = result.get("drugbank_id", "")
301 |         description = result.get("description", "")
302 |         indication = result.get("indication", "")
303 | 
304 |         # Build title
305 |         title = name or drug_id or DEFAULT_TITLE
306 | 
307 |         # Create snippet from description or indication
308 |         snippet_text = indication or description
309 |         snippet = (
310 |             snippet_text[:SNIPPET_LENGTH] + "..."
311 |             if snippet_text and len(snippet_text) > SNIPPET_LENGTH
312 |             else snippet_text
313 |         )
314 | 
315 |         # Determine URL based on available IDs
316 |         url = ""
317 |         if drugbank_id:
318 |             url = f"https://www.drugbank.ca/drugs/{drugbank_id}"
319 |         elif result.get("pubchem_cid"):
320 |             url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{result['pubchem_cid']}"
321 | 
322 |         return {
323 |             RESULT_ID: drug_id,
324 |             RESULT_TITLE: title,
325 |             RESULT_SNIPPET: snippet or "No description available",
326 |             RESULT_URL: url,
327 |             RESULT_METADATA: {
328 |                 "drugbank_id": drugbank_id,
329 |                 "chembl_id": result.get("chembl_id", ""),
330 |                 "pubchem_cid": result.get("pubchem_cid", ""),
331 |                 "chebi_id": result.get("chebi_id", ""),
332 |                 "formula": result.get("formula", ""),
333 |                 "tradename": result.get("tradename", []),
334 |             },
335 |         }
336 | 
337 | 
338 | class DiseaseHandler:
339 |     """Handles formatting for disease information results from MyDisease.info."""
340 | 
341 |     @staticmethod
342 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
343 |         """Format a single disease result.
344 | 
345 |         Args:
346 |             result: Raw disease data from MyDisease.info API
347 | 
348 |         Returns:
349 |             Standardized disease result with id, title, snippet, url, and metadata
350 |         """
351 |         # Extract disease information
352 |         disease_id = result.get("_id", "")
353 |         name = result.get("name", "")
354 |         definition = result.get("definition", "")
355 |         mondo_info = result.get("mondo", {})
356 | 
357 |         # Build title
358 |         title = name or disease_id or DEFAULT_TITLE
359 | 
360 |         # Create snippet from definition
361 |         snippet = (
362 |             definition[:SNIPPET_LENGTH] + "..."
363 |             if definition and len(definition) > SNIPPET_LENGTH
364 |             else definition
365 |         )
366 | 
367 |         # Extract MONDO ID for URL
368 |         mondo_id = mondo_info.get("id") if isinstance(mondo_info, dict) else ""
369 |         url = (
370 |             f"https://monarchinitiative.org/disease/{mondo_id}"
371 |             if mondo_id
372 |             else ""
373 |         )
374 | 
375 |         return {
376 |             RESULT_ID: disease_id,
377 |             RESULT_TITLE: title,
378 |             RESULT_SNIPPET: snippet or "No definition available",
379 |             RESULT_URL: url,
380 |             RESULT_METADATA: {
381 |                 "mondo_id": mondo_id,
382 |                 "definition": definition,
383 |                 "synonyms": result.get("synonyms", []),
384 |                 "xrefs": result.get("xrefs", {}),
385 |                 "phenotypes": len(result.get("phenotypes", [])),
386 |             },
387 |         }
388 | 
389 | 
390 | class NCIOrganizationHandler:
391 |     """Handles formatting for NCI organization results."""
392 | 
393 |     @staticmethod
394 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
395 |         """Format a single NCI organization result.
396 | 
397 |         Args:
398 |             result: Raw organization data from NCI CTS API
399 | 
400 |         Returns:
401 |             Standardized organization result with id, title, snippet, url, and metadata
402 |         """
403 |         org_id = result.get("id", result.get("org_id", ""))
404 |         name = result.get("name", "Unknown Organization")
405 |         org_type = result.get("type", result.get("category", ""))
406 |         city = result.get("city", "")
407 |         state = result.get("state", "")
408 | 
409 |         # Build location string
410 |         location_parts = [p for p in [city, state] if p]
411 |         location = ", ".join(location_parts) if location_parts else ""
412 | 
413 |         # Create snippet
414 |         snippet_parts = []
415 |         if org_type:
416 |             snippet_parts.append(f"Type: {org_type}")
417 |         if location:
418 |             snippet_parts.append(f"Location: {location}")
419 |         snippet = " | ".join(snippet_parts) or "No details available"
420 | 
421 |         return {
422 |             RESULT_ID: org_id,
423 |             RESULT_TITLE: name,
424 |             RESULT_SNIPPET: snippet,
425 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to organizations
426 |             RESULT_METADATA: {
427 |                 "type": org_type,
428 |                 "city": city,
429 |                 "state": state,
430 |                 "country": result.get("country", ""),
431 |             },
432 |         }
433 | 
434 | 
435 | class NCIInterventionHandler:
436 |     """Handles formatting for NCI intervention results."""
437 | 
438 |     @staticmethod
439 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
440 |         """Format a single NCI intervention result.
441 | 
442 |         Args:
443 |             result: Raw intervention data from NCI CTS API
444 | 
445 |         Returns:
446 |             Standardized intervention result with id, title, snippet, url, and metadata
447 |         """
448 |         int_id = result.get("id", result.get("intervention_id", ""))
449 |         name = result.get("name", "Unknown Intervention")
450 |         int_type = result.get("type", result.get("category", ""))
451 |         synonyms = result.get("synonyms", [])
452 | 
453 |         # Create snippet
454 |         snippet_parts = []
455 |         if int_type:
456 |             snippet_parts.append(f"Type: {int_type}")
457 |         if synonyms:
458 |             if isinstance(synonyms, list) and synonyms:
459 |                 snippet_parts.append(
460 |                     f"Also known as: {', '.join(synonyms[:3])}"
461 |                 )
462 |             elif isinstance(synonyms, str):
463 |                 snippet_parts.append(f"Also known as: {synonyms}")
464 |         snippet = " | ".join(snippet_parts) or "No details available"
465 | 
466 |         return {
467 |             RESULT_ID: int_id,
468 |             RESULT_TITLE: name,
469 |             RESULT_SNIPPET: snippet,
470 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to interventions
471 |             RESULT_METADATA: {
472 |                 "type": int_type,
473 |                 "synonyms": synonyms,
474 |                 "description": result.get("description", ""),
475 |             },
476 |         }
477 | 
478 | 
479 | class NCIBiomarkerHandler:
480 |     """Handles formatting for NCI biomarker results."""
481 | 
482 |     @staticmethod
483 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
484 |         """Format a single NCI biomarker result.
485 | 
486 |         Args:
487 |             result: Raw biomarker data from NCI CTS API
488 | 
489 |         Returns:
490 |             Standardized biomarker result with id, title, snippet, url, and metadata
491 |         """
492 |         bio_id = result.get("id", result.get("biomarker_id", ""))
493 |         name = result.get("name", "Unknown Biomarker")
494 |         gene = result.get("gene", result.get("gene_symbol", ""))
495 |         bio_type = result.get("type", result.get("category", ""))
496 |         assay_type = result.get("assay_type", "")
497 | 
498 |         # Build title
499 |         title = name
500 |         if gene and gene not in name:
501 |             title = f"{gene} - {name}"
502 | 
503 |         # Create snippet
504 |         snippet_parts = []
505 |         if bio_type:
506 |             snippet_parts.append(f"Type: {bio_type}")
507 |         if assay_type:
508 |             snippet_parts.append(f"Assay: {assay_type}")
509 |         snippet = (
510 |             " | ".join(snippet_parts) or "Biomarker for trial eligibility"
511 |         )
512 | 
513 |         return {
514 |             RESULT_ID: bio_id,
515 |             RESULT_TITLE: title,
516 |             RESULT_SNIPPET: snippet,
517 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to biomarkers
518 |             RESULT_METADATA: {
519 |                 "gene": gene,
520 |                 "type": bio_type,
521 |                 "assay_type": assay_type,
522 |                 "trial_count": result.get("trial_count", 0),
523 |             },
524 |         }
525 | 
526 | 
527 | class NCIDiseaseHandler:
528 |     """Handles formatting for NCI disease vocabulary results."""
529 | 
530 |     @staticmethod
531 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
532 |         """Format a single NCI disease result.
533 | 
534 |         Args:
535 |             result: Raw disease data from NCI CTS API
536 | 
537 |         Returns:
538 |             Standardized disease result with id, title, snippet, url, and metadata
539 |         """
540 |         disease_id = result.get("id", result.get("disease_id", ""))
541 |         name = result.get(
542 |             "name", result.get("preferred_name", "Unknown Disease")
543 |         )
544 |         category = result.get("category", result.get("type", ""))
545 |         synonyms = result.get("synonyms", [])
546 | 
547 |         # Create snippet
548 |         snippet_parts = []
549 |         if category:
550 |             snippet_parts.append(f"Category: {category}")
551 |         if synonyms:
552 |             if isinstance(synonyms, list) and synonyms:
553 |                 snippet_parts.append(
554 |                     f"Also known as: {', '.join(synonyms[:3])}"
555 |                 )
556 |                 if len(synonyms) > 3:
557 |                     snippet_parts.append(f"and {len(synonyms) - 3} more")
558 |             elif isinstance(synonyms, str):
559 |                 snippet_parts.append(f"Also known as: {synonyms}")
560 |         snippet = " | ".join(snippet_parts) or "NCI cancer vocabulary term"
561 | 
562 |         return {
563 |             RESULT_ID: disease_id,
564 |             RESULT_TITLE: name,
565 |             RESULT_SNIPPET: snippet,
566 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to disease terms
567 |             RESULT_METADATA: {
568 |                 "category": category,
569 |                 "synonyms": synonyms,
570 |                 "codes": result.get("codes", {}),
571 |             },
572 |         }
573 | 
574 | 
575 | def get_domain_handler(
576 |     domain: str,
577 | ) -> (
578 |     type[ArticleHandler]
579 |     | type[TrialHandler]
580 |     | type[VariantHandler]
581 |     | type[GeneHandler]
582 |     | type[DrugHandler]
583 |     | type[DiseaseHandler]
584 |     | type[NCIOrganizationHandler]
585 |     | type[NCIInterventionHandler]
586 |     | type[NCIBiomarkerHandler]
587 |     | type[NCIDiseaseHandler]
588 | ):
589 |     """Get the appropriate handler class for a domain.
590 | 
591 |     Args:
592 |         domain: The domain name ('article', 'trial', 'variant', 'gene', 'drug', 'disease',
593 |                                'nci_organization', 'nci_intervention', 'nci_biomarker', 'nci_disease')
594 | 
595 |     Returns:
596 |         The handler class for the domain
597 | 
598 |     Raises:
599 |         ValueError: If domain is not recognized
600 |     """
601 |     handlers: dict[
602 |         str,
603 |         type[ArticleHandler]
604 |         | type[TrialHandler]
605 |         | type[VariantHandler]
606 |         | type[GeneHandler]
607 |         | type[DrugHandler]
608 |         | type[DiseaseHandler]
609 |         | type[NCIOrganizationHandler]
610 |         | type[NCIInterventionHandler]
611 |         | type[NCIBiomarkerHandler]
612 |         | type[NCIDiseaseHandler],
613 |     ] = {
614 |         "article": ArticleHandler,
615 |         "trial": TrialHandler,
616 |         "variant": VariantHandler,
617 |         "gene": GeneHandler,
618 |         "drug": DrugHandler,
619 |         "disease": DiseaseHandler,
620 |         "nci_organization": NCIOrganizationHandler,
621 |         "nci_intervention": NCIInterventionHandler,
622 |         "nci_biomarker": NCIBiomarkerHandler,
623 |         "nci_disease": NCIDiseaseHandler,
624 |     }
625 | 
626 |     handler = handlers.get(domain)
627 |     if handler is None:
628 |         raise ValueError(f"Unknown domain: {domain}")
629 | 
630 |     return handler
631 | 
```