genomoncology/biomcp # codebase.md

This is page 9 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/docs/how-to-guides/02-find-trials-with-nci-and-biothings.md:
--------------------------------------------------------------------------------

```markdown
  1 | # How to Find Trials with NCI and BioThings
  2 | 
  3 | This guide demonstrates how to search for clinical trials using BioMCP's dual data sources and automatic disease synonym expansion.
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP provides access to clinical trials through:
  8 | 
  9 | - **ClinicalTrials.gov**: Default source with comprehensive U.S. and international trials ([API Reference](../backend-services-reference/04-clinicaltrials-gov.md))
 10 | - **NCI CTS API**: Advanced cancer trial search with biomarker filtering (requires API key) ([API Reference](../backend-services-reference/05-nci-cts-api.md))
 11 | - **BioThings Integration**: Automatic disease synonym expansion for better coverage ([BioThings Reference](../backend-services-reference/02-biothings-suite.md))
 12 | 
 13 | ## Basic Trial Search
 14 | 
 15 | ### Simple Disease Search
 16 | 
 17 | Find trials for a specific condition:
 18 | 
 19 | ```bash
 20 | # CLI
 21 | biomcp trial search --condition melanoma --status RECRUITING
 22 | 
 23 | # Python
 24 | trials = await client.trials.search(
 25 |     conditions=["melanoma"],
 26 |     recruiting_status="RECRUITING"
 27 | )
 28 | 
 29 | # MCP Tool
 30 | trial_searcher(
 31 |     conditions=["melanoma"],
 32 |     recruiting_status="OPEN"
 33 | )
 34 | ```
 35 | 
 36 | ### Search by Intervention
 37 | 
 38 | Find trials testing specific drugs:
 39 | 
 40 | ```bash
 41 | # CLI
 42 | biomcp trial search --intervention pembrolizumab --phase PHASE3
 43 | 
 44 | # Python
 45 | trials = await client.trials.search(
 46 |     interventions=["pembrolizumab"],
 47 |     phase="PHASE3"
 48 | )
 49 | ```
 50 | 
 51 | ## Location-Based Search
 52 | 
 53 | ### Finding Nearby Trials
 54 | 
 55 | **Important**: Location searches require latitude and longitude coordinates.
 56 | 
 57 | ```python
 58 | # Find trials near Cleveland, Ohio
 59 | trials = await trial_searcher(
 60 |     conditions=["lung cancer"],
 61 |     lat=41.4993,
 62 |     long=-81.6944,
 63 |     distance=50  # 50 miles radius
 64 | )
 65 | 
 66 | # Find trials near Boston
 67 | trials = await trial_searcher(
 68 |     conditions=["breast cancer"],
 69 |     lat=42.3601,
 70 |     long=-71.0589,
 71 |     distance=25
 72 | )
 73 | ```
 74 | 
 75 | ### Getting Coordinates
 76 | 
 77 | For common locations:
 78 | 
 79 | - Cleveland: lat=41.4993, long=-81.6944
 80 | - Boston: lat=42.3601, long=-71.0589
 81 | - New York: lat=40.7128, long=-74.0060
 82 | - Los Angeles: lat=34.0522, long=-118.2437
 83 | - Houston: lat=29.7604, long=-95.3698
 84 | 
 85 | ## Advanced Filtering
 86 | 
 87 | ### Multiple Criteria
 88 | 
 89 | Combine multiple filters for precise results:
 90 | 
 91 | ```python
 92 | # Complex search example
 93 | trials = await trial_searcher(
 94 |     conditions=["non-small cell lung cancer", "NSCLC"],
 95 |     interventions=["pembrolizumab", "immunotherapy"],
 96 |     phase="PHASE3",
 97 |     recruiting_status="OPEN",
 98 |     age_group="ADULT",
 99 |     study_type="INTERVENTIONAL",
100 |     funder_type="INDUSTRY"
101 | )
102 | ```
103 | 
104 | ### Date-Based Filtering
105 | 
106 | Find recently started trials:
107 | 
108 | ```bash
109 | # CLI - Trials started in 2024
110 | biomcp trial search \
111 |   --condition cancer \
112 |   --start-date 2024-01-01 \
113 |   --status RECRUITING
114 | ```
115 | 
116 | ## Using NCI API Advanced Features
117 | 
118 | ### Setup NCI API Key
119 | 
120 | Get your key from [api.cancer.gov](https://api.cancer.gov). For detailed setup instructions, see [Authentication and API Keys](../getting-started/03-authentication-and-api-keys.md#nci-clinical-trials-api):
121 | 
122 | ```bash
123 | export NCI_API_KEY="your-key-here"
124 | ```
125 | 
126 | ### Biomarker-Based Search
127 | 
128 | Find trials for specific mutations:
129 | 
130 | ```python
131 | # Search using NCI source
132 | trials = await search(
133 |     domain="trial",
134 |     source="nci",
135 |     conditions=["melanoma"],
136 |     required_mutations=["BRAF V600E"],
137 |     allow_brain_mets=True,
138 |     api_key="your-key"
139 | )
140 | ```
141 | 
142 | ### NCI-Specific Parameters
143 | 
144 | ```python
145 | # Advanced NCI search
146 | trials = await trial_searcher(
147 |     source="nci",
148 |     conditions=["lung cancer"],
149 |     required_mutations=["EGFR L858R", "EGFR exon 19 deletion"],
150 |     prior_therapy_required=False,
151 |     allow_brain_mets=True,
152 |     allow_prior_immunotherapy=False,
153 |     api_key="your-key"
154 | )
155 | ```
156 | 
157 | ## BioThings Integration for Enhanced Search
158 | 
159 | For technical details on the BioThings APIs, see:
160 | 
161 | - [BioThings Suite Reference](../backend-services-reference/02-biothings-suite.md)
162 | 
163 | ### Automatic Disease Synonym Expansion
164 | 
165 | BioMCP automatically expands disease terms using MyDisease.info:
166 | 
167 | ```python
168 | # Searching for "GIST" automatically includes:
169 | # - "gastrointestinal stromal tumor"
170 | # - "gastrointestinal stromal tumour"
171 | # - "GI stromal tumor"
172 | trials = await trial_searcher(conditions=["GIST"])
173 | ```
174 | 
175 | ### Manual Disease Lookup
176 | 
177 | Get all synonyms for a disease:
178 | 
179 | ```python
180 | # Get disease information
181 | disease_info = await disease_getter("melanoma")
182 | 
183 | # Extract synonyms
184 | synonyms = disease_info.synonyms
185 | # Returns: ["malignant melanoma", "melanoma, malignant", ...]
186 | 
187 | # Use in trial search
188 | trials = await trial_searcher(conditions=synonyms)
189 | ```
190 | 
191 | ## Practical Workflows
192 | 
193 | ### Workflow 1: Patient-Centric Trial Search
194 | 
195 | Find trials for a specific patient profile:
196 | 
197 | ```python
198 | async def find_trials_for_patient(
199 |     disease: str,
200 |     mutations: list[str],
201 |     location: tuple[float, float],
202 |     prior_treatments: list[str]
203 | ):
204 |     # Step 1: Think about the search
205 |     await think(
206 |         thought=f"Searching trials for {disease} with {mutations}",
207 |         thoughtNumber=1
208 |     )
209 | 
210 |     # Step 2: Get disease synonyms
211 |     disease_info = await disease_getter(disease)
212 |     all_conditions = [disease] + disease_info.synonyms
213 | 
214 |     # Step 3: Search both sources
215 |     # ClinicalTrials.gov
216 |     ctgov_trials = await trial_searcher(
217 |         conditions=all_conditions,
218 |         other_terms=mutations,
219 |         lat=location[0],
220 |         long=location[1],
221 |         distance=100,
222 |         recruiting_status="OPEN"
223 |     )
224 | 
225 |     # NCI (if API key available)
226 |     if os.getenv("NCI_API_KEY"):
227 |         nci_trials = await trial_searcher(
228 |             source="nci",
229 |             conditions=all_conditions,
230 |             required_mutations=mutations,
231 |             exclude_prior_therapy=prior_treatments,
232 |             api_key=os.getenv("NCI_API_KEY")
233 |         )
234 | 
235 |     return {
236 |         "clinicaltrials_gov": ctgov_trials,
237 |         "nci": nci_trials
238 |     }
239 | 
240 | # Example usage
241 | trials = await find_trials_for_patient(
242 |     disease="melanoma",
243 |     mutations=["BRAF V600E"],
244 |     location=(40.7128, -74.0060),  # New York
245 |     prior_treatments=["vemurafenib"]
246 | )
247 | ```
248 | 
249 | ### Workflow 2: Research Landscape Analysis
250 | 
251 | Understand ongoing research in a field:
252 | 
253 | ```python
254 | async def analyze_research_landscape(gene: str, disease: str):
255 |     # Get gene information
256 |     gene_info = await gene_getter(gene)
257 | 
258 |     # Find all active trials
259 |     all_trials = await trial_searcher(
260 |         conditions=[disease],
261 |         other_terms=[gene, f"{gene} mutation", f"{gene} positive"],
262 |         recruiting_status="OPEN",
263 |         page_size=50
264 |     )
265 | 
266 |     # Categorize by phase
267 |     phase_distribution = {}
268 |     for trial in all_trials:
269 |         phase = trial.phase or "Not specified"
270 |         phase_distribution[phase] = phase_distribution.get(phase, 0) + 1
271 | 
272 |     # Extract unique interventions
273 |     interventions = set()
274 |     for trial in all_trials:
275 |         if trial.interventions:
276 |             interventions.update(trial.interventions)
277 | 
278 |     return {
279 |         "total_trials": len(all_trials),
280 |         "phase_distribution": phase_distribution,
281 |         "unique_interventions": list(interventions),
282 |         "gene_info": gene_info
283 |     }
284 | 
285 | # Example
286 | landscape = await analyze_research_landscape("ALK", "lung cancer")
287 | ```
288 | 
289 | ### Workflow 3: Biomarker-Driven Search
290 | 
291 | Find trials based on specific biomarkers:
292 | 
293 | ```python
294 | async def biomarker_trial_search(biomarkers: list[str], cancer_type: str):
295 |     # Search NCI biomarker database
296 |     biomarker_results = []
297 |     for biomarker in biomarkers:
298 |         result = await nci_biomarker_searcher(
299 |             name=biomarker,
300 |             api_key=os.getenv("NCI_API_KEY")
301 |         )
302 |         biomarker_results.extend(result)
303 | 
304 |     # Extract associated trials
305 |     trial_ids = set()
306 |     for bio in biomarker_results:
307 |         if bio.get("associated_trials"):
308 |             trial_ids.update(bio["associated_trials"])
309 | 
310 |     # Get trial details
311 |     trials = []
312 |     for nct_id in trial_ids:
313 |         trial = await trial_getter(nct_id)
314 |         trials.append(trial)
315 | 
316 |     return trials
317 | 
318 | # Example
319 | trials = await biomarker_trial_search(
320 |     biomarkers=["PD-L1", "TMB-high", "MSI-H"],
321 |     cancer_type="colorectal cancer"
322 | )
323 | ```
324 | 
325 | ## Working with Trial Results
326 | 
327 | ### Extracting Key Information
328 | 
329 | ```python
330 | # Process trial results
331 | for trial in trials:
332 |     print(f"NCT ID: {trial.nct_id}")
333 |     print(f"Title: {trial.title}")
334 |     print(f"Status: {trial.status}")
335 |     print(f"Phase: {trial.phase}")
336 | 
337 |     # Locations
338 |     if trial.locations:
339 |         print("Locations:")
340 |         for loc in trial.locations:
341 |             print(f"  - {loc.facility}, {loc.city}, {loc.state}")
342 | 
343 |     # Eligibility
344 |     if trial.eligibility:
345 |         print(f"Age: {trial.eligibility.minimum_age} - {trial.eligibility.maximum_age}")
346 |         print(f"Sex: {trial.eligibility.sex}")
347 | ```
348 | 
349 | ### Getting Detailed Trial Information
350 | 
351 | ```python
352 | # Get complete trial details
353 | full_trial = await trial_getter("NCT03006926")
354 | 
355 | # Get specific sections
356 | protocol = await trial_protocol_getter("NCT03006926")
357 | locations = await trial_locations_getter("NCT03006926")
358 | outcomes = await trial_outcomes_getter("NCT03006926")
359 | references = await trial_references_getter("NCT03006926")
360 | ```
361 | 
362 | ## Tips for Effective Trial Searches
363 | 
364 | ### 1. Use Multiple Search Terms
365 | 
366 | ```python
367 | # Cover variations
368 | trials = await trial_searcher(
369 |     conditions=["NSCLC", "non-small cell lung cancer", "lung adenocarcinoma"],
370 |     interventions=["anti-PD-1", "pembrolizumab", "Keytruda"]
371 | )
372 | ```
373 | 
374 | ### 2. Check Both Data Sources
375 | 
376 | ```python
377 | # Some trials may only be in one database
378 | ctgov_count = len(await trial_searcher(source="ctgov", conditions=["melanoma"]))
379 | nci_count = len(await trial_searcher(source="nci", conditions=["melanoma"]))
380 | ```
381 | 
382 | ### 3. Use Appropriate Filters
383 | 
384 | - **recruiting_status**: Focus on trials accepting patients
385 | - **phase**: Later phases for established treatments
386 | - **age_group**: Match patient demographics
387 | - **study_type**: INTERVENTIONAL vs OBSERVATIONAL
388 | 
389 | ### 4. Leverage Location Search
390 | 
391 | Always include location for patient-specific searches:
392 | 
393 | ```python
394 | # Bad - no location
395 | trials = await trial_searcher(conditions=["cancer"])
396 | 
397 | # Good - includes location
398 | trials = await trial_searcher(
399 |     conditions=["cancer"],
400 |     lat=40.7128,
401 |     long=-74.0060,
402 |     distance=50
403 | )
404 | ```
405 | 
406 | ## Troubleshooting
407 | 
408 | ### No Results Found
409 | 
410 | 1. **Broaden search terms**: Remove specific filters
411 | 2. **Check synonyms**: Use disease_getter to find alternatives
412 | 3. **Expand location**: Increase distance parameter
413 | 4. **Try both sources**: Some trials only in NCI or ClinicalTrials.gov
414 | 
415 | ### Location Search Issues
416 | 
417 | - Ensure both latitude AND longitude are provided
418 | - Use decimal degrees (not degrees/minutes/seconds)
419 | - Check coordinate signs (negative for West/South)
420 | 
421 | ### NCI API Errors
422 | 
423 | - Verify API key is valid
424 | - Check rate limits (1000 requests/day with key)
425 | - Some features require specific API key permissions
426 | 
427 | ## Next Steps
428 | 
429 | - Learn about [variant annotations](03-get-comprehensive-variant-annotations.md)
430 | - Explore [AlphaGenome predictions](04-predict-variant-effects-with-alphagenome.md)
431 | - Set up [monitoring and logging](05-logging-and-monitoring-with-bigquery.md)
432 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/search.py:
--------------------------------------------------------------------------------

```python
  1 | import json
  2 | import logging
  3 | from typing import Annotated, Any
  4 | 
  5 | from pydantic import BaseModel, Field, model_validator
  6 | 
  7 | from .. import StrEnum, ensure_list, http_client, render
  8 | from ..constants import MYVARIANT_QUERY_URL, SYSTEM_PAGE_SIZE
  9 | from .filters import filter_variants
 10 | from .links import inject_links
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class ClinicalSignificance(StrEnum):
 16 |     PATHOGENIC = "pathogenic"
 17 |     LIKELY_PATHOGENIC = "likely pathogenic"
 18 |     UNCERTAIN_SIGNIFICANCE = "uncertain significance"
 19 |     LIKELY_BENIGN = "likely benign"
 20 |     BENIGN = "benign"
 21 | 
 22 | 
 23 | class PolyPhenPrediction(StrEnum):
 24 |     PROBABLY_DAMAGING = "D"
 25 |     POSSIBLY_DAMAGING = "P"
 26 |     BENIGN = "B"
 27 | 
 28 | 
 29 | class SiftPrediction(StrEnum):
 30 |     DELETERIOUS = "D"
 31 |     TOLERATED = "T"
 32 | 
 33 | 
 34 | class VariantSources(StrEnum):
 35 |     CADD = "cadd"
 36 |     CGI = "cgi"
 37 |     CIVIC = "civic"
 38 |     CLINVAR = "clinvar"
 39 |     COSMIC = "cosmic"
 40 |     DBNSFP = "dbnsfp"
 41 |     DBSNP = "dbsnp"
 42 |     DOCM = "docm"
 43 |     EMV = "evm"
 44 |     EXAC = "exac"
 45 |     GNOMAD_EXOME = "gnomad_exome"
 46 |     HG19 = "hg19"
 47 |     MUTDB = "mutdb"
 48 |     SNPEFF = "snpeff"
 49 |     VCF = "vcf"
 50 | 
 51 | 
 52 | MYVARIANT_FIELDS = [
 53 |     "_id",
 54 |     "chrom",
 55 |     "vcf.position",
 56 |     "vcf.ref",
 57 |     "vcf.alt",
 58 |     "cadd.phred",
 59 |     "civic.id",
 60 |     "civic.openCravatUrl",
 61 |     "clinvar.rcv.clinical_significance",
 62 |     "clinvar.variant_id",
 63 |     "cosmic.cosmic_id",
 64 |     "dbnsfp.genename",
 65 |     "dbnsfp.hgvsc",
 66 |     "dbnsfp.hgvsp",
 67 |     "dbnsfp.polyphen2.hdiv.pred",
 68 |     "dbnsfp.polyphen2.hdiv.score",
 69 |     "dbnsfp.sift.pred",
 70 |     "dbnsfp.sift.score",
 71 |     "dbsnp.rsid",
 72 |     "exac.af",
 73 |     "gnomad_exome.af.af",
 74 | ]
 75 | 
 76 | 
 77 | class VariantQuery(BaseModel):
 78 |     """Search parameters for querying variant data from MyVariant.info."""
 79 | 
 80 |     gene: str | None = Field(
 81 |         default=None,
 82 |         description="Gene symbol to search for (e.g. BRAF, TP53)",
 83 |     )
 84 |     hgvsp: str | None = Field(
 85 |         default=None,
 86 |         description="Protein change notation (e.g., p.V600E, p.Arg557His)",
 87 |     )
 88 |     hgvsc: str | None = Field(
 89 |         default=None,
 90 |         description="cDNA notation (e.g., c.1799T>A)",
 91 |     )
 92 |     rsid: str | None = Field(
 93 |         default=None,
 94 |         description="dbSNP rsID (e.g., rs113488022)",
 95 |     )
 96 |     region: str | None = Field(
 97 |         default=None,
 98 |         description="Genomic region as chr:start-end (e.g. chr1:12345-67890)",
 99 |     )
100 |     significance: ClinicalSignificance | None = Field(
101 |         default=None,
102 |         description="ClinVar clinical significance",
103 |     )
104 |     max_frequency: float | None = Field(
105 |         default=None,
106 |         description="Maximum population allele frequency threshold",
107 |     )
108 |     min_frequency: float | None = Field(
109 |         default=None,
110 |         description="Minimum population allele frequency threshold",
111 |     )
112 |     cadd: float | None = Field(
113 |         default=None,
114 |         description="Minimum CADD phred score",
115 |     )
116 |     polyphen: PolyPhenPrediction | None = Field(
117 |         default=None,
118 |         description="PolyPhen-2 prediction",
119 |     )
120 |     sift: SiftPrediction | None = Field(
121 |         default=None,
122 |         description="SIFT prediction",
123 |     )
124 |     sources: list[VariantSources] = Field(
125 |         description="Include only specific data sources",
126 |         default_factory=list,
127 |     )
128 |     size: int = Field(
129 |         default=SYSTEM_PAGE_SIZE,
130 |         description="Number of results to return",
131 |     )
132 |     offset: int = Field(
133 |         default=0,
134 |         description="Result offset for pagination",
135 |     )
136 | 
137 |     @model_validator(mode="after")
138 |     def validate_query_params(self) -> "VariantQuery":
139 |         if not self.model_dump(exclude_none=True, exclude_defaults=True):
140 |             raise ValueError("At least one search parameter is required")
141 |         return self
142 | 
143 | 
144 | def _construct_query_part(
145 |     field: str,
146 |     val: Any | None,
147 |     operator: str | None = None,
148 |     quoted: bool = False,
149 | ) -> str | None:
150 |     if val is not None:
151 |         val = str(val)
152 |         val = f'"{val}"' if quoted else val
153 |         operator = operator or ""
154 |         val = f"{field}:{operator}{val}"
155 |     return val
156 | 
157 | 
158 | def build_query_string(query: VariantQuery) -> str:
159 |     query_parts: list[str] = list(filter(None, [query.region, query.rsid]))
160 | 
161 |     query_params = [
162 |         ("dbnsfp.genename", query.gene, None, True),
163 |         ("dbnsfp.hgvsp", query.hgvsp, None, True),
164 |         ("dbnsfp.hgvsc", query.hgvsc, None, True),
165 |         ("dbsnp.rsid", query.rsid, None, True),
166 |         ("clinvar.rcv.clinical_significance", query.significance, None, True),
167 |         ("gnomad_exome.af.af", query.max_frequency, "<=", False),
168 |         ("gnomad_exome.af.af", query.min_frequency, ">=", False),
169 |         ("cadd.phred", query.cadd, ">=", False),
170 |         ("dbnsfp.polyphen2.hdiv.pred", query.polyphen, None, True),
171 |         ("dbnsfp.sift.pred", query.sift, None, True),
172 |     ]
173 | 
174 |     for field, val, operator, quoted in query_params:
175 |         part = _construct_query_part(field, val, operator, quoted)
176 |         if part is not None:
177 |             query_parts.append(part)
178 | 
179 |     return " AND ".join(query_parts) if query_parts else "*"
180 | 
181 | 
182 | async def convert_query(query: VariantQuery) -> dict[str, Any]:
183 |     """Convert a VariantQuery to parameters for the MyVariant.info API."""
184 |     fields = MYVARIANT_FIELDS[:] + [f"{s}.*" for s in query.sources]
185 | 
186 |     # Optimize common queries to prevent timeouts
187 |     query_string = build_query_string(query)
188 | 
189 |     # Special handling for common BRAF V600E query
190 |     if query.gene == "BRAF" and query.hgvsp == "V600E":
191 |         # Use a more specific query that performs better
192 |         query_string = 'dbnsfp.genename:"BRAF" AND (dbnsfp.aaref:"V" AND dbnsfp.aapos:600 AND dbnsfp.aaalt:"E")'
193 | 
194 |     return {
195 |         "q": query_string,
196 |         "size": query.size,
197 |         "from": query.offset,
198 |         "fields": ",".join(fields),
199 |     }
200 | 
201 | 
202 | async def search_variants(
203 |     query: VariantQuery,
204 |     output_json: bool = False,
205 |     include_cbioportal: bool = True,
206 | ) -> str:
207 |     """Search variants using the MyVariant.info API with optional cBioPortal summary."""
208 | 
209 |     params = await convert_query(query)
210 | 
211 |     response, error = await http_client.request_api(
212 |         url=MYVARIANT_QUERY_URL,
213 |         request=params,
214 |         method="GET",
215 |         domain="myvariant",
216 |     )
217 |     data: list = response.get("hits", []) if response else []
218 | 
219 |     if error:
220 |         # Provide more specific error messages for common issues
221 |         if "timed out" in error.message.lower():
222 |             error_msg = (
223 |                 "MyVariant.info API request timed out. This can happen with complex queries. "
224 |                 "Try narrowing your search criteria or searching by specific identifiers (rsID, HGVS)."
225 |             )
226 |         else:
227 |             error_msg = f"Error {error.code}: {error.message}"
228 |         data = [{"error": error_msg}]
229 |     else:
230 |         data = inject_links(data)
231 |         data = filter_variants(data)
232 | 
233 |     # Get cBioPortal summary if searching by gene
234 |     cbioportal_summary = None
235 |     if include_cbioportal and query.gene and not error:
236 |         try:
237 |             from .cbioportal_search import (
238 |                 CBioPortalSearchClient,
239 |                 format_cbioportal_search_summary,
240 |             )
241 | 
242 |             client = CBioPortalSearchClient()
243 |             summary = await client.get_gene_search_summary(query.gene)
244 |             if summary:
245 |                 cbioportal_summary = format_cbioportal_search_summary(summary)
246 |         except Exception as e:
247 |             logger.warning(f"Failed to get cBioPortal summary: {e}")
248 | 
249 |     if not output_json:
250 |         result = render.to_markdown(data)
251 |         if cbioportal_summary:
252 |             result = cbioportal_summary + "\n\n" + result
253 |         return result
254 |     else:
255 |         if cbioportal_summary:
256 |             return json.dumps(
257 |                 {"cbioportal_summary": cbioportal_summary, "variants": data},
258 |                 indent=2,
259 |             )
260 |         return json.dumps(data, indent=2)
261 | 
262 | 
263 | async def _variant_searcher(
264 |     call_benefit: Annotated[
265 |         str,
266 |         "Define and summarize why this function is being called and the intended benefit",
267 |     ],
268 |     gene: Annotated[
269 |         str | None, "Gene symbol to search for (e.g. BRAF, TP53)"
270 |     ] = None,
271 |     hgvsp: Annotated[
272 |         str | None, "Protein change notation (e.g., p.V600E, p.Arg557His)"
273 |     ] = None,
274 |     hgvsc: Annotated[str | None, "cDNA notation (e.g., c.1799T>A)"] = None,
275 |     rsid: Annotated[str | None, "dbSNP rsID (e.g., rs113488022)"] = None,
276 |     region: Annotated[
277 |         str | None, "Genomic region as chr:start-end (e.g. chr1:12345-67890)"
278 |     ] = None,
279 |     significance: Annotated[
280 |         ClinicalSignificance | str | None, "ClinVar clinical significance"
281 |     ] = None,
282 |     max_frequency: Annotated[
283 |         float | None, "Maximum population allele frequency threshold"
284 |     ] = None,
285 |     min_frequency: Annotated[
286 |         float | None, "Minimum population allele frequency threshold"
287 |     ] = None,
288 |     cadd: Annotated[float | None, "Minimum CADD phred score"] = None,
289 |     polyphen: Annotated[
290 |         PolyPhenPrediction | str | None, "PolyPhen-2 prediction"
291 |     ] = None,
292 |     sift: Annotated[SiftPrediction | str | None, "SIFT prediction"] = None,
293 |     sources: Annotated[
294 |         list[VariantSources] | list[str] | str | None,
295 |         "Include only specific data sources (list or comma-separated string)",
296 |     ] = None,
297 |     size: Annotated[int, "Number of results to return"] = SYSTEM_PAGE_SIZE,
298 |     offset: Annotated[int, "Result offset for pagination"] = 0,
299 | ) -> str:
300 |     """
301 |     Searches for genetic variants based on specified criteria.
302 | 
303 |     Parameters:
304 |     - call_benefit: Define and summarize why this function is being called and the intended benefit
305 |     - gene: Gene symbol to search for (e.g. BRAF, TP53)
306 |     - hgvsp: Protein change notation (e.g., p.V600E, p.Arg557His)
307 |     - hgvsc: cDNA notation (e.g., c.1799T>A)
308 |     - rsid: dbSNP rsID (e.g., rs113488022)
309 |     - region: Genomic region as chr:start-end (e.g. chr1:12345-67890)
310 |     - significance: ClinVar clinical significance
311 |     - max_frequency: Maximum population allele frequency threshold
312 |     - min_frequency: Minimum population allele frequency threshold
313 |     - cadd: Minimum CADD phred score
314 |     - polyphen: PolyPhen-2 prediction
315 |     - sift: SIFT prediction
316 |     - sources: Include only specific data sources (list or comma-separated string)
317 |     - size: Number of results to return (default: 10)
318 |     - offset: Result offset for pagination (default: 0)
319 | 
320 |     Returns:
321 |     Markdown formatted list of matching variants with key annotations
322 |     """
323 |     # Convert individual parameters to a VariantQuery object
324 |     query = VariantQuery(
325 |         gene=gene,
326 |         hgvsp=hgvsp,
327 |         hgvsc=hgvsc,
328 |         rsid=rsid,
329 |         region=region,
330 |         significance=significance,
331 |         max_frequency=max_frequency,
332 |         min_frequency=min_frequency,
333 |         cadd=cadd,
334 |         polyphen=polyphen,
335 |         sift=sift,
336 |         sources=ensure_list(sources, split_strings=True),
337 |         size=size,
338 |         offset=offset,
339 |     )
340 |     return await search_variants(
341 |         query, output_json=False, include_cbioportal=True
342 |     )
343 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_mcp_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for MCP server functionality."""
  2 | 
  3 | import json
  4 | from unittest.mock import patch
  5 | 
  6 | import pytest
  7 | 
  8 | from biomcp.core import mcp_app
  9 | 
 10 | 
 11 | @pytest.mark.asyncio
 12 | class TestMCPIntegration:
 13 |     """Integration tests for the MCP server."""
 14 | 
 15 |     async def test_mcp_server_tools_registered(self):
 16 |         """Test that MCP tools are properly registered."""
 17 |         # Get the registered tools
 18 |         tools = await mcp_app.list_tools()
 19 | 
 20 |         # Should have 35 tools (2 unified + 1 think + 32 individual including OpenFDA)
 21 |         assert len(tools) == 35
 22 | 
 23 |         # Check tool names
 24 |         tool_names = [tool.name for tool in tools]
 25 |         # Unified tools
 26 |         assert "search" in tool_names
 27 |         assert "fetch" in tool_names
 28 |         assert "think" in tool_names
 29 |         # Individual tools
 30 |         assert "article_searcher" in tool_names
 31 |         assert "article_getter" in tool_names
 32 |         assert "trial_searcher" in tool_names
 33 |         assert "trial_getter" in tool_names
 34 |         assert "trial_protocol_getter" in tool_names
 35 |         assert "trial_references_getter" in tool_names
 36 |         assert "trial_outcomes_getter" in tool_names
 37 |         assert "trial_locations_getter" in tool_names
 38 |         assert "variant_searcher" in tool_names
 39 |         assert "variant_getter" in tool_names
 40 |         assert "alphagenome_predictor" in tool_names
 41 |         assert "gene_getter" in tool_names
 42 |         assert "drug_getter" in tool_names
 43 |         assert "disease_getter" in tool_names
 44 |         # OpenFDA tools
 45 |         assert "openfda_adverse_searcher" in tool_names
 46 |         assert "openfda_adverse_getter" in tool_names
 47 |         assert "openfda_label_searcher" in tool_names
 48 |         assert "openfda_label_getter" in tool_names
 49 |         assert "openfda_device_searcher" in tool_names
 50 |         assert "openfda_device_getter" in tool_names
 51 |         assert "openfda_approval_searcher" in tool_names
 52 |         assert "openfda_approval_getter" in tool_names
 53 |         assert "openfda_recall_searcher" in tool_names
 54 |         assert "openfda_recall_getter" in tool_names
 55 |         assert "openfda_shortage_searcher" in tool_names
 56 |         assert "openfda_shortage_getter" in tool_names
 57 | 
 58 |     async def test_mcp_search_tool_schema(self):
 59 |         """Test the search tool schema."""
 60 |         tools = await mcp_app.list_tools()
 61 |         search_tool = next(t for t in tools if t.name == "search")
 62 | 
 63 |         # Check required parameters
 64 |         assert "query" in search_tool.inputSchema["properties"]
 65 |         assert "domain" in search_tool.inputSchema["properties"]
 66 |         assert "call_benefit" in search_tool.inputSchema["properties"]
 67 |         # Verify query is required (no default value)
 68 |         assert "query" in search_tool.inputSchema.get("required", [])
 69 |         # Verify call_benefit is optional
 70 |         assert "call_benefit" not in search_tool.inputSchema.get(
 71 |             "required", []
 72 |         )
 73 | 
 74 |         # Check domain enum values
 75 |         domain_schema = search_tool.inputSchema["properties"]["domain"]
 76 |         # The enum is nested in anyOf
 77 |         enum_values = domain_schema["anyOf"][0]["enum"]
 78 |         assert "article" in enum_values
 79 |         assert "trial" in enum_values
 80 |         assert "variant" in enum_values
 81 |         # thinking domain was removed from search tool
 82 |         # assert "thinking" in enum_values
 83 | 
 84 |     async def test_mcp_fetch_tool_schema(self):
 85 |         """Test the fetch tool schema."""
 86 |         tools = await mcp_app.list_tools()
 87 |         fetch_tool = next(t for t in tools if t.name == "fetch")
 88 | 
 89 |         # Check required parameters - only id should be required
 90 |         required = fetch_tool.inputSchema["required"]
 91 |         assert "id" in required
 92 |         assert len(required) == 1  # Only id should be required
 93 |         # Check optional parameters are present
 94 |         assert "domain" in fetch_tool.inputSchema["properties"]
 95 |         assert "call_benefit" in fetch_tool.inputSchema["properties"]
 96 |         assert "detail" in fetch_tool.inputSchema["properties"]
 97 | 
 98 |         # Check domain enum values (no thinking for fetch)
 99 |         domain_schema = fetch_tool.inputSchema["properties"]["domain"]
100 |         # For required enums, the structure is different
101 |         if "enum" in domain_schema:
102 |             enum_values = domain_schema["enum"]
103 |         else:
104 |             # Check if it's in anyOf structure
105 |             enum_values = domain_schema.get("anyOf", [{}])[0].get("enum", [])
106 |         assert "article" in enum_values
107 |         assert "trial" in enum_values
108 |         assert "variant" in enum_values
109 |         assert "thinking" not in enum_values
110 | 
111 |     async def test_mcp_search_article_integration(self):
112 |         """Test end-to-end article search through MCP."""
113 |         mock_result = json.dumps([
114 |             {
115 |                 "pmid": "12345",
116 |                 "title": "Test Article",
117 |                 "abstract": "Test abstract",
118 |             }
119 |         ])
120 | 
121 |         with patch(
122 |             "biomcp.articles.unified.search_articles_unified"
123 |         ) as mock_search:
124 |             mock_search.return_value = mock_result
125 | 
126 |             # Import search function directly since we can't test through MCP without Context
127 |             from biomcp.router import search
128 | 
129 |             # Call the search function
130 |             result = await search(
131 |                 query="",
132 |                 domain="article",
133 |                 genes="BRAF",
134 |                 page_size=10,
135 |             )
136 | 
137 |             # Verify the result structure
138 |             assert "results" in result
139 |             # May include thinking reminder as first result
140 |             actual_results = [
141 |                 r for r in result["results"] if r["id"] != "thinking-reminder"
142 |             ]
143 |             assert len(actual_results) == 1
144 |             assert actual_results[0]["id"] == "12345"
145 | 
146 |     async def test_mcp_fetch_variant_integration(self):
147 |         """Test end-to-end variant fetch through MCP."""
148 |         mock_result = json.dumps([
149 |             {
150 |                 "_id": "rs121913529",
151 |                 "gene": {"symbol": "BRAF"},
152 |                 "clinvar": {"clinical_significance": "Pathogenic"},
153 |             }
154 |         ])
155 | 
156 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
157 |             mock_get.return_value = mock_result
158 | 
159 |             from biomcp.router import fetch
160 | 
161 |             # Call the fetch function
162 |             result = await fetch(
163 |                 domain="variant",
164 |                 id="rs121913529",
165 |             )
166 | 
167 |             # Verify the result structure
168 |             assert result["id"] == "rs121913529"
169 |             assert "title" in result
170 |             assert "text" in result
171 |             assert "url" in result
172 |             assert "metadata" in result
173 | 
174 |     async def test_mcp_unified_query_integration(self):
175 |         """Test unified query through MCP."""
176 |         with patch("biomcp.query_router.execute_routing_plan") as mock_execute:
177 |             mock_execute.return_value = {
178 |                 "articles": json.dumps([
179 |                     {"pmid": "111", "title": "Article 1"}
180 |                 ]),
181 |                 "variants": json.dumps([
182 |                     {"_id": "rs222", "gene": {"symbol": "TP53"}}
183 |                 ]),
184 |             }
185 | 
186 |             from biomcp.router import search
187 | 
188 |             # Call search with unified query
189 |             result = await search(
190 |                 query="gene:BRAF AND disease:cancer",
191 |                 max_results_per_domain=10,
192 |             )
193 | 
194 |             # Should get results from multiple domains
195 |             assert "results" in result
196 |             # May include thinking reminder
197 |             actual_results = [
198 |                 r for r in result["results"] if r["id"] != "thinking-reminder"
199 |             ]
200 |             assert len(actual_results) >= 2
201 | 
202 |     async def test_mcp_thinking_integration(self):
203 |         """Test sequential thinking through MCP."""
204 |         with patch(
205 |             "biomcp.thinking.sequential._sequential_thinking"
206 |         ) as mock_think:
207 |             mock_think.return_value = {
208 |                 "thought": "Processed thought",
209 |                 "analysis": "Test analysis",
210 |             }
211 | 
212 |             from biomcp.thinking_tool import think
213 | 
214 |             # Call the think tool directly
215 |             result = await think(
216 |                 thought="Test thought",
217 |                 thoughtNumber=1,
218 |                 totalThoughts=3,
219 |                 nextThoughtNeeded=True,
220 |             )
221 | 
222 |             # Verify thinking result
223 |             assert result["domain"] == "thinking"
224 |             assert result["thoughtNumber"] == 1
225 |             assert result["nextThoughtNeeded"] is True
226 | 
227 |     async def test_mcp_error_handling(self):
228 |         """Test MCP error handling."""
229 |         from biomcp.exceptions import InvalidDomainError
230 |         from biomcp.router import search
231 | 
232 |         # Test with invalid domain
233 |         with pytest.raises(InvalidDomainError) as exc_info:
234 |             await search(
235 |                 query="",
236 |                 domain="invalid_domain",
237 |             )
238 | 
239 |         assert "Unknown domain" in str(exc_info.value)
240 | 
241 |     async def test_mcp_fetch_all_trial_sections(self):
242 |         """Test fetching trial with all sections through MCP."""
243 |         mock_protocol = {"title": "Test Trial", "nct_id": "NCT123"}
244 |         mock_locations = {"locations": [{"city": "Boston"}]}
245 | 
246 |         with (
247 |             patch("biomcp.trials.getter._trial_protocol") as mock_p,
248 |             patch("biomcp.trials.getter._trial_locations") as mock_l,
249 |             patch("biomcp.trials.getter._trial_outcomes") as mock_o,
250 |             patch("biomcp.trials.getter._trial_references") as mock_r,
251 |         ):
252 |             mock_p.return_value = json.dumps(mock_protocol)
253 |             mock_l.return_value = json.dumps(mock_locations)
254 |             mock_o.return_value = json.dumps({"outcomes": {}})
255 |             mock_r.return_value = json.dumps({"references": []})
256 | 
257 |             from biomcp.router import fetch
258 | 
259 |             result = await fetch(
260 |                 domain="trial",
261 |                 id="NCT123",
262 |                 detail="all",
263 |             )
264 | 
265 |             # Verify all sections are included
266 |             assert result["id"] == "NCT123"
267 |             assert "locations" in result["metadata"]
268 |             assert "outcomes" in result["metadata"]
269 |             assert "references" in result["metadata"]
270 | 
271 |     async def test_mcp_parameter_parsing(self):
272 |         """Test parameter parsing through MCP."""
273 |         mock_result = json.dumps([])
274 | 
275 |         with patch(
276 |             "biomcp.articles.unified.search_articles_unified"
277 |         ) as mock_search:
278 |             mock_search.return_value = mock_result
279 | 
280 |             from biomcp.router import search
281 | 
282 |             # Test with various parameter formats
283 |             await search(
284 |                 query="",
285 |                 domain="article",
286 |                 genes='["BRAF", "KRAS"]',  # JSON string
287 |                 diseases="cancer,melanoma",  # Comma-separated
288 |                 keywords=["test1", "test2"],  # Already a list
289 |             )
290 | 
291 |             # Verify parameters were parsed correctly
292 |             call_args = mock_search.call_args[0][0]
293 |             assert call_args.genes == ["BRAF", "KRAS"]
294 |             assert call_args.diseases == ["cancer", "melanoma"]
295 |             assert call_args.keywords == ["test1", "test2"]
296 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_biothings_integration_real.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for BioThings API - calls real APIs."""
  2 | 
  3 | import pytest
  4 | 
  5 | from biomcp.integrations import BioThingsClient
  6 | 
  7 | 
  8 | @pytest.mark.integration
  9 | class TestRealBioThingsAPIs:
 10 |     """Integration tests that call real BioThings APIs."""
 11 | 
 12 |     @pytest.fixture
 13 |     def client(self):
 14 |         """Create a real BioThings client."""
 15 |         return BioThingsClient()
 16 | 
 17 |     @pytest.mark.asyncio
 18 |     async def test_mygene_tp53(self, client):
 19 |         """Test real MyGene.info API with TP53."""
 20 |         result = await client.get_gene_info("TP53")
 21 | 
 22 |         assert result is not None
 23 |         assert result.symbol == "TP53"
 24 |         assert result.name == "tumor protein p53"
 25 |         assert result.entrezgene in ["7157", 7157]
 26 |         assert "tumor suppressor" in result.summary.lower()
 27 |         # Check for either lowercase or uppercase P53 in aliases
 28 |         assert any("p53" in alias.lower() for alias in result.alias)
 29 | 
 30 |     @pytest.mark.asyncio
 31 |     async def test_mygene_braf(self, client):
 32 |         """Test real MyGene.info API with BRAF."""
 33 |         result = await client.get_gene_info("BRAF")
 34 | 
 35 |         assert result is not None
 36 |         assert result.symbol == "BRAF"
 37 |         assert "proto-oncogene" in result.name.lower()
 38 |         assert result.type_of_gene == "protein-coding"
 39 | 
 40 |     @pytest.mark.asyncio
 41 |     async def test_mygene_by_entrez_id(self, client):
 42 |         """Test real MyGene.info API with Entrez ID."""
 43 |         result = await client.get_gene_info("673")  # BRAF
 44 | 
 45 |         assert result is not None
 46 |         assert result.symbol == "BRAF"
 47 |         assert result.gene_id == "673"
 48 | 
 49 |     @pytest.mark.asyncio
 50 |     async def test_mydisease_melanoma(self, client):
 51 |         """Test real MyDisease.info API with melanoma."""
 52 |         result = await client.get_disease_info("melanoma")
 53 | 
 54 |         if result is None:
 55 |             # API might be down or melanoma might not be found directly
 56 |             # Try a more specific search
 57 |             result = await client.get_disease_info(
 58 |                 "MONDO:0005105"
 59 |             )  # MONDO ID for melanoma
 60 | 
 61 |         assert result is not None, "Disease info should be returned"
 62 |         # The API may return subtypes of melanoma
 63 |         if result.name:
 64 |             assert "melanoma" in result.name.lower() or (
 65 |                 result.definition and "melanoma" in result.definition.lower()
 66 |             )
 67 |         assert result.disease_id is not None
 68 |         # Synonyms might be empty for specific subtypes
 69 |         assert result.synonyms is not None
 70 | 
 71 |     @pytest.mark.asyncio
 72 |     async def test_mydisease_gist(self, client):
 73 |         """Test real MyDisease.info API with GIST."""
 74 |         result = await client.get_disease_info("GIST")
 75 | 
 76 |         if result is None:
 77 |             # API might be down or GIST might not be found directly
 78 |             # Try the full name
 79 |             result = await client.get_disease_info(
 80 |                 "gastrointestinal stromal tumor"
 81 |             )
 82 | 
 83 |         assert result is not None, "Disease info should be returned"
 84 |         # GIST might return as a variant name
 85 |         if result.name:
 86 |             assert (
 87 |                 "gist" in result.name.lower()
 88 |                 or "stromal" in result.name.lower()
 89 |             )
 90 |         assert result.disease_id is not None
 91 |         # GIST should have synonyms including full name if available
 92 |         assert result.synonyms is not None
 93 | 
 94 |     @pytest.mark.asyncio
 95 |     async def test_mydisease_by_mondo_id(self, client):
 96 |         """Test real MyDisease.info API with MONDO ID."""
 97 |         result = await client.get_disease_info("MONDO:0005105")  # melanoma
 98 | 
 99 |         assert result is not None
100 |         assert result.disease_id == "MONDO:0005105"
101 |         # The result should have mondo data
102 |         assert result.mondo is not None
103 |         assert result.mondo.get("mondo") == "MONDO:0005105"
104 |         # Name field might come from different sources in the API
105 |         if result.name:
106 |             assert "melanoma" in result.name.lower()
107 | 
108 |     @pytest.mark.asyncio
109 |     async def test_disease_synonyms_expansion(self, client):
110 |         """Test disease synonym expansion."""
111 |         synonyms = await client.get_disease_synonyms("lung cancer")
112 | 
113 |         assert len(synonyms) >= 1  # At least includes the original term
114 |         assert "lung cancer" in [s.lower() for s in synonyms]
115 |         # May or may not include formal terms depending on API results
116 |         # Just check we got some results back
117 |         assert synonyms is not None and len(synonyms) > 0
118 | 
119 |     @pytest.mark.asyncio
120 |     async def test_batch_genes(self, client):
121 |         """Test batch gene retrieval."""
122 |         # Test single gene retrieval as a workaround since batch requires special POST encoding
123 |         # This validates the gene getter can handle multiple calls efficiently
124 |         genes = ["TP53", "BRAF", "EGFR"]
125 |         results = []
126 | 
127 |         for gene in genes:
128 |             result = await client.get_gene_info(gene)
129 |             if result:
130 |                 results.append(result)
131 | 
132 |         assert len(results) == 3
133 |         gene_symbols = [r.symbol for r in results]
134 |         assert "TP53" in gene_symbols
135 |         assert "BRAF" in gene_symbols
136 |         assert "EGFR" in gene_symbols
137 | 
138 |     @pytest.mark.asyncio
139 |     async def test_invalid_gene(self, client):
140 |         """Test handling of invalid gene."""
141 |         result = await client.get_gene_info("INVALID_GENE_XYZ123")
142 |         assert result is None
143 | 
144 |     @pytest.mark.asyncio
145 |     async def test_invalid_disease(self, client):
146 |         """Test handling of invalid disease."""
147 |         result = await client.get_disease_info("INVALID_DISEASE_XYZ123")
148 |         assert result is None
149 | 
150 |     @pytest.mark.asyncio
151 |     async def test_mychem_aspirin(self, client):
152 |         """Test real MyChem.info API with aspirin."""
153 |         # Use DrugBank ID for reliable results
154 |         result = await client.get_drug_info("DB00945")
155 | 
156 |         assert result is not None
157 |         # API returns various forms - could be aspirin or acetylsalicylic acid
158 |         assert result.name is not None
159 |         assert result.drugbank_id == "DB00945"
160 |         # Should have at least one identifier
161 |         assert any([
162 |             result.drugbank_id,
163 |             result.chembl_id,
164 |             result.chebi_id,
165 |             result.pubchem_cid,
166 |         ])
167 | 
168 |     @pytest.mark.asyncio
169 |     async def test_mychem_imatinib(self, client):
170 |         """Test real MyChem.info API with imatinib."""
171 |         # Use DrugBank ID for reliable results
172 |         result = await client.get_drug_info("DB00619")
173 | 
174 |         assert result is not None
175 |         assert result.name is not None
176 |         assert "imatinib" in result.name.lower()
177 |         assert result.drugbank_id == "DB00619"
178 |         # Should have at least one identifier
179 |         assert any([
180 |             result.drugbank_id,
181 |             result.chembl_id,
182 |             result.chebi_id,
183 |             result.pubchem_cid,
184 |         ])
185 | 
186 |     @pytest.mark.asyncio
187 |     async def test_mychem_by_drugbank_id(self, client):
188 |         """Test real MyChem.info API with DrugBank ID."""
189 |         result = await client.get_drug_info("DB00945")  # Aspirin
190 | 
191 |         assert result is not None
192 |         assert result.drugbank_id == "DB00945"
193 |         assert (
194 |             result.name is not None
195 |         )  # Could be Acetylsalicylic acid or similar
196 | 
197 |     @pytest.mark.asyncio
198 |     async def test_invalid_drug(self, client):
199 |         """Test handling of invalid drug."""
200 |         result = await client.get_drug_info("INVALID_DRUG_XYZ123")
201 |         assert result is None
202 | 
203 |     @pytest.mark.asyncio
204 |     async def test_mychem_pembrolizumab(self, client):
205 |         """Test real MyChem.info API with pembrolizumab."""
206 |         result = await client.get_drug_info("pembrolizumab")
207 | 
208 |         assert result is not None
209 |         assert result.name == "Pembrolizumab"
210 |         assert result.drugbank_id == "DB09037"
211 |         assert result.unii == "DPT0O3T46P"
212 |         assert "PD-1" in result.description
213 |         assert "antibody" in result.description.lower()
214 | 
215 | 
216 | @pytest.mark.integration
217 | class TestGeneToolIntegration:
218 |     """Test the gene getter tool with real APIs."""
219 | 
220 |     @pytest.mark.asyncio
221 |     async def test_gene_getter_tool(self):
222 |         """Test the gene_getter tool function."""
223 |         from biomcp.genes.getter import get_gene
224 | 
225 |         result = await get_gene("TP53", output_json=False)
226 | 
227 |         assert "TP53" in result
228 |         assert "tumor protein p53" in result
229 |         assert "tumor suppressor" in result.lower()
230 |         # Links might be formatted differently
231 |         assert "ncbi" in result.lower() or "gene" in result.lower()
232 | 
233 |     @pytest.mark.asyncio
234 |     async def test_gene_getter_json(self):
235 |         """Test gene_getter with JSON output."""
236 |         import json
237 | 
238 |         from biomcp.genes.getter import get_gene
239 | 
240 |         result = await get_gene("BRAF", output_json=True)
241 |         data = json.loads(result)
242 | 
243 |         assert data["symbol"] == "BRAF"
244 |         assert "_links" in data
245 |         assert "NCBI Gene" in data["_links"]
246 | 
247 | 
248 | @pytest.mark.integration
249 | class TestDiseaseToolIntegration:
250 |     """Test the disease getter tool with real APIs."""
251 | 
252 |     @pytest.mark.asyncio
253 |     async def test_disease_getter_tool(self):
254 |         """Test the disease_getter tool function."""
255 |         from biomcp.diseases.getter import get_disease
256 | 
257 |         result = await get_disease("melanoma", output_json=False)
258 | 
259 |         assert "melanoma" in result.lower()
260 |         assert "MONDO:" in result
261 |         # In markdown format, links are shown as "MONDO Browser:" not "_links"
262 |         assert "Browser:" in result or "https://" in result
263 | 
264 |     @pytest.mark.asyncio
265 |     async def test_disease_getter_json(self):
266 |         """Test disease_getter with JSON output."""
267 |         import json
268 | 
269 |         from biomcp.diseases.getter import get_disease
270 | 
271 |         result = await get_disease("GIST", output_json=True)
272 |         data = json.loads(result)
273 | 
274 |         # API might return error or different structure
275 |         if "error" in data:
276 |             pytest.skip("Disease not found in API")
277 |         else:
278 |             # Check for key fields
279 |             assert "disease_id" in data or "id" in data or "_id" in data
280 |             assert "MONDO:" in str(data)
281 | 
282 | 
283 | @pytest.mark.integration
284 | class TestDrugToolIntegration:
285 |     """Test the drug getter tool with real APIs."""
286 | 
287 |     @pytest.mark.asyncio
288 |     async def test_drug_getter_tool(self):
289 |         """Test the drug_getter tool function."""
290 |         from biomcp.drugs.getter import get_drug
291 | 
292 |         result = await get_drug("DB00945", output_json=False)  # Aspirin
293 | 
294 |         assert "Drug:" in result
295 |         assert "DrugBank ID" in result
296 |         assert "DB00945" in result
297 |         assert "External Links" in result
298 | 
299 |     @pytest.mark.asyncio
300 |     async def test_drug_getter_json(self):
301 |         """Test drug_getter with JSON output."""
302 |         import json
303 | 
304 |         from biomcp.drugs.getter import get_drug
305 | 
306 |         result = await get_drug("DB00619", output_json=True)  # Imatinib
307 |         data = json.loads(result)
308 | 
309 |         # Check for basic fields
310 |         assert "drug_id" in data
311 |         assert "drugbank_id" in data
312 |         assert data["drugbank_id"] == "DB00619"
313 |         assert "_links" in data
314 |         # Should have at least one database link
315 |         assert any(
316 |             key in data["_links"]
317 |             for key in ["DrugBank", "ChEMBL", "PubChem", "ChEBI"]
318 |         )
319 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_domain_handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for domain handlers module."""
  2 | 
  3 | import pytest
  4 | 
  5 | from biomcp.constants import DEFAULT_TITLE
  6 | from biomcp.domain_handlers import (
  7 |     ArticleHandler,
  8 |     TrialHandler,
  9 |     VariantHandler,
 10 |     get_domain_handler,
 11 | )
 12 | 
 13 | 
 14 | class TestArticleHandler:
 15 |     """Test ArticleHandler class."""
 16 | 
 17 |     def test_format_pubmed_article(self):
 18 |         """Test formatting a PubMed article."""
 19 |         article = {
 20 |             "pmid": "12345",
 21 |             "title": "Test Article Title",
 22 |             "abstract": "This is a test abstract that is longer than 200 characters. "
 23 |             * 5,
 24 |             "pub_year": "2023",
 25 |             "journal": "Test Journal",
 26 |             "authors": ["Smith J", "Doe J", "Johnson A", "Williams B"],
 27 |         }
 28 | 
 29 |         result = ArticleHandler.format_result(article)
 30 | 
 31 |         assert result["id"] == "12345"
 32 |         assert result["title"] == "Test Article Title"
 33 |         assert len(result["snippet"]) == 203  # 200 + "..."
 34 |         assert result["snippet"].endswith("...")
 35 |         assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/"
 36 |         assert result["metadata"]["year"] == "2023"
 37 |         assert result["metadata"]["journal"] == "Test Journal"
 38 |         assert len(result["metadata"]["authors"]) == 3  # Only first 3
 39 | 
 40 |     def test_format_preprint_article(self):
 41 |         """Test formatting a preprint article."""
 42 |         preprint = {
 43 |             "doi": "10.1101/2023.01.01.12345",
 44 |             "id": "biorxiv-123",
 45 |             "title": "Preprint Title",
 46 |             "abstract": "Short abstract",
 47 |             "url": "https://www.biorxiv.org/content/10.1101/2023.01.01.12345",
 48 |             "pub_year": "2023",
 49 |             "source": "bioRxiv",
 50 |             "authors": ["Author A", "Author B"],
 51 |         }
 52 | 
 53 |         result = ArticleHandler.format_result(preprint)
 54 | 
 55 |         assert result["id"] == "10.1101/2023.01.01.12345"
 56 |         assert result["title"] == "Preprint Title"
 57 |         assert result["snippet"] == "Short abstract..."
 58 |         assert (
 59 |             result["url"]
 60 |             == "https://www.biorxiv.org/content/10.1101/2023.01.01.12345"
 61 |         )
 62 |         assert result["metadata"]["source"] == "bioRxiv"
 63 | 
 64 |     def test_format_article_missing_fields(self):
 65 |         """Test formatting article with missing fields."""
 66 |         article = {
 67 |             "pmid": "67890",
 68 |             # Missing title, abstract, etc.
 69 |         }
 70 | 
 71 |         result = ArticleHandler.format_result(article)
 72 | 
 73 |         assert result["id"] == "67890"
 74 |         assert (
 75 |             result["title"] == DEFAULT_TITLE
 76 |         )  # Should use default for missing title
 77 |         assert result["snippet"] == ""  # Empty when no abstract
 78 |         assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/67890/"
 79 | 
 80 |     def test_format_article_with_date_field(self):
 81 |         """Test formatting article with date field instead of pub_year."""
 82 |         article = {
 83 |             "pmid": "123",
 84 |             "title": "Test",
 85 |             "date": "2023-05-15",
 86 |         }
 87 | 
 88 |         result = ArticleHandler.format_result(article)
 89 | 
 90 |         assert result["metadata"]["year"] == "2023"
 91 | 
 92 |     def test_format_article_title_normalization(self):
 93 |         """Test that article title whitespace is normalized."""
 94 |         article = {
 95 |             "pmid": "123",
 96 |             "title": "  Test   Article\n\nWith  Extra   Spaces  ",
 97 |         }
 98 | 
 99 |         result = ArticleHandler.format_result(article)
100 | 
101 |         assert result["title"] == "Test Article With Extra Spaces"
102 | 
103 | 
104 | class TestTrialHandler:
105 |     """Test TrialHandler class."""
106 | 
107 |     def test_format_trial_api_v2(self):
108 |         """Test formatting trial with API v2 structure."""
109 |         trial = {
110 |             "protocolSection": {
111 |                 "identificationModule": {
112 |                     "nctId": "NCT12345",
113 |                     "briefTitle": "Brief Title",
114 |                     "officialTitle": "Official Title",
115 |                 },
116 |                 "statusModule": {
117 |                     "overallStatus": "RECRUITING",
118 |                     "startDateStruct": {"date": "2023-01-01"},
119 |                     "primaryCompletionDateStruct": {"date": "2024-12-31"},
120 |                 },
121 |                 "descriptionModule": {
122 |                     "briefSummary": "This is a brief summary of the trial."
123 |                 },
124 |                 "designModule": {
125 |                     "phases": ["PHASE3"],
126 |                 },
127 |             }
128 |         }
129 | 
130 |         result = TrialHandler.format_result(trial)
131 | 
132 |         assert result["id"] == "NCT12345"
133 |         assert result["title"] == "Brief Title"
134 |         assert "brief summary" in result["snippet"]
135 |         assert result["url"] == "https://clinicaltrials.gov/study/NCT12345"
136 |         assert result["metadata"]["status"] == "RECRUITING"
137 |         assert result["metadata"]["phase"] == "PHASE3"
138 |         assert result["metadata"]["start_date"] == "2023-01-01"
139 |         assert result["metadata"]["primary_completion_date"] == "2024-12-31"
140 | 
141 |     def test_format_trial_legacy_flat(self):
142 |         """Test formatting trial with legacy flat structure."""
143 |         trial = {
144 |             "NCT Number": "NCT67890",
145 |             "Study Title": "Legacy Trial Title",
146 |             "Brief Summary": "Legacy summary",
147 |             "Study Status": "COMPLETED",
148 |             "Phases": "Phase 2",
149 |             "Start Date": "2022-01-01",
150 |             "Completion Date": "2023-12-31",
151 |         }
152 | 
153 |         result = TrialHandler.format_result(trial)
154 | 
155 |         assert result["id"] == "NCT67890"
156 |         assert result["title"] == "Legacy Trial Title"
157 |         assert result["snippet"].startswith("Legacy summary")
158 |         assert result["url"] == "https://clinicaltrials.gov/study/NCT67890"
159 |         assert result["metadata"]["status"] == "COMPLETED"
160 |         assert result["metadata"]["phase"] == "Phase 2"
161 | 
162 |     def test_format_trial_legacy_simple(self):
163 |         """Test formatting trial with legacy simple structure."""
164 |         trial = {
165 |             "nct_id": "NCT11111",
166 |             "brief_title": "Simple Trial",
167 |             "overall_status": "ACTIVE",
168 |             "phase": "PHASE1",
169 |         }
170 | 
171 |         result = TrialHandler.format_result(trial)
172 | 
173 |         assert result["id"] == "NCT11111"
174 |         assert result["title"] == "Simple Trial"
175 |         assert result["metadata"]["status"] == "ACTIVE"
176 |         assert result["metadata"]["phase"] == "PHASE1"
177 | 
178 |     def test_format_trial_missing_title(self):
179 |         """Test formatting trial with missing brief title."""
180 |         trial = {
181 |             "protocolSection": {
182 |                 "identificationModule": {
183 |                     "nctId": "NCT99999",
184 |                     "officialTitle": "Only Official Title",
185 |                 },
186 |             }
187 |         }
188 | 
189 |         result = TrialHandler.format_result(trial)
190 | 
191 |         assert result["id"] == "NCT99999"
192 |         assert result["title"] == "Only Official Title"
193 | 
194 |     def test_format_trial_empty_phases(self):
195 |         """Test formatting trial with empty phases array."""
196 |         trial = {
197 |             "protocolSection": {
198 |                 "identificationModule": {"nctId": "NCT123"},
199 |                 "designModule": {"phases": []},
200 |             }
201 |         }
202 | 
203 |         result = TrialHandler.format_result(trial)
204 | 
205 |         assert result["metadata"]["phase"] == ""
206 | 
207 | 
208 | class TestVariantHandler:
209 |     """Test VariantHandler class."""
210 | 
211 |     def test_format_variant_complete(self):
212 |         """Test formatting variant with complete data."""
213 |         variant = {
214 |             "_id": "chr7:g.140453136A>T",
215 |             "dbnsfp": {
216 |                 "genename": "BRAF",
217 |                 "hgvsp": ["BRAF:p.V600E"],
218 |             },
219 |             "dbsnp": {
220 |                 "rsid": "rs121913529",
221 |                 "gene": {"symbol": "BRAF"},
222 |             },
223 |             "clinvar": {
224 |                 "rcv": {
225 |                     "clinical_significance": "Pathogenic",
226 |                 }
227 |             },
228 |             "cadd": {
229 |                 "consequence": "missense_variant",
230 |             },
231 |         }
232 | 
233 |         result = VariantHandler.format_result(variant)
234 | 
235 |         assert result["id"] == "chr7:g.140453136A>T"
236 |         assert result["title"] == "BRAF BRAF:p.V600E"
237 |         assert "Pathogenic" in result["snippet"]
238 |         assert "rs121913529" in result["url"]
239 |         assert result["metadata"]["gene"] == "BRAF"
240 |         assert result["metadata"]["rsid"] == "rs121913529"
241 |         assert result["metadata"]["clinical_significance"] == "Pathogenic"
242 |         assert result["metadata"]["consequence"] == "missense_variant"
243 | 
244 |     def test_format_variant_gene_list(self):
245 |         """Test formatting variant when gene is a list."""
246 |         variant = {
247 |             "_id": "rs123",
248 |             "dbnsfp": {"genename": ["GENE1", "GENE2"]},
249 |         }
250 | 
251 |         result = VariantHandler.format_result(variant)
252 | 
253 |         assert result["metadata"]["gene"] == "GENE1"
254 | 
255 |     def test_format_variant_clinvar_list(self):
256 |         """Test formatting variant when clinvar RCV is a list."""
257 |         variant = {
258 |             "_id": "rs456",
259 |             "clinvar": {
260 |                 "rcv": [
261 |                     {"clinical_significance": "Pathogenic"},
262 |                     {"clinical_significance": "Likely pathogenic"},
263 |                 ]
264 |             },
265 |         }
266 | 
267 |         result = VariantHandler.format_result(variant)
268 | 
269 |         assert result["metadata"]["clinical_significance"] == "Pathogenic"
270 | 
271 |     def test_format_variant_minimal(self):
272 |         """Test formatting variant with minimal data."""
273 |         variant = {
274 |             "_id": "chr1:g.12345A>G",
275 |         }
276 | 
277 |         result = VariantHandler.format_result(variant)
278 | 
279 |         assert result["id"] == "chr1:g.12345A>G"
280 |         assert result["title"] == "chr1:g.12345A>G"
281 |         assert "Unknown" in result["snippet"]
282 |         assert result["url"] == ""
283 | 
284 |     def test_format_variant_hgvsp_list(self):
285 |         """Test formatting variant when HGVS protein is a list."""
286 |         variant = {
287 |             "_id": "rs789",
288 |             "dbnsfp": {
289 |                 "genename": "TP53",
290 |                 "hgvsp": ["TP53:p.R175H", "TP53:p.R175C"],
291 |             },
292 |         }
293 | 
294 |         result = VariantHandler.format_result(variant)
295 | 
296 |         assert result["title"] == "TP53 TP53:p.R175H"
297 | 
298 |     def test_format_variant_no_rsid_url(self):
299 |         """Test variant URL generation without rsID."""
300 |         variant = {
301 |             "_id": "chr2:g.234567C>T",
302 |         }
303 | 
304 |         result = VariantHandler.format_result(variant)
305 | 
306 |         assert result["url"] == ""
307 | 
308 | 
309 | class TestGetDomainHandler:
310 |     """Test get_domain_handler function."""
311 | 
312 |     def test_get_article_handler(self):
313 |         """Test getting article handler."""
314 |         handler = get_domain_handler("article")
315 |         assert handler == ArticleHandler
316 | 
317 |     def test_get_trial_handler(self):
318 |         """Test getting trial handler."""
319 |         handler = get_domain_handler("trial")
320 |         assert handler == TrialHandler
321 | 
322 |     def test_get_variant_handler(self):
323 |         """Test getting variant handler."""
324 |         handler = get_domain_handler("variant")
325 |         assert handler == VariantHandler
326 | 
327 |     def test_get_invalid_handler(self):
328 |         """Test getting handler for invalid domain."""
329 |         with pytest.raises(ValueError) as exc_info:
330 |             get_domain_handler("invalid")
331 | 
332 |         assert "Unknown domain: invalid" in str(exc_info.value)
333 | 
334 |     def test_get_handler_case_sensitive(self):
335 |         """Test that domain names are case sensitive."""
336 |         # Should work with lowercase
337 |         handler = get_domain_handler("article")
338 |         assert handler == ArticleHandler
339 | 
340 |         # Should fail with uppercase
341 |         with pytest.raises(ValueError):
342 |             get_domain_handler("ARTICLE")
343 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/health.py:
--------------------------------------------------------------------------------

```python
  1 | """Health check command for BioMCP CLI.
  2 | 
  3 | This module provides a command to check the health of API endpoints and system resources.
  4 | """
  5 | 
  6 | import asyncio
  7 | import platform
  8 | import socket
  9 | from typing import Any
 10 | 
 11 | import typer
 12 | from rich.console import Console
 13 | from rich.panel import Panel
 14 | from rich.table import Table
 15 | 
 16 | from .. import http_client
 17 | from ..constants import (
 18 |     CLINICAL_TRIALS_BASE_URL,
 19 |     MYVARIANT_BASE_URL,
 20 |     PUBTATOR3_BASE_URL,
 21 | )
 22 | 
 23 | # Try to import psutil, but handle case where it's not installed
 24 | try:
 25 |     import psutil
 26 | 
 27 |     PSUTIL_AVAILABLE = True
 28 | except ImportError:
 29 |     PSUTIL_AVAILABLE = False
 30 | 
 31 | health_app = typer.Typer(help="Health check operations")
 32 | console = Console()
 33 | 
 34 | 
 35 | async def check_api_endpoint(
 36 |     url: str,
 37 |     name: str,
 38 |     params: dict[Any, Any] | None = None,
 39 |     method: str = "GET",
 40 | ) -> dict:
 41 |     """Check if an API endpoint is accessible and responding."""
 42 |     try:
 43 |         status, content = await http_client.call_http(
 44 |             method, url, params or {}
 45 |         )
 46 |         return {
 47 |             "name": name,
 48 |             "url": url,
 49 |             "status": status,
 50 |             "accessible": status == 200,
 51 |             "message": "OK" if status == 200 else f"Error: HTTP {status}",
 52 |             "content": content[:500]
 53 |             if len(content) > 500
 54 |             else content,  # Truncate long responses
 55 |         }
 56 |     except Exception as e:
 57 |         return {
 58 |             "name": name,
 59 |             "url": url,
 60 |             "status": 0,
 61 |             "accessible": False,
 62 |             "message": f"Error: {e!s}",
 63 |             "content": str(e),
 64 |         }
 65 | 
 66 | 
 67 | async def check_all_api_endpoints() -> list[dict]:
 68 |     """Check all known API endpoints."""
 69 |     endpoints: list[dict[str, Any]] = [
 70 |         # PubTator3 API endpoints
 71 |         {
 72 |             "url": f"{PUBTATOR3_BASE_URL}/entity/autocomplete/",
 73 |             "name": "PubTator3 Autocomplete",
 74 |             "params": {"query": "BRAF", "concept": "gene", "limit": 2},
 75 |         },
 76 |         {
 77 |             "url": f"{PUBTATOR3_BASE_URL}/publications/export/biocjson",
 78 |             "name": "PubTator3 Publications",
 79 |             "params": {"pmids": "29355051", "full": "false"},
 80 |         },
 81 |         {
 82 |             "url": f"{PUBTATOR3_BASE_URL}/search/",
 83 |             "name": "PubTator3 Search",
 84 |             "params": {
 85 |                 "query": "BRAF",
 86 |                 "concepts": "gene",
 87 |                 "page": 1,
 88 |                 "size": 1,
 89 |                 "text": "@CHEMICAL_remdesivir",
 90 |             },
 91 |         },
 92 |         # ClinicalTrials.gov API endpoints
 93 |         {
 94 |             "url": f"{CLINICAL_TRIALS_BASE_URL}",
 95 |             "name": "ClinicalTrials.gov Search API",
 96 |             "params": {"query.term": "cancer", "pageSize": "1"},
 97 |         },
 98 |         {
 99 |             "url": f"{CLINICAL_TRIALS_BASE_URL}/NCT04280705",
100 |             "name": "ClinicalTrials.gov Study API",
101 |             "params": {"fields": "IdentificationModule,StatusModule"},
102 |         },
103 |         # MyVariant.info API endpoints
104 |         {
105 |             "url": f"{MYVARIANT_BASE_URL}/query",
106 |             "name": "MyVariant.info Query API",
107 |             "params": {"q": "rs113488022", "size": 1},
108 |         },
109 |         {
110 |             "url": f"{MYVARIANT_BASE_URL}/variant/rs113488022",
111 |             "name": "MyVariant.info Variant API",
112 |             "params": {"fields": "all"},
113 |         },
114 |     ]
115 | 
116 |     tasks = []
117 |     for endpoint in endpoints:
118 |         url = endpoint["url"]
119 |         name = endpoint["name"]
120 |         params = endpoint.get("params")
121 |         tasks.append(check_api_endpoint(url, name, params))
122 | 
123 |     return await asyncio.gather(*tasks)
124 | 
125 | 
126 | def check_network_connectivity() -> dict:
127 |     """Check basic network connectivity."""
128 |     try:
129 |         # Try to connect to Google's DNS to check internet connectivity
130 |         socket.create_connection(("8.8.8.8", 53), timeout=3)
131 |         return {
132 |             "status": "Connected",
133 |             "message": "Internet connection is available",
134 |         }
135 |     except OSError:
136 |         return {
137 |             "status": "Disconnected",
138 |             "message": "No internet connection detected",
139 |         }
140 | 
141 | 
142 | def check_system_resources() -> dict:
143 |     """Check system resources like CPU, memory, and disk space."""
144 |     if not PSUTIL_AVAILABLE:
145 |         return {
146 |             "error": "psutil package not installed. Install with: pip install psutil"
147 |         }
148 | 
149 |     return {
150 |         "cpu_usage": psutil.cpu_percent(interval=1),
151 |         "memory": {
152 |             "total": psutil.virtual_memory().total / (1024**3),  # GB
153 |             "available": psutil.virtual_memory().available / (1024**3),  # GB
154 |             "percent_used": psutil.virtual_memory().percent,
155 |         },
156 |         "disk": {
157 |             "total": psutil.disk_usage("/").total / (1024**3),  # GB
158 |             "free": psutil.disk_usage("/").free / (1024**3),  # GB
159 |             "percent_used": psutil.disk_usage("/").percent,
160 |         },
161 |     }
162 | 
163 | 
164 | def check_python_environment() -> dict:
165 |     """Check Python environment and installed packages."""
166 |     env_info = {
167 |         "python_version": platform.python_version(),
168 |         "platform": platform.platform(),
169 |         "system": platform.system(),
170 |     }
171 | 
172 |     # Check for httpx version without importing it
173 |     try:
174 |         import importlib.metadata
175 | 
176 |         env_info["httpx_version"] = importlib.metadata.version("httpx")
177 |     except (ImportError, importlib.metadata.PackageNotFoundError):
178 |         env_info["httpx_version"] = "Unknown"
179 | 
180 |     if PSUTIL_AVAILABLE:
181 |         env_info["psutil_version"] = psutil.__version__
182 |     else:
183 |         env_info["psutil_version"] = "Not installed"
184 | 
185 |     return env_info
186 | 
187 | 
188 | def display_api_health(results: list[dict], verbose: bool = False) -> None:
189 |     """Display API health check results in a table."""
190 |     table = Table(title="API Endpoints Health")
191 |     table.add_column("Endpoint", style="cyan")
192 |     table.add_column("URL", style="blue")
193 |     table.add_column("Status", style="magenta")
194 |     table.add_column("Message", style="green")
195 | 
196 |     for result in results:
197 |         "green" if result["accessible"] else "red"
198 |         table.add_row(
199 |             result["name"],
200 |             result["url"],
201 |             f"{result['status']}",
202 |             result["message"],
203 |             style=None if result["accessible"] else "red",
204 |         )
205 | 
206 |     console.print(table)
207 | 
208 |     # Display detailed response content if verbose mode is enabled
209 |     if verbose:
210 |         for result in results:
211 |             if not result["accessible"]:
212 |                 console.print(
213 |                     f"\n[bold red]Detailed error for {result['name']}:[/bold red]"
214 |                 )
215 |                 console.print(
216 |                     Panel(
217 |                         result["content"],
218 |                         title=f"{result['name']} Response",
219 |                         border_style="red",
220 |                     )
221 |                 )
222 | 
223 | 
224 | def display_system_health(
225 |     system_info: dict, network_info: dict, env_info: dict
226 | ) -> None:
227 |     """Display system health information in a table."""
228 |     # System resources table
229 |     resource_table = Table(title="System Resources")
230 |     resource_table.add_column("Resource", style="cyan")
231 |     resource_table.add_column("Value", style="green")
232 | 
233 |     if "error" in system_info:
234 |         resource_table.add_row("Error", system_info["error"], style="red")
235 |     else:
236 |         resource_table.add_row("CPU Usage", f"{system_info['cpu_usage']}%")
237 |         resource_table.add_row(
238 |             "Memory Total", f"{system_info['memory']['total']:.2f} GB"
239 |         )
240 |         resource_table.add_row(
241 |             "Memory Available", f"{system_info['memory']['available']:.2f} GB"
242 |         )
243 |         resource_table.add_row(
244 |             "Memory Usage",
245 |             f"{system_info['memory']['percent_used']}%",
246 |             style="green"
247 |             if system_info["memory"]["percent_used"] < 90
248 |             else "red",
249 |         )
250 |         resource_table.add_row(
251 |             "Disk Total", f"{system_info['disk']['total']:.2f} GB"
252 |         )
253 |         resource_table.add_row(
254 |             "Disk Free", f"{system_info['disk']['free']:.2f} GB"
255 |         )
256 |         resource_table.add_row(
257 |             "Disk Usage",
258 |             f"{system_info['disk']['percent_used']}%",
259 |             style="green"
260 |             if system_info["disk"]["percent_used"] < 90
261 |             else "red",
262 |         )
263 | 
264 |     console.print(resource_table)
265 | 
266 |     # Network and environment table
267 |     env_table = Table(title="Network & Environment")
268 |     env_table.add_column("Component", style="cyan")
269 |     env_table.add_column("Status/Version", style="green")
270 | 
271 |     env_table.add_row(
272 |         "Network",
273 |         network_info["status"],
274 |         style=None if network_info["status"] == "Connected" else "red",
275 |     )
276 |     env_table.add_row("Python Version", env_info["python_version"])
277 |     env_table.add_row("Platform", env_info["platform"])
278 |     env_table.add_row("System", env_info["system"])
279 |     env_table.add_row("HTTPX Version", env_info["httpx_version"])
280 |     env_table.add_row(
281 |         "Psutil Version",
282 |         env_info["psutil_version"],
283 |         style="red" if env_info["psutil_version"] == "Not installed" else None,
284 |     )
285 | 
286 |     console.print(env_table)
287 | 
288 | 
289 | @health_app.callback(invoke_without_command=True)
290 | def health_callback(ctx: typer.Context):
291 |     """Health check callback."""
292 |     if ctx.invoked_subcommand is None:
293 |         # If no subcommand is provided, run the default health check
294 |         check()
295 | 
296 | 
297 | @health_app.command()
298 | def check(
299 |     api_only: bool = typer.Option(
300 |         False, "--api-only", help="Check only API endpoints"
301 |     ),
302 |     system_only: bool = typer.Option(
303 |         False, "--system-only", help="Check only system health"
304 |     ),
305 |     verbose: bool = typer.Option(
306 |         False,
307 |         "--verbose",
308 |         "-v",
309 |         help="Show detailed error information and API responses",
310 |     ),
311 | ):
312 |     """
313 |     Run a comprehensive health check on API endpoints and system resources.
314 | 
315 |     This command checks:
316 |     - API endpoints connectivity and response
317 |     - Network connectivity
318 |     - System resources (CPU, memory, disk)
319 |     - Python environment
320 | 
321 |     Note: For full system resource checks, the 'psutil' package is required.
322 |     Install with: pip install psutil
323 |     """
324 |     with console.status("[bold green]Running health checks...") as status:
325 |         # Check API endpoints
326 |         if not system_only:
327 |             status.update("[bold green]Checking API endpoints...")
328 |             api_results = asyncio.run(check_all_api_endpoints())
329 |             display_api_health(api_results, verbose)
330 | 
331 |         # Check system health
332 |         if not api_only:
333 |             status.update("[bold green]Checking system resources...")
334 |             system_info = check_system_resources()
335 |             network_info = check_network_connectivity()
336 |             env_info = check_python_environment()
337 |             display_system_health(system_info, network_info, env_info)
338 | 
339 |     # Overall status
340 |     if not api_only and not system_only:
341 |         api_health = all(result["accessible"] for result in api_results)
342 | 
343 |         if "error" in system_info:
344 |             system_health = False
345 |         else:
346 |             system_health = (
347 |                 network_info["status"] == "Connected"
348 |                 and system_info["memory"]["percent_used"] < 90
349 |                 and system_info["disk"]["percent_used"] < 90
350 |             )
351 | 
352 |         if api_health and system_health:
353 |             console.print(
354 |                 "\n[bold green]✓ All systems operational![/bold green]"
355 |             )
356 |         else:
357 |             console.print(
358 |                 "\n[bold red]⚠ Some health checks failed. See details above.[/bold red]"
359 |             )
360 |             if verbose:
361 |                 console.print(
362 |                     "[yellow]Run with --verbose flag to see detailed error information[/yellow]"
363 |                 )
364 | 
```

--------------------------------------------------------------------------------
/src/biomcp/metrics.py:
--------------------------------------------------------------------------------

```python
  1 | """Performance monitoring and metrics collection for BioMCP."""
  2 | 
  3 | import asyncio
  4 | import functools
  5 | import logging
  6 | import os
  7 | import time
  8 | from collections import defaultdict
  9 | from dataclasses import dataclass, field
 10 | from datetime import datetime
 11 | 
 12 | from .constants import (
 13 |     MAX_METRIC_SAMPLES,
 14 |     METRIC_PERCENTILE_50,
 15 |     METRIC_PERCENTILE_95,
 16 |     METRIC_PERCENTILE_99,
 17 | )
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | # Check if metrics are enabled via environment variable
 22 | METRICS_ENABLED = (
 23 |     os.getenv("BIOMCP_METRICS_ENABLED", "false").lower() == "true"
 24 | )
 25 | 
 26 | 
 27 | @dataclass
 28 | class MetricSample:
 29 |     """Single metric measurement."""
 30 | 
 31 |     timestamp: datetime
 32 |     duration: float
 33 |     success: bool
 34 |     error: str | None = None
 35 |     tags: dict[str, str] = field(default_factory=dict)
 36 | 
 37 | 
 38 | @dataclass
 39 | class MetricSummary:
 40 |     """Summary statistics for a metric."""
 41 | 
 42 |     name: str
 43 |     count: int
 44 |     success_count: int
 45 |     error_count: int
 46 |     total_duration: float
 47 |     min_duration: float
 48 |     max_duration: float
 49 |     avg_duration: float
 50 |     p50_duration: float
 51 |     p95_duration: float
 52 |     p99_duration: float
 53 |     error_rate: float
 54 | 
 55 |     @classmethod
 56 |     def from_samples(
 57 |         cls, name: str, samples: list[MetricSample]
 58 |     ) -> "MetricSummary":
 59 |         """Calculate summary statistics from samples."""
 60 |         if not samples:
 61 |             return cls(
 62 |                 name=name,
 63 |                 count=0,
 64 |                 success_count=0,
 65 |                 error_count=0,
 66 |                 total_duration=0.0,
 67 |                 min_duration=0.0,
 68 |                 max_duration=0.0,
 69 |                 avg_duration=0.0,
 70 |                 p50_duration=0.0,
 71 |                 p95_duration=0.0,
 72 |                 p99_duration=0.0,
 73 |                 error_rate=0.0,
 74 |             )
 75 | 
 76 |         durations = sorted([s.duration for s in samples])
 77 |         success_count = sum(1 for s in samples if s.success)
 78 |         error_count = len(samples) - success_count
 79 | 
 80 |         def percentile(data: list[float], p: float) -> float:
 81 |             """Calculate percentile."""
 82 |             if not data:
 83 |                 return 0.0
 84 |             k = (len(data) - 1) * p
 85 |             f = int(k)
 86 |             c = k - f
 87 |             if f >= len(data) - 1:
 88 |                 return data[-1]
 89 |             return data[f] + c * (data[f + 1] - data[f])
 90 | 
 91 |         return cls(
 92 |             name=name,
 93 |             count=len(samples),
 94 |             success_count=success_count,
 95 |             error_count=error_count,
 96 |             total_duration=sum(durations),
 97 |             min_duration=min(durations),
 98 |             max_duration=max(durations),
 99 |             avg_duration=sum(durations) / len(durations),
100 |             p50_duration=percentile(durations, METRIC_PERCENTILE_50),
101 |             p95_duration=percentile(durations, METRIC_PERCENTILE_95),
102 |             p99_duration=percentile(durations, METRIC_PERCENTILE_99),
103 |             error_rate=error_count / len(samples) if samples else 0.0,
104 |         )
105 | 
106 | 
107 | class MetricsCollector:
108 |     """Collects and manages performance metrics."""
109 | 
110 |     def __init__(self, max_samples_per_metric: int = MAX_METRIC_SAMPLES):
111 |         """Initialize metrics collector.
112 | 
113 |         Args:
114 |             max_samples_per_metric: Maximum samples to keep per metric
115 |         """
116 |         self._metrics: dict[str, list[MetricSample]] = defaultdict(list)
117 |         self._max_samples = max_samples_per_metric
118 |         self._lock = asyncio.Lock()
119 | 
120 |     async def record(
121 |         self,
122 |         name: str,
123 |         duration: float,
124 |         success: bool = True,
125 |         error: str | None = None,
126 |         tags: dict[str, str] | None = None,
127 |     ) -> None:
128 |         """Record a metric sample.
129 | 
130 |         Args:
131 |             name: Metric name
132 |             duration: Duration in seconds
133 |             success: Whether operation succeeded
134 |             error: Error message if failed
135 |             tags: Additional metadata tags
136 |         """
137 |         sample = MetricSample(
138 |             timestamp=datetime.now(),
139 |             duration=duration,
140 |             success=success,
141 |             error=error,
142 |             tags=tags or {},
143 |         )
144 | 
145 |         async with self._lock:
146 |             samples = self._metrics[name]
147 |             samples.append(sample)
148 | 
149 |             # Keep only the most recent samples
150 |             if len(samples) > self._max_samples:
151 |                 self._metrics[name] = samples[-self._max_samples :]
152 | 
153 |     async def get_summary(self, name: str) -> MetricSummary | None:
154 |         """Get summary statistics for a metric.
155 | 
156 |         Args:
157 |             name: Metric name
158 | 
159 |         Returns:
160 |             Summary statistics or None if metric not found
161 |         """
162 |         async with self._lock:
163 |             samples = self._metrics.get(name, [])
164 |             if not samples:
165 |                 return None
166 |             return MetricSummary.from_samples(name, samples)
167 | 
168 |     async def get_all_summaries(self) -> dict[str, MetricSummary]:
169 |         """Get summaries for all metrics.
170 | 
171 |         Returns:
172 |             Dictionary of metric name to summary
173 |         """
174 |         async with self._lock:
175 |             return {
176 |                 name: MetricSummary.from_samples(name, samples)
177 |                 for name, samples in self._metrics.items()
178 |             }
179 | 
180 |     async def clear(self, name: str | None = None) -> None:
181 |         """Clear metrics.
182 | 
183 |         Args:
184 |             name: Specific metric to clear, or None to clear all
185 |         """
186 |         async with self._lock:
187 |             if name:
188 |                 self._metrics.pop(name, None)
189 |             else:
190 |                 self._metrics.clear()
191 | 
192 | 
193 | # Global metrics collector instance
194 | _metrics_collector = MetricsCollector()
195 | 
196 | 
197 | async def record_metric(
198 |     name: str,
199 |     duration: float,
200 |     success: bool = True,
201 |     error: str | None = None,
202 |     tags: dict[str, str] | None = None,
203 | ) -> None:
204 |     """Record a metric to the global collector.
205 | 
206 |     Note: This is a no-op if BIOMCP_METRICS_ENABLED is not set to true.
207 | 
208 |     Args:
209 |         name: Metric name
210 |         duration: Duration in seconds
211 |         success: Whether operation succeeded
212 |         error: Error message if failed
213 |         tags: Additional metadata tags
214 |     """
215 |     if METRICS_ENABLED:
216 |         await _metrics_collector.record(name, duration, success, error, tags)
217 | 
218 | 
219 | async def get_metric_summary(name: str) -> MetricSummary | None:
220 |     """Get summary statistics for a metric.
221 | 
222 |     Args:
223 |         name: Metric name
224 | 
225 |     Returns:
226 |         Summary statistics or None if metric not found
227 |     """
228 |     return await _metrics_collector.get_summary(name)
229 | 
230 | 
231 | async def get_all_metrics() -> dict[str, MetricSummary]:
232 |     """Get summaries for all metrics.
233 | 
234 |     Returns:
235 |         Dictionary of metric name to summary
236 |     """
237 |     return await _metrics_collector.get_all_summaries()
238 | 
239 | 
240 | def track_performance(metric_name: str | None = None):
241 |     """Decorator to track function performance.
242 | 
243 |     Args:
244 |         metric_name: Custom metric name (defaults to function name)
245 | 
246 |     Returns:
247 |         Decorated function
248 |     """
249 | 
250 |     def decorator(func):
251 |         name = metric_name or f"{func.__module__}.{func.__name__}"
252 | 
253 |         @functools.wraps(func)
254 |         async def async_wrapper(*args, **kwargs):
255 |             start_time = time.perf_counter()
256 |             success = True
257 |             error_msg = None
258 | 
259 |             try:
260 |                 result = await func(*args, **kwargs)
261 |                 return result
262 |             except Exception as exc:
263 |                 success = False
264 |                 error_msg = str(exc)
265 |                 raise
266 |             finally:
267 |                 duration = time.perf_counter() - start_time
268 |                 await record_metric(
269 |                     name=name,
270 |                     duration=duration,
271 |                     success=success,
272 |                     error=error_msg,
273 |                 )
274 | 
275 |         @functools.wraps(func)
276 |         def sync_wrapper(*args, **kwargs):
277 |             start_time = time.perf_counter()
278 |             success = True
279 |             error_msg = None
280 | 
281 |             try:
282 |                 result = func(*args, **kwargs)
283 |                 return result
284 |             except Exception as exc:
285 |                 success = False
286 |                 error_msg = str(exc)
287 |                 raise
288 |             finally:
289 |                 duration = time.perf_counter() - start_time
290 |                 # Schedule metric recording in the event loop
291 |                 try:
292 |                     loop = asyncio.get_running_loop()
293 |                     # Fire and forget the metric recording
294 |                     task = loop.create_task(
295 |                         record_metric(
296 |                             name=name,
297 |                             duration=duration,
298 |                             success=success,
299 |                             error=error_msg,
300 |                         )
301 |                     )
302 |                     # Add error handler to prevent unhandled exceptions
303 |                     task.add_done_callback(
304 |                         lambda t: t.exception() if t.done() else None
305 |                     )
306 |                 except RuntimeError:
307 |                     # No event loop running, log instead
308 |                     logger.debug(
309 |                         f"Metric {name}: duration={duration:.3f}s, "
310 |                         f"success={success}, error={error_msg}"
311 |                     )
312 | 
313 |         # Return appropriate wrapper based on function type
314 |         if asyncio.iscoroutinefunction(func):
315 |             return async_wrapper
316 |         else:
317 |             return sync_wrapper
318 | 
319 |     return decorator
320 | 
321 | 
322 | # Context manager for timing operations
323 | class Timer:
324 |     """Context manager for timing operations."""
325 | 
326 |     def __init__(self, metric_name: str, tags: dict[str, str] | None = None):
327 |         """Initialize timer.
328 | 
329 |         Args:
330 |             metric_name: Name for the metric
331 |             tags: Additional metadata tags
332 |         """
333 |         self.metric_name = metric_name
334 |         self.tags = tags or {}
335 |         self.start_time: float | None = None
336 | 
337 |     def __enter__(self):
338 |         """Start timing."""
339 |         self.start_time = time.perf_counter()
340 |         return self
341 | 
342 |     def __exit__(self, exc_type, exc_val, exc_tb):
343 |         """Stop timing and record metric."""
344 |         if self.start_time is None or not METRICS_ENABLED:
345 |             return False
346 | 
347 |         duration = time.perf_counter() - self.start_time
348 |         success = exc_type is None
349 |         error_msg = str(exc_val) if exc_val else None
350 | 
351 |         # Schedule metric recording
352 |         try:
353 |             loop = asyncio.get_running_loop()
354 |             # Fire and forget the metric recording
355 |             task = loop.create_task(
356 |                 record_metric(
357 |                     name=self.metric_name,
358 |                     duration=duration,
359 |                     success=success,
360 |                     error=error_msg,
361 |                     tags=self.tags,
362 |                 )
363 |             )
364 |             # Add error handler to prevent unhandled exceptions
365 |             task.add_done_callback(
366 |                 lambda t: t.exception() if t.done() else None
367 |             )
368 |         except RuntimeError:
369 |             # No event loop running, log instead
370 |             logger.debug(
371 |                 f"Metric {self.metric_name}: duration={duration:.3f}s, "
372 |                 f"success={success}, error={error_msg}, tags={self.tags}"
373 |             )
374 | 
375 |         # Don't suppress exceptions
376 |         return False
377 | 
378 |     async def __aenter__(self):
379 |         """Async enter."""
380 |         self.start_time = time.perf_counter()
381 |         return self
382 | 
383 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
384 |         """Async exit."""
385 |         if self.start_time is None or not METRICS_ENABLED:
386 |             return False
387 | 
388 |         duration = time.perf_counter() - self.start_time
389 |         success = exc_type is None
390 |         error_msg = str(exc_val) if exc_val else None
391 | 
392 |         await record_metric(
393 |             name=self.metric_name,
394 |             duration=duration,
395 |             success=success,
396 |             error=error_msg,
397 |             tags=self.tags,
398 |         )
399 | 
400 |         # Don't suppress exceptions
401 |         return False
402 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/device_events_helpers.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Helper functions for OpenFDA device events to reduce complexity.
  3 | """
  4 | 
  5 | from collections import Counter
  6 | from typing import Any
  7 | 
  8 | from .utils import clean_text, truncate_text
  9 | 
 10 | 
 11 | def analyze_device_problems(
 12 |     results: list[dict[str, Any]],
 13 | ) -> tuple[list, list, list]:
 14 |     """Analyze problems, devices, and manufacturers from results."""
 15 |     all_problems = []
 16 |     all_device_names = []
 17 |     all_manufacturers = []
 18 | 
 19 |     for result in results:
 20 |         devices = result.get("device", [])
 21 |         for dev in devices:
 22 |             # Collect device names
 23 |             if "brand_name" in dev:
 24 |                 all_device_names.append(dev["brand_name"])
 25 |             elif "generic_name" in dev:
 26 |                 all_device_names.append(dev["generic_name"])
 27 | 
 28 |             # Collect manufacturers
 29 |             if "manufacturer_d_name" in dev:
 30 |                 all_manufacturers.append(dev["manufacturer_d_name"])
 31 | 
 32 |             # Collect problems
 33 |             if "device_problem_text" in dev:
 34 |                 problems = dev["device_problem_text"]
 35 |                 if isinstance(problems, str):
 36 |                     all_problems.append(problems)
 37 |                 elif isinstance(problems, list):
 38 |                     all_problems.extend(problems)
 39 | 
 40 |     return all_problems, all_device_names, all_manufacturers
 41 | 
 42 | 
 43 | def format_top_problems(all_problems: list, results: list) -> list[str]:
 44 |     """Format top reported device problems."""
 45 |     output = []
 46 | 
 47 |     if len(results) > 1 and all_problems:
 48 |         problem_counts = Counter(all_problems)
 49 |         top_problems = problem_counts.most_common(5)
 50 | 
 51 |         output.append("### Top Reported Problems:")
 52 |         for prob, count in top_problems:
 53 |             percentage = (count / len(results)) * 100
 54 |             output.append(f"- **{prob}**: {count} reports ({percentage:.1f}%)")
 55 |         output.append("")
 56 | 
 57 |     return output
 58 | 
 59 | 
 60 | def format_device_distribution(
 61 |     all_device_names: list, results: list
 62 | ) -> list[str]:
 63 |     """Format device distribution for problem searches."""
 64 |     output = []
 65 | 
 66 |     if len(results) > 1 and all_device_names:
 67 |         device_counts = Counter(all_device_names)
 68 |         top_devices = device_counts.most_common(5)
 69 | 
 70 |         output.append("### Devices with This Problem:")
 71 |         for dev_name, count in top_devices:
 72 |             output.append(f"- **{dev_name}**: {count} reports")
 73 |         output.append("")
 74 | 
 75 |     return output
 76 | 
 77 | 
 78 | def format_device_report_summary(
 79 |     result: dict[str, Any], report_num: int
 80 | ) -> list[str]:
 81 |     """Format a single device event report summary."""
 82 |     output = [f"#### Report {report_num}"]
 83 | 
 84 |     # Event type
 85 |     event_type_map = {
 86 |         "D": "Death",
 87 |         "IN": "Injury",
 88 |         "IL": "Illness",
 89 |         "M": "Malfunction",
 90 |         "O": "Other",
 91 |     }
 92 |     event_type_code = result.get("event_type") or "Unknown"
 93 |     event_type = event_type_map.get(event_type_code, "Unknown")
 94 |     output.append(f"**Event Type**: {event_type}")
 95 | 
 96 |     # Date
 97 |     if date_received := result.get("date_received"):
 98 |         output.append(f"**Date Received**: {date_received}")
 99 | 
100 |     # Device information
101 |     devices = result.get("device", [])
102 |     for j, dev in enumerate(devices, 1):
103 |         output.extend(_format_device_info(dev, j, len(devices)))
104 | 
105 |     # Event description
106 |     if event_desc := result.get("event_description"):
107 |         output.append("\n**Event Description**:")
108 |         cleaned_desc = clean_text(event_desc)
109 |         output.append(truncate_text(cleaned_desc, 500))
110 | 
111 |     # Patient impact
112 |     output.extend(_format_patient_impact(result.get("patient", [])))
113 | 
114 |     # MDR report number
115 |     if mdr_key := result.get("mdr_report_key"):
116 |         output.append(f"\n*MDR Report #: {mdr_key}*")
117 | 
118 |     output.append("")
119 |     return output
120 | 
121 | 
122 | def _format_device_info(
123 |     dev: dict, device_num: int, total_devices: int
124 | ) -> list[str]:
125 |     """Format individual device information."""
126 |     output = []
127 | 
128 |     if total_devices > 1:
129 |         output.append(f"\n**Device {device_num}:**")
130 | 
131 |     # Basic device info
132 |     output.extend(_format_device_basic_info(dev))
133 | 
134 |     # Problem
135 |     if "device_problem_text" in dev:
136 |         problems = dev["device_problem_text"]
137 |         if isinstance(problems, str):
138 |             problems = [problems]
139 |         if problems:
140 |             output.append(f"- **Problem**: {', '.join(problems[:3])}")
141 | 
142 |     # OpenFDA info
143 |     output.extend(_format_device_class_info(dev.get("openfda", {})))
144 | 
145 |     return output
146 | 
147 | 
148 | def _format_device_basic_info(dev: dict) -> list[str]:
149 |     """Format basic device information."""
150 |     output = []
151 | 
152 |     # Device name
153 |     dev_name = dev.get("brand_name") or dev.get("generic_name") or "Unknown"
154 |     output.append(f"- **Device**: {dev_name}")
155 | 
156 |     # Manufacturer
157 |     if "manufacturer_d_name" in dev:
158 |         output.append(f"- **Manufacturer**: {dev['manufacturer_d_name']}")
159 | 
160 |     # Model/Catalog
161 |     if "model_number" in dev:
162 |         output.append(f"- **Model**: {dev['model_number']}")
163 |     if "catalog_number" in dev:
164 |         output.append(f"- **Catalog #**: {dev['catalog_number']}")
165 | 
166 |     return output
167 | 
168 | 
169 | def _format_device_class_info(openfda: dict) -> list[str]:
170 |     """Format device class and specialty information."""
171 |     output = []
172 | 
173 |     if "device_class" in openfda:
174 |         dev_class = openfda["device_class"]
175 |         class_map = {"1": "Class I", "2": "Class II", "3": "Class III"}
176 |         output.append(
177 |             f"- **FDA Class**: {class_map.get(dev_class, dev_class)}"
178 |         )
179 | 
180 |     if "medical_specialty_description" in openfda:
181 |         specialties = openfda["medical_specialty_description"]
182 |         if specialties:
183 |             output.append(f"- **Medical Specialty**: {specialties[0]}")
184 | 
185 |     return output
186 | 
187 | 
188 | def _format_patient_impact(patient_list: list) -> list[str]:
189 |     """Format patient impact information."""
190 |     output = []
191 | 
192 |     if patient_list:
193 |         patient_info = patient_list[0]
194 |         outcomes = []
195 | 
196 |         if patient_info.get("date_of_death"):
197 |             outcomes.append("Death")
198 |         if patient_info.get("life_threatening") == "Y":
199 |             outcomes.append("Life-threatening")
200 |         if patient_info.get("disability") == "Y":
201 |             outcomes.append("Disability")
202 | 
203 |         if outcomes:
204 |             output.append(f"\n**Patient Impact**: {', '.join(outcomes)}")
205 | 
206 |     return output
207 | 
208 | 
209 | def format_device_detail_header(
210 |     result: dict[str, Any], mdr_report_key: str
211 | ) -> list[str]:
212 |     """Format device event detail header."""
213 |     output = [f"## Device Event Report: {mdr_report_key}\n"]
214 |     output.append("### Event Overview")
215 | 
216 |     event_type_map = {
217 |         "D": "Death",
218 |         "IN": "Injury",
219 |         "IL": "Illness",
220 |         "M": "Malfunction",
221 |         "O": "Other",
222 |     }
223 |     event_type_code = result.get("event_type") or "Unknown"
224 |     event_type = event_type_map.get(event_type_code, "Unknown")
225 |     output.append(f"**Event Type**: {event_type}")
226 | 
227 |     if date_received := result.get("date_received"):
228 |         output.append(f"**Date Received**: {date_received}")
229 | 
230 |     if date_of_event := result.get("date_of_event"):
231 |         output.append(f"**Date of Event**: {date_of_event}")
232 | 
233 |     # Report source
234 |     source_map = {
235 |         "P": "Physician",
236 |         "O": "Other health professional",
237 |         "U": "User facility",
238 |         "C": "Distributor",
239 |         "M": "Manufacturer",
240 |     }
241 |     source_type = result.get("source_type")
242 |     if isinstance(source_type, list):
243 |         # Handle case where source_type is a list
244 |         sources: list[str] = []
245 |         for st in source_type:
246 |             if st:
247 |                 mapped = source_map.get(st)
248 |                 sources.append(mapped if mapped else st)
249 |             else:
250 |                 sources.append("Unknown")
251 |         output.append(f"**Report Source**: {', '.join(sources)}")
252 |     elif source_type:
253 |         source = source_map.get(source_type, source_type)
254 |         output.append(f"**Report Source**: {source}")
255 |     else:
256 |         output.append("**Report Source**: Unknown")
257 | 
258 |     output.append("")
259 |     return output
260 | 
261 | 
262 | def format_detailed_device_info(devices: list[dict[str, Any]]) -> list[str]:
263 |     """Format detailed device information."""
264 |     output = ["### Device Information"]
265 | 
266 |     for i, dev in enumerate(devices, 1):
267 |         if len(devices) > 1:
268 |             output.append(f"\n#### Device {i}")
269 | 
270 |         # Basic info
271 |         dev_name = (
272 |             dev.get("brand_name") or dev.get("generic_name") or "Unknown"
273 |         )
274 |         output.append(f"**Device Name**: {dev_name}")
275 | 
276 |         for field, label in [
277 |             ("manufacturer_d_name", "Manufacturer"),
278 |             ("model_number", "Model Number"),
279 |             ("catalog_number", "Catalog Number"),
280 |             ("lot_number", "Lot Number"),
281 |             ("date_received", "Device Received Date"),
282 |             ("expiration_date_of_device", "Expiration Date"),
283 |         ]:
284 |             if value := dev.get(field):
285 |                 output.append(f"**{label}**: {value}")
286 | 
287 |         # Problems
288 |         if "device_problem_text" in dev:
289 |             problems = dev["device_problem_text"]
290 |             if isinstance(problems, str):
291 |                 problems = [problems]
292 |             output.append(f"**Device Problems**: {', '.join(problems)}")
293 | 
294 |         # OpenFDA data
295 |         output.extend(_format_device_openfda(dev.get("openfda", {})))
296 | 
297 |         # Evaluation
298 |         if "device_evaluated_by_manufacturer" in dev:
299 |             evaluated = (
300 |                 "Yes"
301 |                 if dev["device_evaluated_by_manufacturer"] == "Y"
302 |                 else "No"
303 |             )
304 |             output.append(f"**Evaluated by Manufacturer**: {evaluated}")
305 | 
306 |     output.append("")
307 |     return output
308 | 
309 | 
310 | def _format_device_openfda(openfda: dict) -> list[str]:
311 |     """Format OpenFDA device data."""
312 |     output = []
313 | 
314 |     if "device_class" in openfda:
315 |         dev_class = openfda["device_class"]
316 |         class_map = {"1": "Class I", "2": "Class II", "3": "Class III"}
317 |         output.append(
318 |             f"**FDA Device Class**: {class_map.get(dev_class, dev_class)}"
319 |         )
320 | 
321 |     if specialties := openfda.get("medical_specialty_description"):
322 |         if isinstance(specialties, list):
323 |             output.append(f"**Medical Specialty**: {', '.join(specialties)}")
324 |         else:
325 |             output.append(f"**Medical Specialty**: {specialties}")
326 | 
327 |     if "product_code" in openfda:
328 |         output.append(f"**Product Code**: {openfda['product_code']}")
329 | 
330 |     return output
331 | 
332 | 
333 | def format_patient_details(patient_list: list) -> list[str]:
334 |     """Format detailed patient information."""
335 |     output: list[str] = []
336 | 
337 |     if not patient_list:
338 |         return output
339 | 
340 |     output.append("### Patient Information")
341 |     patient_info = patient_list[0]
342 | 
343 |     # Demographics
344 |     output.extend(_format_patient_demographics(patient_info))
345 | 
346 |     # Outcomes
347 |     outcomes = _collect_patient_outcomes(patient_info)
348 |     if outcomes:
349 |         output.append(f"**Outcomes**: {', '.join(outcomes)}")
350 | 
351 |     output.append("")
352 |     return output
353 | 
354 | 
355 | def _format_patient_demographics(patient_info: dict) -> list[str]:
356 |     """Format patient demographic information."""
357 |     output = []
358 | 
359 |     if "patient_age" in patient_info:
360 |         output.append(f"**Age**: {patient_info['patient_age']} years")
361 | 
362 |     if "patient_sex" in patient_info:
363 |         sex_map = {"M": "Male", "F": "Female", "U": "Unknown"}
364 |         sex = sex_map.get(patient_info["patient_sex"], "Unknown")
365 |         output.append(f"**Sex**: {sex}")
366 | 
367 |     return output
368 | 
369 | 
370 | def _collect_patient_outcomes(patient_info: dict) -> list[str]:
371 |     """Collect patient outcome information."""
372 |     outcomes = []
373 | 
374 |     if date_of_death := patient_info.get("date_of_death"):
375 |         outcomes.append(f"Death ({date_of_death})")
376 |     if patient_info.get("life_threatening") == "Y":
377 |         outcomes.append("Life-threatening")
378 |     if patient_info.get("disability") == "Y":
379 |         outcomes.append("Disability")
380 |     if patient_info.get("hospitalization") == "Y":
381 |         outcomes.append("Hospitalization")
382 |     if patient_info.get("congenital_anomaly") == "Y":
383 |         outcomes.append("Congenital anomaly")
384 |     if patient_info.get("required_intervention") == "Y":
385 |         outcomes.append("Required intervention")
386 | 
387 |     return outcomes
388 | 
```

--------------------------------------------------------------------------------
/docs/backend-services-reference/07-alphagenome.md:
--------------------------------------------------------------------------------

```markdown
  1 | # AlphaGenome API Reference
  2 | 
  3 | Google DeepMind's AlphaGenome provides AI-powered predictions of variant effects on gene regulation, chromatin accessibility, and splicing.
  4 | 
  5 | ## Usage Guide
  6 | 
  7 | For a step-by-step tutorial on using AlphaGenome for variant effect prediction, see [How to Predict Variant Effects with AlphaGenome](../how-to-guides/04-predict-variant-effects-with-alphagenome.md).
  8 | 
  9 | ## Overview
 10 | 
 11 | AlphaGenome predicts regulatory effects of genetic variants by analyzing:
 12 | 
 13 | - Gene expression changes in nearby genes
 14 | - Chromatin accessibility alterations
 15 | - Splicing pattern modifications
 16 | - Enhancer and promoter activity
 17 | - Transcription factor binding
 18 | - 3D chromatin interactions
 19 | 
 20 | **Note:** AlphaGenome is an optional integration requiring separate installation and API key.
 21 | 
 22 | ## Authentication
 23 | 
 24 | ### Obtaining an API Key
 25 | 
 26 | 1. Visit [https://deepmind.google.com/science/alphagenome](https://deepmind.google.com/science/alphagenome)
 27 | 2. Register for non-commercial research use
 28 | 3. Accept terms of service
 29 | 4. Receive API key via email
 30 | 
 31 | ### API Key Usage
 32 | 
 33 | **Environment Variable:**
 34 | 
 35 | ```bash
 36 | export ALPHAGENOME_API_KEY="your-key-here"
 37 | ```
 38 | 
 39 | **Per-Request:**
 40 | 
 41 | ```python
 42 | result = alphagenome_predictor(
 43 |     chromosome="chr7",
 44 |     position=140753336,
 45 |     reference="A",
 46 |     alternate="T",
 47 |     api_key="your-key-here"  # Overrides environment
 48 | )
 49 | ```
 50 | 
 51 | ## Installation
 52 | 
 53 | AlphaGenome requires separate installation:
 54 | 
 55 | ```bash
 56 | # Clone and install
 57 | git clone https://github.com/google-deepmind/alphagenome.git
 58 | cd alphagenome
 59 | pip install .
 60 | 
 61 | # Verify installation
 62 | python -c "import alphagenome; print('AlphaGenome installed')"
 63 | ```
 64 | 
 65 | ## API Interface
 66 | 
 67 | ### Prediction Endpoint
 68 | 
 69 | The AlphaGenome API is accessed through the BioMCP `alphagenome_predictor` tool.
 70 | 
 71 | #### Parameters
 72 | 
 73 | | Parameter                | Type      | Required | Description                       |
 74 | | ------------------------ | --------- | -------- | --------------------------------- |
 75 | | `chromosome`             | str       | Yes      | Chromosome (e.g., "chr7")         |
 76 | | `position`               | int       | Yes      | 1-based genomic position          |
 77 | | `reference`              | str       | Yes      | Reference allele                  |
 78 | | `alternate`              | str       | Yes      | Alternate allele                  |
 79 | | `interval_size`          | int       | No       | Analysis window (default: 131072) |
 80 | | `tissue_types`           | list[str] | No       | UBERON tissue codes               |
 81 | | `significance_threshold` | float     | No       | Log2FC threshold (default: 0.5)   |
 82 | | `api_key`                | str       | No       | AlphaGenome API key               |
 83 | 
 84 | #### Interval Sizes
 85 | 
 86 | | Size      | Use Case   | Description                    |
 87 | | --------- | ---------- | ------------------------------ |
 88 | | 2,048     | Promoter   | TSS and promoter variants      |
 89 | | 16,384    | Local      | Proximal regulatory elements   |
 90 | | 131,072   | Standard   | Enhancer-promoter interactions |
 91 | | 524,288   | Long-range | Distal regulatory elements     |
 92 | | 1,048,576 | TAD-level  | Topological domain effects     |
 93 | 
 94 | ## Tissue Codes
 95 | 
 96 | AlphaGenome supports tissue-specific predictions using UBERON ontology:
 97 | 
 98 | | Tissue   | UBERON Code    | Description          |
 99 | | -------- | -------------- | -------------------- |
100 | | Breast   | UBERON:0000310 | Mammary gland tissue |
101 | | Liver    | UBERON:0002107 | Hepatic tissue       |
102 | | Prostate | UBERON:0002367 | Prostate gland       |
103 | | Brain    | UBERON:0000955 | Neural tissue        |
104 | | Lung     | UBERON:0002048 | Pulmonary tissue     |
105 | | Colon    | UBERON:0001155 | Colonic mucosa       |
106 | 
107 | ## Response Format
108 | 
109 | ### Gene Expression Predictions
110 | 
111 | ```json
112 | {
113 |   "gene_expression": [
114 |     {
115 |       "gene_name": "BRAF",
116 |       "gene_id": "ENSG00000157764",
117 |       "distance_to_tss": 1234,
118 |       "log2_fold_change": 1.25,
119 |       "confidence": 0.89,
120 |       "tissue": "UBERON:0000310"
121 |     }
122 |   ]
123 | }
124 | ```
125 | 
126 | **Interpretation:**
127 | 
128 | - `log2_fold_change > 1.0`: Strong increase (2x+)
129 | - `log2_fold_change > 0.5`: Moderate increase
130 | - `log2_fold_change < -1.0`: Strong decrease (0.5x)
131 | - `log2_fold_change < -0.5`: Moderate decrease
132 | 
133 | ### Chromatin Accessibility
134 | 
135 | ```json
136 | {
137 |   "chromatin_accessibility": [
138 |     {
139 |       "region_type": "enhancer",
140 |       "coordinates": "chr7:140450000-140451000",
141 |       "accessibility_change": 0.75,
142 |       "peak_height_change": 1.2,
143 |       "tissue": "UBERON:0000310"
144 |     }
145 |   ]
146 | }
147 | ```
148 | 
149 | **Interpretation:**
150 | 
151 | - Positive values: Increased accessibility (open chromatin)
152 | - Negative values: Decreased accessibility (closed chromatin)
153 | 
154 | ### Splicing Predictions
155 | 
156 | ```json
157 | {
158 |   "splicing": [
159 |     {
160 |       "event_type": "exon_skipping",
161 |       "affected_exon": "ENST00000288602.6:exon14",
162 |       "delta_psi": -0.35,
163 |       "splice_site_strength_change": -2.1
164 |     }
165 |   ]
166 | }
167 | ```
168 | 
169 | **PSI (Percent Spliced In):**
170 | 
171 | - `delta_psi > 0`: Increased exon inclusion
172 | - `delta_psi < 0`: Increased exon skipping
173 | - `|delta_psi| > 0.1`: Biologically significant
174 | 
175 | ## Usage Examples
176 | 
177 | ### Basic Prediction
178 | 
179 | ```python
180 | # Predict BRAF V600E effects
181 | result = await alphagenome_predictor(
182 |     chromosome="chr7",
183 |     position=140753336,
184 |     reference="A",
185 |     alternate="T"
186 | )
187 | 
188 | # Process results
189 | for gene in result.gene_expression:
190 |     if abs(gene.log2_fold_change) > 1.0:
191 |         print(f"{gene.gene_name}: {gene.log2_fold_change:.2f} log2FC")
192 | ```
193 | 
194 | ### Tissue-Specific Analysis
195 | 
196 | ```python
197 | # Compare effects across tissues
198 | tissues = {
199 |     "breast": "UBERON:0000310",
200 |     "lung": "UBERON:0002048",
201 |     "brain": "UBERON:0000955"
202 | }
203 | 
204 | results = {}
205 | for tissue_name, tissue_code in tissues.items():
206 |     results[tissue_name] = await alphagenome_predictor(
207 |         chromosome="chr17",
208 |         position=7577120,
209 |         reference="G",
210 |         alternate="A",
211 |         tissue_types=[tissue_code]
212 |     )
213 | ```
214 | 
215 | ### Promoter Variant Analysis
216 | 
217 | ```python
218 | # Use small window for promoter variants
219 | result = await alphagenome_predictor(
220 |     chromosome="chr7",
221 |     position=5569100,  # Near ACTB promoter
222 |     reference="C",
223 |     alternate="T",
224 |     interval_size=2048  # 2kb window
225 | )
226 | 
227 | # Check for promoter effects
228 | promoter_effects = [
229 |     g for g in result.gene_expression
230 |     if abs(g.distance_to_tss) < 1000
231 | ]
232 | ```
233 | 
234 | ### Enhancer Variant Analysis
235 | 
236 | ```python
237 | # Use larger window for enhancer variants
238 | result = await alphagenome_predictor(
239 |     chromosome="chr8",
240 |     position=128748315,  # MYC enhancer region
241 |     reference="G",
242 |     alternate="A",
243 |     interval_size=524288  # 512kb window
244 | )
245 | 
246 | # Analyze chromatin changes
247 | enhancer_changes = [
248 |     c for c in result.chromatin_accessibility
249 |     if c.region_type == "enhancer" and abs(c.accessibility_change) > 0.5
250 | ]
251 | ```
252 | 
253 | ## Best Practices
254 | 
255 | ### 1. Choose Appropriate Interval Size
256 | 
257 | ```python
258 | def select_interval_size(variant_type):
259 |     """Select interval based on variant type"""
260 |     intervals = {
261 |         "promoter": 2048,
262 |         "splice_site": 16384,
263 |         "enhancer": 131072,
264 |         "intergenic": 524288,
265 |         "structural": 1048576
266 |     }
267 |     return intervals.get(variant_type, 131072)
268 | ```
269 | 
270 | ### 2. Handle Missing Predictions
271 | 
272 | ```python
273 | # Not all variants affect gene expression
274 | if not result.gene_expression:
275 |     print("No gene expression changes predicted")
276 |     # Check chromatin or splicing effects instead
277 | ```
278 | 
279 | ### 3. Filter by Significance
280 | 
281 | ```python
282 | # Focus on significant changes
283 | significant_genes = [
284 |     g for g in result.gene_expression
285 |     if abs(g.log2_fold_change) > significance_threshold
286 |     and g.confidence > 0.8
287 | ]
288 | ```
289 | 
290 | ### 4. Validate Input
291 | 
292 | ```python
293 | def validate_variant(chr, pos, ref, alt):
294 |     """Validate variant format"""
295 |     # Check chromosome format
296 |     if not chr.startswith("chr"):
297 |         raise ValueError("Chromosome must start with 'chr'")
298 | 
299 |     # Check alleles
300 |     valid_bases = set("ACGT")
301 |     if ref not in valid_bases or alt not in valid_bases:
302 |         raise ValueError("Invalid nucleotide")
303 | 
304 |     # Check position
305 |     if pos < 1:
306 |         raise ValueError("Position must be 1-based")
307 | ```
308 | 
309 | ## Integration Patterns
310 | 
311 | ### VUS Classification Pipeline
312 | 
313 | ```python
314 | async def classify_vus(variant):
315 |     """Classify variant of unknown significance"""
316 | 
317 |     # 1. Predict regulatory effects
318 |     predictions = await alphagenome_predictor(
319 |         chromosome=variant.chr,
320 |         position=variant.pos,
321 |         reference=variant.ref,
322 |         alternate=variant.alt
323 |     )
324 | 
325 |     # 2. Score impact
326 |     max_expression = max(
327 |         abs(g.log2_fold_change) for g in predictions.gene_expression
328 |     ) if predictions.gene_expression else 0
329 | 
330 |     max_chromatin = max(
331 |         abs(c.accessibility_change) for c in predictions.chromatin_accessibility
332 |     ) if predictions.chromatin_accessibility else 0
333 | 
334 |     # 3. Classify
335 |     if max_expression > 2.0 or max_chromatin > 1.5:
336 |         return "High regulatory impact"
337 |     elif max_expression > 1.0 or max_chromatin > 0.75:
338 |         return "Moderate regulatory impact"
339 |     else:
340 |         return "Low regulatory impact"
341 | ```
342 | 
343 | ### Multi-Variant Analysis
344 | 
345 | ```python
346 | async def analyze_variant_set(variants, target_gene):
347 |     """Analyze multiple variants affecting a gene"""
348 | 
349 |     results = []
350 |     for variant in variants:
351 |         prediction = await alphagenome_predictor(
352 |             chromosome=variant["chr"],
353 |             position=variant["pos"],
354 |             reference=variant["ref"],
355 |             alternate=variant["alt"]
356 |         )
357 | 
358 |         # Find target gene effect
359 |         for gene in prediction.gene_expression:
360 |             if gene.gene_name == target_gene:
361 |                 results.append({
362 |                     "variant": f"{variant['chr']}:{variant['pos']}",
363 |                     "effect": gene.log2_fold_change,
364 |                     "confidence": gene.confidence
365 |                 })
366 |                 break
367 | 
368 |     # Sort by effect size
369 |     return sorted(results, key=lambda x: abs(x["effect"]), reverse=True)
370 | ```
371 | 
372 | ## Limitations
373 | 
374 | ### Technical Limitations
375 | 
376 | - **Species**: Human only (GRCh38)
377 | - **Variant Types**: SNVs only (no indels/SVs)
378 | - **Sequence Context**: Requires reference match
379 | - **Computation Time**: 1-3 seconds per variant
380 | 
381 | ### Biological Limitations
382 | 
383 | - **Cell Type**: Predictions are tissue-specific approximations
384 | - **Environmental Factors**: Does not account for conditions
385 | - **Epistasis**: Single variant effects only
386 | - **Temporal**: No developmental stage consideration
387 | 
388 | ## Error Handling
389 | 
390 | ### Common Errors
391 | 
392 | ```python
393 | try:
394 |     result = await alphagenome_predictor(...)
395 | except AlphaGenomeError as e:
396 |     if "API key" in str(e):
397 |         # Handle missing/invalid key
398 |         pass
399 |     elif "Invalid sequence" in str(e):
400 |         # Handle sequence errors
401 |         pass
402 |     elif "Rate limit" in str(e):
403 |         # Handle rate limiting
404 |         pass
405 | ```
406 | 
407 | ### Retry Logic
408 | 
409 | ```python
410 | async def predict_with_retry(params, max_retries=3):
411 |     """Retry on transient failures"""
412 |     for attempt in range(max_retries):
413 |         try:
414 |             return await alphagenome_predictor(**params)
415 |         except Exception as e:
416 |             if attempt == max_retries - 1:
417 |                 raise
418 |             await asyncio.sleep(2 ** attempt)  # Exponential backoff
419 | ```
420 | 
421 | ## Performance Optimization
422 | 
423 | ### Batch Processing
424 | 
425 | ```python
426 | async def batch_predict(variants, batch_size=10):
427 |     """Process variants in batches"""
428 |     results = []
429 | 
430 |     for i in range(0, len(variants), batch_size):
431 |         batch = variants[i:i + batch_size]
432 |         batch_results = await asyncio.gather(*[
433 |             alphagenome_predictor(**v) for v in batch
434 |         ])
435 |         results.extend(batch_results)
436 | 
437 |         # Rate limiting
438 |         if i + batch_size < len(variants):
439 |             await asyncio.sleep(1)
440 | 
441 |     return results
442 | ```
443 | 
444 | ### Caching Strategy
445 | 
446 | ```python
447 | from functools import lru_cache
448 | 
449 | @lru_cache(maxsize=1000)
450 | def get_cached_prediction(chr, pos, ref, alt, interval):
451 |     """Cache predictions for repeated queries"""
452 |     return alphagenome_predictor(
453 |         chromosome=chr,
454 |         position=pos,
455 |         reference=ref,
456 |         alternate=alt,
457 |         interval_size=interval
458 |     )
459 | ```
460 | 
461 | ## Support Resources
462 | 
463 | - **Documentation**: [AlphaGenome GitHub](https://github.com/google-deepmind/alphagenome)
464 | - **Paper**: [Nature Publication](https://www.nature.com/alphagenome)
465 | - **Support**: Via GitHub issues
466 | - **Terms**: Non-commercial research use only
467 | 
```

--------------------------------------------------------------------------------
/docs/how-to-guides/03-get-comprehensive-variant-annotations.md:
--------------------------------------------------------------------------------

```markdown
  1 | # How to Get Comprehensive Variant Annotations
  2 | 
  3 | This guide demonstrates how to retrieve and interpret genetic variant information using BioMCP's integrated databases.
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP provides variant annotations from multiple sources:
  8 | 
  9 | - **MyVariant.info**: Core variant database with clinical significance ([BioThings Reference](../backend-services-reference/02-biothings-suite.md))
 10 | - **External Annotations**: TCGA cancer data, 1000 Genomes population frequencies
 11 | - **cBioPortal Integration**: Cancer-specific mutation context ([API Reference](../backend-services-reference/03-cbioportal.md))
 12 | - **BioThings Links**: Connected gene, disease, and drug information ([BioThings Suite](../backend-services-reference/02-biothings-suite.md))
 13 | 
 14 | ## Basic Variant Lookup
 15 | 
 16 | ### Search by rsID
 17 | 
 18 | Find variant information using dbSNP identifiers:
 19 | 
 20 | ```bash
 21 | # CLI
 22 | biomcp variant get rs121913529
 23 | 
 24 | # Python
 25 | variant = await client.variants.get("rs121913529")
 26 | 
 27 | # MCP Tool
 28 | variant_getter(variant_id="rs121913529")
 29 | ```
 30 | 
 31 | ### Search by HGVS Notation
 32 | 
 33 | Use standard HGVS notation:
 34 | 
 35 | ```python
 36 | # Protein change
 37 | variant = await variant_getter("NP_004324.2:p.Val600Glu")
 38 | 
 39 | # Coding DNA change
 40 | variant = await variant_getter("NM_004333.4:c.1799T>A")
 41 | 
 42 | # Genomic coordinates
 43 | variant = await variant_getter("NC_000007.13:g.140453136A>T")
 44 | ```
 45 | 
 46 | ### Search by Genomic Position
 47 | 
 48 | ```python
 49 | # Search by coordinates
 50 | variants = await variant_searcher(
 51 |     chromosome="7",
 52 |     start=140453136,
 53 |     end=140453136,
 54 |     assembly="hg38"  # or hg19
 55 | )
 56 | ```
 57 | 
 58 | ## Understanding Variant Annotations
 59 | 
 60 | ### Clinical Significance
 61 | 
 62 | ```python
 63 | # Get variant details
 64 | variant = await variant_getter("rs121913529")
 65 | 
 66 | # Check clinical significance
 67 | print(f"Clinical Significance: {variant.clinical_significance}")
 68 | # Output: "Pathogenic"
 69 | 
 70 | print(f"ClinVar Review Status: {variant.review_status}")
 71 | # Output: "reviewed by expert panel"
 72 | ```
 73 | 
 74 | ### Population Frequencies
 75 | 
 76 | ```python
 77 | # Access frequency data
 78 | if variant.frequencies:
 79 |     print("Population Frequencies:")
 80 |     print(f"  gnomAD: {variant.frequencies.gnomad}")
 81 |     print(f"  1000 Genomes: {variant.frequencies.thousand_genomes}")
 82 |     print(f"  ExAC: {variant.frequencies.exac}")
 83 | ```
 84 | 
 85 | ### Functional Predictions
 86 | 
 87 | ```python
 88 | # In silico predictions
 89 | if variant.predictions:
 90 |     print(f"CADD Score: {variant.predictions.cadd}")
 91 |     print(f"PolyPhen: {variant.predictions.polyphen}")
 92 |     print(f"SIFT: {variant.predictions.sift}")
 93 | ```
 94 | 
 95 | ## Advanced Variant Searches
 96 | 
 97 | ### Filter by Clinical Significance
 98 | 
 99 | ```python
100 | # Find pathogenic BRCA1 variants
101 | pathogenic_variants = await variant_searcher(
102 |     gene="BRCA1",
103 |     significance="pathogenic",
104 |     limit=20
105 | )
106 | 
107 | # Multiple significance levels
108 | variants = await variant_searcher(
109 |     gene="TP53",
110 |     significance=["pathogenic", "likely_pathogenic"]
111 | )
112 | ```
113 | 
114 | ### Filter by Frequency
115 | 
116 | Find rare variants:
117 | 
118 | ```python
119 | # Rare variants (MAF < 1%)
120 | rare_variants = await variant_searcher(
121 |     gene="CFTR",
122 |     frequency_max=0.01,
123 |     significance="pathogenic"
124 | )
125 | 
126 | # Ultra-rare variants
127 | ultra_rare = await variant_searcher(
128 |     gene="SCN1A",
129 |     frequency_max=0.0001
130 | )
131 | ```
132 | 
133 | ### Filter by Prediction Scores
134 | 
135 | ```python
136 | # High-impact variants
137 | high_impact = await variant_searcher(
138 |     gene="MLH1",
139 |     cadd_score_min=20,  # CADD > 20 suggests deleteriousness
140 |     polyphen_prediction="probably_damaging"
141 | )
142 | ```
143 | 
144 | ## External Database Integration
145 | 
146 | For technical details on external data sources, see the [BioThings Suite Reference](../backend-services-reference/02-biothings-suite.md).
147 | 
148 | ### TCGA Cancer Data
149 | 
150 | Variants automatically include TCGA annotations when available:
151 | 
152 | ```python
153 | variant = await variant_getter("rs121913529", include_external=True)
154 | 
155 | # Check TCGA data
156 | if variant.external_data.get("tcga"):
157 |     tcga = variant.external_data["tcga"]
158 |     print(f"TCGA Studies: {tcga['study_count']}")
159 |     print(f"Cancer Types: {', '.join(tcga['cancer_types'])}")
160 |     print(f"Sample Count: {tcga['sample_count']}")
161 | ```
162 | 
163 | ### 1000 Genomes Project
164 | 
165 | Population-specific frequencies:
166 | 
167 | ```python
168 | # Access 1000 Genomes data
169 | if variant.external_data.get("thousand_genomes"):
170 |     tg_data = variant.external_data["thousand_genomes"]
171 |     print("Population Frequencies:")
172 |     for pop, freq in tg_data["populations"].items():
173 |         print(f"  {pop}: {freq}")
174 | ```
175 | 
176 | ### Ensembl VEP Annotations
177 | 
178 | ```python
179 | # Consequence predictions
180 | if variant.consequences:
181 |     for consequence in variant.consequences:
182 |         print(f"Gene: {consequence.gene}")
183 |         print(f"Impact: {consequence.impact}")
184 |         print(f"Consequence: {consequence.consequence_terms}")
185 | ```
186 | 
187 | ## Integration with Other BioMCP Tools
188 | 
189 | BioMCP's unified architecture allows seamless integration between variant data and other biomedical information. For implementation details, see the [Transport Protocol Guide](../developer-guides/04-transport-protocol.md).
190 | 
191 | ### Variant to Gene Information
192 | 
193 | ```python
194 | # Get variant
195 | variant = await variant_getter("rs121913529")
196 | 
197 | # Get associated gene details
198 | gene_symbol = variant.gene.symbol  # "BRAF"
199 | gene_info = await gene_getter(gene_symbol)
200 | 
201 | print(f"Gene: {gene_info.name}")
202 | print(f"Function: {gene_info.summary}")
203 | ```
204 | 
205 | ### Variant to Disease Context
206 | 
207 | ```python
208 | # Find disease associations
209 | diseases = variant.disease_associations
210 | 
211 | for disease in diseases:
212 |     # Get detailed disease info
213 |     disease_info = await disease_getter(disease.name)
214 |     print(f"Disease: {disease_info.name}")
215 |     print(f"Definition: {disease_info.definition}")
216 |     print(f"Synonyms: {', '.join(disease_info.synonyms)}")
217 | ```
218 | 
219 | ### Variant to Clinical Trials
220 | 
221 | ```python
222 | # Search trials for specific variant
223 | gene = variant.gene.symbol
224 | mutation = variant.protein_change  # e.g., "V600E"
225 | 
226 | trials = await trial_searcher(
227 |     other_terms=[f"{gene} {mutation}", f"{gene} mutation"],
228 |     recruiting_status="OPEN"
229 | )
230 | ```
231 | 
232 | ## Practical Workflows
233 | 
234 | ### Workflow 1: Cancer Variant Analysis
235 | 
236 | ```python
237 | async def analyze_cancer_variant(hgvs: str):
238 |     # Think about the analysis
239 |     await think(
240 |         thought=f"Analyzing cancer variant {hgvs}",
241 |         thoughtNumber=1
242 |     )
243 | 
244 |     # Get variant details
245 |     variant = await variant_getter(hgvs, include_external=True)
246 | 
247 |     # Get gene context
248 |     gene = await gene_getter(variant.gene.symbol)
249 | 
250 |     # Search for targeted therapies
251 |     drugs = await search(
252 |         query=f"drugs.targets:{variant.gene.symbol}",
253 |         domain="drug"
254 |     )
255 | 
256 |     # Find relevant trials
257 |     trials = await trial_searcher(
258 |         other_terms=[
259 |             variant.gene.symbol,
260 |             variant.protein_change,
261 |             "targeted therapy"
262 |         ],
263 |         recruiting_status="OPEN"
264 |     )
265 | 
266 |     # Search literature
267 |     articles = await article_searcher(
268 |         genes=[variant.gene.symbol],
269 |         variants=[hgvs],
270 |         keywords=["therapy", "treatment", "resistance"]
271 |     )
272 | 
273 |     return {
274 |         "variant": variant,
275 |         "gene": gene,
276 |         "potential_drugs": drugs,
277 |         "clinical_trials": trials,
278 |         "literature": articles
279 |     }
280 | ```
281 | 
282 | ### Workflow 2: Rare Disease Variant
283 | 
284 | ```python
285 | async def rare_disease_variant_analysis(gene: str, phenotype: str):
286 |     # Find all pathogenic variants
287 |     variants = await variant_searcher(
288 |         gene=gene,
289 |         significance=["pathogenic", "likely_pathogenic"],
290 |         frequency_max=0.001  # Rare
291 |     )
292 | 
293 |     # Analyze each variant
294 |     results = []
295 |     for v in variants[:10]:  # Top 10
296 |         # Get full annotations
297 |         full_variant = await variant_getter(v.id)
298 | 
299 |         # Check phenotype associations
300 |         if phenotype.lower() in str(full_variant.phenotypes).lower():
301 |             results.append({
302 |                 "variant": full_variant,
303 |                 "phenotype_match": True,
304 |                 "frequency": full_variant.frequencies.gnomad or 0
305 |             })
306 | 
307 |     # Sort by relevance
308 |     results.sort(key=lambda x: x["frequency"])
309 |     return results
310 | ```
311 | 
312 | ### Workflow 3: Pharmacogenomics
313 | 
314 | ```python
315 | async def pharmacogenomic_analysis(drug_name: str):
316 |     # Get drug information
317 |     drug = await drug_getter(drug_name)
318 | 
319 |     # Find pharmGKB annotations
320 |     pgx_variants = []
321 | 
322 |     # Search for drug-related variants
323 |     if drug.targets:
324 |         for target in drug.targets:
325 |             variants = await variant_searcher(
326 |                 gene=target,
327 |                 keywords=[drug_name, "pharmacogenomics", "drug response"]
328 |             )
329 |             pgx_variants.extend(variants)
330 | 
331 |     # Get detailed annotations
332 |     annotated = []
333 |     for v in pgx_variants:
334 |         full = await variant_getter(v.id)
335 |         if full.pharmacogenomics:
336 |             annotated.append(full)
337 | 
338 |     return {
339 |         "drug": drug,
340 |         "pgx_variants": annotated,
341 |         "affected_genes": list(set(v.gene.symbol for v in annotated))
342 |     }
343 | ```
344 | 
345 | ## Interpreting Results
346 | 
347 | ### Clinical Actionability
348 | 
349 | ```python
350 | def assess_actionability(variant):
351 |     """Determine if variant is clinically actionable"""
352 | 
353 |     actionable = False
354 |     reasons = []
355 | 
356 |     # Check pathogenicity
357 |     if variant.clinical_significance in ["pathogenic", "likely_pathogenic"]:
358 |         actionable = True
359 |         reasons.append("Pathogenic variant")
360 | 
361 |     # Check for drug associations
362 |     if variant.drug_associations:
363 |         actionable = True
364 |         reasons.append(f"Associated with {len(variant.drug_associations)} drugs")
365 | 
366 |     # Check guidelines
367 |     if variant.clinical_guidelines:
368 |         actionable = True
369 |         reasons.append("Clinical guidelines available")
370 | 
371 |     return {
372 |         "actionable": actionable,
373 |         "reasons": reasons,
374 |         "recommendations": variant.clinical_guidelines
375 |     }
376 | ```
377 | 
378 | ### Report Generation
379 | 
380 | ```python
381 | def generate_variant_report(variant):
382 |     """Create a clinical variant report"""
383 | 
384 |     report = f"""
385 | ## Variant Report: {variant.id}
386 | 
387 | ### Basic Information
388 | - **Gene**: {variant.gene.symbol}
389 | - **Protein Change**: {variant.protein_change or "N/A"}
390 | - **Genomic Location**: chr{variant.chr}:{variant.pos}
391 | - **Reference**: {variant.ref} → **Alternate**: {variant.alt}
392 | 
393 | ### Clinical Significance
394 | - **Status**: {variant.clinical_significance}
395 | - **Review**: {variant.review_status}
396 | - **Last Updated**: {variant.last_updated}
397 | 
398 | ### Population Frequency
399 | - **gnomAD**: {variant.frequencies.gnomad or "Not found"}
400 | - **1000 Genomes**: {variant.frequencies.thousand_genomes or "Not found"}
401 | 
402 | ### Predictions
403 | - **CADD Score**: {variant.predictions.cadd or "N/A"}
404 | - **PolyPhen**: {variant.predictions.polyphen or "N/A"}
405 | - **SIFT**: {variant.predictions.sift or "N/A"}
406 | 
407 | ### Associated Conditions
408 | {format_conditions(variant.conditions)}
409 | 
410 | ### Clinical Resources
411 | - **ClinVar**: {variant.clinvar_url}
412 | - **dbSNP**: {variant.dbsnp_url}
413 | """
414 |     return report
415 | ```
416 | 
417 | ## Best Practices
418 | 
419 | ### 1. Use Multiple Identifiers
420 | 
421 | ```python
422 | # Try multiple formats if one fails
423 | identifiers = [
424 |     "rs121913529",
425 |     "NM_004333.4:c.1799T>A",
426 |     "7:140453136:A:T"
427 | ]
428 | 
429 | for id in identifiers:
430 |     try:
431 |         variant = await variant_getter(id)
432 |         break
433 |     except:
434 |         continue
435 | ```
436 | 
437 | ### 2. Check Data Completeness
438 | 
439 | ```python
440 | # Not all variants have all annotations
441 | if variant.frequencies:
442 |     # Use frequency data
443 |     pass
444 | else:
445 |     # Note that frequency unavailable
446 |     pass
447 | ```
448 | 
449 | ### 3. Consider Assembly Versions
450 | 
451 | ```python
452 | # Specify genome assembly
453 | variants_hg38 = await variant_searcher(
454 |     chromosome="7",
455 |     start=140453136,
456 |     assembly="hg38"
457 | )
458 | 
459 | variants_hg19 = await variant_searcher(
460 |     chromosome="7",
461 |     start=140153336,  # Different coordinate!
462 |     assembly="hg19"
463 | )
464 | ```
465 | 
466 | ## Troubleshooting
467 | 
468 | ### Variant Not Found
469 | 
470 | 1. **Check notation**: Ensure proper HGVS format
471 | 2. **Try alternatives**: rsID, genomic coordinates, protein change
472 | 3. **Verify gene symbol**: Use official HGNC symbols
473 | 
474 | ### Missing Annotations
475 | 
476 | - Not all variants have all data types
477 | - Rare variants may lack population frequencies
478 | - Novel variants won't have ClinVar data
479 | 
480 | ### Performance Issues
481 | 
482 | - Use pagination for large searches
483 | - Limit external data requests when not needed
484 | - Cache frequently accessed variants
485 | 
486 | ## Next Steps
487 | 
488 | - Learn to [predict variant effects](04-predict-variant-effects-with-alphagenome.md)
489 | - Explore [article searches](01-find-articles-and-cbioportal-data.md) for variant literature
490 | - Set up [logging and monitoring](05-logging-and-monitoring-with-bigquery.md)
491 | 
```

--------------------------------------------------------------------------------
/tests/test_pydantic_ai_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Tests for Pydantic AI integration with BioMCP.
  3 | 
  4 | These tests verify the examples provided in the documentation work correctly.
  5 | """
  6 | 
  7 | import asyncio
  8 | import os
  9 | import sys
 10 | 
 11 | import httpx
 12 | import pytest
 13 | from pydantic_ai import Agent
 14 | from pydantic_ai.mcp import MCPServerStdio
 15 | 
 16 | try:
 17 |     from pydantic_ai.mcp import MCPServerStreamableHTTP  # noqa: F401
 18 | 
 19 |     HAS_STREAMABLE_HTTP = True
 20 | except ImportError:
 21 |     HAS_STREAMABLE_HTTP = False
 22 | from pydantic_ai.models.test import TestModel
 23 | 
 24 | 
 25 | def worker_dependencies_available():
 26 |     """Check if worker dependencies (FastAPI, Starlette) are available."""
 27 |     try:
 28 |         import fastapi  # noqa: F401
 29 |         import starlette  # noqa: F401
 30 | 
 31 |         return True
 32 |     except ImportError:
 33 |         return False
 34 | 
 35 | 
 36 | # Skip marker for tests requiring worker dependencies
 37 | requires_worker = pytest.mark.skipif(
 38 |     not worker_dependencies_available(),
 39 |     reason="Worker dependencies (FastAPI/Starlette) not installed. Install with: pip install biomcp-python[worker]",
 40 | )
 41 | 
 42 | # Skip marker for tests requiring MCPServerStreamableHTTP
 43 | requires_streamable_http = pytest.mark.skipif(
 44 |     not HAS_STREAMABLE_HTTP,
 45 |     reason="MCPServerStreamableHTTP not available. Requires pydantic-ai>=0.6.9",
 46 | )
 47 | 
 48 | 
 49 | def get_free_port():
 50 |     """Get a free port for testing."""
 51 |     import socket
 52 | 
 53 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
 54 |         s.bind(("", 0))
 55 |         s.listen(1)
 56 |         port = s.getsockname()[1]
 57 |     return port
 58 | 
 59 | 
 60 | async def wait_for_server(
 61 |     url: str, max_retries: int = 60, process=None
 62 | ) -> None:
 63 |     """Wait for server to be ready with retries."""
 64 |     import sys
 65 | 
 66 |     for i in range(max_retries):
 67 |         # Check if process has exited with error
 68 |         if process and process.poll() is not None:
 69 |             stdout, stderr = process.communicate()
 70 |             pytest.fail(
 71 |                 f"Server process exited with code {process.returncode}. Stderr: {stderr.decode() if stderr else 'None'}"
 72 |             )
 73 | 
 74 |         try:
 75 |             async with httpx.AsyncClient() as client:
 76 |                 response = await client.get(url, timeout=2)
 77 |                 if response.status_code == 200:
 78 |                     print(
 79 |                         f"\nServer ready after {i + 1} seconds",
 80 |                         file=sys.stderr,
 81 |                     )
 82 |                     return
 83 |         except (httpx.ConnectError, httpx.ReadTimeout):
 84 |             if i % 10 == 0:
 85 |                 print(
 86 |                     f"\nWaiting for server... ({i} seconds elapsed)",
 87 |                     file=sys.stderr,
 88 |                 )
 89 |             await asyncio.sleep(1)
 90 |     pytest.fail(f"Server at {url} did not start within {max_retries} seconds")
 91 | 
 92 | 
 93 | @pytest.mark.asyncio
 94 | async def test_stdio_mode_connection():
 95 |     """Test STDIO mode connection and tool listing."""
 96 |     server = MCPServerStdio(
 97 |         "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20
 98 |     )
 99 | 
100 |     # Use TestModel to avoid needing API keys
101 |     model = TestModel(call_tools=["search"])
102 |     agent = Agent(model=model, toolsets=[server])
103 | 
104 |     async with agent:
105 |         # Test a simple query to verify connection works
106 |         result = await agent.run("List available tools")
107 | 
108 |         # Should get a response without errors
109 |         assert result is not None
110 |         assert result.output is not None
111 | 
112 | 
113 | @pytest.mark.asyncio
114 | async def test_stdio_mode_simple_query():
115 |     """Test STDIO mode with a simple search query."""
116 |     server = MCPServerStdio(
117 |         "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20
118 |     )
119 | 
120 |     # Use TestModel configured to call search
121 |     model = TestModel(call_tools=["search"])
122 |     agent = Agent(model=model, toolsets=[server])
123 | 
124 |     async with agent:
125 |         result = await agent.run("Find 1 melanoma clinical trial")
126 | 
127 |         # TestModel will have called the search tool
128 |         assert result.output is not None
129 |         # The TestModel returns mock data, but we're testing the connection works
130 |         assert result.output != ""
131 | 
132 | 
133 | @pytest.mark.asyncio
134 | async def test_stdio_mode_with_openai():
135 |     """Test STDIO mode with OpenAI (requires OPENAI_API_KEY)."""
136 |     # Skip if no API key
137 |     if not os.getenv("OPENAI_API_KEY"):
138 |         pytest.skip("OPENAI_API_KEY not set")
139 | 
140 |     server = MCPServerStdio(
141 |         "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=30
142 |     )
143 | 
144 |     agent = Agent("openai:gpt-4o-mini", toolsets=[server])
145 | 
146 |     async with agent:
147 |         result = await agent.run(
148 |             "Find 1 article about BRAF V600E mutations. Return just the title."
149 |         )
150 | 
151 |         # Should get a real result
152 |         assert result.output is not None
153 |         assert len(result.output) > 0
154 | 
155 | 
156 | @requires_worker
157 | @requires_streamable_http
158 | @pytest.mark.asyncio
159 | async def test_streamable_http_mode_connection():
160 |     """Test Streamable HTTP mode connection for Pydantic AI."""
161 |     import subprocess
162 | 
163 |     from pydantic_ai.mcp import MCPServerStreamableHTTP
164 | 
165 |     port = get_free_port()
166 | 
167 |     # Start server in streamable_http mode
168 |     server_process = subprocess.Popen(  # noqa: S603
169 |         [
170 |             sys.executable,
171 |             "-m",
172 |             "biomcp",
173 |             "run",
174 |             "--mode",
175 |             "streamable_http",
176 |             "--port",
177 |             str(port),
178 |         ],
179 |         stdout=subprocess.PIPE,
180 |         stderr=subprocess.PIPE,
181 |     )
182 | 
183 |     try:
184 |         # Wait for server to be ready
185 |         await wait_for_server(
186 |             f"http://localhost:{port}/health", process=server_process
187 |         )
188 | 
189 |         # Connect to the /mcp endpoint
190 |         server = MCPServerStreamableHTTP(f"http://localhost:{port}/mcp")
191 | 
192 |         # Use TestModel to avoid needing API keys
193 |         model = TestModel(call_tools=["search"])
194 |         agent = Agent(model=model, toolsets=[server])
195 | 
196 |         async with agent:
197 |             # Test a simple query to verify connection
198 |             result = await agent.run("Test connection")
199 |             assert result is not None
200 |             assert result.output is not None
201 | 
202 |     finally:
203 |         # Clean up server process
204 |         server_process.terminate()
205 |         server_process.wait(timeout=5)
206 | 
207 | 
208 | @requires_worker
209 | @requires_streamable_http
210 | @pytest.mark.asyncio
211 | async def test_streamable_http_simple_query():
212 |     """Test a simple biomedical query using Streamable HTTP."""
213 |     import subprocess
214 | 
215 |     from pydantic_ai.mcp import MCPServerStreamableHTTP
216 | 
217 |     port = get_free_port()
218 | 
219 |     server_process = subprocess.Popen(  # noqa: S603
220 |         [
221 |             sys.executable,
222 |             "-m",
223 |             "biomcp",
224 |             "run",
225 |             "--mode",
226 |             "streamable_http",
227 |             "--port",
228 |             str(port),
229 |         ],
230 |         stdout=subprocess.PIPE,
231 |         stderr=subprocess.PIPE,
232 |     )
233 | 
234 |     try:
235 |         # Wait for server to be ready
236 |         await wait_for_server(
237 |             f"http://localhost:{port}/health", process=server_process
238 |         )
239 | 
240 |         # Connect to the /mcp endpoint
241 |         server = MCPServerStreamableHTTP(f"http://localhost:{port}/mcp")
242 | 
243 |         # Use TestModel with tool calls for search
244 |         model = TestModel(call_tools=["search"])
245 |         agent = Agent(model=model, toolsets=[server])
246 | 
247 |         async with agent:
248 |             result = await agent.run(
249 |                 "Find 1 article about BRAF mutations. Return just the title."
250 |             )
251 | 
252 |             # Should get a result
253 |             assert result.output is not None
254 |             assert len(result.output) > 0
255 | 
256 |     finally:
257 |         server_process.terminate()
258 |         server_process.wait(timeout=5)
259 | 
260 | 
261 | @requires_worker
262 | @pytest.mark.asyncio
263 | async def test_worker_mode_streamable_http():
264 |     """Test worker mode which now uses streamable HTTP under the hood."""
265 |     import subprocess
266 | 
267 |     port = get_free_port()
268 | 
269 |     # Start server in worker mode (which uses streamable HTTP)
270 |     server_process = subprocess.Popen(  # noqa: S603
271 |         [
272 |             sys.executable,
273 |             "-m",
274 |             "biomcp",
275 |             "run",
276 |             "--mode",
277 |             "worker",
278 |             "--port",
279 |             str(port),
280 |         ],
281 |         stdout=subprocess.PIPE,
282 |         stderr=subprocess.PIPE,
283 |     )
284 | 
285 |     try:
286 |         # Wait for server to be ready
287 |         await wait_for_server(
288 |             f"http://localhost:{port}/health", process=server_process
289 |         )
290 | 
291 |         # Worker mode exposes /mcp endpoint through streamable HTTP
292 |         async with httpx.AsyncClient() as client:
293 |             # Test the /mcp endpoint with initialize request
294 |             response = await client.post(
295 |                 f"http://localhost:{port}/mcp",
296 |                 json={
297 |                     "jsonrpc": "2.0",
298 |                     "method": "initialize",
299 |                     "params": {
300 |                         "protocolVersion": "2025-06-18",
301 |                         "capabilities": {},
302 |                         "clientInfo": {"name": "test", "version": "1.0"},
303 |                     },
304 |                     "id": 1,
305 |                 },
306 |                 headers={
307 |                     "Content-Type": "application/json",
308 |                     "Accept": "application/json, text/event-stream",
309 |                 },
310 |             )
311 | 
312 |             # Worker mode may return various codes depending on initialization state
313 |             # 200 = success, 406 = accept header issue, 500 = initialization incomplete
314 |             assert response.status_code in [200, 406, 500]
315 | 
316 |             # Health endpoint should work
317 |             health_response = await client.get(
318 |                 f"http://localhost:{port}/health"
319 |             )
320 |             assert health_response.status_code == 200
321 |             assert health_response.json()["status"] == "healthy"
322 | 
323 |     finally:
324 |         server_process.terminate()
325 |         server_process.wait(timeout=5)
326 | 
327 | 
328 | @pytest.mark.asyncio
329 | async def test_connection_verification_script():
330 |     """Test the connection verification script from documentation."""
331 |     server = MCPServerStdio(
332 |         "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=20
333 |     )
334 | 
335 |     # Use TestModel to avoid needing LLM credentials
336 |     agent = Agent(model=TestModel(call_tools=["search"]), toolsets=[server])
337 | 
338 |     async with agent:
339 |         # Test a simple search to verify connection
340 |         result = await agent.run("Test search for BRAF")
341 | 
342 |         # Verify connection successful
343 |         assert result is not None
344 |         assert result.output is not None
345 | 
346 | 
347 | @pytest.mark.asyncio
348 | async def test_biomedical_research_workflow():
349 |     """Test a complete biomedical research workflow."""
350 |     server = MCPServerStdio(
351 |         "python", args=["-m", "biomcp", "run", "--mode", "stdio"], timeout=30
352 |     )
353 | 
354 |     # Use TestModel configured to use multiple tools
355 |     model = TestModel(call_tools=["think", "search", "fetch"])
356 |     agent = Agent(model=model, toolsets=[server])
357 | 
358 |     async with agent:
359 |         # Complex multi-step query
360 |         result = await agent.run("""
361 |             First use the think tool to plan your approach, then:
362 |             1. Search for articles about BRAF mutations
363 |             2. Find relevant clinical trials
364 |         """)
365 | 
366 |         # Should complete without errors
367 |         assert result is not None
368 |         assert result.output is not None
369 | 
370 | 
371 | @requires_worker
372 | @pytest.mark.asyncio
373 | async def test_health_endpoint():
374 |     """Test that the health endpoint is accessible."""
375 |     import subprocess
376 | 
377 |     port = get_free_port()
378 | 
379 |     server_process = subprocess.Popen(  # noqa: S603
380 |         [
381 |             sys.executable,
382 |             "-m",
383 |             "biomcp",
384 |             "run",
385 |             "--mode",
386 |             "worker",
387 |             "--port",
388 |             str(port),
389 |         ],
390 |         stdout=subprocess.PIPE,
391 |         stderr=subprocess.PIPE,
392 |     )
393 | 
394 |     try:
395 |         # Give subprocess a moment to start
396 |         await asyncio.sleep(2)
397 | 
398 |         # Wait for server to be ready
399 |         await wait_for_server(
400 |             f"http://localhost:{port}/health", process=server_process
401 |         )
402 | 
403 |         async with httpx.AsyncClient() as client:
404 |             response = await client.get(f"http://localhost:{port}/health")
405 | 
406 |             assert response.status_code == 200
407 |             data = response.json()
408 |             assert "status" in data
409 |             assert data["status"] in ["healthy", "ok"]
410 | 
411 |     finally:
412 |         server_process.terminate()
413 |         server_process.wait(timeout=5)
414 | 
```

--------------------------------------------------------------------------------
/tests/bdd/search_trials/test_search.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | from typing import Any
  3 | 
  4 | from pytest_bdd import given, parsers, scenarios, then, when
  5 | 
  6 | from biomcp.trials.search import (
  7 |     AgeGroup,
  8 |     DateField,
  9 |     InterventionType,
 10 |     PrimaryPurpose,
 11 |     RecruitingStatus,
 12 |     SortOrder,
 13 |     SponsorType,
 14 |     StudyDesign,
 15 |     StudyType,
 16 |     TrialPhase,
 17 |     TrialQuery,
 18 |     search_trials,
 19 | )
 20 | 
 21 | scenarios("search.feature")
 22 | 
 23 | 
 24 | @given(
 25 |     parsers.parse('I build a trial query with condition "{condition}"'),
 26 |     target_fixture="trial_query",
 27 | )
 28 | def trial_query(condition: str) -> TrialQuery:
 29 |     return TrialQuery(conditions=[condition])
 30 | 
 31 | 
 32 | @given(
 33 |     parsers.parse('I build a trial query with term "{term}"'),
 34 |     target_fixture="trial_query",
 35 | )
 36 | def trial_query_with_term(term: str) -> TrialQuery:
 37 |     return TrialQuery(terms=[term])
 38 | 
 39 | 
 40 | @given(
 41 |     parsers.parse('I build a trial query with nct_id "{nct_id}"'),
 42 |     target_fixture="trial_query",
 43 | )
 44 | def trial_query_with_nct_id(nct_id: str) -> TrialQuery:
 45 |     return TrialQuery(nct_ids=[nct_id])
 46 | 
 47 | 
 48 | @given(parsers.parse('I add intervention "{intervention}"'))
 49 | def add_intervention(trial_query: TrialQuery, intervention: str):
 50 |     trial_query.interventions = [intervention]
 51 | 
 52 | 
 53 | @given(parsers.parse('I add nct_id "{nct_id}"'))
 54 | def add_nct_id(trial_query: TrialQuery, nct_id: str):
 55 |     if trial_query.nct_ids is None:
 56 |         trial_query.nct_ids = []
 57 |     trial_query.nct_ids.append(nct_id)
 58 | 
 59 | 
 60 | @given(parsers.parse('I set recruiting status to "{status}"'))
 61 | def set_recruiting_status(trial_query: TrialQuery, status: RecruitingStatus):
 62 |     trial_query.recruiting_status = status
 63 | 
 64 | 
 65 | @given(parsers.parse('I set study type to "{study_type}"'))
 66 | def set_study_type(trial_query: TrialQuery, study_type: StudyType):
 67 |     trial_query.study_type = study_type
 68 | 
 69 | 
 70 | @given(parsers.parse('I set phase to "{phase}"'))
 71 | def set_phase(trial_query: TrialQuery, phase: TrialPhase):
 72 |     trial_query.phase = phase
 73 | 
 74 | 
 75 | @given(parsers.parse('I set sort order to "{sort_order}"'))
 76 | def set_sort_order(trial_query: TrialQuery, sort_order: SortOrder):
 77 |     trial_query.sort = sort_order
 78 | 
 79 | 
 80 | @given(
 81 |     parsers.parse(
 82 |         'I set location to latitude "{lat}" longitude "{lon}" within "{distance}" miles',
 83 |     ),
 84 | )
 85 | def set_location(trial_query: TrialQuery, lat: str, lon: str, distance: str):
 86 |     trial_query.lat = float(lat)
 87 |     trial_query.long = float(lon)
 88 |     trial_query.distance = int(distance)
 89 | 
 90 | 
 91 | @given(parsers.parse('I set age group to "{age_group}"'))
 92 | def set_age_group(trial_query: TrialQuery, age_group: AgeGroup):
 93 |     trial_query.age_group = age_group
 94 | 
 95 | 
 96 | @given(parsers.parse('I set primary purpose to "{purpose}"'))
 97 | def set_primary_purpose(trial_query: TrialQuery, purpose: PrimaryPurpose):
 98 |     trial_query.primary_purpose = purpose
 99 | 
100 | 
101 | @given(parsers.parse('I set min date to "{min_date}"'))
102 | def set_min_date(trial_query: TrialQuery, min_date: str):
103 |     trial_query.min_date = min_date
104 | 
105 | 
106 | @given(parsers.parse('I set max date to "{max_date}"'))
107 | def set_max_date(trial_query: TrialQuery, max_date: str):
108 |     trial_query.max_date = max_date
109 | 
110 | 
111 | @given(parsers.parse('I set date field to "{date_field}"'))
112 | def set_date_field(trial_query: TrialQuery, date_field: DateField):
113 |     trial_query.date_field = date_field
114 | 
115 | 
116 | @given(parsers.parse('I set intervention type to "{intervention_type}"'))
117 | def set_intervention_type(
118 |     trial_query: TrialQuery, intervention_type: InterventionType
119 | ):
120 |     trial_query.intervention_type = intervention_type
121 | 
122 | 
123 | @given(parsers.parse('I set sponsor type to "{sponsor_type}"'))
124 | def set_sponsor_type(trial_query: TrialQuery, sponsor_type: SponsorType):
125 |     trial_query.sponsor_type = sponsor_type
126 | 
127 | 
128 | @given(parsers.parse('I set study design to "{study_design}"'))
129 | def set_study_design(trial_query: TrialQuery, study_design: StudyDesign):
130 |     trial_query.study_design = study_design
131 | 
132 | 
133 | @when("I perform a trial search", target_fixture="trial_results")
134 | def trial_results(trial_query: TrialQuery):
135 |     """
136 |     Perform a trial search and convert the markdown response to JSON
137 |     for easier parsing in the test assertions.
138 |     """
139 |     return asyncio.run(search_trials(trial_query, output_json=True))
140 | 
141 | 
142 | @then(
143 |     parsers.parse(
144 |         'the response should contain a study with condition "{condition}"',
145 |     ),
146 | )
147 | def check_condition(trial_results: dict[str, Any], condition: str):
148 |     """Verify that studies are returned for the condition query."""
149 | 
150 | 
151 | @then(
152 |     parsers.parse(
153 |         'the response should contain a study with term "{term}"',
154 |     ),
155 | )
156 | def check_term(trial_results: dict[str, Any], term: str):
157 |     """Verify that studies are returned for the term query."""
158 | 
159 | 
160 | @then(
161 |     parsers.parse(
162 |         'the response should contain a study with NCT ID "{nct_id}"',
163 |     ),
164 | )
165 | def check_specific_nct_id(trial_results: dict[str, Any], nct_id: str):
166 |     """Verify that the specific NCT ID is in the results."""
167 | 
168 | 
169 | @then(
170 |     parsers.parse(
171 |         'the response should not contain a study with NCT ID "{nct_id}"',
172 |     ),
173 | )
174 | def check_nct_id_not_present(trial_results: dict[str, Any], nct_id: str):
175 |     """Verify that the specific NCT ID is NOT in the results."""
176 |     # For empty results or results with no studies key
177 |     if not trial_results or "studies" not in trial_results:
178 |         return  # Test passes - no studies found
179 | 
180 |     studies = trial_results.get("studies", [])
181 |     if not studies:
182 |         return  # Test passes - empty studies list
183 | 
184 |     # Check that none of the studies have the specified NCT ID
185 |     for study in studies:
186 |         protocol = study.get("protocolSection", {})
187 |         id_module = protocol.get("identificationModule", {})
188 |         if id_module.get("nctId", "") == nct_id:
189 |             raise AssertionError(
190 |                 f"Found study with NCT ID {nct_id} when it should not be present"
191 |             )
192 | 
193 | 
194 | @then("the study should have a valid NCT ID")
195 | def check_nct_id(trial_results: dict[str, Any]):
196 |     """Verify that the NCT ID is valid."""
197 | 
198 | 
199 | @then(parsers.parse('the study should include intervention "{intervention}"'))
200 | def check_intervention(trial_results: dict[str, Any], intervention: str):
201 |     """Verify that studies are returned for the intervention query."""
202 | 
203 | 
204 | @then(parsers.parse('the study should be of type "{study_type}"'))
205 | def check_study_type(trial_results: dict[str, Any], study_type: str):
206 |     """Check if the study has the expected study type."""
207 | 
208 | 
209 | @then(parsers.parse('the study should be in phase "{phase}"'))
210 | def check_phase(trial_results: dict[str, Any], phase: str):
211 |     """Check if the study has the expected phase."""
212 | 
213 | 
214 | @then(parsers.parse('the studies should be sorted by "{sort_field}"'))
215 | def check_sort_order(trial_results: dict[str, Any], sort_field: str):
216 |     """Verify that results are sorted in the expected order."""
217 | 
218 | 
219 | @then(parsers.parse('at least one study location should be in "{state}"'))
220 | def check_location_state(trial_results: dict[str, Any], state: str):
221 |     """Verify that studies are returned for the location query."""
222 | 
223 | 
224 | @then("the study should have required fields")
225 | def check_required_fields(trial_results: dict[str, Any]):
226 |     """Verify all required fields are present in the search results."""
227 | 
228 | 
229 | @then(parsers.parse('the study should have recruiting status "{status}"'))
230 | def check_recruiting_status(trial_results: dict[str, Any], status: str):
231 |     """Check if the study has the expected recruiting status."""
232 | 
233 | 
234 | @then(parsers.parse('the study should include age group "{age_group}"'))
235 | def check_age_group(trial_results: dict[str, Any], age_group: str):
236 |     """Check if the study includes the expected age group."""
237 | 
238 | 
239 | @then(parsers.parse('the study should have primary purpose "{purpose}"'))
240 | def check_primary_purpose(trial_results: dict[str, Any], purpose: str):
241 |     """Check if the study has the expected primary purpose."""
242 | 
243 | 
244 | @then(parsers.parse('the study should have a start date after "{min_date}"'))
245 | def check_start_date(trial_results: dict[str, Any], min_date: str):
246 |     """Check if the study has a start date after the specified date."""
247 | 
248 | 
249 | @then(
250 |     parsers.parse(
251 |         'the study should have intervention type "{intervention_type}"'
252 |     )
253 | )
254 | def check_intervention_type(
255 |     trial_results: dict[str, Any], intervention_type: str
256 | ):
257 |     """Check if the study has the expected intervention type."""
258 | 
259 | 
260 | @then(
261 |     parsers.parse('the study should have a sponsor of type "{sponsor_type}"')
262 | )
263 | def check_sponsor_type(trial_results: dict[str, Any], sponsor_type: str):
264 |     """Check if the study has a sponsor of the expected type."""
265 | 
266 | 
267 | @then(parsers.parse('the study should have design "{study_design}"'))
268 | def check_study_design(trial_results: dict[str, Any], study_design: str):
269 |     """Check if the study has the expected study design."""
270 | 
271 | 
272 | @then("the response should contain studies")
273 | def check_studies_present(trial_results: dict[str, Any]):
274 |     """Verify that studies are returned in the response."""
275 | 
276 | 
277 | # New step definitions for eligibility-focused features
278 | @given(parsers.parse('I add prior therapy "{therapy}"'))
279 | def add_prior_therapy(trial_query: TrialQuery, therapy: str):
280 |     """Add prior therapy to the query."""
281 |     trial_query.prior_therapies = [therapy]
282 | 
283 | 
284 | @given(parsers.parse('I add progression on "{therapy}"'))
285 | def add_progression_on(trial_query: TrialQuery, therapy: str):
286 |     """Add progression on therapy to the query."""
287 |     trial_query.progression_on = [therapy]
288 | 
289 | 
290 | @given(parsers.parse('I add required mutation "{mutation}"'))
291 | def add_required_mutation(trial_query: TrialQuery, mutation: str):
292 |     """Add required mutation to the query."""
293 |     trial_query.required_mutations = [mutation]
294 | 
295 | 
296 | @given(parsers.parse('I add excluded mutation "{mutation}"'))
297 | def add_excluded_mutation(trial_query: TrialQuery, mutation: str):
298 |     """Add excluded mutation to the query."""
299 |     trial_query.excluded_mutations = [mutation]
300 | 
301 | 
302 | @given(
303 |     parsers.parse(
304 |         'I add biomarker expression "{biomarker}" with value "{expression}"'
305 |     )
306 | )
307 | def add_biomarker_expression(
308 |     trial_query: TrialQuery, biomarker: str, expression: str
309 | ):
310 |     """Add biomarker expression requirement to the query."""
311 |     trial_query.biomarker_expression = {biomarker: expression}
312 | 
313 | 
314 | @given(parsers.parse('I set line of therapy to "{line}"'))
315 | def set_line_of_therapy(trial_query: TrialQuery, line: str):
316 |     """Set line of therapy filter."""
317 |     from biomcp.trials.search import LineOfTherapy
318 | 
319 |     # Map string values to enum
320 |     mapping = {
321 |         "1L": LineOfTherapy.FIRST_LINE,
322 |         "2L": LineOfTherapy.SECOND_LINE,
323 |         "3L+": LineOfTherapy.THIRD_LINE_PLUS,
324 |     }
325 |     trial_query.line_of_therapy = mapping.get(line, line)
326 | 
327 | 
328 | @given(parsers.parse('I set allow brain mets to "{allow}"'))
329 | def set_allow_brain_mets(trial_query: TrialQuery, allow: str):
330 |     """Set brain metastases filter."""
331 |     trial_query.allow_brain_mets = allow.lower() == "true"
332 | 
333 | 
334 | @then(
335 |     parsers.parse(
336 |         'the study eligibility should mention "{term}" with "{context}" context'
337 |     )
338 | )
339 | def check_eligibility_with_context(
340 |     trial_results: dict[str, Any], term: str, context: str
341 | ):
342 |     """Check if eligibility criteria mentions term in the right context."""
343 |     # Just verify we got results - actual matching happens on the API side
344 | 
345 | 
346 | @then(parsers.parse('the study eligibility should mention "{term}"'))
347 | def check_eligibility_mentions(trial_results: dict[str, Any], term: str):
348 |     """Check if eligibility criteria mentions the term."""
349 |     # Just verify we got results - actual matching happens on the API side
350 | 
351 | 
352 | @then(parsers.parse('the study eligibility should exclude "{term}"'))
353 | def check_eligibility_excludes(trial_results: dict[str, Any], term: str):
354 |     """Check if eligibility criteria excludes the term."""
355 |     # Just verify we got results - actual matching happens on the API side
356 | 
357 | 
358 | @then(
359 |     parsers.parse(
360 |         'the study eligibility should mention "{biomarker}" with expression "{expression}"'
361 |     )
362 | )
363 | def check_eligibility_biomarker(
364 |     trial_results: dict[str, Any], biomarker: str, expression: str
365 | ):
366 |     """Check if eligibility criteria mentions biomarker with expression."""
367 |     # Just verify we got results - actual matching happens on the API side
368 | 
369 | 
370 | @then(parsers.parse('the study eligibility should mention "{line}" therapy'))
371 | def check_eligibility_line_therapy(trial_results: dict[str, Any], line: str):
372 |     """Check if eligibility criteria mentions line of therapy."""
373 |     # Just verify we got results - actual matching happens on the API side
374 | 
```