#
tokens: 47258/50000 7/309 files (page 14/20)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 14 of 20. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── oncokb_helper.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   ├── oncokb_client.py
│       │   ├── oncokb_models.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── oncokb_mock_responses.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_oncokb_integration.py
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   ├── test_oncokb_client.py
│   │   │   ├── test_oncokb_helper.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/tests/tdd/test_router.py:
--------------------------------------------------------------------------------

```python
  1 | """Comprehensive tests for the unified router module."""
  2 | 
  3 | import json
  4 | from unittest.mock import patch
  5 | 
  6 | import pytest
  7 | 
  8 | from biomcp.exceptions import (
  9 |     InvalidDomainError,
 10 |     InvalidParameterError,
 11 |     QueryParsingError,
 12 |     SearchExecutionError,
 13 | )
 14 | from biomcp.router import fetch, format_results, search
 15 | 
 16 | 
 17 | class TestFormatResults:
 18 |     """Test the format_results function."""
 19 | 
 20 |     def test_format_article_results(self):
 21 |         """Test formatting article results."""
 22 |         results = [
 23 |             {
 24 |                 "pmid": "12345",
 25 |                 "title": "Test Article",
 26 |                 "abstract": "This is a test abstract",
 27 |                 # Note: url in input is ignored, always generates PubMed URL
 28 |             }
 29 |         ]
 30 | 
 31 |         # Mock thinking tracker to prevent reminder
 32 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 33 |             formatted = format_results(results, "article", 1, 10, 1)
 34 | 
 35 |         assert "results" in formatted
 36 |         assert len(formatted["results"]) == 1
 37 |         result = formatted["results"][0]
 38 |         assert result["id"] == "12345"
 39 |         assert result["title"] == "Test Article"
 40 |         assert "test abstract" in result["text"]
 41 |         assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/"
 42 | 
 43 |     def test_format_trial_results_api_v2(self):
 44 |         """Test formatting trial results with API v2 structure."""
 45 |         results = [
 46 |             {
 47 |                 "protocolSection": {
 48 |                     "identificationModule": {
 49 |                         "nctId": "NCT12345",
 50 |                         "briefTitle": "Test Trial",
 51 |                     },
 52 |                     "descriptionModule": {
 53 |                         "briefSummary": "This is a test trial summary"
 54 |                     },
 55 |                     "statusModule": {"overallStatus": "RECRUITING"},
 56 |                     "designModule": {"phases": ["PHASE3"]},
 57 |                 }
 58 |             }
 59 |         ]
 60 | 
 61 |         # Mock thinking tracker to prevent reminder
 62 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 63 |             formatted = format_results(results, "trial", 1, 10, 1)
 64 | 
 65 |         assert "results" in formatted
 66 |         assert len(formatted["results"]) == 1
 67 |         result = formatted["results"][0]
 68 |         assert result["id"] == "NCT12345"
 69 |         assert result["title"] == "Test Trial"
 70 |         assert "test trial summary" in result["text"]
 71 |         assert "NCT12345" in result["url"]
 72 | 
 73 |     def test_format_trial_results_legacy(self):
 74 |         """Test formatting trial results with legacy structure."""
 75 |         results = [
 76 |             {
 77 |                 "NCT Number": "NCT67890",
 78 |                 "Study Title": "Legacy Trial",
 79 |                 "Brief Summary": "Legacy trial summary",
 80 |                 "Study Status": "COMPLETED",
 81 |                 "Phases": "Phase 2",
 82 |             }
 83 |         ]
 84 | 
 85 |         # Mock thinking tracker to prevent reminder
 86 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
 87 |             formatted = format_results(results, "trial", 1, 10, 1)
 88 | 
 89 |         assert "results" in formatted
 90 |         assert len(formatted["results"]) == 1
 91 |         result = formatted["results"][0]
 92 |         assert result["id"] == "NCT67890"
 93 |         assert result["title"] == "Legacy Trial"
 94 |         assert "Legacy trial summary" in result["text"]
 95 | 
 96 |     def test_format_variant_results(self):
 97 |         """Test formatting variant results."""
 98 |         results = [
 99 |             {
100 |                 "_id": "chr7:g.140453136A>T",
101 |                 "dbsnp": {"rsid": "rs121913529"},
102 |                 "dbnsfp": {"genename": "BRAF"},
103 |                 "clinvar": {"rcv": {"clinical_significance": "Pathogenic"}},
104 |             }
105 |         ]
106 | 
107 |         # Mock thinking tracker to prevent reminder
108 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
109 |             formatted = format_results(results, "variant", 1, 10, 1)
110 | 
111 |         assert "results" in formatted
112 |         assert len(formatted["results"]) == 1
113 |         result = formatted["results"][0]
114 |         assert result["id"] == "chr7:g.140453136A>T"
115 |         assert "BRAF" in result["title"]
116 |         assert "Pathogenic" in result["text"]
117 |         assert "rs121913529" in result["url"]
118 | 
119 |     def test_format_results_invalid_domain(self):
120 |         """Test format_results with invalid domain."""
121 |         with pytest.raises(InvalidDomainError) as exc_info:
122 |             format_results([], "invalid_domain", 1, 10, 0)
123 | 
124 |         assert "Unknown domain: invalid_domain" in str(exc_info.value)
125 | 
126 |     def test_format_results_malformed_data(self):
127 |         """Test format_results handles malformed data gracefully."""
128 |         results = [
129 |             {"title": "Good Article", "pmid": "123"},
130 |             None,  # Malformed - will be skipped
131 |             {
132 |                 "invalid": "data"
133 |             },  # Missing required fields but won't fail (treated as preprint)
134 |         ]
135 | 
136 |         # Mock thinking tracker to prevent reminder
137 |         with patch("biomcp.router.get_thinking_reminder", return_value=""):
138 |             formatted = format_results(results, "article", 1, 10, 3)
139 | 
140 |         # Should skip None but include the third (treated as preprint with empty fields)
141 |         assert len(formatted["results"]) == 2
142 |         assert formatted["results"][0]["id"] == "123"
143 |         assert formatted["results"][1]["id"] == ""  # Empty ID for invalid data
144 | 
145 | 
146 | @pytest.mark.asyncio
147 | class TestSearchFunction:
148 |     """Test the unified search function."""
149 | 
150 |     async def test_search_article_domain(self):
151 |         """Test search with article domain."""
152 |         mock_result = json.dumps([
153 |             {"pmid": "123", "title": "Test", "abstract": "Abstract"}
154 |         ])
155 | 
156 |         with patch(
157 |             "biomcp.articles.unified.search_articles_unified"
158 |         ) as mock_search:
159 |             mock_search.return_value = mock_result
160 | 
161 |             # Mock thinking tracker to prevent reminder
162 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
163 |                 result = await search(
164 |                     query="",
165 |                     domain="article",
166 |                     genes="BRAF",
167 |                     diseases=["cancer"],
168 |                     page_size=10,
169 |                 )
170 | 
171 |             assert "results" in result
172 |             assert len(result["results"]) == 1
173 |             assert result["results"][0]["id"] == "123"
174 | 
175 |     async def test_search_trial_domain(self):
176 |         """Test search with trial domain."""
177 |         mock_result = json.dumps({
178 |             "studies": [
179 |                 {
180 |                     "protocolSection": {
181 |                         "identificationModule": {"nctId": "NCT123"},
182 |                     }
183 |                 }
184 |             ]
185 |         })
186 | 
187 |         with patch("biomcp.trials.search.search_trials") as mock_search:
188 |             mock_search.return_value = mock_result
189 | 
190 |             # Mock thinking tracker to prevent reminder
191 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
192 |                 result = await search(
193 |                     query="",
194 |                     domain="trial",
195 |                     conditions=["cancer"],
196 |                     phase="Phase 3",
197 |                     page_size=20,
198 |                 )
199 | 
200 |             assert "results" in result
201 |             mock_search.assert_called_once()
202 | 
203 |     async def test_search_variant_domain(self):
204 |         """Test search with variant domain."""
205 |         mock_result = json.dumps([
206 |             {"_id": "rs123", "gene": {"symbol": "BRAF"}}
207 |         ])
208 | 
209 |         with patch("biomcp.variants.search.search_variants") as mock_search:
210 |             mock_search.return_value = mock_result
211 | 
212 |             # Mock thinking tracker to prevent reminder
213 |             with patch("biomcp.router.get_thinking_reminder", return_value=""):
214 |                 result = await search(
215 |                     query="",
216 |                     domain="variant",
217 |                     genes="BRAF",
218 |                     significance="pathogenic",
219 |                     page_size=10,
220 |                 )
221 | 
222 |             assert "results" in result
223 |             assert len(result["results"]) == 1
224 | 
225 |     async def test_search_unified_query(self):
226 |         """Test search with unified query language."""
227 |         with patch("biomcp.router._unified_search") as mock_unified:
228 |             mock_unified.return_value = {
229 |                 "results": [{"id": "1", "title": "Test"}]
230 |             }
231 | 
232 |             result = await search(
233 |                 query="gene:BRAF AND disease:cancer",
234 |                 max_results_per_domain=20,
235 |             )
236 | 
237 |             assert "results" in result
238 |             mock_unified.assert_called_once_with(
239 |                 query="gene:BRAF AND disease:cancer",
240 |                 max_results_per_domain=20,
241 |                 domains=None,
242 |                 explain_query=False,
243 |             )
244 | 
245 |     async def test_search_no_domain_or_query(self):
246 |         """Test search without domain or query raises error."""
247 |         with pytest.raises(InvalidParameterError) as exc_info:
248 |             await search(query="")
249 | 
250 |         assert "query or domain" in str(exc_info.value)
251 | 
252 |     async def test_search_invalid_domain(self):
253 |         """Test search with invalid domain."""
254 |         with pytest.raises(InvalidDomainError):
255 |             await search(query="", domain="invalid_domain")
256 | 
257 |     async def test_search_get_schema(self):
258 |         """Test search with get_schema flag."""
259 |         result = await search(query="", get_schema=True)
260 | 
261 |         assert "domains" in result
262 |         assert "cross_domain_fields" in result
263 |         assert "domain_fields" in result
264 |         assert isinstance(result["cross_domain_fields"], dict)
265 | 
266 |     async def test_search_pagination_validation(self):
267 |         """Test search with invalid pagination parameters."""
268 |         with pytest.raises(InvalidParameterError) as exc_info:
269 |             await search(
270 |                 query="",
271 |                 domain="article",
272 |                 page=0,  # Invalid - must be >= 1
273 |                 page_size=10,
274 |             )
275 | 
276 |         assert "page" in str(exc_info.value)
277 | 
278 |     async def test_search_parameter_parsing(self):
279 |         """Test parameter parsing for list inputs."""
280 |         mock_result = json.dumps([])
281 | 
282 |         with patch(
283 |             "biomcp.articles.unified.search_articles_unified"
284 |         ) as mock_search:
285 |             mock_search.return_value = mock_result
286 | 
287 |             # Test with JSON array string
288 |             await search(
289 |                 query="",
290 |                 domain="article",
291 |                 genes='["BRAF", "KRAS"]',
292 |                 diseases="cancer,melanoma",  # Comma-separated
293 |             )
294 | 
295 |             # Check the request was parsed correctly
296 |             call_args = mock_search.call_args[0][0]
297 |             assert call_args.genes == ["BRAF", "KRAS"]
298 |             assert call_args.diseases == ["cancer", "melanoma"]
299 | 
300 | 
301 | @pytest.mark.asyncio
302 | class TestFetchFunction:
303 |     """Test the unified fetch function."""
304 | 
305 |     async def test_fetch_article(self):
306 |         """Test fetching article details."""
307 |         mock_result = json.dumps([
308 |             {
309 |                 "pmid": 12345,
310 |                 "title": "Test Article",
311 |                 "abstract": "Full abstract",
312 |                 "full_text": "Full text content",
313 |             }
314 |         ])
315 | 
316 |         with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
317 |             mock_fetch.return_value = mock_result
318 | 
319 |             result = await fetch(
320 |                 domain="article",
321 |                 id="12345",
322 |             )
323 | 
324 |             assert result["id"] == "12345"
325 |             assert result["title"] == "Test Article"
326 |             assert result["text"] == "Full text content"
327 |             assert "metadata" in result
328 | 
329 |     async def test_fetch_article_invalid_pmid(self):
330 |         """Test fetching article with invalid identifier."""
331 |         result = await fetch(domain="article", id="not_a_number")
332 | 
333 |         # Should return an error since "not_a_number" is neither a valid PMID nor DOI
334 |         assert "error" in result
335 |         assert "Invalid identifier format" in result["error"]
336 |         assert "not_a_number" in result["error"]
337 | 
338 |     async def test_fetch_trial_all_sections(self):
339 |         """Test fetching trial with all sections."""
340 |         mock_protocol = json.dumps({
341 |             "title": "Test Trial",
342 |             "nct_id": "NCT123",
343 |             "brief_summary": "Summary",
344 |         })
345 |         mock_locations = json.dumps({"locations": [{"city": "Boston"}]})
346 |         mock_outcomes = json.dumps({
347 |             "outcomes": {"primary_outcomes": ["Outcome1"]}
348 |         })
349 |         mock_references = json.dumps({"references": [{"pmid": "456"}]})
350 | 
351 |         with (
352 |             patch("biomcp.trials.getter._trial_protocol") as mock_p,
353 |             patch("biomcp.trials.getter._trial_locations") as mock_l,
354 |             patch("biomcp.trials.getter._trial_outcomes") as mock_o,
355 |             patch("biomcp.trials.getter._trial_references") as mock_r,
356 |         ):
357 |             mock_p.return_value = mock_protocol
358 |             mock_l.return_value = mock_locations
359 |             mock_o.return_value = mock_outcomes
360 |             mock_r.return_value = mock_references
361 | 
362 |             result = await fetch(domain="trial", id="NCT123", detail="all")
363 | 
364 |             assert result["id"] == "NCT123"
365 |             assert "metadata" in result
366 |             assert "locations" in result["metadata"]
367 |             assert "outcomes" in result["metadata"]
368 |             assert "references" in result["metadata"]
369 | 
370 |     async def test_fetch_trial_invalid_detail(self):
371 |         """Test fetching trial with invalid detail parameter."""
372 |         with pytest.raises(InvalidParameterError) as exc_info:
373 |             await fetch(
374 |                 domain="trial",
375 |                 id="NCT123",
376 |                 detail="invalid_section",
377 |             )
378 | 
379 |         assert "one of:" in str(exc_info.value)
380 | 
381 |     async def test_fetch_variant(self):
382 |         """Test fetching variant details."""
383 |         mock_result = json.dumps([
384 |             {
385 |                 "_id": "rs123",
386 |                 "gene": {"symbol": "BRAF"},
387 |                 "clinvar": {"clinical_significance": "Pathogenic"},
388 |                 "tcga": {"cancer_types": {}},
389 |                 "external_links": {"dbSNP": "https://example.com"},
390 |             }
391 |         ])
392 | 
393 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
394 |             mock_get.return_value = mock_result
395 | 
396 |             result = await fetch(domain="variant", id="rs123")
397 | 
398 |             assert result["id"] == "rs123"
399 |             assert "TCGA Data: Available" in result["text"]
400 |             assert "external_links" in result["metadata"]
401 | 
402 |     async def test_fetch_variant_list_response(self):
403 |         """Test fetching variant when API returns list."""
404 |         mock_result = json.dumps([
405 |             {"_id": "rs123", "gene": {"symbol": "BRAF"}}
406 |         ])
407 | 
408 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
409 |             mock_get.return_value = mock_result
410 | 
411 |             result = await fetch(domain="variant", id="rs123")
412 | 
413 |             assert result["id"] == "rs123"
414 | 
415 |     async def test_fetch_invalid_domain(self):
416 |         """Test fetch with invalid domain."""
417 |         with pytest.raises(InvalidDomainError):
418 |             await fetch(domain="invalid", id="123")
419 | 
420 |     async def test_fetch_error_handling(self):
421 |         """Test fetch error handling."""
422 |         with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
423 |             mock_fetch.side_effect = Exception("API Error")
424 | 
425 |             with pytest.raises(SearchExecutionError) as exc_info:
426 |                 await fetch(domain="article", id="123")
427 | 
428 |             assert "Failed to execute search" in str(exc_info.value)
429 | 
430 |     async def test_fetch_domain_auto_detection_pmid(self):
431 |         """Test domain auto-detection for PMID."""
432 |         with patch("biomcp.articles.fetch._article_details") as mock_fetch:
433 |             mock_fetch.return_value = json.dumps([
434 |                 {"pmid": "12345", "title": "Test"}
435 |             ])
436 | 
437 |             # Numeric ID should auto-detect as article
438 |             result = await fetch(id="12345")
439 |             assert result["id"] == "12345"
440 |             mock_fetch.assert_called_once()
441 | 
442 |     async def test_fetch_domain_auto_detection_nct(self):
443 |         """Test domain auto-detection for NCT ID."""
444 |         with patch("biomcp.trials.getter.get_trial") as mock_get:
445 |             mock_get.return_value = json.dumps({
446 |                 "protocolSection": {
447 |                     "identificationModule": {"briefTitle": "Test Trial"}
448 |                 }
449 |             })
450 | 
451 |             # NCT ID should auto-detect as trial
452 |             result = await fetch(id="NCT12345")
453 |             assert "NCT12345" in result["url"]
454 |             mock_get.assert_called()
455 | 
456 |     async def test_fetch_domain_auto_detection_doi(self):
457 |         """Test domain auto-detection for DOI."""
458 |         with patch("biomcp.articles.fetch._article_details") as mock_fetch:
459 |             mock_fetch.return_value = json.dumps([
460 |                 {"doi": "10.1038/nature12345", "title": "Test"}
461 |             ])
462 | 
463 |             # DOI should auto-detect as article
464 |             await fetch(id="10.1038/nature12345")
465 |             mock_fetch.assert_called_once()
466 | 
467 |     async def test_fetch_domain_auto_detection_variant(self):
468 |         """Test domain auto-detection for variant IDs."""
469 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
470 |             mock_get.return_value = json.dumps([{"_id": "rs12345"}])
471 | 
472 |             # rsID should auto-detect as variant
473 |             await fetch(id="rs12345")
474 |             mock_get.assert_called_once()
475 | 
476 |         # Test HGVS notation
477 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
478 |             mock_get.return_value = json.dumps([
479 |                 {"_id": "chr7:g.140453136A>T"}
480 |             ])
481 | 
482 |             await fetch(id="chr7:g.140453136A>T")
483 |             mock_get.assert_called_once()
484 | 
485 | 
486 | @pytest.mark.asyncio
487 | class TestUnifiedSearch:
488 |     """Test the _unified_search internal function."""
489 | 
490 |     async def test_unified_search_explain_query(self):
491 |         """Test unified search with explain_query flag."""
492 |         from biomcp.router import _unified_search
493 | 
494 |         result = await _unified_search(
495 |             query="gene:BRAF AND disease:cancer", explain_query=True
496 |         )
497 | 
498 |         assert "original_query" in result
499 |         assert "parsed_structure" in result
500 |         assert "routing_plan" in result
501 |         assert "schema" in result
502 | 
503 |     async def test_unified_search_execution(self):
504 |         """Test unified search normal execution."""
505 |         from biomcp.router import _unified_search
506 | 
507 |         with patch("biomcp.query_router.execute_routing_plan") as mock_execute:
508 |             mock_execute.return_value = {
509 |                 "articles": json.dumps([{"pmid": "123", "title": "Article 1"}])
510 |             }
511 | 
512 |             result = await _unified_search(
513 |                 query="gene:BRAF", max_results_per_domain=10
514 |             )
515 | 
516 |             assert "results" in result
517 |             assert isinstance(result["results"], list)
518 | 
519 |     async def test_unified_search_parse_error(self):
520 |         """Test unified search with invalid query."""
521 |         from biomcp.router import _unified_search
522 | 
523 |         with patch("biomcp.query_parser.QueryParser.parse") as mock_parse:
524 |             mock_parse.side_effect = Exception("Parse error")
525 | 
526 |             with pytest.raises(QueryParsingError):
527 |                 await _unified_search(
528 |                     query="invalid::query", max_results_per_domain=10
529 |                 )
530 | 
```

--------------------------------------------------------------------------------
/src/biomcp/integrations/biothings_client.py:
--------------------------------------------------------------------------------

```python
  1 | """BioThings API client for unified access to the BioThings suite.
  2 | 
  3 | The BioThings suite (https://biothings.io) provides high-performance biomedical
  4 | data APIs including:
  5 | - MyGene.info - Gene annotations and information
  6 | - MyVariant.info - Genetic variant annotations (existing integration enhanced)
  7 | - MyDisease.info - Disease ontology and synonyms
  8 | - MyChem.info - Drug/chemical annotations and information
  9 | 
 10 | This module provides a centralized client for interacting with all BioThings APIs,
 11 | handling common concerns like error handling, rate limiting, and response parsing.
 12 | While MyVariant.info has specialized modules for complex variant operations, this
 13 | client provides the base layer for all BioThings API interactions.
 14 | """
 15 | 
 16 | import logging
 17 | from typing import Any
 18 | from urllib.parse import quote
 19 | 
 20 | from pydantic import BaseModel, Field
 21 | 
 22 | from .. import http_client
 23 | from ..constants import (
 24 |     MYVARIANT_GET_URL,
 25 | )
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | # BioThings API endpoints
 30 | MYGENE_BASE_URL = "https://mygene.info/v3"
 31 | MYGENE_QUERY_URL = f"{MYGENE_BASE_URL}/query"
 32 | MYGENE_GET_URL = f"{MYGENE_BASE_URL}/gene"
 33 | 
 34 | MYDISEASE_BASE_URL = "https://mydisease.info/v1"
 35 | MYDISEASE_QUERY_URL = f"{MYDISEASE_BASE_URL}/query"
 36 | MYDISEASE_GET_URL = f"{MYDISEASE_BASE_URL}/disease"
 37 | 
 38 | MYCHEM_BASE_URL = "https://mychem.info/v1"
 39 | MYCHEM_QUERY_URL = f"{MYCHEM_BASE_URL}/query"
 40 | MYCHEM_GET_URL = f"{MYCHEM_BASE_URL}/chem"
 41 | 
 42 | 
 43 | class GeneInfo(BaseModel):
 44 |     """Gene information from MyGene.info."""
 45 | 
 46 |     gene_id: str = Field(alias="_id")
 47 |     symbol: str | None = None
 48 |     name: str | None = None
 49 |     summary: str | None = None
 50 |     alias: list[str] | None = Field(default_factory=list)
 51 |     entrezgene: int | str | None = None
 52 |     ensembl: dict[str, Any] | None = None
 53 |     refseq: dict[str, Any] | None = None
 54 |     type_of_gene: str | None = None
 55 |     taxid: int | None = None
 56 | 
 57 | 
 58 | class DiseaseInfo(BaseModel):
 59 |     """Disease information from MyDisease.info."""
 60 | 
 61 |     disease_id: str = Field(alias="_id")
 62 |     name: str | None = None
 63 |     mondo: dict[str, Any] | None = None
 64 |     definition: str | None = None
 65 |     synonyms: list[str] | None = Field(default_factory=list)
 66 |     xrefs: dict[str, Any] | None = None
 67 |     phenotypes: list[dict[str, Any]] | None = None
 68 | 
 69 | 
 70 | class DrugInfo(BaseModel):
 71 |     """Drug/chemical information from MyChem.info."""
 72 | 
 73 |     drug_id: str = Field(alias="_id")
 74 |     name: str | None = None
 75 |     tradename: list[str] | None = Field(default_factory=list)
 76 |     drugbank_id: str | None = None
 77 |     chebi_id: str | None = None
 78 |     chembl_id: str | None = None
 79 |     pubchem_cid: str | None = None
 80 |     unii: str | dict[str, Any] | None = None
 81 |     inchikey: str | None = None
 82 |     formula: str | None = None
 83 |     description: str | None = None
 84 |     indication: str | None = None
 85 |     pharmacology: dict[str, Any] | None = None
 86 |     mechanism_of_action: str | None = None
 87 | 
 88 | 
 89 | class BioThingsClient:
 90 |     """Unified client for BioThings APIs (MyGene, MyVariant, MyDisease, MyChem)."""
 91 | 
 92 |     def __init__(self):
 93 |         """Initialize the BioThings client."""
 94 |         self.logger = logger
 95 | 
 96 |     async def get_gene_info(
 97 |         self, gene_id_or_symbol: str, fields: list[str] | None = None
 98 |     ) -> GeneInfo | None:
 99 |         """Get gene information from MyGene.info.
100 | 
101 |         Args:
102 |             gene_id_or_symbol: Gene ID (Entrez, Ensembl) or symbol (e.g., "TP53")
103 |             fields: Optional list of fields to return
104 | 
105 |         Returns:
106 |             GeneInfo object or None if not found
107 |         """
108 |         try:
109 |             # First, try direct GET (works for Entrez IDs)
110 |             if gene_id_or_symbol.isdigit():
111 |                 return await self._get_gene_by_id(gene_id_or_symbol, fields)
112 | 
113 |             # For symbols, we need to query first
114 |             query_result = await self._query_gene(gene_id_or_symbol)
115 |             if not query_result:
116 |                 return None
117 | 
118 |             # Get the best match
119 |             gene_id = query_result[0].get("_id")
120 |             if not gene_id:
121 |                 return None
122 | 
123 |             # Now get full details
124 |             return await self._get_gene_by_id(gene_id, fields)
125 | 
126 |         except Exception as e:
127 |             self.logger.warning(
128 |                 f"Failed to get gene info for {gene_id_or_symbol}: {e}"
129 |             )
130 |             return None
131 | 
132 |     async def _query_gene(self, symbol: str) -> list[dict[str, Any]] | None:
133 |         """Query MyGene.info for a gene symbol."""
134 |         params = {
135 |             "q": f"symbol:{quote(symbol)}",
136 |             "species": "human",
137 |             "fields": "_id,symbol,name,taxid",
138 |             "size": 5,
139 |         }
140 | 
141 |         response, error = await http_client.request_api(
142 |             url=MYGENE_QUERY_URL,
143 |             request=params,
144 |             method="GET",
145 |             domain="mygene",
146 |         )
147 | 
148 |         if error or not response:
149 |             return None
150 | 
151 |         hits = response.get("hits", [])
152 |         # Filter for human genes (taxid 9606)
153 |         human_hits = [h for h in hits if h.get("taxid") == 9606]
154 |         return human_hits if human_hits else hits
155 | 
156 |     async def _get_gene_by_id(
157 |         self, gene_id: str, fields: list[str] | None = None
158 |     ) -> GeneInfo | None:
159 |         """Get gene details by ID from MyGene.info."""
160 |         if fields is None:
161 |             fields = [
162 |                 "symbol",
163 |                 "name",
164 |                 "summary",
165 |                 "alias",
166 |                 "type_of_gene",
167 |                 "ensembl",
168 |                 "refseq",
169 |                 "entrezgene",
170 |             ]
171 | 
172 |         params = {"fields": ",".join(fields)}
173 | 
174 |         response, error = await http_client.request_api(
175 |             url=f"{MYGENE_GET_URL}/{gene_id}",
176 |             request=params,
177 |             method="GET",
178 |             domain="mygene",
179 |         )
180 | 
181 |         if error or not response:
182 |             return None
183 | 
184 |         try:
185 |             return GeneInfo(**response)
186 |         except Exception as e:
187 |             self.logger.warning(f"Failed to parse gene response: {e}")
188 |             return None
189 | 
190 |     async def batch_get_genes(
191 |         self, gene_ids: list[str], fields: list[str] | None = None
192 |     ) -> list[GeneInfo]:
193 |         """Get multiple genes in a single request.
194 | 
195 |         Args:
196 |             gene_ids: List of gene IDs or symbols
197 |             fields: Optional list of fields to return
198 | 
199 |         Returns:
200 |             List of GeneInfo objects
201 |         """
202 |         if not gene_ids:
203 |             return []
204 | 
205 |         if fields is None:
206 |             fields = ["symbol", "name", "summary", "alias", "type_of_gene"]
207 | 
208 |         # MyGene supports POST for batch queries
209 |         data = {
210 |             "ids": ",".join(gene_ids),
211 |             "fields": ",".join(fields),
212 |             "species": "human",
213 |         }
214 | 
215 |         response, error = await http_client.request_api(
216 |             url=MYGENE_GET_URL,
217 |             request=data,
218 |             method="POST",
219 |             domain="mygene",
220 |         )
221 | 
222 |         if error or not response:
223 |             return []
224 | 
225 |         results = []
226 |         for item in response:
227 |             try:
228 |                 if "notfound" not in item:
229 |                     results.append(GeneInfo(**item))
230 |             except Exception as e:
231 |                 self.logger.warning(f"Failed to parse gene in batch: {e}")
232 |                 continue
233 | 
234 |         return results
235 | 
236 |     async def get_disease_info(
237 |         self, disease_id_or_name: str, fields: list[str] | None = None
238 |     ) -> DiseaseInfo | None:
239 |         """Get disease information from MyDisease.info.
240 | 
241 |         Args:
242 |             disease_id_or_name: Disease ID (MONDO, DOID) or name
243 |             fields: Optional list of fields to return
244 | 
245 |         Returns:
246 |             DiseaseInfo object or None if not found
247 |         """
248 |         try:
249 |             # Check if it's an ID (starts with known prefixes)
250 |             if any(
251 |                 disease_id_or_name.upper().startswith(prefix)
252 |                 for prefix in ["MONDO:", "DOID:", "OMIM:", "MESH:"]
253 |             ):
254 |                 return await self._get_disease_by_id(
255 |                     disease_id_or_name, fields
256 |                 )
257 | 
258 |             # Otherwise, query by name
259 |             query_result = await self._query_disease(disease_id_or_name)
260 |             if not query_result:
261 |                 return None
262 | 
263 |             # Get the best match
264 |             disease_id = query_result[0].get("_id")
265 |             if not disease_id:
266 |                 return None
267 | 
268 |             # Now get full details
269 |             return await self._get_disease_by_id(disease_id, fields)
270 | 
271 |         except Exception as e:
272 |             self.logger.warning(
273 |                 f"Failed to get disease info for {disease_id_or_name}: {e}"
274 |             )
275 |             return None
276 | 
277 |     async def _query_disease(self, name: str) -> list[dict[str, Any]] | None:
278 |         """Query MyDisease.info for a disease name."""
279 |         params = {
280 |             "q": quote(name),
281 |             "fields": "_id,name,mondo",
282 |             "size": 10,
283 |         }
284 | 
285 |         response, error = await http_client.request_api(
286 |             url=MYDISEASE_QUERY_URL,
287 |             request=params,
288 |             method="GET",
289 |             domain="mydisease",
290 |         )
291 | 
292 |         if error or not response:
293 |             return None
294 | 
295 |         return response.get("hits", [])
296 | 
297 |     async def _get_disease_by_id(
298 |         self, disease_id: str, fields: list[str] | None = None
299 |     ) -> DiseaseInfo | None:
300 |         """Get disease details by ID from MyDisease.info."""
301 |         if fields is None:
302 |             fields = [
303 |                 "name",
304 |                 "mondo",
305 |                 "definition",
306 |                 "synonyms",
307 |                 "xrefs",
308 |                 "phenotypes",
309 |             ]
310 | 
311 |         params = {"fields": ",".join(fields)}
312 | 
313 |         response, error = await http_client.request_api(
314 |             url=f"{MYDISEASE_GET_URL}/{quote(disease_id, safe='')}",
315 |             request=params,
316 |             method="GET",
317 |             domain="mydisease",
318 |         )
319 | 
320 |         if error or not response:
321 |             return None
322 | 
323 |         try:
324 |             # Extract definition from mondo if available
325 |             if "mondo" in response and isinstance(response["mondo"], dict):
326 |                 if (
327 |                     "definition" in response["mondo"]
328 |                     and "definition" not in response
329 |                 ):
330 |                     response["definition"] = response["mondo"]["definition"]
331 |                 # Extract synonyms from mondo
332 |                 if "synonym" in response["mondo"]:
333 |                     mondo_synonyms = response["mondo"]["synonym"]
334 |                     if isinstance(mondo_synonyms, dict):
335 |                         # Handle exact synonyms
336 |                         exact = mondo_synonyms.get("exact", [])
337 |                         if isinstance(exact, list):
338 |                             response["synonyms"] = exact
339 |                     elif isinstance(mondo_synonyms, list):
340 |                         response["synonyms"] = mondo_synonyms
341 | 
342 |             return DiseaseInfo(**response)
343 |         except Exception as e:
344 |             self.logger.warning(f"Failed to parse disease response: {e}")
345 |             return None
346 | 
347 |     async def get_disease_synonyms(self, disease_id_or_name: str) -> list[str]:
348 |         """Get disease synonyms for query expansion.
349 | 
350 |         Args:
351 |             disease_id_or_name: Disease ID or name
352 | 
353 |         Returns:
354 |             List of synonyms including the original term
355 |         """
356 |         disease_info = await self.get_disease_info(disease_id_or_name)
357 |         if not disease_info:
358 |             return [disease_id_or_name]
359 | 
360 |         synonyms = [disease_id_or_name]
361 |         if disease_info.name and disease_info.name != disease_id_or_name:
362 |             synonyms.append(disease_info.name)
363 | 
364 |         if disease_info.synonyms:
365 |             synonyms.extend(disease_info.synonyms)
366 | 
367 |         # Remove duplicates while preserving order
368 |         seen = set()
369 |         unique_synonyms = []
370 |         for syn in synonyms:
371 |             if syn.lower() not in seen:
372 |                 seen.add(syn.lower())
373 |                 unique_synonyms.append(syn)
374 | 
375 |         return unique_synonyms[
376 |             :5
377 |         ]  # Limit to top 5 to avoid overly broad searches
378 | 
379 |     async def get_drug_info(
380 |         self, drug_id_or_name: str, fields: list[str] | None = None
381 |     ) -> DrugInfo | None:
382 |         """Get drug/chemical information from MyChem.info.
383 | 
384 |         Args:
385 |             drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name
386 |             fields: Optional list of fields to return
387 | 
388 |         Returns:
389 |             DrugInfo object or None if not found
390 |         """
391 |         try:
392 |             # Check if it's an ID (starts with known prefixes)
393 |             if any(
394 |                 drug_id_or_name.upper().startswith(prefix)
395 |                 for prefix in ["DRUGBANK:", "DB", "CHEMBL", "CHEBI:", "CID"]
396 |             ):
397 |                 return await self._get_drug_by_id(drug_id_or_name, fields)
398 | 
399 |             # Otherwise, query by name
400 |             query_result = await self._query_drug(drug_id_or_name)
401 |             if not query_result:
402 |                 return None
403 | 
404 |             # Get the best match
405 |             drug_id = query_result[0].get("_id")
406 |             if not drug_id:
407 |                 return None
408 | 
409 |             # Now get full details
410 |             return await self._get_drug_by_id(drug_id, fields)
411 | 
412 |         except Exception as e:
413 |             self.logger.warning(
414 |                 f"Failed to get drug info for {drug_id_or_name}: {e}"
415 |             )
416 |             return None
417 | 
418 |     async def _query_drug(self, name: str) -> list[dict[str, Any]] | None:
419 |         """Query MyChem.info for a drug name."""
420 |         params = {
421 |             "q": quote(name),
422 |             "fields": "_id,name,drugbank.name,chebi.name,chembl.pref_name,unii.display_name",
423 |             "size": 10,
424 |         }
425 | 
426 |         response, error = await http_client.request_api(
427 |             url=MYCHEM_QUERY_URL,
428 |             request=params,
429 |             method="GET",
430 |             domain="mychem",
431 |         )
432 | 
433 |         if error or not response:
434 |             return None
435 | 
436 |         hits = response.get("hits", [])
437 | 
438 |         # Sort hits to prioritize those with actual drug names
439 |         def score_hit(hit):
440 |             score = hit.get("_score", 0)
441 |             # Boost score if hit has drug name fields
442 |             if hit.get("drugbank", {}).get("name"):
443 |                 score += 10
444 |             if hit.get("chembl", {}).get("pref_name"):
445 |                 score += 5
446 |             if hit.get("unii", {}).get("display_name"):
447 |                 score += 3
448 |             return score
449 | 
450 |         hits.sort(key=score_hit, reverse=True)
451 |         return hits
452 | 
453 |     async def _get_drug_by_id(
454 |         self, drug_id: str, fields: list[str] | None = None
455 |     ) -> DrugInfo | None:
456 |         """Get drug details by ID from MyChem.info."""
457 |         if fields is None:
458 |             fields = [
459 |                 "name",
460 |                 "drugbank",
461 |                 "chebi",
462 |                 "chembl",
463 |                 "pubchem",
464 |                 "unii",
465 |                 "inchikey",
466 |                 "formula",
467 |                 "description",
468 |                 "indication",
469 |                 "pharmacology",
470 |                 "mechanism_of_action",
471 |             ]
472 | 
473 |         params = {"fields": ",".join(fields)}
474 | 
475 |         response, error = await http_client.request_api(
476 |             url=f"{MYCHEM_GET_URL}/{quote(drug_id, safe='')}",
477 |             request=params,
478 |             method="GET",
479 |             domain="mychem",
480 |         )
481 | 
482 |         if error or not response:
483 |             return None
484 | 
485 |         try:
486 |             # Handle array response (multiple results)
487 |             if isinstance(response, list):
488 |                 if not response:
489 |                     return None
490 |                 # Take the first result
491 |                 response = response[0]
492 | 
493 |             # Extract fields from nested structures
494 |             self._extract_drugbank_fields(response)
495 |             self._extract_chebi_fields(response)
496 |             self._extract_chembl_fields(response)
497 |             self._extract_pubchem_fields(response)
498 |             self._extract_unii_fields(response)
499 | 
500 |             return DrugInfo(**response)
501 |         except Exception as e:
502 |             self.logger.warning(f"Failed to parse drug response: {e}")
503 |             return None
504 | 
505 |     def _extract_drugbank_fields(self, response: dict[str, Any]) -> None:
506 |         """Extract DrugBank fields from response."""
507 |         if "drugbank" in response and isinstance(response["drugbank"], dict):
508 |             db = response["drugbank"]
509 |             response["drugbank_id"] = db.get("id")
510 |             response["name"] = response.get("name") or db.get("name")
511 |             response["tradename"] = db.get("products", {}).get("name", [])
512 |             if isinstance(response["tradename"], str):
513 |                 response["tradename"] = [response["tradename"]]
514 |             response["indication"] = db.get("indication")
515 |             response["mechanism_of_action"] = db.get("mechanism_of_action")
516 |             response["description"] = db.get("description")
517 | 
518 |     def _extract_chebi_fields(self, response: dict[str, Any]) -> None:
519 |         """Extract ChEBI fields from response."""
520 |         if "chebi" in response and isinstance(response["chebi"], dict):
521 |             response["chebi_id"] = response["chebi"].get("id")
522 |             if not response.get("name"):
523 |                 response["name"] = response["chebi"].get("name")
524 | 
525 |     def _extract_chembl_fields(self, response: dict[str, Any]) -> None:
526 |         """Extract ChEMBL fields from response."""
527 |         if "chembl" in response and isinstance(response["chembl"], dict):
528 |             response["chembl_id"] = response["chembl"].get(
529 |                 "molecule_chembl_id"
530 |             )
531 |             if not response.get("name"):
532 |                 response["name"] = response["chembl"].get("pref_name")
533 | 
534 |     def _extract_pubchem_fields(self, response: dict[str, Any]) -> None:
535 |         """Extract PubChem fields from response."""
536 |         if "pubchem" in response and isinstance(response["pubchem"], dict):
537 |             response["pubchem_cid"] = str(response["pubchem"].get("cid", ""))
538 | 
539 |     def _extract_unii_fields(self, response: dict[str, Any]) -> None:
540 |         """Extract UNII fields from response."""
541 |         if "unii" in response and isinstance(response["unii"], dict):
542 |             unii_data = response["unii"]
543 |             # Set UNII code
544 |             response["unii"] = unii_data.get("unii", "")
545 |             # Use display name as drug name if not already set
546 |             if not response.get("name") and unii_data.get("display_name"):
547 |                 response["name"] = unii_data["display_name"]
548 |             # Use NCIT description if no description
549 |             if not response.get("description") and unii_data.get(
550 |                 "ncit_description"
551 |             ):
552 |                 response["description"] = unii_data["ncit_description"]
553 | 
554 |     async def get_variant_info(
555 |         self, variant_id: str, fields: list[str] | None = None
556 |     ) -> dict[str, Any] | None:
557 |         """Get variant information from MyVariant.info.
558 | 
559 |         This is a wrapper around the existing MyVariant integration.
560 | 
561 |         Args:
562 |             variant_id: Variant ID (rsID, HGVS)
563 |             fields: Optional list of fields to return
564 | 
565 |         Returns:
566 |             Variant data dictionary or None if not found
567 |         """
568 |         params = {"fields": "all" if fields is None else ",".join(fields)}
569 | 
570 |         response, error = await http_client.request_api(
571 |             url=f"{MYVARIANT_GET_URL}/{variant_id}",
572 |             request=params,
573 |             method="GET",
574 |             domain="myvariant",
575 |         )
576 | 
577 |         if error or not response:
578 |             return None
579 | 
580 |         return response
581 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_oncokb_client.py:
--------------------------------------------------------------------------------

```python
  1 | """Comprehensive unit tests for OncoKB client."""
  2 | 
  3 | import json
  4 | import os
  5 | from pathlib import Path
  6 | from unittest.mock import patch
  7 | 
  8 | import pytest
  9 | 
 10 | from biomcp.http_client import RequestError
 11 | from biomcp.variants.oncokb_client import (
 12 |     ONCOKB_DEMO_URL,
 13 |     ONCOKB_PROD_URL,
 14 |     OncoKBClient,
 15 | )
 16 | 
 17 | 
 18 | # Load mock responses from test data file
 19 | def load_mock_responses() -> dict:
 20 |     """Load mock OncoKB responses from JSON file."""
 21 |     test_data_dir = Path(__file__).parent.parent.parent / "data"
 22 |     mock_file = test_data_dir / "oncokb_mock_responses.json"
 23 |     with open(mock_file) as f:
 24 |         return json.load(f)
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def mock_responses():
 29 |     """Fixture providing mock OncoKB responses."""
 30 |     return load_mock_responses()
 31 | 
 32 | 
 33 | class TestOncoKBClient:
 34 |     """Test suite for OncoKBClient functionality."""
 35 | 
 36 |     def test_client_initialization_demo(self):
 37 |         """Test client initializes with demo URL when no token present."""
 38 |         with patch.dict(os.environ, {}, clear=True):
 39 |             client = OncoKBClient()
 40 |             assert client.base_url == ONCOKB_DEMO_URL
 41 |             assert client.is_demo is True
 42 |             assert "Accept" in client.headers
 43 |             assert "Authorization" not in client.headers
 44 | 
 45 |     def test_client_initialization_prod(self):
 46 |         """Test client switches to production URL when token is set."""
 47 |         with (
 48 |             patch.dict(os.environ, {"ONCOKB_TOKEN": "test-token"}, clear=True),
 49 |             patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", "test-token"),
 50 |         ):
 51 |             client = OncoKBClient()
 52 |             assert client.base_url == ONCOKB_PROD_URL
 53 |             assert client.is_demo is False
 54 |             assert "Authorization" in client.headers
 55 |             assert client.headers["Authorization"] == "Bearer test-token"
 56 | 
 57 |     def test_token_detection_with_bearer_prefix(self):
 58 |         """Test that Bearer prefix is not duplicated if already present."""
 59 |         with (
 60 |             patch.dict(
 61 |                 os.environ,
 62 |                 {"ONCOKB_TOKEN": "Bearer existing-token"},
 63 |                 clear=True,
 64 |             ),
 65 |             patch(
 66 |                 "biomcp.variants.oncokb_client.ONCOKB_TOKEN",
 67 |                 "Bearer existing-token",
 68 |             ),
 69 |         ):
 70 |             client = OncoKBClient()
 71 |             assert client.headers["Authorization"] == "Bearer existing-token"
 72 |             assert not client.headers["Authorization"].startswith(
 73 |                 "Bearer Bearer"
 74 |             )
 75 | 
 76 |     def test_server_selection_demo_mode(self):
 77 |         """Test demo server selection when no token is configured."""
 78 |         with patch.dict(os.environ, {}, clear=True):
 79 |             client = OncoKBClient()
 80 |             assert client.base_url == ONCOKB_DEMO_URL
 81 |             assert client.is_demo is True
 82 | 
 83 |     def test_server_selection_prod_mode(self):
 84 |         """Test production server selection when token is configured."""
 85 |         token = "my-oncokb-token"  # noqa: S105 - test token
 86 |         with (
 87 |             patch.dict(os.environ, {"ONCOKB_TOKEN": token}, clear=True),
 88 |             patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", token),
 89 |         ):
 90 |             client = OncoKBClient()
 91 |             assert client.base_url == ONCOKB_PROD_URL
 92 |             assert client.is_demo is False
 93 | 
 94 |     @pytest.mark.asyncio
 95 |     async def test_get_curated_genes_success(self, mock_responses):
 96 |         """Test successful retrieval of curated genes list."""
 97 |         with patch.dict(os.environ, {}, clear=True):
 98 |             client = OncoKBClient()
 99 | 
100 |             mock_genes = mock_responses["allCuratedGenes"]
101 | 
102 |             with patch(
103 |                 "biomcp.variants.oncokb_client.request_api"
104 |             ) as mock_request:
105 |                 mock_request.return_value = (mock_genes, None)
106 | 
107 |                 result, error = await client.get_curated_genes()
108 | 
109 |                 # Verify result
110 |                 assert error is None
111 |                 assert result is not None
112 |                 assert isinstance(result, list)
113 |                 assert len(result) == 3
114 | 
115 |                 # Check BRAF entry
116 |                 braf = next(
117 |                     (g for g in result if g["hugoSymbol"] == "BRAF"), None
118 |                 )
119 |                 assert braf is not None
120 |                 assert braf["entrezGeneId"] == 673
121 |                 assert braf["geneType"] == "ONCOGENE"
122 |                 assert "BRAF" in braf["summary"]
123 | 
124 |                 # Check TP53 entry
125 |                 tp53 = next(
126 |                     (g for g in result if g["hugoSymbol"] == "TP53"), None
127 |                 )
128 |                 assert tp53 is not None
129 |                 assert tp53["geneType"] == "TSG"
130 |                 assert tp53["entrezGeneId"] == 7157
131 | 
132 |                 # Verify API was called correctly
133 |                 mock_request.assert_called_once()
134 |                 call_kwargs = mock_request.call_args[1]
135 |                 assert call_kwargs["domain"] == "oncokb"
136 |                 assert call_kwargs["endpoint_key"] == "oncokb_curated_genes"
137 |                 assert call_kwargs["cache_ttl"] == 86400  # 24 hours
138 | 
139 |     @pytest.mark.asyncio
140 |     async def test_get_curated_genes_api_error(self):
141 |         """Test handling of API errors in get_curated_genes."""
142 |         with patch.dict(os.environ, {}, clear=True):
143 |             client = OncoKBClient()
144 | 
145 |             error_response = RequestError(
146 |                 code=500, message="Internal server error"
147 |             )
148 | 
149 |             with patch(
150 |                 "biomcp.variants.oncokb_client.request_api"
151 |             ) as mock_request:
152 |                 mock_request.return_value = (None, error_response)
153 | 
154 |                 result, error = await client.get_curated_genes()
155 | 
156 |                 assert result is None
157 |                 assert error is not None
158 |                 assert error.code == 500
159 |                 assert "Internal server error" in error.message
160 | 
161 |     @pytest.mark.asyncio
162 |     async def test_get_curated_genes_unexpected_format(self):
163 |         """Test handling of unexpected response format."""
164 |         with patch.dict(os.environ, {}, clear=True):
165 |             client = OncoKBClient()
166 | 
167 |             # Return dict instead of list
168 |             with patch(
169 |                 "biomcp.variants.oncokb_client.request_api"
170 |             ) as mock_request:
171 |                 mock_request.return_value = ({"error": "not a list"}, None)
172 | 
173 |                 result, error = await client.get_curated_genes()
174 | 
175 |                 assert result is None
176 |                 assert error is not None
177 |                 assert "Unexpected response format" in error.message
178 | 
179 |     @pytest.mark.asyncio
180 |     async def test_get_curated_genes_exception_handling(self):
181 |         """Test exception handling in get_curated_genes."""
182 |         with patch.dict(os.environ, {}, clear=True):
183 |             client = OncoKBClient()
184 | 
185 |             with patch(
186 |                 "biomcp.variants.oncokb_client.request_api"
187 |             ) as mock_request:
188 |                 mock_request.side_effect = ValueError("Unexpected error")
189 | 
190 |                 result, error = await client.get_curated_genes()
191 | 
192 |                 assert result is None
193 |                 assert error is not None
194 |                 assert "Failed to fetch curated genes" in error.message
195 | 
196 |     @pytest.mark.asyncio
197 |     async def test_get_gene_annotation_success(self, mock_responses):
198 |         """Test successful retrieval of BRAF gene annotation."""
199 |         with patch.dict(os.environ, {}, clear=True):
200 |             client = OncoKBClient()
201 | 
202 |             mock_annotation = mock_responses["genesByHugoSymbol"][0]
203 | 
204 |             with patch(
205 |                 "biomcp.variants.oncokb_client.request_api"
206 |             ) as mock_request:
207 |                 mock_request.return_value = (mock_annotation, None)
208 | 
209 |                 result, error = await client.get_gene_annotation("BRAF")
210 | 
211 |                 # Verify result
212 |                 assert error is None
213 |                 assert result is not None
214 |                 assert result["hugoSymbol"] == "BRAF"
215 |                 assert result["entrezGeneId"] == 673
216 |                 assert result["geneType"] == "ONCOGENE"
217 |                 assert "geneAliases" in result
218 |                 assert "BRAF1" in result["geneAliases"]
219 | 
220 |                 # Verify API was called correctly
221 |                 mock_request.assert_called_once()
222 |                 call_kwargs = mock_request.call_args[1]
223 |                 assert call_kwargs["domain"] == "oncokb"
224 |                 assert call_kwargs["endpoint_key"] == "oncokb_gene_annotation"
225 |                 assert call_kwargs["cache_ttl"] == 3600  # 1 hour
226 | 
227 |     @pytest.mark.asyncio
228 |     async def test_get_gene_annotation_multiple_genes(self, mock_responses):
229 |         """Test annotation retrieval for multiple different genes."""
230 |         with patch.dict(os.environ, {}, clear=True):
231 |             client = OncoKBClient()
232 | 
233 |             # Test BRAF
234 |             braf_annotation = mock_responses["genesByHugoSymbol"][0]
235 |             with patch(
236 |                 "biomcp.variants.oncokb_client.request_api"
237 |             ) as mock_request:
238 |                 mock_request.return_value = (braf_annotation, None)
239 |                 result, error = await client.get_gene_annotation("BRAF")
240 |                 assert error is None
241 |                 assert result["hugoSymbol"] == "BRAF"
242 | 
243 |             # Test ROS1
244 |             ros1_annotation = mock_responses["genesByHugoSymbol"][1]
245 |             with patch(
246 |                 "biomcp.variants.oncokb_client.request_api"
247 |             ) as mock_request:
248 |                 mock_request.return_value = (ros1_annotation, None)
249 |                 result, error = await client.get_gene_annotation("ROS1")
250 |                 assert error is None
251 |                 assert result["hugoSymbol"] == "ROS1"
252 |                 assert result["geneType"] == "ONCOGENE"
253 | 
254 |             # Test TP53
255 |             tp53_annotation = mock_responses["genesByHugoSymbol"][2]
256 |             with patch(
257 |                 "biomcp.variants.oncokb_client.request_api"
258 |             ) as mock_request:
259 |                 mock_request.return_value = (tp53_annotation, None)
260 |                 result, error = await client.get_gene_annotation("TP53")
261 |                 assert error is None
262 |                 assert result["hugoSymbol"] == "TP53"
263 |                 assert result["geneType"] == "TSG"
264 | 
265 |     @pytest.mark.asyncio
266 |     async def test_get_gene_annotation_api_error(self):
267 |         """Test handling of API errors in get_gene_annotation."""
268 |         with patch.dict(os.environ, {}, clear=True):
269 |             client = OncoKBClient()
270 | 
271 |             error_response = RequestError(code=404, message="Gene not found")
272 | 
273 |             with patch(
274 |                 "biomcp.variants.oncokb_client.request_api"
275 |             ) as mock_request:
276 |                 mock_request.return_value = (None, error_response)
277 | 
278 |                 result, error = await client.get_gene_annotation("INVALID")
279 | 
280 |                 assert result is None
281 |                 assert error is not None
282 |                 assert error.code == 404
283 | 
284 |     @pytest.mark.asyncio
285 |     async def test_get_gene_annotation_unexpected_format(self):
286 |         """Test handling of unexpected response format in gene annotation."""
287 |         with patch.dict(os.environ, {}, clear=True):
288 |             client = OncoKBClient()
289 | 
290 |             # Return list instead of dict
291 |             with patch(
292 |                 "biomcp.variants.oncokb_client.request_api"
293 |             ) as mock_request:
294 |                 mock_request.return_value = (["not", "a", "dict"], None)
295 | 
296 |                 result, error = await client.get_gene_annotation("BRAF")
297 | 
298 |                 assert result is None
299 |                 assert error is not None
300 |                 assert "Unexpected response format" in error.message
301 | 
302 |     @pytest.mark.asyncio
303 |     async def test_get_variant_annotation_success(self, mock_responses):
304 |         """Test successful retrieval of BRAF V600E variant annotation."""
305 |         with patch.dict(os.environ, {}, clear=True):
306 |             client = OncoKBClient()
307 | 
308 |             mock_annotation = mock_responses["variantAnnotation"][
309 |                 "BRAF_V600E_melanoma"
310 |             ]
311 | 
312 |             with patch(
313 |                 "biomcp.variants.oncokb_client.request_api"
314 |             ) as mock_request:
315 |                 mock_request.return_value = (mock_annotation, None)
316 | 
317 |                 result, error = await client.get_variant_annotation(
318 |                     "BRAF", "V600E"
319 |                 )
320 | 
321 |                 # Verify result
322 |                 assert error is None
323 |                 assert result is not None
324 | 
325 |                 # Check query details
326 |                 query = result["query"]
327 |                 assert query["hugoSymbol"] == "BRAF"
328 |                 assert query["alteration"] == "V600E"
329 |                 assert query["entrezGeneId"] == 673
330 | 
331 |                 # Check oncogenicity
332 |                 assert result["oncogenic"] == "Oncogenic"
333 |                 assert result["mutationEffect"]["knownEffect"] == (
334 |                     "Gain-of-function"
335 |                 )
336 | 
337 |                 # Check evidence levels
338 |                 assert result["highestSensitiveLevel"] == "LEVEL_1"
339 |                 assert result["highestFdaLevel"] == "LEVEL_Fda2"
340 |                 assert result["hotspot"] is True
341 | 
342 |                 # Check treatments
343 |                 treatments = result["treatments"]
344 |                 assert len(treatments) > 0
345 |                 dabrafenib_treatment = treatments[0]
346 |                 assert dabrafenib_treatment["level"] == "LEVEL_1"
347 |                 assert len(dabrafenib_treatment["drugs"]) > 0
348 |                 assert dabrafenib_treatment["drugs"][0]["drugName"] == (
349 |                     "Dabrafenib"
350 |                 )
351 | 
352 |                 # Verify API was called correctly
353 |                 mock_request.assert_called_once()
354 |                 call_kwargs = mock_request.call_args[1]
355 |                 assert call_kwargs["domain"] == "oncokb"
356 |                 assert (
357 |                     call_kwargs["endpoint_key"] == "oncokb_variant_annotation"
358 |                 )
359 |                 assert call_kwargs["cache_ttl"] == 3600  # 1 hour
360 | 
361 |     @pytest.mark.asyncio
362 |     async def test_get_variant_annotation_parameters(self):
363 |         """Test that variant annotation sends correct parameters."""
364 |         with patch.dict(os.environ, {}, clear=True):
365 |             client = OncoKBClient()
366 | 
367 |             with patch(
368 |                 "biomcp.variants.oncokb_client.request_api"
369 |             ) as mock_request:
370 |                 mock_request.return_value = (
371 |                     {"query": {}, "oncogenic": "Oncogenic"},
372 |                     None,
373 |                 )
374 | 
375 |                 await client.get_variant_annotation("BRAF", "V600E")
376 | 
377 |                 # Verify parameters
378 |                 call_kwargs = mock_request.call_args[1]
379 |                 request_params = call_kwargs["request"]
380 |                 assert request_params["hugoSymbol"] == "BRAF"
381 |                 assert request_params["alteration"] == "V600E"
382 |                 assert "_headers" in request_params
383 | 
384 |     @pytest.mark.asyncio
385 |     async def test_get_variant_annotation_api_error(self):
386 |         """Test handling of API errors in get_variant_annotation."""
387 |         with patch.dict(os.environ, {}, clear=True):
388 |             client = OncoKBClient()
389 | 
390 |             error_response = RequestError(
391 |                 code=404, message="Variant not found"
392 |             )
393 | 
394 |             with patch(
395 |                 "biomcp.variants.oncokb_client.request_api"
396 |             ) as mock_request:
397 |                 mock_request.return_value = (None, error_response)
398 | 
399 |                 result, error = await client.get_variant_annotation(
400 |                     "BRAF", "INVALID"
401 |                 )
402 | 
403 |                 assert result is None
404 |                 assert error is not None
405 |                 assert error.code == 404
406 | 
407 |     @pytest.mark.asyncio
408 |     async def test_get_variant_annotation_exception_handling(self):
409 |         """Test exception handling in get_variant_annotation."""
410 |         with patch.dict(os.environ, {}, clear=True):
411 |             client = OncoKBClient()
412 | 
413 |             with patch(
414 |                 "biomcp.variants.oncokb_client.request_api"
415 |             ) as mock_request:
416 |                 mock_request.side_effect = RuntimeError("Network error")
417 | 
418 |                 result, error = await client.get_variant_annotation(
419 |                     "BRAF", "V600E"
420 |                 )
421 | 
422 |                 assert result is None
423 |                 assert error is not None
424 |                 assert "Failed to fetch variant annotation" in error.message
425 | 
426 |     def test_headers_json_formatting(self):
427 |         """Test that headers are properly formatted as JSON."""
428 |         with (
429 |             patch.dict(os.environ, {"ONCOKB_TOKEN": "test-token"}, clear=True),
430 |             patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", "test-token"),
431 |         ):
432 |             client = OncoKBClient()
433 |             headers_json = client._headers_json()
434 | 
435 |             # Should be valid JSON
436 |             parsed = json.loads(headers_json)
437 |             assert "Accept" in parsed
438 |             assert "Authorization" in parsed
439 |             assert parsed["Authorization"] == "Bearer test-token"
440 | 
441 |     @pytest.mark.asyncio
442 |     async def test_error_handling_graceful_degradation(self):
443 |         """Test that all methods gracefully handle errors and return None."""
444 |         with patch.dict(os.environ, {}, clear=True):
445 |             client = OncoKBClient()
446 | 
447 |             # Simulate complete API failure
448 |             with patch(
449 |                 "biomcp.variants.oncokb_client.request_api"
450 |             ) as mock_request:
451 |                 mock_request.return_value = (
452 |                     None,
453 |                     RequestError(code=503, message="Service unavailable"),
454 |                 )
455 | 
456 |                 # All methods should return None without raising exceptions
457 |                 genes_result, genes_error = await client.get_curated_genes()
458 |                 assert genes_result is None
459 |                 assert genes_error is not None
460 | 
461 |                 gene_result, gene_error = await client.get_gene_annotation(
462 |                     "BRAF"
463 |                 )
464 |                 assert gene_result is None
465 |                 assert gene_error is not None
466 | 
467 |                 (
468 |                     variant_result,
469 |                     variant_error,
470 |                 ) = await client.get_variant_annotation("BRAF", "V600E")
471 |                 assert variant_result is None
472 |                 assert variant_error is not None
473 | 
474 |     @pytest.mark.asyncio
475 |     async def test_caching_behavior(self):
476 |         """Test that caching parameters are correctly set."""
477 |         with patch.dict(os.environ, {}, clear=True):
478 |             client = OncoKBClient()
479 | 
480 |             with patch(
481 |                 "biomcp.variants.oncokb_client.request_api"
482 |             ) as mock_request:
483 |                 mock_request.return_value = ([], None)
484 | 
485 |                 # Test curated genes - 24 hour cache
486 |                 await client.get_curated_genes()
487 |                 assert mock_request.call_args[1]["cache_ttl"] == 86400
488 | 
489 |                 # Test gene annotation - 1 hour cache
490 |                 mock_request.return_value = ({}, None)
491 |                 await client.get_gene_annotation("BRAF")
492 |                 assert mock_request.call_args[1]["cache_ttl"] == 3600
493 | 
494 |                 # Test variant annotation - 1 hour cache
495 |                 await client.get_variant_annotation("BRAF", "V600E")
496 |                 assert mock_request.call_args[1]["cache_ttl"] == 3600
497 | 
498 |     @pytest.mark.asyncio
499 |     async def test_retry_enabled_for_all_methods(self):
500 |         """Test that retry is enabled for all API methods."""
501 |         with patch.dict(os.environ, {}, clear=True):
502 |             client = OncoKBClient()
503 | 
504 |             with patch(
505 |                 "biomcp.variants.oncokb_client.request_api"
506 |             ) as mock_request:
507 |                 mock_request.return_value = ([], None)
508 | 
509 |                 await client.get_curated_genes()
510 |                 assert mock_request.call_args[1]["enable_retry"] is True
511 | 
512 |                 mock_request.return_value = ({}, None)
513 |                 await client.get_gene_annotation("BRAF")
514 |                 assert mock_request.call_args[1]["enable_retry"] is True
515 | 
516 |                 await client.get_variant_annotation("BRAF", "V600E")
517 |                 assert mock_request.call_args[1]["enable_retry"] is True
518 | 
```

--------------------------------------------------------------------------------
/docs/user-guides/02-mcp-tools-reference.md:
--------------------------------------------------------------------------------

```markdown
  1 | # MCP Tools Reference
  2 | 
  3 | BioMCP provides 35 specialized tools for biomedical research through the Model Context Protocol (MCP). This reference covers all available tools, their parameters, and usage patterns.
  4 | 
  5 | ## Related Guides
  6 | 
  7 | - **Conceptual Overview**: [Sequential Thinking with the Think Tool](../concepts/03-sequential-thinking-with-the-think-tool.md)
  8 | - **Practical Examples**: See the [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for real-world usage patterns
  9 | - **Integration Setup**: [Claude Desktop Integration](../getting-started/02-claude-desktop-integration.md)
 10 | 
 11 | ## Tool Categories
 12 | 
 13 | | Category            | Count | Tools                                                          |
 14 | | ------------------- | ----- | -------------------------------------------------------------- |
 15 | | **Core Tools**      | 3     | `search`, `fetch`, `think`                                     |
 16 | | **Article Tools**   | 2     | `article_searcher`, `article_getter`                           |
 17 | | **Trial Tools**     | 6     | `trial_searcher`, `trial_getter`, + 4 detail getters           |
 18 | | **Variant Tools**   | 3     | `variant_searcher`, `variant_getter`, `alphagenome_predictor`  |
 19 | | **BioThings Tools** | 3     | `gene_getter`, `disease_getter`, `drug_getter`                 |
 20 | | **NCI Tools**       | 6     | Organization, intervention, biomarker, and disease tools       |
 21 | | **OpenFDA Tools**   | 12    | Adverse events, labels, devices, approvals, recalls, shortages |
 22 | 
 23 | ## Core Unified Tools
 24 | 
 25 | ### 1. search
 26 | 
 27 | **Universal search across all biomedical domains with unified query language.**
 28 | 
 29 | ```python
 30 | search(
 31 |     query: str = None,              # Unified query syntax
 32 |     domain: str = None,             # Target domain
 33 |     genes: list[str] = None,        # Gene symbols
 34 |     diseases: list[str] = None,     # Disease/condition terms
 35 |     variants: list[str] = None,     # Variant notations
 36 |     chemicals: list[str] = None,    # Drug/chemical names
 37 |     keywords: list[str] = None,     # Additional keywords
 38 |     conditions: list[str] = None,   # Trial conditions
 39 |     interventions: list[str] = None,# Trial interventions
 40 |     lat: float = None,              # Latitude for trials
 41 |     long: float = None,             # Longitude for trials
 42 |     page: int = 1,                  # Page number
 43 |     page_size: int = 10,            # Results per page
 44 |     api_key: str = None             # For NCI domains
 45 | ) -> dict
 46 | ```
 47 | 
 48 | **Domains:** `article`, `trial`, `variant`, `gene`, `drug`, `disease`, `nci_organization`, `nci_intervention`, `nci_biomarker`, `nci_disease`, `fda_adverse`, `fda_label`, `fda_device`, `fda_approval`, `fda_recall`, `fda_shortage`
 49 | 
 50 | **Query Language Examples:**
 51 | 
 52 | - `"gene:BRAF AND disease:melanoma"`
 53 | - `"drugs.tradename:gleevec"`
 54 | - `"gene:TP53 AND (mutation OR variant)"`
 55 | 
 56 | **Usage Examples:**
 57 | 
 58 | ```python
 59 | # Domain-specific search
 60 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
 61 | 
 62 | # Unified query language
 63 | search(query="gene:EGFR AND mutation:T790M")
 64 | 
 65 | # Clinical trials by location
 66 | search(domain="trial", conditions=["lung cancer"], lat=40.7128, long=-74.0060)
 67 | 
 68 | # FDA adverse events
 69 | search(domain="fda_adverse", chemicals=["aspirin"])
 70 | 
 71 | # FDA drug approvals
 72 | search(domain="fda_approval", chemicals=["keytruda"])
 73 | ```
 74 | 
 75 | ### 2. fetch
 76 | 
 77 | **Retrieve detailed information for any biomedical record.**
 78 | 
 79 | ```python
 80 | fetch(
 81 |     id: str,                    # Record identifier
 82 |     domain: str = None,         # Domain (auto-detected if not provided)
 83 |     detail: str = None,         # Specific section for trials
 84 |     api_key: str = None         # For NCI records
 85 | ) -> dict
 86 | ```
 87 | 
 88 | **Supported IDs:**
 89 | 
 90 | - Articles: PMID (e.g., "38768446"), DOI (e.g., "10.1101/2024.01.20")
 91 | - Trials: NCT ID (e.g., "NCT03006926")
 92 | - Variants: HGVS, rsID, genomic coordinates
 93 | - Genes/Drugs/Diseases: Names or database IDs
 94 | - FDA Records: Report IDs, Application Numbers (e.g., "BLA125514"), Recall Numbers, etc.
 95 | 
 96 | **Detail Options for Trials:** `protocol`, `locations`, `outcomes`, `references`, `all`
 97 | 
 98 | **Usage Examples:**
 99 | 
100 | ```python
101 | # Fetch article by PMID
102 | fetch(id="38768446", domain="article")
103 | 
104 | # Fetch trial with specific details
105 | fetch(id="NCT03006926", domain="trial", detail="locations")
106 | 
107 | # Auto-detect domain
108 | fetch(id="rs121913529")  # Variant
109 | fetch(id="BRAF")         # Gene
110 | 
111 | # Fetch FDA records
112 | fetch(id="BLA125514", domain="fda_approval")  # Drug approval
113 | fetch(id="D-0001-2023", domain="fda_recall")   # Drug recall
114 | ```
115 | 
116 | ### 3. think
117 | 
118 | **Sequential thinking tool for structured problem-solving.**
119 | 
120 | ```python
121 | think(
122 |     thought: str,               # Current reasoning step
123 |     thoughtNumber: int,         # Sequential number (1, 2, 3...)
124 |     totalThoughts: int = None,  # Estimated total thoughts
125 |     nextThoughtNeeded: bool = True  # Continue thinking?
126 | ) -> str
127 | ```
128 | 
129 | **CRITICAL:** Always use `think` BEFORE any other BioMCP operation!
130 | 
131 | **Usage Pattern:**
132 | 
133 | ```python
134 | # Step 1: Problem decomposition
135 | think(
136 |     thought="Breaking down query: need to find BRAF inhibitor trials...",
137 |     thoughtNumber=1,
138 |     nextThoughtNeeded=True
139 | )
140 | 
141 | # Step 2: Search strategy
142 | think(
143 |     thought="Will search trials for BRAF V600E melanoma, then articles...",
144 |     thoughtNumber=2,
145 |     nextThoughtNeeded=True
146 | )
147 | 
148 | # Final step: Synthesis
149 | think(
150 |     thought="Ready to synthesize findings from 5 trials and 12 articles...",
151 |     thoughtNumber=3,
152 |     nextThoughtNeeded=False  # Analysis complete
153 | )
154 | ```
155 | 
156 | ## Article Tools
157 | 
158 | ### 4. article_searcher
159 | 
160 | **Search PubMed/PubTator3 for biomedical literature.**
161 | 
162 | ```python
163 | article_searcher(
164 |     chemicals: list[str] = None,
165 |     diseases: list[str] = None,
166 |     genes: list[str] = None,
167 |     keywords: list[str] = None,    # Supports OR with "|"
168 |     variants: list[str] = None,
169 |     include_preprints: bool = True,
170 |     include_cbioportal: bool = True,
171 |     page: int = 1,
172 |     page_size: int = 10
173 | ) -> str
174 | ```
175 | 
176 | **Features:**
177 | 
178 | - Automatic cBioPortal integration for gene searches
179 | - Preprint inclusion from bioRxiv/medRxiv
180 | - OR logic in keywords: `"V600E|p.V600E|c.1799T>A"`
181 | 
182 | **Example:**
183 | 
184 | ```python
185 | # Search with multiple filters
186 | article_searcher(
187 |     genes=["BRAF"],
188 |     diseases=["melanoma"],
189 |     keywords=["resistance|resistant"],
190 |     include_cbioportal=True
191 | )
192 | ```
193 | 
194 | ### 5. article_getter
195 | 
196 | **Fetch detailed article information.**
197 | 
198 | ```python
199 | article_getter(
200 |     pmid: str  # PubMed ID, PMC ID, or DOI
201 | ) -> str
202 | ```
203 | 
204 | **Supports:**
205 | 
206 | - PubMed IDs: "38768446"
207 | - PMC IDs: "PMC7498215"
208 | - DOIs: "10.1101/2024.01.20.23288905"
209 | 
210 | ## Trial Tools
211 | 
212 | ### 6. trial_searcher
213 | 
214 | **Search ClinicalTrials.gov with comprehensive filters.**
215 | 
216 | ```python
217 | trial_searcher(
218 |     conditions: list[str] = None,
219 |     interventions: list[str] = None,
220 |     other_terms: list[str] = None,
221 |     recruiting_status: str = "ANY",  # "OPEN", "CLOSED", "ANY"
222 |     phase: str = None,               # "PHASE1", "PHASE2", etc.
223 |     lat: float = None,               # Location-based search
224 |     long: float = None,
225 |     distance: int = None,            # Miles from coordinates
226 |     age_group: str = None,           # "CHILD", "ADULT", "OLDER_ADULT"
227 |     sex: str = None,                 # "MALE", "FEMALE", "ALL"
228 |     study_type: str = None,          # "INTERVENTIONAL", "OBSERVATIONAL"
229 |     funder_type: str = None,         # "NIH", "INDUSTRY", etc.
230 |     page: int = 1,
231 |     page_size: int = 10
232 | ) -> str
233 | ```
234 | 
235 | **Location Search Example:**
236 | 
237 | ```python
238 | # Trials near Boston
239 | trial_searcher(
240 |     conditions=["breast cancer"],
241 |     lat=42.3601,
242 |     long=-71.0589,
243 |     distance=50,
244 |     recruiting_status="OPEN"
245 | )
246 | ```
247 | 
248 | ### 7-11. Trial Detail Getters
249 | 
250 | ```python
251 | # Get complete trial information
252 | trial_getter(nct_id: str) -> str
253 | 
254 | # Get specific sections
255 | trial_protocol_getter(nct_id: str) -> str     # Core protocol info
256 | trial_locations_getter(nct_id: str) -> str    # Sites and contacts
257 | trial_outcomes_getter(nct_id: str) -> str     # Outcome measures
258 | trial_references_getter(nct_id: str) -> str   # Publications
259 | ```
260 | 
261 | ## Variant Tools
262 | 
263 | ### 12. variant_searcher
264 | 
265 | **Search MyVariant.info for genetic variants.**
266 | 
267 | ```python
268 | variant_searcher(
269 |     gene: str = None,
270 |     hgvs: str = None,
271 |     hgvsp: str = None,              # Protein HGVS
272 |     hgvsc: str = None,              # Coding DNA HGVS
273 |     rsid: str = None,
274 |     region: str = None,             # "chr7:140753336-140753337"
275 |     significance: str = None,        # Clinical significance
276 |     frequency_min: float = None,
277 |     frequency_max: float = None,
278 |     cadd_score_min: float = None,
279 |     sift_prediction: str = None,
280 |     polyphen_prediction: str = None,
281 |     sources: list[str] = None,
282 |     include_cbioportal: bool = True,
283 |     page: int = 1,
284 |     page_size: int = 10
285 | ) -> str
286 | ```
287 | 
288 | **Significance Options:** `pathogenic`, `likely_pathogenic`, `uncertain_significance`, `likely_benign`, `benign`
289 | 
290 | **Example:**
291 | 
292 | ```python
293 | # Find rare pathogenic BRCA1 variants
294 | variant_searcher(
295 |     gene="BRCA1",
296 |     significance="pathogenic",
297 |     frequency_max=0.001,
298 |     cadd_score_min=20
299 | )
300 | ```
301 | 
302 | ### 13. variant_getter
303 | 
304 | **Fetch comprehensive variant details.**
305 | 
306 | ```python
307 | variant_getter(
308 |     variant_id: str,              # HGVS, rsID, or MyVariant ID
309 |     include_external: bool = True  # Include TCGA, 1000 Genomes
310 | ) -> str
311 | ```
312 | 
313 | ### 14. alphagenome_predictor
314 | 
315 | **Predict variant effects using Google DeepMind's AlphaGenome.**
316 | 
317 | ```python
318 | alphagenome_predictor(
319 |     chromosome: str,              # e.g., "chr7"
320 |     position: int,                # 1-based position
321 |     reference: str,               # Reference allele
322 |     alternate: str,               # Alternate allele
323 |     interval_size: int = 131072,  # Analysis window
324 |     tissue_types: list[str] = None,  # UBERON terms
325 |     significance_threshold: float = 0.5,
326 |     api_key: str = None          # AlphaGenome API key
327 | ) -> str
328 | ```
329 | 
330 | **Requires:** AlphaGenome API key (environment variable or per-request)
331 | 
332 | **Tissue Examples:**
333 | 
334 | - `UBERON:0002367` - prostate gland
335 | - `UBERON:0001155` - colon
336 | - `UBERON:0002048` - lung
337 | 
338 | **Example:**
339 | 
340 | ```python
341 | # Predict BRAF V600E effects
342 | alphagenome_predictor(
343 |     chromosome="chr7",
344 |     position=140753336,
345 |     reference="A",
346 |     alternate="T",
347 |     tissue_types=["UBERON:0002367"],  # prostate
348 |     api_key="your-key"
349 | )
350 | ```
351 | 
352 | ## BioThings Tools
353 | 
354 | ### 15. gene_getter
355 | 
356 | **Get gene information from MyGene.info.**
357 | 
358 | ```python
359 | gene_getter(
360 |     gene_id_or_symbol: str  # Gene symbol or Entrez ID
361 | ) -> str
362 | ```
363 | 
364 | **Returns:** Official name, aliases, summary, genomic location, database links
365 | 
366 | ### 16. disease_getter
367 | 
368 | **Get disease information from MyDisease.info.**
369 | 
370 | ```python
371 | disease_getter(
372 |     disease_id_or_name: str  # Disease name or ontology ID
373 | ) -> str
374 | ```
375 | 
376 | **Returns:** Definition, synonyms, MONDO/DOID IDs, associated phenotypes
377 | 
378 | ### 17. drug_getter
379 | 
380 | **Get drug/chemical information from MyChem.info.**
381 | 
382 | ```python
383 | drug_getter(
384 |     drug_id_or_name: str  # Drug name or database ID
385 | ) -> str
386 | ```
387 | 
388 | **Returns:** Chemical structure, mechanism, indications, trade names, identifiers
389 | 
390 | ## NCI-Specific Tools
391 | 
392 | All NCI tools require an API key from [clinicaltrialsapi.cancer.gov](https://clinicaltrialsapi.cancer.gov).
393 | 
394 | ### 18-19. Organization Tools
395 | 
396 | ```python
397 | # Search organizations
398 | nci_organization_searcher(
399 |     name: str = None,
400 |     organization_type: str = None,
401 |     city: str = None,              # Must use with state
402 |     state: str = None,             # Must use with city
403 |     api_key: str = None
404 | ) -> str
405 | 
406 | # Get organization details
407 | nci_organization_getter(
408 |     organization_id: str,
409 |     api_key: str = None
410 | ) -> str
411 | ```
412 | 
413 | ### 20-21. Intervention Tools
414 | 
415 | ```python
416 | # Search interventions
417 | nci_intervention_searcher(
418 |     name: str = None,
419 |     intervention_type: str = None,  # "Drug", "Device", etc.
420 |     synonyms: bool = True,
421 |     api_key: str = None
422 | ) -> str
423 | 
424 | # Get intervention details
425 | nci_intervention_getter(
426 |     intervention_id: str,
427 |     api_key: str = None
428 | ) -> str
429 | ```
430 | 
431 | ### 22. Biomarker Search
432 | 
433 | ```python
434 | nci_biomarker_searcher(
435 |     name: str = None,
436 |     biomarker_type: str = None,
437 |     api_key: str = None
438 | ) -> str
439 | ```
440 | 
441 | ### 23. Disease Search (NCI)
442 | 
443 | ```python
444 | nci_disease_searcher(
445 |     name: str = None,
446 |     include_synonyms: bool = True,
447 |     category: str = None,
448 |     api_key: str = None
449 | ) -> str
450 | ```
451 | 
452 | ## OpenFDA Tools
453 | 
454 | All OpenFDA tools support optional API keys for higher rate limits (240/min vs 40/min). Get a free key at [open.fda.gov/apis/authentication](https://open.fda.gov/apis/authentication/).
455 | 
456 | ### 24. openfda_adverse_searcher
457 | 
458 | **Search FDA Adverse Event Reporting System (FAERS).**
459 | 
460 | ```python
461 | openfda_adverse_searcher(
462 |     drug: str = None,
463 |     reaction: str = None,
464 |     serious: bool = None,        # Filter serious events only
465 |     limit: int = 25,
466 |     skip: int = 0,
467 |     api_key: str = None          # Optional OpenFDA API key
468 | ) -> str
469 | ```
470 | 
471 | **Example:**
472 | 
473 | ```python
474 | # Find serious bleeding events for warfarin
475 | openfda_adverse_searcher(
476 |     drug="warfarin",
477 |     reaction="bleeding",
478 |     serious=True,
479 |     api_key="your-key"  # Optional
480 | )
481 | ```
482 | 
483 | ### 25. openfda_adverse_getter
484 | 
485 | **Get detailed adverse event report.**
486 | 
487 | ```python
488 | openfda_adverse_getter(
489 |     report_id: str,              # Safety report ID
490 |     api_key: str = None
491 | ) -> str
492 | ```
493 | 
494 | ### 26. openfda_label_searcher
495 | 
496 | **Search FDA drug product labels.**
497 | 
498 | ```python
499 | openfda_label_searcher(
500 |     name: str = None,
501 |     indication: str = None,      # Search by indication
502 |     boxed_warning: bool = False, # Filter for boxed warnings
503 |     section: str = None,         # Specific label section
504 |     limit: int = 25,
505 |     skip: int = 0,
506 |     api_key: str = None
507 | ) -> str
508 | ```
509 | 
510 | ### 27. openfda_label_getter
511 | 
512 | **Get complete drug label information.**
513 | 
514 | ```python
515 | openfda_label_getter(
516 |     set_id: str,                 # Label set ID
517 |     sections: list[str] = None,  # Specific sections to retrieve
518 |     api_key: str = None
519 | ) -> str
520 | ```
521 | 
522 | **Label Sections:** `indications_and_usage`, `contraindications`, `warnings_and_precautions`, `dosage_and_administration`, `adverse_reactions`, `drug_interactions`, `pregnancy`, `pediatric_use`, `geriatric_use`
523 | 
524 | ### 28. openfda_device_searcher
525 | 
526 | **Search FDA device adverse event reports (MAUDE).**
527 | 
528 | ```python
529 | openfda_device_searcher(
530 |     device: str = None,
531 |     manufacturer: str = None,
532 |     problem: str = None,
533 |     product_code: str = None,    # FDA product code
534 |     genomics_only: bool = True,  # Filter genomic/diagnostic devices
535 |     limit: int = 25,
536 |     skip: int = 0,
537 |     api_key: str = None
538 | ) -> str
539 | ```
540 | 
541 | **Note:** FDA uses abbreviated device names (e.g., "F1CDX" for "FoundationOne CDx").
542 | 
543 | ### 29. openfda_device_getter
544 | 
545 | **Get detailed device event report.**
546 | 
547 | ```python
548 | openfda_device_getter(
549 |     mdr_report_key: str,         # MDR report key
550 |     api_key: str = None
551 | ) -> str
552 | ```
553 | 
554 | ### 30. openfda_approval_searcher
555 | 
556 | **Search FDA drug approval records (Drugs@FDA).**
557 | 
558 | ```python
559 | openfda_approval_searcher(
560 |     drug: str = None,
561 |     application_number: str = None,  # NDA/BLA number
562 |     approval_year: str = None,       # YYYY format
563 |     limit: int = 25,
564 |     skip: int = 0,
565 |     api_key: str = None
566 | ) -> str
567 | ```
568 | 
569 | ### 31. openfda_approval_getter
570 | 
571 | **Get drug approval details.**
572 | 
573 | ```python
574 | openfda_approval_getter(
575 |     application_number: str,     # NDA/BLA number
576 |     api_key: str = None
577 | ) -> str
578 | ```
579 | 
580 | ### 32. openfda_recall_searcher
581 | 
582 | **Search FDA drug recall records.**
583 | 
584 | ```python
585 | openfda_recall_searcher(
586 |     drug: str = None,
587 |     recall_class: str = None,    # "1", "2", or "3"
588 |     status: str = None,          # "ongoing" or "completed"
589 |     reason: str = None,
590 |     since_date: str = None,      # YYYYMMDD format
591 |     limit: int = 25,
592 |     skip: int = 0,
593 |     api_key: str = None
594 | ) -> str
595 | ```
596 | 
597 | **Recall Classes:**
598 | 
599 | - Class 1: Dangerous or defective products that could cause serious health problems or death
600 | - Class 2: Products that might cause temporary health problems or pose slight threat
601 | - Class 3: Products unlikely to cause adverse health consequences
602 | 
603 | ### 33. openfda_recall_getter
604 | 
605 | **Get drug recall details.**
606 | 
607 | ```python
608 | openfda_recall_getter(
609 |     recall_number: str,          # FDA recall number
610 |     api_key: str = None
611 | ) -> str
612 | ```
613 | 
614 | ### 34. openfda_shortage_searcher
615 | 
616 | **Search FDA drug shortage database.**
617 | 
618 | ```python
619 | openfda_shortage_searcher(
620 |     drug: str = None,
621 |     status: str = None,          # "current" or "resolved"
622 |     therapeutic_category: str = None,
623 |     limit: int = 25,
624 |     skip: int = 0,
625 |     api_key: str = None
626 | ) -> str
627 | ```
628 | 
629 | ### 35. openfda_shortage_getter
630 | 
631 | **Get drug shortage details.**
632 | 
633 | ```python
634 | openfda_shortage_getter(
635 |     drug_name: str,
636 |     api_key: str = None
637 | ) -> str
638 | ```
639 | 
640 | ## Best Practices
641 | 
642 | ### 1. Always Think First
643 | 
644 | ```python
645 | # ✅ CORRECT - Think before searching
646 | think(thought="Planning BRAF melanoma research...", thoughtNumber=1)
647 | results = article_searcher(genes=["BRAF"], diseases=["melanoma"])
648 | 
649 | # ❌ INCORRECT - Skipping think tool
650 | results = article_searcher(genes=["BRAF"])  # Poor results!
651 | ```
652 | 
653 | ### 2. Use Unified Tools for Flexibility
654 | 
655 | ```python
656 | # Unified search supports complex queries
657 | results = search(query="gene:EGFR AND (mutation:T790M OR mutation:C797S)")
658 | 
659 | # Unified fetch auto-detects domain
660 | details = fetch(id="NCT03006926")  # Knows it's a trial
661 | ```
662 | 
663 | ### 3. Leverage Domain-Specific Features
664 | 
665 | ```python
666 | # Article search with cBioPortal
667 | articles = article_searcher(
668 |     genes=["KRAS"],
669 |     include_cbioportal=True  # Adds cancer genomics context
670 | )
671 | 
672 | # Variant search with multiple filters
673 | variants = variant_searcher(
674 |     gene="TP53",
675 |     significance="pathogenic",
676 |     frequency_max=0.01,
677 |     cadd_score_min=25
678 | )
679 | ```
680 | 
681 | ### 4. Handle API Keys Properly
682 | 
683 | ```python
684 | # For personal use - environment variable
685 | # export NCI_API_KEY="your-key"
686 | nci_results = search(domain="nci_organization", name="Mayo Clinic")
687 | 
688 | # For shared environments - per-request
689 | nci_results = search(
690 |     domain="nci_organization",
691 |     name="Mayo Clinic",
692 |     api_key="user-provided-key"
693 | )
694 | ```
695 | 
696 | ### 5. Use Appropriate Page Sizes
697 | 
698 | ```python
699 | # Large result sets - increase page_size
700 | results = article_searcher(
701 |     genes=["TP53"],
702 |     page_size=50  # Get more results at once
703 | )
704 | 
705 | # Iterative exploration - use pagination
706 | page1 = trial_searcher(conditions=["cancer"], page=1, page_size=10)
707 | page2 = trial_searcher(conditions=["cancer"], page=2, page_size=10)
708 | ```
709 | 
710 | ## Error Handling
711 | 
712 | All tools include comprehensive error handling:
713 | 
714 | - **Invalid parameters**: Clear error messages with valid options
715 | - **API failures**: Graceful degradation with informative messages
716 | - **Rate limits**: Automatic retry with exponential backoff
717 | - **Missing API keys**: Helpful instructions for obtaining keys
718 | 
719 | ## Tool Selection Guide
720 | 
721 | | If you need to...              | Use this tool                                     |
722 | | ------------------------------ | ------------------------------------------------- |
723 | | Search across multiple domains | `search` with query language                      |
724 | | Get any record by ID           | `fetch` with auto-detection                       |
725 | | Plan your research approach    | `think` (always first!)                           |
726 | | Find recent papers             | `article_searcher`                                |
727 | | Locate clinical trials         | `trial_searcher`                                  |
728 | | Analyze genetic variants       | `variant_searcher` + `variant_getter`             |
729 | | Predict variant effects        | `alphagenome_predictor`                           |
730 | | Get gene/drug/disease info     | `gene_getter`, `drug_getter`, `disease_getter`    |
731 | | Access NCI databases           | `nci_*` tools with API key                        |
732 | | Check drug adverse events      | `openfda_adverse_searcher`                        |
733 | | Review FDA drug labels         | `openfda_label_searcher` + `openfda_label_getter` |
734 | | Investigate device issues      | `openfda_device_searcher`                         |
735 | | Find drug approvals            | `openfda_approval_searcher`                       |
736 | | Check drug recalls             | `openfda_recall_searcher`                         |
737 | | Monitor drug shortages         | `openfda_shortage_searcher`                       |
738 | 
739 | ## Next Steps
740 | 
741 | - Review [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) methodology
742 | - Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for complex workflows
743 | - Set up [API Keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features
744 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_oncokb_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for OncoKB API.
  2 | 
  3 | These tests make real API calls to verify OncoKB integration works correctly.
  4 | They use the demo server by default (demo.oncokb.org) which has limited data.
  5 | Tests are marked with pytest.mark.integration and gracefully skip if API is
  6 | unavailable.
  7 | 
  8 | Demo Server Limitations:
  9 | - Only has data for BRAF, ROS1, and TP53
 10 | - No authentication required
 11 | - Limited to basic annotations
 12 | 
 13 | Production Server:
 14 | - Requires ONCOKB_TOKEN environment variable
 15 | - Full cancer gene database
 16 | - Complete therapeutic/diagnostic annotations
 17 | """
 18 | 
 19 | import os
 20 | 
 21 | import pytest
 22 | 
 23 | from biomcp.variants.oncokb_client import OncoKBClient
 24 | 
 25 | 
 26 | @pytest.mark.integration
 27 | class TestOncoKBDemoServer:
 28 |     """Integration tests for OncoKB demo server (no auth required)."""
 29 | 
 30 |     @pytest.mark.asyncio
 31 |     async def test_demo_server_access(self):
 32 |         """Test basic access to demo server with curated genes list."""
 33 |         # Temporarily remove token to force demo server
 34 |         original_token = os.environ.get("ONCOKB_TOKEN")
 35 |         if original_token:
 36 |             del os.environ["ONCOKB_TOKEN"]
 37 | 
 38 |         try:
 39 |             client = OncoKBClient()
 40 | 
 41 |             # Verify demo server is being used
 42 |             assert client.is_demo is True
 43 |             assert "demo.oncokb.org" in client.base_url
 44 | 
 45 |             # Fetch curated genes list (this works on demo)
 46 |             result, error = await client.get_curated_genes()
 47 | 
 48 |             # Skip if demo server is unavailable
 49 |             if error and error.code in [500, 503, 504]:
 50 |                 pytest.skip(f"OncoKB demo server unavailable: {error.message}")
 51 | 
 52 |             # Should succeed with curated genes
 53 |             assert error is None, f"Expected success but got error: {error}"
 54 |             assert result is not None
 55 |             assert isinstance(result, list)
 56 |             assert len(result) > 0
 57 | 
 58 |             # Find BRAF in the results
 59 |             braf = next(
 60 |                 (g for g in result if g.get("hugoSymbol") == "BRAF"), None
 61 |             )
 62 |             assert braf is not None, "BRAF should be in demo curated genes"
 63 | 
 64 |             print("✓ Demo server access successful")
 65 |             print(f"  Total curated genes: {len(result)}")
 66 |             print(f"  BRAF gene: {braf.get('hugoSymbol')}")
 67 |             print(f"  BRAF Entrez ID: {braf.get('entrezGeneId')}")
 68 |             print(f"  BRAF gene type: {braf.get('geneType')}")
 69 | 
 70 |         finally:
 71 |             # Restore token if it was set
 72 |             if original_token:
 73 |                 os.environ["ONCOKB_TOKEN"] = original_token
 74 | 
 75 |     @pytest.mark.asyncio
 76 |     async def test_demo_gene_limits(self):
 77 |         """Test that demo server only has BRAF, ROS1, and TP53."""
 78 |         # Temporarily remove token to force demo server
 79 |         original_token = os.environ.get("ONCOKB_TOKEN")
 80 |         if original_token:
 81 |             del os.environ["ONCOKB_TOKEN"]
 82 | 
 83 |         try:
 84 |             client = OncoKBClient()
 85 |             assert client.is_demo is True
 86 | 
 87 |             # Get all curated genes from demo
 88 |             result, error = await client.get_curated_genes()
 89 | 
 90 |             # Skip if server unavailable
 91 |             if error and error.code in [500, 503, 504]:
 92 |                 pytest.skip(f"OncoKB demo server unavailable: {error.message}")
 93 | 
 94 |             assert error is None, f"Expected success but got error: {error}"
 95 |             assert result is not None
 96 |             assert isinstance(result, list)
 97 | 
 98 |             # Extract gene symbols
 99 |             gene_symbols = {g.get("hugoSymbol") for g in result}
100 | 
101 |             # Demo should have exactly BRAF, ROS1, and TP53
102 |             expected_demo_genes = {"BRAF", "ROS1", "TP53"}
103 |             assert gene_symbols == expected_demo_genes, (
104 |                 f"Expected demo genes {expected_demo_genes}, "
105 |                 f"got {gene_symbols}"
106 |             )
107 | 
108 |             print(
109 |                 f"✓ Demo server has exactly the expected genes: {gene_symbols}"
110 |             )
111 | 
112 |             # Verify KRAS is NOT in demo
113 |             assert "KRAS" not in gene_symbols, "KRAS should not be in demo"
114 |             print("✓ Demo correctly excludes non-demo genes like KRAS")
115 | 
116 |         finally:
117 |             # Restore token if it was set
118 |             if original_token:
119 |                 os.environ["ONCOKB_TOKEN"] = original_token
120 | 
121 |     @pytest.mark.asyncio
122 |     async def test_variant_annotation(self):
123 |         """Test variant annotation with BRAF V600E on demo server."""
124 |         # Temporarily remove token to force demo server
125 |         original_token = os.environ.get("ONCOKB_TOKEN")
126 |         if original_token:
127 |             del os.environ["ONCOKB_TOKEN"]
128 | 
129 |         try:
130 |             client = OncoKBClient()
131 |             assert client.is_demo is True
132 | 
133 |             # Request BRAF V600E annotation
134 |             result, error = await client.get_variant_annotation(
135 |                 gene="BRAF", protein_change="V600E"
136 |             )
137 | 
138 |             # Skip if server unavailable
139 |             if error and error.code in [500, 503, 504]:
140 |                 pytest.skip(f"OncoKB demo server unavailable: {error.message}")
141 | 
142 |             # Should get annotation for this well-known variant
143 |             if error:
144 |                 # Some demo servers may not support variant endpoints
145 |                 print(
146 |                     f"Note: Variant endpoint returned error: {error.message}"
147 |                 )
148 |                 pytest.skip("Demo variant endpoint not available")
149 | 
150 |             assert result is not None
151 |             assert isinstance(result, dict)
152 | 
153 |             # Check basic annotation fields
154 |             query = result.get("query", {})
155 |             assert query.get("hugoSymbol") == "BRAF"
156 |             assert query.get("alteration") == "V600E"
157 | 
158 |             # Check if variant is marked as oncogenic
159 |             oncogenic = result.get("oncogenic")
160 |             if oncogenic:
161 |                 print(f"✓ BRAF V600E oncogenicity: {oncogenic}")
162 |                 # V600E is a well-known oncogenic mutation
163 |                 assert "Oncogenic" in oncogenic or "Likely" in oncogenic
164 | 
165 |             # Check mutation effect
166 |             mutation_effect = result.get("mutationEffect")
167 |             if mutation_effect:
168 |                 known_effect = mutation_effect.get("knownEffect")
169 |                 print(f"✓ BRAF V600E effect: {known_effect}")
170 | 
171 |             # Check if it's a hotspot
172 |             hotspot = result.get("hotspot")
173 |             if hotspot is not None:
174 |                 print(f"✓ BRAF V600E hotspot: {hotspot}")
175 |                 # V600E is a known hotspot
176 |                 assert hotspot is True
177 | 
178 |             print("✓ Variant annotation successful for BRAF V600E")
179 | 
180 |         finally:
181 |             # Restore token if it was set
182 |             if original_token:
183 |                 os.environ["ONCOKB_TOKEN"] = original_token
184 | 
185 |     @pytest.mark.asyncio
186 |     async def test_curated_genes_demo(self):
187 |         """Test fetching curated genes list from demo server."""
188 |         # Temporarily remove token to force demo server
189 |         original_token = os.environ.get("ONCOKB_TOKEN")
190 |         if original_token:
191 |             del os.environ["ONCOKB_TOKEN"]
192 | 
193 |         try:
194 |             client = OncoKBClient()
195 |             assert client.is_demo is True
196 | 
197 |             # Fetch curated genes
198 |             result, error = await client.get_curated_genes()
199 | 
200 |             # Skip if server unavailable
201 |             if error and error.code in [500, 503, 504]:
202 |                 pytest.skip(f"OncoKB demo server unavailable: {error.message}")
203 | 
204 |             # Should get a list of genes
205 |             assert error is None, f"Expected success but got error: {error}"
206 |             assert result is not None
207 |             assert isinstance(result, list)
208 | 
209 |             # Demo should have at least BRAF, ROS1, TP53
210 |             if len(result) > 0:
211 |                 print(f"✓ Demo server has {len(result)} curated genes")
212 | 
213 |                 # Check structure of first gene
214 |                 first_gene = result[0]
215 |                 assert "hugoSymbol" in first_gene
216 |                 assert "entrezGeneId" in first_gene
217 | 
218 |                 # Verify demo genes are present
219 |                 gene_symbols = {g.get("hugoSymbol") for g in result}
220 |                 demo_expected = {"BRAF", "ROS1", "TP53"}
221 | 
222 |                 # At least some demo genes should be present
223 |                 found = gene_symbols & demo_expected
224 |                 if found:
225 |                     print(f"✓ Found expected demo genes: {found}")
226 | 
227 |                 # Print first few genes
228 |                 for gene in result[:5]:
229 |                     symbol = gene.get("hugoSymbol")
230 |                     oncogene = gene.get("oncogene")
231 |                     tsg = gene.get("tsg")
232 |                     print(f"  - {symbol}: oncogene={oncogene}, tsg={tsg}")
233 |             else:
234 |                 pytest.skip("Demo server returned empty gene list")
235 | 
236 |         finally:
237 |             # Restore token if it was set
238 |             if original_token:
239 |                 os.environ["ONCOKB_TOKEN"] = original_token
240 | 
241 | 
242 | @pytest.mark.integration
243 | class TestOncoKBProductionServer:
244 |     """Integration tests for OncoKB production server (requires auth)."""
245 | 
246 |     @pytest.mark.asyncio
247 |     async def test_production_requires_token(self):
248 |         """Test that demo/production server selection works correctly."""
249 |         # Get original token state
250 |         original_token = os.environ.get("ONCOKB_TOKEN")
251 | 
252 |         try:
253 |             # Test 1: Without token, should use demo
254 |             if original_token:
255 |                 del os.environ["ONCOKB_TOKEN"]
256 | 
257 |             # Need to reload module to pick up env var change
258 |             import importlib
259 | 
260 |             from biomcp.variants import oncokb_client
261 | 
262 |             importlib.reload(oncokb_client)
263 | 
264 |             client_no_token = oncokb_client.OncoKBClient()
265 |             assert client_no_token.is_demo is True
266 |             assert "demo.oncokb.org" in client_no_token.base_url
267 |             print("✓ Without token, client correctly uses demo server")
268 | 
269 |             # Test 2: With token (invalid), should try production
270 |             os.environ["ONCOKB_TOKEN"] = "invalid_token_for_testing"  # noqa: S105
271 |             importlib.reload(oncokb_client)
272 | 
273 |             client_with_token = oncokb_client.OncoKBClient()
274 |             assert client_with_token.is_demo is False
275 |             assert "www.oncokb.org" in client_with_token.base_url
276 |             print("✓ With token set, client correctly uses production server")
277 | 
278 |             # Try to fetch with invalid token - should get auth error
279 |             result, error = await client_with_token.get_curated_genes()
280 | 
281 |             if error:
282 |                 # Expected: auth error with invalid token
283 |                 assert error.code in [
284 |                     400,
285 |                     401,
286 |                     403,
287 |                 ], f"Expected auth error, got: {error.code}"
288 |                 print(
289 |                     f"✓ Production correctly rejects invalid token (HTTP {error.code})"
290 |                 )
291 |             else:
292 |                 # Unexpected but not a failure - maybe public endpoint
293 |                 print(
294 |                     "Note: Production endpoint accessible with invalid token"
295 |                 )
296 | 
297 |         finally:
298 |             # Restore original state
299 |             if original_token:
300 |                 os.environ["ONCOKB_TOKEN"] = original_token
301 |             elif "ONCOKB_TOKEN" in os.environ:
302 |                 del os.environ["ONCOKB_TOKEN"]
303 | 
304 |             # Reload one more time to restore original state
305 |             import importlib
306 | 
307 |             from biomcp.variants import oncokb_client
308 | 
309 |             importlib.reload(oncokb_client)
310 | 
311 |     @pytest.mark.asyncio
312 |     async def test_production_with_token(self):
313 |         """Test production server with valid token (if available)."""
314 |         # Only run if token is set
315 |         if not os.environ.get("ONCOKB_TOKEN"):
316 |             pytest.skip("ONCOKB_TOKEN not set - skipping production test")
317 | 
318 |         client = OncoKBClient()
319 | 
320 |         # Should be using production server
321 |         assert client.is_demo is False
322 |         assert "www.oncokb.org" in client.base_url
323 |         print("✓ Using production server with token")
324 | 
325 |         # Try to fetch curated genes (works on production with token)
326 |         result, error = await client.get_curated_genes()
327 | 
328 |         # Skip if server unavailable
329 |         if error and error.code in [500, 503, 504]:
330 |             pytest.skip(
331 |                 f"OncoKB production server unavailable: {error.message}"
332 |             )
333 | 
334 |         # Should succeed with valid token
335 |         if error:
336 |             if error.code in [401, 403]:
337 |                 pytest.skip(f"Token authentication failed: {error.message}")
338 |             else:
339 |                 pytest.fail(f"Unexpected error: {error}")
340 | 
341 |         assert result is not None
342 |         assert isinstance(result, list)
343 |         assert len(result) > 0
344 | 
345 |         # Find a common cancer gene like EGFR
346 |         egfr = next((g for g in result if g.get("hugoSymbol") == "EGFR"), None)
347 | 
348 |         print("✓ Production server access successful with token")
349 |         print(f"  Total genes: {len(result)}")
350 |         if egfr:
351 |             print(f"  Sample gene: {egfr.get('hugoSymbol')}")
352 |             print(f"  Entrez ID: {egfr.get('entrezGeneId')}")
353 | 
354 |     @pytest.mark.asyncio
355 |     async def test_production_curated_genes(self):
356 |         """Test production server has full gene database."""
357 |         # Only run if token is set
358 |         if not os.environ.get("ONCOKB_TOKEN"):
359 |             pytest.skip("ONCOKB_TOKEN not set - skipping production test")
360 | 
361 |         client = OncoKBClient()
362 |         assert client.is_demo is False
363 | 
364 |         # Fetch all curated genes
365 |         result, error = await client.get_curated_genes()
366 | 
367 |         # Skip if server unavailable or auth fails
368 |         if error:
369 |             if error.code in [401, 403]:
370 |                 pytest.skip(f"Token authentication failed: {error.message}")
371 |             elif error.code in [500, 503, 504]:
372 |                 pytest.skip(f"OncoKB production unavailable: {error.message}")
373 |             else:
374 |                 pytest.fail(f"Unexpected error: {error}")
375 | 
376 |         assert result is not None
377 |         assert isinstance(result, list)
378 | 
379 |         # Production should have many genes (>700)
380 |         assert (
381 |             len(result) > 100
382 |         ), f"Expected >100 genes in production, got {len(result)}"
383 | 
384 |         print(f"✓ Production server has {len(result)} curated genes")
385 | 
386 |         # Check for well-known cancer genes
387 |         gene_symbols = {g.get("hugoSymbol") for g in result}
388 |         expected_genes = {"BRAF", "TP53", "EGFR", "KRAS", "PIK3CA"}
389 | 
390 |         found = gene_symbols & expected_genes
391 |         assert len(found) == len(
392 |             expected_genes
393 |         ), f"Expected all cancer genes, found: {found}"
394 | 
395 |         print(f"✓ Found expected cancer genes: {found}")
396 | 
397 | 
398 | @pytest.mark.integration
399 | class TestOncoKBErrorHandling:
400 |     """Integration tests for error handling and edge cases."""
401 | 
402 |     @pytest.mark.asyncio
403 |     async def test_invalid_gene_symbol(self):
404 |         """Test handling of genes not in curated list."""
405 |         # Use demo server for this test
406 |         original_token = os.environ.get("ONCOKB_TOKEN")
407 |         if original_token:
408 |             del os.environ["ONCOKB_TOKEN"]
409 | 
410 |         try:
411 |             client = OncoKBClient()
412 | 
413 |             # Get curated genes list
414 |             result, error = await client.get_curated_genes()
415 | 
416 |             # Skip if server unavailable
417 |             if error and error.code in [500, 503, 504]:
418 |                 pytest.skip(f"OncoKB server unavailable: {error.message}")
419 | 
420 |             assert error is None, f"Expected success but got error: {error}"
421 |             assert result is not None
422 | 
423 |             # Verify an invalid gene like "NOTAREALGENE" is not in the list
424 |             gene_symbols = {g.get("hugoSymbol") for g in result}
425 |             assert "NOTAREALGENE" not in gene_symbols
426 |             print("✓ Invalid gene correctly not in curated genes list")
427 | 
428 |         finally:
429 |             if original_token:
430 |                 os.environ["ONCOKB_TOKEN"] = original_token
431 | 
432 |     @pytest.mark.asyncio
433 |     async def test_empty_query_handling(self):
434 |         """Test handling of empty/missing parameters."""
435 |         original_token = os.environ.get("ONCOKB_TOKEN")
436 |         if original_token:
437 |             del os.environ["ONCOKB_TOKEN"]
438 | 
439 |         try:
440 |             client = OncoKBClient()
441 | 
442 |             # Fetching curated genes requires no parameters
443 |             # This should always work
444 |             result, error = await client.get_curated_genes()
445 | 
446 |             # Skip if server unavailable
447 |             if error and error.code in [500, 503, 504]:
448 |                 pytest.skip(f"OncoKB server unavailable: {error.message}")
449 | 
450 |             # Should succeed
451 |             assert error is None, f"Expected success but got error: {error}"
452 |             assert result is not None
453 |             assert isinstance(result, list)
454 |             print(
455 |                 f"✓ Curated genes query works without parameters ({len(result)} genes)"
456 |             )
457 | 
458 |         finally:
459 |             if original_token:
460 |                 os.environ["ONCOKB_TOKEN"] = original_token
461 | 
462 |     @pytest.mark.asyncio
463 |     async def test_invalid_variant_format(self):
464 |         """Test handling of invalid variant formats."""
465 |         original_token = os.environ.get("ONCOKB_TOKEN")
466 |         if original_token:
467 |             del os.environ["ONCOKB_TOKEN"]
468 | 
469 |         try:
470 |             client = OncoKBClient()
471 | 
472 |             # Try with invalid protein change format
473 |             result, error = await client.get_variant_annotation(
474 |                 gene="BRAF", protein_change="invalid_format_123"
475 |             )
476 | 
477 |             # Skip if server unavailable
478 |             if error and error.code in [500, 503, 504]:
479 |                 pytest.skip(f"OncoKB server unavailable: {error.message}")
480 | 
481 |             # Should handle gracefully (may return error or empty result)
482 |             if error:
483 |                 print(
484 |                     f"✓ Invalid variant format returns error (HTTP {error.code})"
485 |                 )
486 |             else:
487 |                 # Some servers may return result with warnings
488 |                 assert result is not None
489 |                 print("✓ Invalid variant format handled gracefully")
490 | 
491 |         finally:
492 |             if original_token:
493 |                 os.environ["ONCOKB_TOKEN"] = original_token
494 | 
495 |     @pytest.mark.asyncio
496 |     async def test_concurrent_requests(self):
497 |         """Test handling of concurrent API requests."""
498 |         import asyncio
499 | 
500 |         original_token = os.environ.get("ONCOKB_TOKEN")
501 |         if original_token:
502 |             del os.environ["ONCOKB_TOKEN"]
503 | 
504 |         try:
505 |             client = OncoKBClient()
506 | 
507 |             # Make multiple concurrent requests
508 |             genes = ["BRAF", "ROS1", "TP53"]
509 |             tasks = [client.get_gene_annotation(gene) for gene in genes]
510 | 
511 |             results = await asyncio.gather(*tasks, return_exceptions=True)
512 | 
513 |             # Check results
514 |             for gene, result in zip(genes, results, strict=False):
515 |                 if isinstance(result, Exception):
516 |                     pytest.skip(
517 |                         f"Server error during concurrent test: {result}"
518 |                     )
519 | 
520 |                 data, error = result
521 | 
522 |                 # Skip if server unavailable
523 |                 if error and error.code in [500, 503, 504]:
524 |                     pytest.skip(f"OncoKB server unavailable: {error.message}")
525 | 
526 |                 # Should handle concurrent requests
527 |                 if data:
528 |                     assert data.get("hugoSymbol") == gene
529 |                     print(f"✓ Concurrent request successful for {gene}")
530 | 
531 |         finally:
532 |             if original_token:
533 |                 os.environ["ONCOKB_TOKEN"] = original_token
534 | 
535 | 
536 | if __name__ == "__main__":
537 |     """
538 |     Run integration tests directly for debugging.
539 | 
540 |     Usage:
541 |         python tests/integration/test_oncokb_integration.py
542 |     """
543 |     import asyncio
544 | 
545 |     async def run_tests():
546 |         """Run all test classes."""
547 |         print("=" * 70)
548 |         print("OncoKB Integration Tests")
549 |         print("=" * 70)
550 | 
551 |         # Demo server tests
552 |         print("\n[1/4] Testing Demo Server Access...")
553 |         await TestOncoKBDemoServer().test_demo_server_access()
554 | 
555 |         print("\n[2/4] Testing Demo Gene Limits...")
556 |         await TestOncoKBDemoServer().test_demo_gene_limits()
557 | 
558 |         print("\n[3/4] Testing Variant Annotation...")
559 |         await TestOncoKBDemoServer().test_variant_annotation()
560 | 
561 |         print("\n[4/4] Testing Production Auth Requirement...")
562 |         await TestOncoKBProductionServer().test_production_requires_token()
563 | 
564 |         print("\n" + "=" * 70)
565 |         print("✓ All integration tests completed")
566 |         print("=" * 70)
567 | 
568 |     asyncio.run(run_tests())
569 | 
```

--------------------------------------------------------------------------------
/src/biomcp/domain_handlers.py:
--------------------------------------------------------------------------------

```python
  1 | """Domain-specific result handlers for BioMCP.
  2 | 
  3 | This module contains formatting functions for converting raw API responses
  4 | from different biomedical data sources into a standardized format.
  5 | """
  6 | 
  7 | import logging
  8 | from typing import Any
  9 | 
 10 | from biomcp.constants import (
 11 |     DEFAULT_SIGNIFICANCE,
 12 |     DEFAULT_TITLE,
 13 |     METADATA_AUTHORS,
 14 |     METADATA_COMPLETION_DATE,
 15 |     METADATA_CONSEQUENCE,
 16 |     METADATA_GENE,
 17 |     METADATA_JOURNAL,
 18 |     METADATA_PHASE,
 19 |     METADATA_RSID,
 20 |     METADATA_SIGNIFICANCE,
 21 |     METADATA_SOURCE,
 22 |     METADATA_START_DATE,
 23 |     METADATA_STATUS,
 24 |     METADATA_YEAR,
 25 |     RESULT_ID,
 26 |     RESULT_METADATA,
 27 |     RESULT_SNIPPET,
 28 |     RESULT_TITLE,
 29 |     RESULT_URL,
 30 |     SNIPPET_LENGTH,
 31 | )
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | class ArticleHandler:
 37 |     """Handles formatting for article/publication results."""
 38 | 
 39 |     @staticmethod
 40 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
 41 |         """Format a single article result.
 42 | 
 43 |         Args:
 44 |             result: Raw article data from PubTator3 or preprint APIs
 45 | 
 46 |         Returns:
 47 |             Standardized article result with id, title, snippet, url, and metadata
 48 |         """
 49 |         if "pmid" in result:
 50 |             # PubMed article
 51 |             # Clean up title - remove extra spaces
 52 |             title = result.get("title", "").strip()
 53 |             title = " ".join(title.split())  # Normalize whitespace
 54 | 
 55 |             # Use default if empty
 56 |             if not title:
 57 |                 title = DEFAULT_TITLE
 58 | 
 59 |             return {
 60 |                 RESULT_ID: result["pmid"],
 61 |                 RESULT_TITLE: title,
 62 |                 RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
 63 |                 + "..."
 64 |                 if result.get("abstract")
 65 |                 else "",
 66 |                 RESULT_URL: f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/",
 67 |                 RESULT_METADATA: {
 68 |                     METADATA_YEAR: result.get("pub_year")
 69 |                     or (
 70 |                         result.get("date", "")[:4]
 71 |                         if result.get("date")
 72 |                         else None
 73 |                     ),
 74 |                     METADATA_JOURNAL: result.get("journal", ""),
 75 |                     METADATA_AUTHORS: result.get("authors", [])[:3],
 76 |                 },
 77 |             }
 78 |         else:
 79 |             # Preprint result
 80 |             return {
 81 |                 RESULT_ID: result.get("doi", result.get("id", "")),
 82 |                 RESULT_TITLE: result.get("title", ""),
 83 |                 RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
 84 |                 + "..."
 85 |                 if result.get("abstract")
 86 |                 else "",
 87 |                 RESULT_URL: result.get("url", ""),
 88 |                 RESULT_METADATA: {
 89 |                     METADATA_YEAR: result.get("pub_year"),
 90 |                     METADATA_SOURCE: result.get("source", ""),
 91 |                     METADATA_AUTHORS: result.get("authors", [])[:3],
 92 |                 },
 93 |             }
 94 | 
 95 | 
 96 | class TrialHandler:
 97 |     """Handles formatting for clinical trial results."""
 98 | 
 99 |     @staticmethod
100 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
101 |         """Format a single trial result.
102 | 
103 |         Handles both ClinicalTrials.gov API v2 nested structure and legacy formats.
104 | 
105 |         Args:
106 |             result: Raw trial data from ClinicalTrials.gov API
107 | 
108 |         Returns:
109 |             Standardized trial result with id, title, snippet, url, and metadata
110 |         """
111 |         # Handle ClinicalTrials.gov API v2 nested structure
112 |         if "protocolSection" in result:
113 |             # API v2 format - extract from nested modules
114 |             protocol = result.get("protocolSection", {})
115 |             identification = protocol.get("identificationModule", {})
116 |             status = protocol.get("statusModule", {})
117 |             description = protocol.get("descriptionModule", {})
118 | 
119 |             nct_id = identification.get("nctId", "")
120 |             brief_title = identification.get("briefTitle", "")
121 |             official_title = identification.get("officialTitle", "")
122 |             brief_summary = description.get("briefSummary", "")
123 |             overall_status = status.get("overallStatus", "")
124 |             start_date = status.get("startDateStruct", {}).get("date", "")
125 |             completion_date = status.get(
126 |                 "primaryCompletionDateStruct", {}
127 |             ).get("date", "")
128 | 
129 |             # Extract phase from designModule
130 |             design = protocol.get("designModule", {})
131 |             phases = design.get("phases", [])
132 |             phase = phases[0] if phases else ""
133 |         elif "NCT Number" in result:
134 |             # Legacy flat format from search results
135 |             nct_id = result.get("NCT Number", "")
136 |             brief_title = result.get("Study Title", "")
137 |             official_title = ""  # Not available in this format
138 |             brief_summary = result.get("Brief Summary", "")
139 |             overall_status = result.get("Study Status", "")
140 |             phase = result.get("Phases", "")
141 |             start_date = result.get("Start Date", "")
142 |             completion_date = result.get("Completion Date", "")
143 |         else:
144 |             # Original legacy format or simplified structure
145 |             nct_id = result.get("nct_id", "")
146 |             brief_title = result.get("brief_title", "")
147 |             official_title = result.get("official_title", "")
148 |             brief_summary = result.get("brief_summary", "")
149 |             overall_status = result.get("overall_status", "")
150 |             phase = result.get("phase", "")
151 |             start_date = result.get("start_date", "")
152 |             completion_date = result.get("primary_completion_date", "")
153 | 
154 |         return {
155 |             RESULT_ID: nct_id,
156 |             RESULT_TITLE: brief_title or official_title or DEFAULT_TITLE,
157 |             RESULT_SNIPPET: brief_summary[:SNIPPET_LENGTH] + "..."
158 |             if brief_summary
159 |             else "",
160 |             RESULT_URL: f"https://clinicaltrials.gov/study/{nct_id}",
161 |             RESULT_METADATA: {
162 |                 METADATA_STATUS: overall_status,
163 |                 METADATA_PHASE: phase,
164 |                 METADATA_START_DATE: start_date,
165 |                 METADATA_COMPLETION_DATE: completion_date,
166 |             },
167 |         }
168 | 
169 | 
170 | class VariantHandler:
171 |     """Handles formatting for genetic variant results."""
172 | 
173 |     @staticmethod
174 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
175 |         """Format a single variant result.
176 | 
177 |         Args:
178 |             result: Raw variant data from MyVariant.info API
179 | 
180 |         Returns:
181 |             Standardized variant result with id, title, snippet, url, and metadata
182 |         """
183 |         # Extract gene symbol - MyVariant.info stores this in multiple locations
184 |         gene = (
185 |             result.get("dbnsfp", {}).get("genename", "")
186 |             or result.get("dbsnp", {}).get("gene", {}).get("symbol", "")
187 |             or ""
188 |         )
189 |         # Handle case where gene is a list
190 |         if isinstance(gene, list):
191 |             gene = gene[0] if gene else ""
192 | 
193 |         # Extract rsid
194 |         rsid = result.get("dbsnp", {}).get("rsid", "") or ""
195 | 
196 |         # Extract clinical significance
197 |         clinvar = result.get("clinvar", {})
198 |         significance = ""
199 |         if isinstance(clinvar.get("rcv"), dict):
200 |             significance = clinvar["rcv"].get("clinical_significance", "")
201 |         elif isinstance(clinvar.get("rcv"), list) and clinvar["rcv"]:
202 |             significance = clinvar["rcv"][0].get("clinical_significance", "")
203 | 
204 |         # Build a meaningful title
205 |         hgvs = ""
206 |         if "dbnsfp" in result and "hgvsp" in result["dbnsfp"]:
207 |             hgvs = result["dbnsfp"]["hgvsp"]
208 |             if isinstance(hgvs, list):
209 |                 hgvs = hgvs[0] if hgvs else ""
210 | 
211 |         title = f"{gene} {hgvs}".strip() or result.get("_id", DEFAULT_TITLE)
212 | 
213 |         return {
214 |             RESULT_ID: result.get("_id", ""),
215 |             RESULT_TITLE: title,
216 |             RESULT_SNIPPET: f"Clinical significance: {significance or DEFAULT_SIGNIFICANCE}",
217 |             RESULT_URL: f"https://www.ncbi.nlm.nih.gov/snp/{rsid}"
218 |             if rsid
219 |             else "",
220 |             RESULT_METADATA: {
221 |                 METADATA_GENE: gene,
222 |                 METADATA_RSID: rsid,
223 |                 METADATA_SIGNIFICANCE: significance,
224 |                 METADATA_CONSEQUENCE: result.get("cadd", {}).get(
225 |                     "consequence", ""
226 |                 ),
227 |             },
228 |         }
229 | 
230 | 
231 | class GeneHandler:
232 |     """Handles formatting for gene information results from MyGene.info."""
233 | 
234 |     @staticmethod
235 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
236 |         """Format a single gene result.
237 | 
238 |         Args:
239 |             result: Raw gene data from MyGene.info API
240 | 
241 |         Returns:
242 |             Standardized gene result with id, title, snippet, url, and metadata
243 |         """
244 |         # Extract gene information
245 |         gene_id = result.get("_id", result.get("entrezgene", ""))
246 |         symbol = result.get("symbol", "")
247 |         name = result.get("name", "")
248 |         summary = result.get("summary", "")
249 | 
250 |         # Build title
251 |         title = (
252 |             f"{symbol}: {name}"
253 |             if symbol and name
254 |             else symbol or name or DEFAULT_TITLE
255 |         )
256 | 
257 |         # Create snippet from summary
258 |         snippet = (
259 |             summary[:SNIPPET_LENGTH] + "..."
260 |             if summary and len(summary) > SNIPPET_LENGTH
261 |             else summary
262 |         )
263 | 
264 |         return {
265 |             RESULT_ID: str(gene_id),
266 |             RESULT_TITLE: title,
267 |             RESULT_SNIPPET: snippet or "No summary available",
268 |             RESULT_URL: f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{symbol}"
269 |             if symbol
270 |             else "",
271 |             RESULT_METADATA: {
272 |                 "entrezgene": result.get("entrezgene"),
273 |                 "symbol": symbol,
274 |                 "name": name,
275 |                 "type_of_gene": result.get("type_of_gene", ""),
276 |                 "ensembl": result.get("ensembl", {}).get("gene")
277 |                 if isinstance(result.get("ensembl"), dict)
278 |                 else None,
279 |                 "refseq": result.get("refseq", {}),
280 |             },
281 |         }
282 | 
283 | 
284 | class DrugHandler:
285 |     """Handles formatting for drug/chemical information results from MyChem.info."""
286 | 
287 |     @staticmethod
288 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
289 |         """Format a single drug result.
290 | 
291 |         Args:
292 |             result: Raw drug data from MyChem.info API
293 | 
294 |         Returns:
295 |             Standardized drug result with id, title, snippet, url, and metadata
296 |         """
297 |         # Extract drug information
298 |         drug_id = result.get("_id", "")
299 |         name = result.get("name", "")
300 |         drugbank_id = result.get("drugbank_id", "")
301 |         description = result.get("description", "")
302 |         indication = result.get("indication", "")
303 | 
304 |         # Build title
305 |         title = name or drug_id or DEFAULT_TITLE
306 | 
307 |         # Create snippet from description or indication
308 |         snippet_text = indication or description
309 |         snippet = (
310 |             snippet_text[:SNIPPET_LENGTH] + "..."
311 |             if snippet_text and len(snippet_text) > SNIPPET_LENGTH
312 |             else snippet_text
313 |         )
314 | 
315 |         # Determine URL based on available IDs
316 |         url = ""
317 |         if drugbank_id:
318 |             url = f"https://www.drugbank.ca/drugs/{drugbank_id}"
319 |         elif result.get("pubchem_cid"):
320 |             url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{result['pubchem_cid']}"
321 | 
322 |         return {
323 |             RESULT_ID: drug_id,
324 |             RESULT_TITLE: title,
325 |             RESULT_SNIPPET: snippet or "No description available",
326 |             RESULT_URL: url,
327 |             RESULT_METADATA: {
328 |                 "drugbank_id": drugbank_id,
329 |                 "chembl_id": result.get("chembl_id", ""),
330 |                 "pubchem_cid": result.get("pubchem_cid", ""),
331 |                 "chebi_id": result.get("chebi_id", ""),
332 |                 "formula": result.get("formula", ""),
333 |                 "tradename": result.get("tradename", []),
334 |             },
335 |         }
336 | 
337 | 
338 | class DiseaseHandler:
339 |     """Handles formatting for disease information results from MyDisease.info."""
340 | 
341 |     @staticmethod
342 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
343 |         """Format a single disease result.
344 | 
345 |         Args:
346 |             result: Raw disease data from MyDisease.info API
347 | 
348 |         Returns:
349 |             Standardized disease result with id, title, snippet, url, and metadata
350 |         """
351 |         # Extract disease information
352 |         disease_id = result.get("_id", "")
353 |         name = result.get("name", "")
354 |         definition = result.get("definition", "")
355 |         mondo_info = result.get("mondo", {})
356 | 
357 |         # Build title
358 |         title = name or disease_id or DEFAULT_TITLE
359 | 
360 |         # Create snippet from definition
361 |         snippet = (
362 |             definition[:SNIPPET_LENGTH] + "..."
363 |             if definition and len(definition) > SNIPPET_LENGTH
364 |             else definition
365 |         )
366 | 
367 |         # Extract MONDO ID for URL
368 |         mondo_id = mondo_info.get("id") if isinstance(mondo_info, dict) else ""
369 |         url = (
370 |             f"https://monarchinitiative.org/disease/{mondo_id}"
371 |             if mondo_id
372 |             else ""
373 |         )
374 | 
375 |         return {
376 |             RESULT_ID: disease_id,
377 |             RESULT_TITLE: title,
378 |             RESULT_SNIPPET: snippet or "No definition available",
379 |             RESULT_URL: url,
380 |             RESULT_METADATA: {
381 |                 "mondo_id": mondo_id,
382 |                 "definition": definition,
383 |                 "synonyms": result.get("synonyms", []),
384 |                 "xrefs": result.get("xrefs", {}),
385 |                 "phenotypes": len(result.get("phenotypes", [])),
386 |             },
387 |         }
388 | 
389 | 
390 | class NCIOrganizationHandler:
391 |     """Handles formatting for NCI organization results."""
392 | 
393 |     @staticmethod
394 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
395 |         """Format a single NCI organization result.
396 | 
397 |         Args:
398 |             result: Raw organization data from NCI CTS API
399 | 
400 |         Returns:
401 |             Standardized organization result with id, title, snippet, url, and metadata
402 |         """
403 |         org_id = result.get("id", result.get("org_id", ""))
404 |         name = result.get("name", "Unknown Organization")
405 |         org_type = result.get("type", result.get("category", ""))
406 |         city = result.get("city", "")
407 |         state = result.get("state", "")
408 | 
409 |         # Build location string
410 |         location_parts = [p for p in [city, state] if p]
411 |         location = ", ".join(location_parts) if location_parts else ""
412 | 
413 |         # Create snippet
414 |         snippet_parts = []
415 |         if org_type:
416 |             snippet_parts.append(f"Type: {org_type}")
417 |         if location:
418 |             snippet_parts.append(f"Location: {location}")
419 |         snippet = " | ".join(snippet_parts) or "No details available"
420 | 
421 |         return {
422 |             RESULT_ID: org_id,
423 |             RESULT_TITLE: name,
424 |             RESULT_SNIPPET: snippet,
425 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to organizations
426 |             RESULT_METADATA: {
427 |                 "type": org_type,
428 |                 "city": city,
429 |                 "state": state,
430 |                 "country": result.get("country", ""),
431 |             },
432 |         }
433 | 
434 | 
435 | class NCIInterventionHandler:
436 |     """Handles formatting for NCI intervention results."""
437 | 
438 |     @staticmethod
439 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
440 |         """Format a single NCI intervention result.
441 | 
442 |         Args:
443 |             result: Raw intervention data from NCI CTS API
444 | 
445 |         Returns:
446 |             Standardized intervention result with id, title, snippet, url, and metadata
447 |         """
448 |         int_id = result.get("id", result.get("intervention_id", ""))
449 |         name = result.get("name", "Unknown Intervention")
450 |         int_type = result.get("type", result.get("category", ""))
451 |         synonyms = result.get("synonyms", [])
452 | 
453 |         # Create snippet
454 |         snippet_parts = []
455 |         if int_type:
456 |             snippet_parts.append(f"Type: {int_type}")
457 |         if synonyms:
458 |             if isinstance(synonyms, list) and synonyms:
459 |                 snippet_parts.append(
460 |                     f"Also known as: {', '.join(synonyms[:3])}"
461 |                 )
462 |             elif isinstance(synonyms, str):
463 |                 snippet_parts.append(f"Also known as: {synonyms}")
464 |         snippet = " | ".join(snippet_parts) or "No details available"
465 | 
466 |         return {
467 |             RESULT_ID: int_id,
468 |             RESULT_TITLE: name,
469 |             RESULT_SNIPPET: snippet,
470 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to interventions
471 |             RESULT_METADATA: {
472 |                 "type": int_type,
473 |                 "synonyms": synonyms,
474 |                 "description": result.get("description", ""),
475 |             },
476 |         }
477 | 
478 | 
479 | class NCIBiomarkerHandler:
480 |     """Handles formatting for NCI biomarker results."""
481 | 
482 |     @staticmethod
483 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
484 |         """Format a single NCI biomarker result.
485 | 
486 |         Args:
487 |             result: Raw biomarker data from NCI CTS API
488 | 
489 |         Returns:
490 |             Standardized biomarker result with id, title, snippet, url, and metadata
491 |         """
492 |         bio_id = result.get("id", result.get("biomarker_id", ""))
493 |         name = result.get("name", "Unknown Biomarker")
494 |         gene = result.get("gene", result.get("gene_symbol", ""))
495 |         bio_type = result.get("type", result.get("category", ""))
496 |         assay_type = result.get("assay_type", "")
497 | 
498 |         # Build title
499 |         title = name
500 |         if gene and gene not in name:
501 |             title = f"{gene} - {name}"
502 | 
503 |         # Create snippet
504 |         snippet_parts = []
505 |         if bio_type:
506 |             snippet_parts.append(f"Type: {bio_type}")
507 |         if assay_type:
508 |             snippet_parts.append(f"Assay: {assay_type}")
509 |         snippet = (
510 |             " | ".join(snippet_parts) or "Biomarker for trial eligibility"
511 |         )
512 | 
513 |         return {
514 |             RESULT_ID: bio_id,
515 |             RESULT_TITLE: title,
516 |             RESULT_SNIPPET: snippet,
517 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to biomarkers
518 |             RESULT_METADATA: {
519 |                 "gene": gene,
520 |                 "type": bio_type,
521 |                 "assay_type": assay_type,
522 |                 "trial_count": result.get("trial_count", 0),
523 |             },
524 |         }
525 | 
526 | 
527 | class NCIDiseaseHandler:
528 |     """Handles formatting for NCI disease vocabulary results."""
529 | 
530 |     @staticmethod
531 |     def format_result(result: dict[str, Any]) -> dict[str, Any]:
532 |         """Format a single NCI disease result.
533 | 
534 |         Args:
535 |             result: Raw disease data from NCI CTS API
536 | 
537 |         Returns:
538 |             Standardized disease result with id, title, snippet, url, and metadata
539 |         """
540 |         disease_id = result.get("id", result.get("disease_id", ""))
541 |         name = result.get(
542 |             "name", result.get("preferred_name", "Unknown Disease")
543 |         )
544 |         category = result.get("category", result.get("type", ""))
545 |         synonyms = result.get("synonyms", [])
546 | 
547 |         # Create snippet
548 |         snippet_parts = []
549 |         if category:
550 |             snippet_parts.append(f"Category: {category}")
551 |         if synonyms:
552 |             if isinstance(synonyms, list) and synonyms:
553 |                 snippet_parts.append(
554 |                     f"Also known as: {', '.join(synonyms[:3])}"
555 |                 )
556 |                 if len(synonyms) > 3:
557 |                     snippet_parts.append(f"and {len(synonyms) - 3} more")
558 |             elif isinstance(synonyms, str):
559 |                 snippet_parts.append(f"Also known as: {synonyms}")
560 |         snippet = " | ".join(snippet_parts) or "NCI cancer vocabulary term"
561 | 
562 |         return {
563 |             RESULT_ID: disease_id,
564 |             RESULT_TITLE: name,
565 |             RESULT_SNIPPET: snippet,
566 |             RESULT_URL: "",  # NCI doesn't provide direct URLs to disease terms
567 |             RESULT_METADATA: {
568 |                 "category": category,
569 |                 "synonyms": synonyms,
570 |                 "codes": result.get("codes", {}),
571 |             },
572 |         }
573 | 
574 | 
575 | def get_domain_handler(
576 |     domain: str,
577 | ) -> (
578 |     type[ArticleHandler]
579 |     | type[TrialHandler]
580 |     | type[VariantHandler]
581 |     | type[GeneHandler]
582 |     | type[DrugHandler]
583 |     | type[DiseaseHandler]
584 |     | type[NCIOrganizationHandler]
585 |     | type[NCIInterventionHandler]
586 |     | type[NCIBiomarkerHandler]
587 |     | type[NCIDiseaseHandler]
588 | ):
589 |     """Get the appropriate handler class for a domain.
590 | 
591 |     Args:
592 |         domain: The domain name ('article', 'trial', 'variant', 'gene', 'drug', 'disease',
593 |                                'nci_organization', 'nci_intervention', 'nci_biomarker', 'nci_disease')
594 | 
595 |     Returns:
596 |         The handler class for the domain
597 | 
598 |     Raises:
599 |         ValueError: If domain is not recognized
600 |     """
601 |     handlers: dict[
602 |         str,
603 |         type[ArticleHandler]
604 |         | type[TrialHandler]
605 |         | type[VariantHandler]
606 |         | type[GeneHandler]
607 |         | type[DrugHandler]
608 |         | type[DiseaseHandler]
609 |         | type[NCIOrganizationHandler]
610 |         | type[NCIInterventionHandler]
611 |         | type[NCIBiomarkerHandler]
612 |         | type[NCIDiseaseHandler],
613 |     ] = {
614 |         "article": ArticleHandler,
615 |         "trial": TrialHandler,
616 |         "variant": VariantHandler,
617 |         "gene": GeneHandler,
618 |         "drug": DrugHandler,
619 |         "disease": DiseaseHandler,
620 |         "nci_organization": NCIOrganizationHandler,
621 |         "nci_intervention": NCIInterventionHandler,
622 |         "nci_biomarker": NCIBiomarkerHandler,
623 |         "nci_disease": NCIDiseaseHandler,
624 |     }
625 | 
626 |     handler = handlers.get(domain)
627 |     if handler is None:
628 |         raise ValueError(f"Unknown domain: {domain}")
629 | 
630 |     return handler
631 | 
```

--------------------------------------------------------------------------------
/tests/tdd/trials/test_search.py:
--------------------------------------------------------------------------------

```python
  1 | import pytest
  2 | 
  3 | from biomcp.trials.search import (
  4 |     CLOSED_STATUSES,
  5 |     AgeGroup,
  6 |     DateField,
  7 |     InterventionType,
  8 |     LineOfTherapy,
  9 |     PrimaryPurpose,
 10 |     RecruitingStatus,
 11 |     SortOrder,
 12 |     SponsorType,
 13 |     StudyDesign,
 14 |     StudyType,
 15 |     TrialPhase,
 16 |     TrialQuery,
 17 |     _build_biomarker_expression_essie,
 18 |     _build_brain_mets_essie,
 19 |     _build_excluded_mutations_essie,
 20 |     _build_line_of_therapy_essie,
 21 |     _build_prior_therapy_essie,
 22 |     _build_progression_essie,
 23 |     _build_required_mutations_essie,
 24 |     _inject_ids,
 25 |     convert_query,
 26 | )
 27 | 
 28 | 
 29 | @pytest.mark.asyncio
 30 | async def test_convert_query_basic_parameters():
 31 |     """Test basic parameter conversion from TrialQuery to API format."""
 32 |     query = TrialQuery(conditions=["lung cancer"])
 33 |     params = await convert_query(query)
 34 | 
 35 |     assert "markupFormat" in params
 36 |     assert params["markupFormat"] == ["markdown"]
 37 |     assert "query.cond" in params
 38 |     assert params["query.cond"] == ["lung cancer"]
 39 |     assert "filter.overallStatus" in params
 40 |     assert "RECRUITING" in params["filter.overallStatus"][0]
 41 | 
 42 | 
 43 | @pytest.mark.asyncio
 44 | async def test_convert_query_multiple_conditions():
 45 |     """Test conversion of multiple conditions to API format."""
 46 |     query = TrialQuery(conditions=["lung cancer", "metastatic"])
 47 |     params = await convert_query(query)
 48 | 
 49 |     assert "query.cond" in params
 50 |     # The query should contain the original terms, but may have expanded synonyms
 51 |     cond_value = params["query.cond"][0]
 52 |     assert "lung cancer" in cond_value
 53 |     assert "metastatic" in cond_value
 54 |     assert cond_value.startswith("(") and cond_value.endswith(")")
 55 | 
 56 | 
 57 | @pytest.mark.asyncio
 58 | async def test_convert_query_terms_parameter():
 59 |     """Test conversion of terms parameter to API format."""
 60 |     query = TrialQuery(terms=["immunotherapy"])
 61 |     params = await convert_query(query)
 62 | 
 63 |     assert "query.term" in params
 64 |     assert params["query.term"] == ["immunotherapy"]
 65 | 
 66 | 
 67 | @pytest.mark.asyncio
 68 | async def test_convert_query_interventions_parameter():
 69 |     """Test conversion of interventions parameter to API format."""
 70 |     query = TrialQuery(interventions=["pembrolizumab"])
 71 |     params = await convert_query(query)
 72 | 
 73 |     assert "query.intr" in params
 74 |     assert params["query.intr"] == ["pembrolizumab"]
 75 | 
 76 | 
 77 | @pytest.mark.asyncio
 78 | async def test_convert_query_lead_sponsor_parameter():
 79 |     """Test conversion of lead_sponsor parameter to API format."""
 80 |     query = TrialQuery(lead_sponsor=["Pfizer"])
 81 |     params = await convert_query(query)
 82 | 
 83 |     assert "query.lead" in params
 84 |     assert params["query.lead"] == ["Pfizer"]
 85 | 
 86 | 
 87 | @pytest.mark.asyncio
 88 | async def test_convert_query_multiple_lead_sponsors():
 89 |     """Test conversion of multiple lead sponsors to API format."""
 90 |     query = TrialQuery(lead_sponsor=["Pfizer", "National Cancer Institute"])
 91 |     params = await convert_query(query)
 92 | 
 93 |     assert "query.lead" in params
 94 |     # Multiple sponsors are combined with OR logic
 95 |     assert len(params["query.lead"]) == 1
 96 |     lead_value = params["query.lead"][0]
 97 |     assert "Pfizer" in lead_value
 98 |     assert "National Cancer Institute" in lead_value
 99 |     assert " OR " in lead_value or lead_value.startswith(
100 |         "("
101 |     )  # OR or parenthesized format
102 | 
103 | 
104 | @pytest.mark.asyncio
105 | async def test_convert_query_nct_ids():
106 |     """Test conversion of NCT IDs to API format."""
107 |     query = TrialQuery(nct_ids=["NCT04179552"])
108 |     params = await convert_query(query)
109 | 
110 |     assert "query.id" in params
111 |     assert params["query.id"] == ["NCT04179552"]
112 |     # Note: The implementation keeps filter.overallStatus when using nct_ids
113 |     # So we don't assert its absence
114 | 
115 | 
116 | @pytest.mark.asyncio
117 | async def test_convert_query_recruiting_status():
118 |     """Test conversion of recruiting status to API format."""
119 |     # Test open status
120 |     query = TrialQuery(recruiting_status=RecruitingStatus.OPEN)
121 |     params = await convert_query(query)
122 | 
123 |     assert "filter.overallStatus" in params
124 |     assert "RECRUITING" in params["filter.overallStatus"][0]
125 | 
126 |     # Test closed status
127 |     query = TrialQuery(recruiting_status=RecruitingStatus.CLOSED)
128 |     params = await convert_query(query)
129 | 
130 |     assert "filter.overallStatus" in params
131 |     assert all(
132 |         status in params["filter.overallStatus"][0]
133 |         for status in CLOSED_STATUSES
134 |     )
135 | 
136 |     # Test any status
137 |     query = TrialQuery(recruiting_status=RecruitingStatus.ANY)
138 |     params = await convert_query(query)
139 | 
140 |     assert "filter.overallStatus" not in params
141 | 
142 | 
143 | @pytest.mark.asyncio
144 | async def test_convert_query_location_parameters():
145 |     """Test conversion of location parameters to API format."""
146 |     query = TrialQuery(lat=40.7128, long=-74.0060, distance=10)
147 |     params = await convert_query(query)
148 | 
149 |     assert "filter.geo" in params
150 |     assert params["filter.geo"] == ["distance(40.7128,-74.006,10mi)"]
151 | 
152 | 
153 | @pytest.mark.asyncio
154 | async def test_convert_query_study_type():
155 |     """Test conversion of study type to API format."""
156 |     query = TrialQuery(study_type=StudyType.INTERVENTIONAL)
157 |     params = await convert_query(query)
158 | 
159 |     assert "filter.advanced" in params
160 |     assert "AREA[StudyType]Interventional" in params["filter.advanced"][0]
161 | 
162 | 
163 | @pytest.mark.asyncio
164 | async def test_convert_query_phase():
165 |     """Test conversion of phase to API format."""
166 |     query = TrialQuery(phase=TrialPhase.PHASE3)
167 |     params = await convert_query(query)
168 | 
169 |     assert "filter.advanced" in params
170 |     assert "AREA[Phase]PHASE3" in params["filter.advanced"][0]
171 | 
172 | 
173 | @pytest.mark.asyncio
174 | async def test_convert_query_date_range():
175 |     """Test conversion of date range to API format."""
176 |     query = TrialQuery(
177 |         min_date="2020-01-01",
178 |         max_date="2020-12-31",
179 |         date_field=DateField.LAST_UPDATE,
180 |     )
181 |     params = await convert_query(query)
182 | 
183 |     assert "filter.advanced" in params
184 |     assert (
185 |         "AREA[LastUpdatePostDate]RANGE[2020-01-01,2020-12-31]"
186 |         in params["filter.advanced"][0]
187 |     )
188 | 
189 |     # Test min date only
190 |     query = TrialQuery(
191 |         min_date="2021-01-01",
192 |         date_field=DateField.STUDY_START,
193 |     )
194 |     params = await convert_query(query)
195 | 
196 |     assert "filter.advanced" in params
197 |     assert (
198 |         "AREA[StartDate]RANGE[2021-01-01,MAX]" in params["filter.advanced"][0]
199 |     )
200 | 
201 | 
202 | @pytest.mark.asyncio
203 | async def test_convert_query_sort_order():
204 |     """Test conversion of sort order to API format."""
205 |     query = TrialQuery(sort=SortOrder.RELEVANCE)
206 |     params = await convert_query(query)
207 | 
208 |     assert "sort" in params
209 |     assert params["sort"] == ["@relevance"]
210 | 
211 |     query = TrialQuery(sort=SortOrder.LAST_UPDATE)
212 |     params = await convert_query(query)
213 | 
214 |     assert "sort" in params
215 |     assert params["sort"] == ["LastUpdatePostDate:desc"]
216 | 
217 | 
218 | @pytest.mark.asyncio
219 | async def test_convert_query_intervention_type():
220 |     """Test conversion of intervention type to API format."""
221 |     query = TrialQuery(intervention_type=InterventionType.DRUG)
222 |     params = await convert_query(query)
223 | 
224 |     assert "filter.advanced" in params
225 |     assert "AREA[InterventionType]Drug" in params["filter.advanced"][0]
226 | 
227 | 
228 | @pytest.mark.asyncio
229 | async def test_convert_query_sponsor_type():
230 |     """Test conversion of sponsor type to API format."""
231 |     query = TrialQuery(sponsor_type=SponsorType.ACADEMIC)
232 |     params = await convert_query(query)
233 | 
234 |     assert "filter.advanced" in params
235 |     assert "AREA[SponsorType]Academic" in params["filter.advanced"][0]
236 | 
237 | 
238 | @pytest.mark.asyncio
239 | async def test_convert_query_study_design():
240 |     """Test conversion of study design to API format."""
241 |     query = TrialQuery(study_design=StudyDesign.RANDOMIZED)
242 |     params = await convert_query(query)
243 | 
244 |     assert "filter.advanced" in params
245 |     assert "AREA[StudyDesign]Randomized" in params["filter.advanced"][0]
246 | 
247 | 
248 | @pytest.mark.asyncio
249 | async def test_convert_query_age_group():
250 |     """Test conversion of age group to API format."""
251 |     query = TrialQuery(age_group=AgeGroup.ADULT)
252 |     params = await convert_query(query)
253 | 
254 |     assert "filter.advanced" in params
255 |     assert "AREA[StdAge]Adult" in params["filter.advanced"][0]
256 | 
257 | 
258 | @pytest.mark.asyncio
259 | async def test_convert_query_primary_purpose():
260 |     """Test conversion of primary purpose to API format."""
261 |     query = TrialQuery(primary_purpose=PrimaryPurpose.TREATMENT)
262 |     params = await convert_query(query)
263 | 
264 |     assert "filter.advanced" in params
265 |     assert (
266 |         "AREA[DesignPrimaryPurpose]Treatment" in params["filter.advanced"][0]
267 |     )
268 | 
269 | 
270 | @pytest.mark.asyncio
271 | async def test_convert_query_next_page_hash():
272 |     """Test conversion of next_page_hash to API format."""
273 |     query = TrialQuery(next_page_hash="abc123")
274 |     params = await convert_query(query)
275 | 
276 |     assert "pageToken" in params
277 |     assert params["pageToken"] == ["abc123"]
278 | 
279 | 
280 | @pytest.mark.asyncio
281 | async def test_convert_query_complex_parameters():
282 |     """Test conversion of multiple parameters to API format."""
283 |     query = TrialQuery(
284 |         conditions=["diabetes"],
285 |         terms=["obesity"],
286 |         interventions=["metformin"],
287 |         primary_purpose=PrimaryPurpose.TREATMENT,
288 |         study_type=StudyType.INTERVENTIONAL,
289 |         intervention_type=InterventionType.DRUG,
290 |         recruiting_status=RecruitingStatus.OPEN,
291 |         phase=TrialPhase.PHASE3,
292 |         age_group=AgeGroup.ADULT,
293 |         sort=SortOrder.RELEVANCE,
294 |     )
295 |     params = await convert_query(query)
296 | 
297 |     assert "query.cond" in params
298 |     # Disease synonym expansion may add synonyms to diabetes
299 |     assert "diabetes" in params["query.cond"][0]
300 |     assert "query.term" in params
301 |     assert params["query.term"] == ["obesity"]
302 |     assert "query.intr" in params
303 |     assert params["query.intr"] == ["metformin"]
304 |     assert "filter.advanced" in params
305 |     assert (
306 |         "AREA[DesignPrimaryPurpose]Treatment" in params["filter.advanced"][0]
307 |     )
308 |     assert "AREA[StudyType]Interventional" in params["filter.advanced"][0]
309 |     assert "AREA[InterventionType]Drug" in params["filter.advanced"][0]
310 |     assert "AREA[Phase]PHASE3" in params["filter.advanced"][0]
311 |     assert "AREA[StdAge]Adult" in params["filter.advanced"][0]
312 |     assert "filter.overallStatus" in params
313 |     assert "RECRUITING" in params["filter.overallStatus"][0]
314 |     assert "sort" in params
315 |     assert params["sort"] == ["@relevance"]
316 | 
317 | 
318 | # Test TrialQuery field validation for CLI input processing
319 | # noinspection PyTypeChecker
320 | def test_trial_query_field_validation_basic():
321 |     """Test basic field validation for TrialQuery."""
322 |     # Test list fields conversion
323 |     query = TrialQuery(conditions="diabetes")
324 |     assert query.conditions == ["diabetes"]
325 | 
326 |     query = TrialQuery(interventions="metformin")
327 |     assert query.interventions == ["metformin"]
328 | 
329 |     query = TrialQuery(terms="blood glucose")
330 |     assert query.terms == ["blood glucose"]
331 | 
332 |     query = TrialQuery(nct_ids="NCT01234567")
333 |     assert query.nct_ids == ["NCT01234567"]
334 | 
335 | 
336 | # noinspection PyTypeChecker
337 | def test_trial_query_field_validation_recruiting_status():
338 |     """Test recruiting status field validation."""
339 |     # Exact match uppercase
340 |     query = TrialQuery(recruiting_status="OPEN")
341 |     assert query.recruiting_status == RecruitingStatus.OPEN
342 | 
343 |     # Exact match lowercase
344 |     query = TrialQuery(recruiting_status="closed")
345 |     assert query.recruiting_status == RecruitingStatus.CLOSED
346 | 
347 |     # Invalid value
348 |     with pytest.raises(ValueError) as excinfo:
349 |         TrialQuery(recruiting_status="invalid")
350 |     assert "validation error for TrialQuery" in str(excinfo.value)
351 | 
352 | 
353 | # noinspection PyTypeChecker
354 | @pytest.mark.asyncio
355 | async def test_trial_query_field_validation_combined():
356 |     """Test combined parameters validation."""
357 |     query = TrialQuery(
358 |         conditions=["diabetes", "obesity"],
359 |         interventions="metformin",
360 |         recruiting_status="open",
361 |         study_type="interventional",
362 |         lat=40.7128,
363 |         long=-74.0060,
364 |         distance=10,
365 |     )
366 | 
367 |     assert query.conditions == ["diabetes", "obesity"]
368 |     assert query.interventions == ["metformin"]
369 |     assert query.recruiting_status == RecruitingStatus.OPEN
370 |     assert query.study_type == StudyType.INTERVENTIONAL
371 |     assert query.lat == 40.7128
372 |     assert query.long == -74.0060
373 |     assert query.distance == 10
374 | 
375 |     # Check that the query can be converted to parameters properly
376 |     params = await convert_query(query)
377 |     assert "query.cond" in params
378 |     # The query should contain the original terms, but may have expanded synonyms
379 |     cond_value = params["query.cond"][0]
380 |     assert "diabetes" in cond_value
381 |     assert "obesity" in cond_value
382 |     assert cond_value.startswith("(") and cond_value.endswith(")")
383 |     assert "query.intr" in params
384 |     assert "metformin" in params["query.intr"][0]
385 |     assert "filter.geo" in params
386 |     assert "distance(40.7128,-74.006,10mi)" in params["filter.geo"][0]
387 | 
388 | 
389 | # noinspection PyTypeChecker
390 | @pytest.mark.asyncio
391 | async def test_trial_query_field_validation_terms():
392 |     """Test terms parameter validation."""
393 |     # Single term as string
394 |     query = TrialQuery(terms="cancer")
395 |     assert query.terms == ["cancer"]
396 | 
397 |     # Multiple terms as list
398 |     query = TrialQuery(terms=["cancer", "therapy"])
399 |     assert query.terms == ["cancer", "therapy"]
400 | 
401 |     # Check parameter generation
402 |     params = await convert_query(query)
403 |     assert "query.term" in params
404 |     assert "(cancer OR therapy)" in params["query.term"][0]
405 | 
406 | 
407 | # noinspection PyTypeChecker
408 | @pytest.mark.asyncio
409 | async def test_trial_query_field_validation_nct_ids():
410 |     """Test NCT IDs parameter validation."""
411 |     # Single NCT ID
412 |     query = TrialQuery(nct_ids="NCT01234567")
413 |     assert query.nct_ids == ["NCT01234567"]
414 | 
415 |     # Multiple NCT IDs
416 |     query = TrialQuery(nct_ids=["NCT01234567", "NCT89012345"])
417 |     assert query.nct_ids == ["NCT01234567", "NCT89012345"]
418 | 
419 |     # Check parameter generation
420 |     params = await convert_query(query)
421 |     assert "query.id" in params
422 |     assert "NCT01234567,NCT89012345" in params["query.id"][0]
423 | 
424 | 
425 | # noinspection PyTypeChecker
426 | @pytest.mark.asyncio
427 | async def test_trial_query_field_validation_date_range():
428 |     """Test date range parameters validation."""
429 |     # Min date only with date field
430 |     query = TrialQuery(min_date="2020-01-01", date_field=DateField.STUDY_START)
431 |     assert query.min_date == "2020-01-01"
432 |     assert query.date_field == DateField.STUDY_START
433 | 
434 |     # Min and max date with date field using lazy mapping
435 |     query = TrialQuery(
436 |         min_date="2020-01-01",
437 |         max_date="2021-12-31",
438 |         date_field="last update",  # space not underscore.
439 |     )
440 |     assert query.min_date == "2020-01-01"
441 |     assert query.max_date == "2021-12-31"
442 |     assert query.date_field == DateField.LAST_UPDATE
443 | 
444 |     # Check parameter generation
445 |     params = await convert_query(query)
446 |     assert "filter.advanced" in params
447 |     assert (
448 |         "AREA[LastUpdatePostDate]RANGE[2020-01-01,2021-12-31]"
449 |         in params["filter.advanced"][0]
450 |     )
451 | 
452 | 
453 | # noinspection PyTypeChecker
454 | def test_trial_query_field_validation_primary_purpose():
455 |     """Test primary purpose parameter validation."""
456 |     # Exact match uppercase
457 |     query = TrialQuery(primary_purpose=PrimaryPurpose.TREATMENT)
458 |     assert query.primary_purpose == PrimaryPurpose.TREATMENT
459 | 
460 |     # Exact match lowercase
461 |     query = TrialQuery(primary_purpose=PrimaryPurpose.PREVENTION)
462 |     assert query.primary_purpose == PrimaryPurpose.PREVENTION
463 | 
464 |     # Case-insensitive
465 |     query = TrialQuery(primary_purpose="ScReeNING")
466 |     assert query.primary_purpose == PrimaryPurpose.SCREENING
467 | 
468 |     # Invalid
469 |     with pytest.raises(ValueError):
470 |         TrialQuery(primary_purpose="invalid")
471 | 
472 | 
473 | def test_inject_ids_with_many_ids_and_condition():
474 |     """Test _inject_ids function with 300 IDs and a condition to ensure filter.ids is used."""
475 |     # Create a params dict with a condition (indicating other filters present)
476 |     params = {
477 |         "query.cond": ["melanoma"],
478 |         "format": ["json"],
479 |         "markupFormat": ["markdown"],
480 |     }
481 | 
482 |     # Generate 300 NCT IDs
483 |     nct_ids = [f"NCT{str(i).zfill(8)}" for i in range(1, 301)]
484 | 
485 |     # Call _inject_ids with has_other_filters=True
486 |     _inject_ids(params, nct_ids, has_other_filters=True)
487 | 
488 |     # Assert that filter.ids is used (not query.id)
489 |     assert "filter.ids" in params
490 |     assert "query.id" not in params
491 | 
492 |     # Verify the IDs are properly formatted
493 |     ids_param = params["filter.ids"][0]
494 |     assert ids_param.startswith("NCT")
495 |     assert "NCT00000001" in ids_param
496 |     assert "NCT00000300" in ids_param
497 | 
498 |     # Verify it's a comma-separated list
499 |     assert "," in ids_param
500 |     assert ids_param.count(",") == 299  # 300 IDs = 299 commas
501 | 
502 | 
503 | def test_inject_ids_without_other_filters():
504 |     """Test _inject_ids function with only NCT IDs (no other filters)."""
505 |     # Create a minimal params dict
506 |     params = {
507 |         "format": ["json"],
508 |         "markupFormat": ["markdown"],
509 |     }
510 | 
511 |     # Use a small number of NCT IDs
512 |     nct_ids = ["NCT00000001", "NCT00000002", "NCT00000003"]
513 | 
514 |     # Call _inject_ids with has_other_filters=False
515 |     _inject_ids(params, nct_ids, has_other_filters=False)
516 | 
517 |     # Assert that query.id is used (not filter.ids) for small lists
518 |     assert "query.id" in params
519 |     assert "filter.ids" not in params
520 | 
521 |     # Verify the format
522 |     assert params["query.id"][0] == "NCT00000001,NCT00000002,NCT00000003"
523 | 
524 | 
525 | def test_inject_ids_large_list_without_filters():
526 |     """Test _inject_ids with a large ID list but no other filters."""
527 |     params = {
528 |         "format": ["json"],
529 |         "markupFormat": ["markdown"],
530 |     }
531 | 
532 |     # Generate enough IDs to exceed 1800 character limit
533 |     nct_ids = [f"NCT{str(i).zfill(8)}" for i in range(1, 201)]  # ~2200 chars
534 | 
535 |     # Call _inject_ids with has_other_filters=False
536 |     _inject_ids(params, nct_ids, has_other_filters=False)
537 | 
538 |     # Assert that filter.ids is used for large lists even without other filters
539 |     assert "filter.ids" in params
540 |     assert "query.id" not in params
541 | 
542 | 
543 | # Tests for new Essie builder functions
544 | def test_build_prior_therapy_essie():
545 |     """Test building Essie fragments for prior therapies."""
546 |     # Single therapy
547 |     fragments = _build_prior_therapy_essie(["osimertinib"])
548 |     assert len(fragments) == 1
549 |     assert (
550 |         fragments[0]
551 |         == 'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
552 |     )
553 | 
554 |     # Multiple therapies
555 |     fragments = _build_prior_therapy_essie(["osimertinib", "erlotinib"])
556 |     assert len(fragments) == 2
557 |     assert (
558 |         fragments[0]
559 |         == 'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
560 |     )
561 |     assert (
562 |         fragments[1]
563 |         == 'AREA[EligibilityCriteria]("erlotinib" AND (prior OR previous OR received))'
564 |     )
565 | 
566 |     # Empty strings are filtered out
567 |     fragments = _build_prior_therapy_essie(["osimertinib", "", "erlotinib"])
568 |     assert len(fragments) == 2
569 | 
570 | 
571 | def test_build_progression_essie():
572 |     """Test building Essie fragments for progression on therapy."""
573 |     fragments = _build_progression_essie(["pembrolizumab"])
574 |     assert len(fragments) == 1
575 |     assert (
576 |         fragments[0]
577 |         == 'AREA[EligibilityCriteria]("pembrolizumab" AND (progression OR resistant OR refractory))'
578 |     )
579 | 
580 | 
581 | def test_build_required_mutations_essie():
582 |     """Test building Essie fragments for required mutations."""
583 |     fragments = _build_required_mutations_essie(["EGFR L858R", "T790M"])
584 |     assert len(fragments) == 2
585 |     assert fragments[0] == 'AREA[EligibilityCriteria]("EGFR L858R")'
586 |     assert fragments[1] == 'AREA[EligibilityCriteria]("T790M")'
587 | 
588 | 
589 | def test_build_excluded_mutations_essie():
590 |     """Test building Essie fragments for excluded mutations."""
591 |     fragments = _build_excluded_mutations_essie(["KRAS G12C"])
592 |     assert len(fragments) == 1
593 |     assert fragments[0] == 'AREA[EligibilityCriteria](NOT "KRAS G12C")'
594 | 
595 | 
596 | def test_build_biomarker_expression_essie():
597 |     """Test building Essie fragments for biomarker expression."""
598 |     biomarkers = {"PD-L1": "≥50%", "TMB": "≥10 mut/Mb"}
599 |     fragments = _build_biomarker_expression_essie(biomarkers)
600 |     assert len(fragments) == 2
601 |     assert 'AREA[EligibilityCriteria]("PD-L1" AND "≥50%")' in fragments
602 |     assert 'AREA[EligibilityCriteria]("TMB" AND "≥10 mut/Mb")' in fragments
603 | 
604 |     # Empty values are filtered out
605 |     biomarkers = {"PD-L1": "≥50%", "TMB": "", "HER2": "positive"}
606 |     fragments = _build_biomarker_expression_essie(biomarkers)
607 |     assert len(fragments) == 2
608 | 
609 | 
610 | def test_build_line_of_therapy_essie():
611 |     """Test building Essie fragment for line of therapy."""
612 |     # First line
613 |     fragment = _build_line_of_therapy_essie(LineOfTherapy.FIRST_LINE)
614 |     assert (
615 |         fragment
616 |         == 'AREA[EligibilityCriteria]("first line" OR "first-line" OR "1st line" OR "frontline" OR "treatment naive" OR "previously untreated")'
617 |     )
618 | 
619 |     # Second line
620 |     fragment = _build_line_of_therapy_essie(LineOfTherapy.SECOND_LINE)
621 |     assert (
622 |         fragment
623 |         == 'AREA[EligibilityCriteria]("second line" OR "second-line" OR "2nd line" OR "one prior line" OR "1 prior line")'
624 |     )
625 | 
626 |     # Third line plus
627 |     fragment = _build_line_of_therapy_essie(LineOfTherapy.THIRD_LINE_PLUS)
628 |     assert (
629 |         fragment
630 |         == 'AREA[EligibilityCriteria]("third line" OR "third-line" OR "3rd line" OR "≥2 prior" OR "at least 2 prior" OR "heavily pretreated")'
631 |     )
632 | 
633 | 
634 | def test_build_brain_mets_essie():
635 |     """Test building Essie fragment for brain metastases filter."""
636 |     # Allow brain mets (no filter)
637 |     fragment = _build_brain_mets_essie(True)
638 |     assert fragment == ""
639 | 
640 |     # Exclude brain mets
641 |     fragment = _build_brain_mets_essie(False)
642 |     assert fragment == 'AREA[EligibilityCriteria](NOT "brain metastases")'
643 | 
644 | 
645 | @pytest.mark.asyncio
646 | async def test_convert_query_with_eligibility_fields():
647 |     """Test conversion of query with new eligibility-focused fields."""
648 |     query = TrialQuery(
649 |         conditions=["lung cancer"],
650 |         prior_therapies=["osimertinib"],
651 |         progression_on=["erlotinib"],
652 |         required_mutations=["EGFR L858R"],
653 |         excluded_mutations=["T790M"],
654 |         biomarker_expression={"PD-L1": "≥50%"},
655 |         line_of_therapy=LineOfTherapy.SECOND_LINE,
656 |         allow_brain_mets=False,
657 |     )
658 |     params = await convert_query(query)
659 | 
660 |     # Check that query.term contains all the Essie fragments
661 |     assert "query.term" in params
662 |     term = params["query.term"][0]
663 | 
664 |     # Prior therapy
665 |     assert (
666 |         'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
667 |         in term
668 |     )
669 | 
670 |     # Progression
671 |     assert (
672 |         'AREA[EligibilityCriteria]("erlotinib" AND (progression OR resistant OR refractory))'
673 |         in term
674 |     )
675 | 
676 |     # Required mutation
677 |     assert 'AREA[EligibilityCriteria]("EGFR L858R")' in term
678 | 
679 |     # Excluded mutation
680 |     assert 'AREA[EligibilityCriteria](NOT "T790M")' in term
681 | 
682 |     # Biomarker expression
683 |     assert 'AREA[EligibilityCriteria]("PD-L1" AND "≥50%")' in term
684 | 
685 |     # Line of therapy
686 |     assert 'AREA[EligibilityCriteria]("second line" OR "second-line"' in term
687 | 
688 |     # Brain mets exclusion
689 |     assert 'AREA[EligibilityCriteria](NOT "brain metastases")' in term
690 | 
691 |     # All fragments should be combined with AND
692 |     assert " AND " in term
693 | 
694 | 
695 | @pytest.mark.asyncio
696 | async def test_convert_query_with_custom_fields_and_page_size():
697 |     """Test conversion of query with custom return fields and page size."""
698 |     query = TrialQuery(
699 |         conditions=["diabetes"],
700 |         return_fields=["NCTId", "BriefTitle", "OverallStatus"],
701 |         page_size=100,
702 |     )
703 |     params = await convert_query(query)
704 | 
705 |     assert "fields" in params
706 |     assert params["fields"] == ["NCTId,BriefTitle,OverallStatus"]
707 | 
708 |     assert "pageSize" in params
709 |     assert params["pageSize"] == ["100"]
710 | 
711 | 
712 | @pytest.mark.asyncio
713 | async def test_convert_query_eligibility_with_existing_terms():
714 |     """Test that eligibility Essie fragments are properly combined with existing terms."""
715 |     query = TrialQuery(
716 |         terms=["immunotherapy"],
717 |         prior_therapies=["chemotherapy"],
718 |     )
719 |     params = await convert_query(query)
720 | 
721 |     assert "query.term" in params
722 |     term = params["query.term"][0]
723 | 
724 |     # Should contain both the original term and the new Essie fragment
725 |     assert "immunotherapy" in term
726 |     assert (
727 |         'AREA[EligibilityCriteria]("chemotherapy" AND (prior OR previous OR received))'
728 |         in term
729 |     )
730 |     # Should be combined with AND
731 |     assert "immunotherapy AND AREA[EligibilityCriteria]" in term
732 | 
```
Page 14/20FirstPrevNextLast