This is page 14 of 20. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .github
│ ├── actions
│ │ └── setup-python-env
│ │ └── action.yml
│ ├── dependabot.yml
│ └── workflows
│ ├── ci.yml
│ ├── deploy-docs.yml
│ ├── main.yml.disabled
│ ├── on-release-main.yml
│ └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│ ├── apis
│ │ ├── error-codes.md
│ │ ├── overview.md
│ │ └── python-sdk.md
│ ├── assets
│ │ ├── biomcp-cursor-locations.png
│ │ ├── favicon.ico
│ │ ├── icon.png
│ │ ├── logo.png
│ │ ├── mcp_architecture.txt
│ │ └── remote-connection
│ │ ├── 00_connectors.png
│ │ ├── 01_add_custom_connector.png
│ │ ├── 02_connector_enabled.png
│ │ ├── 03_connect_to_biomcp.png
│ │ ├── 04_select_google_oauth.png
│ │ └── 05_success_connect.png
│ ├── backend-services-reference
│ │ ├── 01-overview.md
│ │ ├── 02-biothings-suite.md
│ │ ├── 03-cbioportal.md
│ │ ├── 04-clinicaltrials-gov.md
│ │ ├── 05-nci-cts-api.md
│ │ ├── 06-pubtator3.md
│ │ └── 07-alphagenome.md
│ ├── blog
│ │ ├── ai-assisted-clinical-trial-search-analysis.md
│ │ ├── images
│ │ │ ├── deep-researcher-video.png
│ │ │ ├── researcher-announce.png
│ │ │ ├── researcher-drop-down.png
│ │ │ ├── researcher-prompt.png
│ │ │ ├── trial-search-assistant.png
│ │ │ └── what_is_biomcp_thumbnail.png
│ │ └── researcher-persona-resource.md
│ ├── changelog.md
│ ├── CNAME
│ ├── concepts
│ │ ├── 01-what-is-biomcp.md
│ │ ├── 02-the-deep-researcher-persona.md
│ │ └── 03-sequential-thinking-with-the-think-tool.md
│ ├── developer-guides
│ │ ├── 01-server-deployment.md
│ │ ├── 02-contributing-and-testing.md
│ │ ├── 03-third-party-endpoints.md
│ │ ├── 04-transport-protocol.md
│ │ ├── 05-error-handling.md
│ │ ├── 06-http-client-and-caching.md
│ │ ├── 07-performance-optimizations.md
│ │ └── generate_endpoints.py
│ ├── faq-condensed.md
│ ├── FDA_SECURITY.md
│ ├── genomoncology.md
│ ├── getting-started
│ │ ├── 01-quickstart-cli.md
│ │ ├── 02-claude-desktop-integration.md
│ │ └── 03-authentication-and-api-keys.md
│ ├── how-to-guides
│ │ ├── 01-find-articles-and-cbioportal-data.md
│ │ ├── 02-find-trials-with-nci-and-biothings.md
│ │ ├── 03-get-comprehensive-variant-annotations.md
│ │ ├── 04-predict-variant-effects-with-alphagenome.md
│ │ ├── 05-logging-and-monitoring-with-bigquery.md
│ │ └── 06-search-nci-organizations-and-interventions.md
│ ├── index.md
│ ├── policies.md
│ ├── reference
│ │ ├── architecture-diagrams.md
│ │ ├── quick-architecture.md
│ │ ├── quick-reference.md
│ │ └── visual-architecture.md
│ ├── robots.txt
│ ├── stylesheets
│ │ ├── announcement.css
│ │ └── extra.css
│ ├── troubleshooting.md
│ ├── tutorials
│ │ ├── biothings-prompts.md
│ │ ├── claude-code-biomcp-alphagenome.md
│ │ ├── nci-prompts.md
│ │ ├── openfda-integration.md
│ │ ├── openfda-prompts.md
│ │ ├── pydantic-ai-integration.md
│ │ └── remote-connection.md
│ ├── user-guides
│ │ ├── 01-command-line-interface.md
│ │ ├── 02-mcp-tools-reference.md
│ │ └── 03-integrating-with-ides-and-clients.md
│ └── workflows
│ └── all-workflows.md
├── example_scripts
│ ├── mcp_integration.py
│ └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│ ├── check_docs_in_mkdocs.py
│ ├── check_http_imports.py
│ └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│ └── biomcp
│ ├── __init__.py
│ ├── __main__.py
│ ├── articles
│ │ ├── __init__.py
│ │ ├── autocomplete.py
│ │ ├── fetch.py
│ │ ├── preprints.py
│ │ ├── search_optimized.py
│ │ ├── search.py
│ │ └── unified.py
│ ├── biomarkers
│ │ ├── __init__.py
│ │ └── search.py
│ ├── cbioportal_helper.py
│ ├── circuit_breaker.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── articles.py
│ │ ├── biomarkers.py
│ │ ├── diseases.py
│ │ ├── health.py
│ │ ├── interventions.py
│ │ ├── main.py
│ │ ├── openfda.py
│ │ ├── organizations.py
│ │ ├── server.py
│ │ ├── trials.py
│ │ └── variants.py
│ ├── connection_pool.py
│ ├── constants.py
│ ├── core.py
│ ├── diseases
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── domain_handlers.py
│ ├── drugs
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── exceptions.py
│ ├── genes
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── http_client_simple.py
│ ├── http_client.py
│ ├── individual_tools.py
│ ├── integrations
│ │ ├── __init__.py
│ │ ├── biothings_client.py
│ │ └── cts_api.py
│ ├── interventions
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── logging_filter.py
│ ├── metrics_handler.py
│ ├── metrics.py
│ ├── oncokb_helper.py
│ ├── openfda
│ │ ├── __init__.py
│ │ ├── adverse_events_helpers.py
│ │ ├── adverse_events.py
│ │ ├── cache.py
│ │ ├── constants.py
│ │ ├── device_events_helpers.py
│ │ ├── device_events.py
│ │ ├── drug_approvals.py
│ │ ├── drug_labels_helpers.py
│ │ ├── drug_labels.py
│ │ ├── drug_recalls_helpers.py
│ │ ├── drug_recalls.py
│ │ ├── drug_shortages_detail_helpers.py
│ │ ├── drug_shortages_helpers.py
│ │ ├── drug_shortages.py
│ │ ├── exceptions.py
│ │ ├── input_validation.py
│ │ ├── rate_limiter.py
│ │ ├── utils.py
│ │ └── validation.py
│ ├── organizations
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── parameter_parser.py
│ ├── query_parser.py
│ ├── query_router.py
│ ├── rate_limiter.py
│ ├── render.py
│ ├── request_batcher.py
│ ├── resources
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── instructions.md
│ │ └── researcher.md
│ ├── retry.py
│ ├── router_handlers.py
│ ├── router.py
│ ├── shared_context.py
│ ├── thinking
│ │ ├── __init__.py
│ │ ├── sequential.py
│ │ └── session.py
│ ├── thinking_tool.py
│ ├── thinking_tracker.py
│ ├── trials
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── nci_getter.py
│ │ ├── nci_search.py
│ │ └── search.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cancer_types_api.py
│ │ ├── cbio_http_adapter.py
│ │ ├── endpoint_registry.py
│ │ ├── gene_validator.py
│ │ ├── metrics.py
│ │ ├── mutation_filter.py
│ │ ├── query_utils.py
│ │ ├── rate_limiter.py
│ │ └── request_cache.py
│ ├── variants
│ │ ├── __init__.py
│ │ ├── alphagenome.py
│ │ ├── cancer_types.py
│ │ ├── cbio_external_client.py
│ │ ├── cbioportal_mutations.py
│ │ ├── cbioportal_search_helpers.py
│ │ ├── cbioportal_search.py
│ │ ├── constants.py
│ │ ├── external.py
│ │ ├── filters.py
│ │ ├── getter.py
│ │ ├── links.py
│ │ ├── oncokb_client.py
│ │ ├── oncokb_models.py
│ │ └── search.py
│ └── workers
│ ├── __init__.py
│ ├── worker_entry_stytch.js
│ ├── worker_entry.js
│ └── worker.py
├── tests
│ ├── bdd
│ │ ├── cli_help
│ │ │ ├── help.feature
│ │ │ └── test_help.py
│ │ ├── conftest.py
│ │ ├── features
│ │ │ └── alphagenome_integration.feature
│ │ ├── fetch_articles
│ │ │ ├── fetch.feature
│ │ │ └── test_fetch.py
│ │ ├── get_trials
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── get_variants
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── search_articles
│ │ │ ├── autocomplete.feature
│ │ │ ├── search.feature
│ │ │ ├── test_autocomplete.py
│ │ │ └── test_search.py
│ │ ├── search_trials
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ ├── search_variants
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ └── steps
│ │ └── test_alphagenome_steps.py
│ ├── config
│ │ └── test_smithery_config.py
│ ├── conftest.py
│ ├── data
│ │ ├── ct_gov
│ │ │ ├── clinical_trials_api_v2.yaml
│ │ │ ├── trials_NCT04280705.json
│ │ │ └── trials_NCT04280705.txt
│ │ ├── myvariant
│ │ │ ├── myvariant_api.yaml
│ │ │ ├── myvariant_field_descriptions.csv
│ │ │ ├── variants_full_braf_v600e.json
│ │ │ ├── variants_full_braf_v600e.txt
│ │ │ └── variants_part_braf_v600_multiple.json
│ │ ├── oncokb_mock_responses.json
│ │ ├── openfda
│ │ │ ├── drugsfda_detail.json
│ │ │ ├── drugsfda_search.json
│ │ │ ├── enforcement_detail.json
│ │ │ └── enforcement_search.json
│ │ └── pubtator
│ │ ├── pubtator_autocomplete.json
│ │ └── pubtator3_paper.txt
│ ├── integration
│ │ ├── test_oncokb_integration.py
│ │ ├── test_openfda_integration.py
│ │ ├── test_preprints_integration.py
│ │ ├── test_simple.py
│ │ └── test_variants_integration.py
│ ├── tdd
│ │ ├── articles
│ │ │ ├── test_autocomplete.py
│ │ │ ├── test_cbioportal_integration.py
│ │ │ ├── test_fetch.py
│ │ │ ├── test_preprints.py
│ │ │ ├── test_search.py
│ │ │ └── test_unified.py
│ │ ├── conftest.py
│ │ ├── drugs
│ │ │ ├── __init__.py
│ │ │ └── test_drug_getter.py
│ │ ├── openfda
│ │ │ ├── __init__.py
│ │ │ ├── test_adverse_events.py
│ │ │ ├── test_device_events.py
│ │ │ ├── test_drug_approvals.py
│ │ │ ├── test_drug_labels.py
│ │ │ ├── test_drug_recalls.py
│ │ │ ├── test_drug_shortages.py
│ │ │ └── test_security.py
│ │ ├── test_biothings_integration_real.py
│ │ ├── test_biothings_integration.py
│ │ ├── test_circuit_breaker.py
│ │ ├── test_concurrent_requests.py
│ │ ├── test_connection_pool.py
│ │ ├── test_domain_handlers.py
│ │ ├── test_drug_approvals.py
│ │ ├── test_drug_recalls.py
│ │ ├── test_drug_shortages.py
│ │ ├── test_endpoint_documentation.py
│ │ ├── test_error_scenarios.py
│ │ ├── test_europe_pmc_fetch.py
│ │ ├── test_mcp_integration.py
│ │ ├── test_mcp_tools.py
│ │ ├── test_metrics.py
│ │ ├── test_nci_integration.py
│ │ ├── test_nci_mcp_tools.py
│ │ ├── test_network_policies.py
│ │ ├── test_offline_mode.py
│ │ ├── test_openfda_unified.py
│ │ ├── test_pten_r173_search.py
│ │ ├── test_render.py
│ │ ├── test_request_batcher.py.disabled
│ │ ├── test_retry.py
│ │ ├── test_router.py
│ │ ├── test_shared_context.py.disabled
│ │ ├── test_unified_biothings.py
│ │ ├── thinking
│ │ │ ├── __init__.py
│ │ │ └── test_sequential.py
│ │ ├── trials
│ │ │ ├── test_backward_compatibility.py
│ │ │ ├── test_getter.py
│ │ │ └── test_search.py
│ │ ├── utils
│ │ │ ├── test_gene_validator.py
│ │ │ ├── test_mutation_filter.py
│ │ │ ├── test_rate_limiter.py
│ │ │ └── test_request_cache.py
│ │ ├── variants
│ │ │ ├── constants.py
│ │ │ ├── test_alphagenome_api_key.py
│ │ │ ├── test_alphagenome_comprehensive.py
│ │ │ ├── test_alphagenome.py
│ │ │ ├── test_cbioportal_mutations.py
│ │ │ ├── test_cbioportal_search.py
│ │ │ ├── test_external_integration.py
│ │ │ ├── test_external.py
│ │ │ ├── test_extract_gene_aa_change.py
│ │ │ ├── test_filters.py
│ │ │ ├── test_getter.py
│ │ │ ├── test_links.py
│ │ │ ├── test_oncokb_client.py
│ │ │ ├── test_oncokb_helper.py
│ │ │ └── test_search.py
│ │ └── workers
│ │ └── test_worker_sanitization.js
│ └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```
# Files
--------------------------------------------------------------------------------
/tests/tdd/test_router.py:
--------------------------------------------------------------------------------
```python
1 | """Comprehensive tests for the unified router module."""
2 |
3 | import json
4 | from unittest.mock import patch
5 |
6 | import pytest
7 |
8 | from biomcp.exceptions import (
9 | InvalidDomainError,
10 | InvalidParameterError,
11 | QueryParsingError,
12 | SearchExecutionError,
13 | )
14 | from biomcp.router import fetch, format_results, search
15 |
16 |
17 | class TestFormatResults:
18 | """Test the format_results function."""
19 |
20 | def test_format_article_results(self):
21 | """Test formatting article results."""
22 | results = [
23 | {
24 | "pmid": "12345",
25 | "title": "Test Article",
26 | "abstract": "This is a test abstract",
27 | # Note: url in input is ignored, always generates PubMed URL
28 | }
29 | ]
30 |
31 | # Mock thinking tracker to prevent reminder
32 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
33 | formatted = format_results(results, "article", 1, 10, 1)
34 |
35 | assert "results" in formatted
36 | assert len(formatted["results"]) == 1
37 | result = formatted["results"][0]
38 | assert result["id"] == "12345"
39 | assert result["title"] == "Test Article"
40 | assert "test abstract" in result["text"]
41 | assert result["url"] == "https://pubmed.ncbi.nlm.nih.gov/12345/"
42 |
43 | def test_format_trial_results_api_v2(self):
44 | """Test formatting trial results with API v2 structure."""
45 | results = [
46 | {
47 | "protocolSection": {
48 | "identificationModule": {
49 | "nctId": "NCT12345",
50 | "briefTitle": "Test Trial",
51 | },
52 | "descriptionModule": {
53 | "briefSummary": "This is a test trial summary"
54 | },
55 | "statusModule": {"overallStatus": "RECRUITING"},
56 | "designModule": {"phases": ["PHASE3"]},
57 | }
58 | }
59 | ]
60 |
61 | # Mock thinking tracker to prevent reminder
62 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
63 | formatted = format_results(results, "trial", 1, 10, 1)
64 |
65 | assert "results" in formatted
66 | assert len(formatted["results"]) == 1
67 | result = formatted["results"][0]
68 | assert result["id"] == "NCT12345"
69 | assert result["title"] == "Test Trial"
70 | assert "test trial summary" in result["text"]
71 | assert "NCT12345" in result["url"]
72 |
73 | def test_format_trial_results_legacy(self):
74 | """Test formatting trial results with legacy structure."""
75 | results = [
76 | {
77 | "NCT Number": "NCT67890",
78 | "Study Title": "Legacy Trial",
79 | "Brief Summary": "Legacy trial summary",
80 | "Study Status": "COMPLETED",
81 | "Phases": "Phase 2",
82 | }
83 | ]
84 |
85 | # Mock thinking tracker to prevent reminder
86 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
87 | formatted = format_results(results, "trial", 1, 10, 1)
88 |
89 | assert "results" in formatted
90 | assert len(formatted["results"]) == 1
91 | result = formatted["results"][0]
92 | assert result["id"] == "NCT67890"
93 | assert result["title"] == "Legacy Trial"
94 | assert "Legacy trial summary" in result["text"]
95 |
96 | def test_format_variant_results(self):
97 | """Test formatting variant results."""
98 | results = [
99 | {
100 | "_id": "chr7:g.140453136A>T",
101 | "dbsnp": {"rsid": "rs121913529"},
102 | "dbnsfp": {"genename": "BRAF"},
103 | "clinvar": {"rcv": {"clinical_significance": "Pathogenic"}},
104 | }
105 | ]
106 |
107 | # Mock thinking tracker to prevent reminder
108 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
109 | formatted = format_results(results, "variant", 1, 10, 1)
110 |
111 | assert "results" in formatted
112 | assert len(formatted["results"]) == 1
113 | result = formatted["results"][0]
114 | assert result["id"] == "chr7:g.140453136A>T"
115 | assert "BRAF" in result["title"]
116 | assert "Pathogenic" in result["text"]
117 | assert "rs121913529" in result["url"]
118 |
119 | def test_format_results_invalid_domain(self):
120 | """Test format_results with invalid domain."""
121 | with pytest.raises(InvalidDomainError) as exc_info:
122 | format_results([], "invalid_domain", 1, 10, 0)
123 |
124 | assert "Unknown domain: invalid_domain" in str(exc_info.value)
125 |
126 | def test_format_results_malformed_data(self):
127 | """Test format_results handles malformed data gracefully."""
128 | results = [
129 | {"title": "Good Article", "pmid": "123"},
130 | None, # Malformed - will be skipped
131 | {
132 | "invalid": "data"
133 | }, # Missing required fields but won't fail (treated as preprint)
134 | ]
135 |
136 | # Mock thinking tracker to prevent reminder
137 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
138 | formatted = format_results(results, "article", 1, 10, 3)
139 |
140 | # Should skip None but include the third (treated as preprint with empty fields)
141 | assert len(formatted["results"]) == 2
142 | assert formatted["results"][0]["id"] == "123"
143 | assert formatted["results"][1]["id"] == "" # Empty ID for invalid data
144 |
145 |
146 | @pytest.mark.asyncio
147 | class TestSearchFunction:
148 | """Test the unified search function."""
149 |
150 | async def test_search_article_domain(self):
151 | """Test search with article domain."""
152 | mock_result = json.dumps([
153 | {"pmid": "123", "title": "Test", "abstract": "Abstract"}
154 | ])
155 |
156 | with patch(
157 | "biomcp.articles.unified.search_articles_unified"
158 | ) as mock_search:
159 | mock_search.return_value = mock_result
160 |
161 | # Mock thinking tracker to prevent reminder
162 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
163 | result = await search(
164 | query="",
165 | domain="article",
166 | genes="BRAF",
167 | diseases=["cancer"],
168 | page_size=10,
169 | )
170 |
171 | assert "results" in result
172 | assert len(result["results"]) == 1
173 | assert result["results"][0]["id"] == "123"
174 |
175 | async def test_search_trial_domain(self):
176 | """Test search with trial domain."""
177 | mock_result = json.dumps({
178 | "studies": [
179 | {
180 | "protocolSection": {
181 | "identificationModule": {"nctId": "NCT123"},
182 | }
183 | }
184 | ]
185 | })
186 |
187 | with patch("biomcp.trials.search.search_trials") as mock_search:
188 | mock_search.return_value = mock_result
189 |
190 | # Mock thinking tracker to prevent reminder
191 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
192 | result = await search(
193 | query="",
194 | domain="trial",
195 | conditions=["cancer"],
196 | phase="Phase 3",
197 | page_size=20,
198 | )
199 |
200 | assert "results" in result
201 | mock_search.assert_called_once()
202 |
203 | async def test_search_variant_domain(self):
204 | """Test search with variant domain."""
205 | mock_result = json.dumps([
206 | {"_id": "rs123", "gene": {"symbol": "BRAF"}}
207 | ])
208 |
209 | with patch("biomcp.variants.search.search_variants") as mock_search:
210 | mock_search.return_value = mock_result
211 |
212 | # Mock thinking tracker to prevent reminder
213 | with patch("biomcp.router.get_thinking_reminder", return_value=""):
214 | result = await search(
215 | query="",
216 | domain="variant",
217 | genes="BRAF",
218 | significance="pathogenic",
219 | page_size=10,
220 | )
221 |
222 | assert "results" in result
223 | assert len(result["results"]) == 1
224 |
225 | async def test_search_unified_query(self):
226 | """Test search with unified query language."""
227 | with patch("biomcp.router._unified_search") as mock_unified:
228 | mock_unified.return_value = {
229 | "results": [{"id": "1", "title": "Test"}]
230 | }
231 |
232 | result = await search(
233 | query="gene:BRAF AND disease:cancer",
234 | max_results_per_domain=20,
235 | )
236 |
237 | assert "results" in result
238 | mock_unified.assert_called_once_with(
239 | query="gene:BRAF AND disease:cancer",
240 | max_results_per_domain=20,
241 | domains=None,
242 | explain_query=False,
243 | )
244 |
245 | async def test_search_no_domain_or_query(self):
246 | """Test search without domain or query raises error."""
247 | with pytest.raises(InvalidParameterError) as exc_info:
248 | await search(query="")
249 |
250 | assert "query or domain" in str(exc_info.value)
251 |
252 | async def test_search_invalid_domain(self):
253 | """Test search with invalid domain."""
254 | with pytest.raises(InvalidDomainError):
255 | await search(query="", domain="invalid_domain")
256 |
257 | async def test_search_get_schema(self):
258 | """Test search with get_schema flag."""
259 | result = await search(query="", get_schema=True)
260 |
261 | assert "domains" in result
262 | assert "cross_domain_fields" in result
263 | assert "domain_fields" in result
264 | assert isinstance(result["cross_domain_fields"], dict)
265 |
266 | async def test_search_pagination_validation(self):
267 | """Test search with invalid pagination parameters."""
268 | with pytest.raises(InvalidParameterError) as exc_info:
269 | await search(
270 | query="",
271 | domain="article",
272 | page=0, # Invalid - must be >= 1
273 | page_size=10,
274 | )
275 |
276 | assert "page" in str(exc_info.value)
277 |
278 | async def test_search_parameter_parsing(self):
279 | """Test parameter parsing for list inputs."""
280 | mock_result = json.dumps([])
281 |
282 | with patch(
283 | "biomcp.articles.unified.search_articles_unified"
284 | ) as mock_search:
285 | mock_search.return_value = mock_result
286 |
287 | # Test with JSON array string
288 | await search(
289 | query="",
290 | domain="article",
291 | genes='["BRAF", "KRAS"]',
292 | diseases="cancer,melanoma", # Comma-separated
293 | )
294 |
295 | # Check the request was parsed correctly
296 | call_args = mock_search.call_args[0][0]
297 | assert call_args.genes == ["BRAF", "KRAS"]
298 | assert call_args.diseases == ["cancer", "melanoma"]
299 |
300 |
301 | @pytest.mark.asyncio
302 | class TestFetchFunction:
303 | """Test the unified fetch function."""
304 |
305 | async def test_fetch_article(self):
306 | """Test fetching article details."""
307 | mock_result = json.dumps([
308 | {
309 | "pmid": 12345,
310 | "title": "Test Article",
311 | "abstract": "Full abstract",
312 | "full_text": "Full text content",
313 | }
314 | ])
315 |
316 | with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
317 | mock_fetch.return_value = mock_result
318 |
319 | result = await fetch(
320 | domain="article",
321 | id="12345",
322 | )
323 |
324 | assert result["id"] == "12345"
325 | assert result["title"] == "Test Article"
326 | assert result["text"] == "Full text content"
327 | assert "metadata" in result
328 |
329 | async def test_fetch_article_invalid_pmid(self):
330 | """Test fetching article with invalid identifier."""
331 | result = await fetch(domain="article", id="not_a_number")
332 |
333 | # Should return an error since "not_a_number" is neither a valid PMID nor DOI
334 | assert "error" in result
335 | assert "Invalid identifier format" in result["error"]
336 | assert "not_a_number" in result["error"]
337 |
338 | async def test_fetch_trial_all_sections(self):
339 | """Test fetching trial with all sections."""
340 | mock_protocol = json.dumps({
341 | "title": "Test Trial",
342 | "nct_id": "NCT123",
343 | "brief_summary": "Summary",
344 | })
345 | mock_locations = json.dumps({"locations": [{"city": "Boston"}]})
346 | mock_outcomes = json.dumps({
347 | "outcomes": {"primary_outcomes": ["Outcome1"]}
348 | })
349 | mock_references = json.dumps({"references": [{"pmid": "456"}]})
350 |
351 | with (
352 | patch("biomcp.trials.getter._trial_protocol") as mock_p,
353 | patch("biomcp.trials.getter._trial_locations") as mock_l,
354 | patch("biomcp.trials.getter._trial_outcomes") as mock_o,
355 | patch("biomcp.trials.getter._trial_references") as mock_r,
356 | ):
357 | mock_p.return_value = mock_protocol
358 | mock_l.return_value = mock_locations
359 | mock_o.return_value = mock_outcomes
360 | mock_r.return_value = mock_references
361 |
362 | result = await fetch(domain="trial", id="NCT123", detail="all")
363 |
364 | assert result["id"] == "NCT123"
365 | assert "metadata" in result
366 | assert "locations" in result["metadata"]
367 | assert "outcomes" in result["metadata"]
368 | assert "references" in result["metadata"]
369 |
370 | async def test_fetch_trial_invalid_detail(self):
371 | """Test fetching trial with invalid detail parameter."""
372 | with pytest.raises(InvalidParameterError) as exc_info:
373 | await fetch(
374 | domain="trial",
375 | id="NCT123",
376 | detail="invalid_section",
377 | )
378 |
379 | assert "one of:" in str(exc_info.value)
380 |
381 | async def test_fetch_variant(self):
382 | """Test fetching variant details."""
383 | mock_result = json.dumps([
384 | {
385 | "_id": "rs123",
386 | "gene": {"symbol": "BRAF"},
387 | "clinvar": {"clinical_significance": "Pathogenic"},
388 | "tcga": {"cancer_types": {}},
389 | "external_links": {"dbSNP": "https://example.com"},
390 | }
391 | ])
392 |
393 | with patch("biomcp.variants.getter.get_variant") as mock_get:
394 | mock_get.return_value = mock_result
395 |
396 | result = await fetch(domain="variant", id="rs123")
397 |
398 | assert result["id"] == "rs123"
399 | assert "TCGA Data: Available" in result["text"]
400 | assert "external_links" in result["metadata"]
401 |
402 | async def test_fetch_variant_list_response(self):
403 | """Test fetching variant when API returns list."""
404 | mock_result = json.dumps([
405 | {"_id": "rs123", "gene": {"symbol": "BRAF"}}
406 | ])
407 |
408 | with patch("biomcp.variants.getter.get_variant") as mock_get:
409 | mock_get.return_value = mock_result
410 |
411 | result = await fetch(domain="variant", id="rs123")
412 |
413 | assert result["id"] == "rs123"
414 |
415 | async def test_fetch_invalid_domain(self):
416 | """Test fetch with invalid domain."""
417 | with pytest.raises(InvalidDomainError):
418 | await fetch(domain="invalid", id="123")
419 |
420 | async def test_fetch_error_handling(self):
421 | """Test fetch error handling."""
422 | with patch("biomcp.articles.fetch.fetch_articles") as mock_fetch:
423 | mock_fetch.side_effect = Exception("API Error")
424 |
425 | with pytest.raises(SearchExecutionError) as exc_info:
426 | await fetch(domain="article", id="123")
427 |
428 | assert "Failed to execute search" in str(exc_info.value)
429 |
430 | async def test_fetch_domain_auto_detection_pmid(self):
431 | """Test domain auto-detection for PMID."""
432 | with patch("biomcp.articles.fetch._article_details") as mock_fetch:
433 | mock_fetch.return_value = json.dumps([
434 | {"pmid": "12345", "title": "Test"}
435 | ])
436 |
437 | # Numeric ID should auto-detect as article
438 | result = await fetch(id="12345")
439 | assert result["id"] == "12345"
440 | mock_fetch.assert_called_once()
441 |
442 | async def test_fetch_domain_auto_detection_nct(self):
443 | """Test domain auto-detection for NCT ID."""
444 | with patch("biomcp.trials.getter.get_trial") as mock_get:
445 | mock_get.return_value = json.dumps({
446 | "protocolSection": {
447 | "identificationModule": {"briefTitle": "Test Trial"}
448 | }
449 | })
450 |
451 | # NCT ID should auto-detect as trial
452 | result = await fetch(id="NCT12345")
453 | assert "NCT12345" in result["url"]
454 | mock_get.assert_called()
455 |
456 | async def test_fetch_domain_auto_detection_doi(self):
457 | """Test domain auto-detection for DOI."""
458 | with patch("biomcp.articles.fetch._article_details") as mock_fetch:
459 | mock_fetch.return_value = json.dumps([
460 | {"doi": "10.1038/nature12345", "title": "Test"}
461 | ])
462 |
463 | # DOI should auto-detect as article
464 | await fetch(id="10.1038/nature12345")
465 | mock_fetch.assert_called_once()
466 |
467 | async def test_fetch_domain_auto_detection_variant(self):
468 | """Test domain auto-detection for variant IDs."""
469 | with patch("biomcp.variants.getter.get_variant") as mock_get:
470 | mock_get.return_value = json.dumps([{"_id": "rs12345"}])
471 |
472 | # rsID should auto-detect as variant
473 | await fetch(id="rs12345")
474 | mock_get.assert_called_once()
475 |
476 | # Test HGVS notation
477 | with patch("biomcp.variants.getter.get_variant") as mock_get:
478 | mock_get.return_value = json.dumps([
479 | {"_id": "chr7:g.140453136A>T"}
480 | ])
481 |
482 | await fetch(id="chr7:g.140453136A>T")
483 | mock_get.assert_called_once()
484 |
485 |
486 | @pytest.mark.asyncio
487 | class TestUnifiedSearch:
488 | """Test the _unified_search internal function."""
489 |
490 | async def test_unified_search_explain_query(self):
491 | """Test unified search with explain_query flag."""
492 | from biomcp.router import _unified_search
493 |
494 | result = await _unified_search(
495 | query="gene:BRAF AND disease:cancer", explain_query=True
496 | )
497 |
498 | assert "original_query" in result
499 | assert "parsed_structure" in result
500 | assert "routing_plan" in result
501 | assert "schema" in result
502 |
503 | async def test_unified_search_execution(self):
504 | """Test unified search normal execution."""
505 | from biomcp.router import _unified_search
506 |
507 | with patch("biomcp.query_router.execute_routing_plan") as mock_execute:
508 | mock_execute.return_value = {
509 | "articles": json.dumps([{"pmid": "123", "title": "Article 1"}])
510 | }
511 |
512 | result = await _unified_search(
513 | query="gene:BRAF", max_results_per_domain=10
514 | )
515 |
516 | assert "results" in result
517 | assert isinstance(result["results"], list)
518 |
519 | async def test_unified_search_parse_error(self):
520 | """Test unified search with invalid query."""
521 | from biomcp.router import _unified_search
522 |
523 | with patch("biomcp.query_parser.QueryParser.parse") as mock_parse:
524 | mock_parse.side_effect = Exception("Parse error")
525 |
526 | with pytest.raises(QueryParsingError):
527 | await _unified_search(
528 | query="invalid::query", max_results_per_domain=10
529 | )
530 |
```
--------------------------------------------------------------------------------
/src/biomcp/integrations/biothings_client.py:
--------------------------------------------------------------------------------
```python
1 | """BioThings API client for unified access to the BioThings suite.
2 |
3 | The BioThings suite (https://biothings.io) provides high-performance biomedical
4 | data APIs including:
5 | - MyGene.info - Gene annotations and information
6 | - MyVariant.info - Genetic variant annotations (existing integration enhanced)
7 | - MyDisease.info - Disease ontology and synonyms
8 | - MyChem.info - Drug/chemical annotations and information
9 |
10 | This module provides a centralized client for interacting with all BioThings APIs,
11 | handling common concerns like error handling, rate limiting, and response parsing.
12 | While MyVariant.info has specialized modules for complex variant operations, this
13 | client provides the base layer for all BioThings API interactions.
14 | """
15 |
16 | import logging
17 | from typing import Any
18 | from urllib.parse import quote
19 |
20 | from pydantic import BaseModel, Field
21 |
22 | from .. import http_client
23 | from ..constants import (
24 | MYVARIANT_GET_URL,
25 | )
26 |
27 | logger = logging.getLogger(__name__)
28 |
29 | # BioThings API endpoints
30 | MYGENE_BASE_URL = "https://mygene.info/v3"
31 | MYGENE_QUERY_URL = f"{MYGENE_BASE_URL}/query"
32 | MYGENE_GET_URL = f"{MYGENE_BASE_URL}/gene"
33 |
34 | MYDISEASE_BASE_URL = "https://mydisease.info/v1"
35 | MYDISEASE_QUERY_URL = f"{MYDISEASE_BASE_URL}/query"
36 | MYDISEASE_GET_URL = f"{MYDISEASE_BASE_URL}/disease"
37 |
38 | MYCHEM_BASE_URL = "https://mychem.info/v1"
39 | MYCHEM_QUERY_URL = f"{MYCHEM_BASE_URL}/query"
40 | MYCHEM_GET_URL = f"{MYCHEM_BASE_URL}/chem"
41 |
42 |
43 | class GeneInfo(BaseModel):
44 | """Gene information from MyGene.info."""
45 |
46 | gene_id: str = Field(alias="_id")
47 | symbol: str | None = None
48 | name: str | None = None
49 | summary: str | None = None
50 | alias: list[str] | None = Field(default_factory=list)
51 | entrezgene: int | str | None = None
52 | ensembl: dict[str, Any] | None = None
53 | refseq: dict[str, Any] | None = None
54 | type_of_gene: str | None = None
55 | taxid: int | None = None
56 |
57 |
58 | class DiseaseInfo(BaseModel):
59 | """Disease information from MyDisease.info."""
60 |
61 | disease_id: str = Field(alias="_id")
62 | name: str | None = None
63 | mondo: dict[str, Any] | None = None
64 | definition: str | None = None
65 | synonyms: list[str] | None = Field(default_factory=list)
66 | xrefs: dict[str, Any] | None = None
67 | phenotypes: list[dict[str, Any]] | None = None
68 |
69 |
70 | class DrugInfo(BaseModel):
71 | """Drug/chemical information from MyChem.info."""
72 |
73 | drug_id: str = Field(alias="_id")
74 | name: str | None = None
75 | tradename: list[str] | None = Field(default_factory=list)
76 | drugbank_id: str | None = None
77 | chebi_id: str | None = None
78 | chembl_id: str | None = None
79 | pubchem_cid: str | None = None
80 | unii: str | dict[str, Any] | None = None
81 | inchikey: str | None = None
82 | formula: str | None = None
83 | description: str | None = None
84 | indication: str | None = None
85 | pharmacology: dict[str, Any] | None = None
86 | mechanism_of_action: str | None = None
87 |
88 |
89 | class BioThingsClient:
90 | """Unified client for BioThings APIs (MyGene, MyVariant, MyDisease, MyChem)."""
91 |
92 | def __init__(self):
93 | """Initialize the BioThings client."""
94 | self.logger = logger
95 |
96 | async def get_gene_info(
97 | self, gene_id_or_symbol: str, fields: list[str] | None = None
98 | ) -> GeneInfo | None:
99 | """Get gene information from MyGene.info.
100 |
101 | Args:
102 | gene_id_or_symbol: Gene ID (Entrez, Ensembl) or symbol (e.g., "TP53")
103 | fields: Optional list of fields to return
104 |
105 | Returns:
106 | GeneInfo object or None if not found
107 | """
108 | try:
109 | # First, try direct GET (works for Entrez IDs)
110 | if gene_id_or_symbol.isdigit():
111 | return await self._get_gene_by_id(gene_id_or_symbol, fields)
112 |
113 | # For symbols, we need to query first
114 | query_result = await self._query_gene(gene_id_or_symbol)
115 | if not query_result:
116 | return None
117 |
118 | # Get the best match
119 | gene_id = query_result[0].get("_id")
120 | if not gene_id:
121 | return None
122 |
123 | # Now get full details
124 | return await self._get_gene_by_id(gene_id, fields)
125 |
126 | except Exception as e:
127 | self.logger.warning(
128 | f"Failed to get gene info for {gene_id_or_symbol}: {e}"
129 | )
130 | return None
131 |
132 | async def _query_gene(self, symbol: str) -> list[dict[str, Any]] | None:
133 | """Query MyGene.info for a gene symbol."""
134 | params = {
135 | "q": f"symbol:{quote(symbol)}",
136 | "species": "human",
137 | "fields": "_id,symbol,name,taxid",
138 | "size": 5,
139 | }
140 |
141 | response, error = await http_client.request_api(
142 | url=MYGENE_QUERY_URL,
143 | request=params,
144 | method="GET",
145 | domain="mygene",
146 | )
147 |
148 | if error or not response:
149 | return None
150 |
151 | hits = response.get("hits", [])
152 | # Filter for human genes (taxid 9606)
153 | human_hits = [h for h in hits if h.get("taxid") == 9606]
154 | return human_hits if human_hits else hits
155 |
156 | async def _get_gene_by_id(
157 | self, gene_id: str, fields: list[str] | None = None
158 | ) -> GeneInfo | None:
159 | """Get gene details by ID from MyGene.info."""
160 | if fields is None:
161 | fields = [
162 | "symbol",
163 | "name",
164 | "summary",
165 | "alias",
166 | "type_of_gene",
167 | "ensembl",
168 | "refseq",
169 | "entrezgene",
170 | ]
171 |
172 | params = {"fields": ",".join(fields)}
173 |
174 | response, error = await http_client.request_api(
175 | url=f"{MYGENE_GET_URL}/{gene_id}",
176 | request=params,
177 | method="GET",
178 | domain="mygene",
179 | )
180 |
181 | if error or not response:
182 | return None
183 |
184 | try:
185 | return GeneInfo(**response)
186 | except Exception as e:
187 | self.logger.warning(f"Failed to parse gene response: {e}")
188 | return None
189 |
190 | async def batch_get_genes(
191 | self, gene_ids: list[str], fields: list[str] | None = None
192 | ) -> list[GeneInfo]:
193 | """Get multiple genes in a single request.
194 |
195 | Args:
196 | gene_ids: List of gene IDs or symbols
197 | fields: Optional list of fields to return
198 |
199 | Returns:
200 | List of GeneInfo objects
201 | """
202 | if not gene_ids:
203 | return []
204 |
205 | if fields is None:
206 | fields = ["symbol", "name", "summary", "alias", "type_of_gene"]
207 |
208 | # MyGene supports POST for batch queries
209 | data = {
210 | "ids": ",".join(gene_ids),
211 | "fields": ",".join(fields),
212 | "species": "human",
213 | }
214 |
215 | response, error = await http_client.request_api(
216 | url=MYGENE_GET_URL,
217 | request=data,
218 | method="POST",
219 | domain="mygene",
220 | )
221 |
222 | if error or not response:
223 | return []
224 |
225 | results = []
226 | for item in response:
227 | try:
228 | if "notfound" not in item:
229 | results.append(GeneInfo(**item))
230 | except Exception as e:
231 | self.logger.warning(f"Failed to parse gene in batch: {e}")
232 | continue
233 |
234 | return results
235 |
236 | async def get_disease_info(
237 | self, disease_id_or_name: str, fields: list[str] | None = None
238 | ) -> DiseaseInfo | None:
239 | """Get disease information from MyDisease.info.
240 |
241 | Args:
242 | disease_id_or_name: Disease ID (MONDO, DOID) or name
243 | fields: Optional list of fields to return
244 |
245 | Returns:
246 | DiseaseInfo object or None if not found
247 | """
248 | try:
249 | # Check if it's an ID (starts with known prefixes)
250 | if any(
251 | disease_id_or_name.upper().startswith(prefix)
252 | for prefix in ["MONDO:", "DOID:", "OMIM:", "MESH:"]
253 | ):
254 | return await self._get_disease_by_id(
255 | disease_id_or_name, fields
256 | )
257 |
258 | # Otherwise, query by name
259 | query_result = await self._query_disease(disease_id_or_name)
260 | if not query_result:
261 | return None
262 |
263 | # Get the best match
264 | disease_id = query_result[0].get("_id")
265 | if not disease_id:
266 | return None
267 |
268 | # Now get full details
269 | return await self._get_disease_by_id(disease_id, fields)
270 |
271 | except Exception as e:
272 | self.logger.warning(
273 | f"Failed to get disease info for {disease_id_or_name}: {e}"
274 | )
275 | return None
276 |
277 | async def _query_disease(self, name: str) -> list[dict[str, Any]] | None:
278 | """Query MyDisease.info for a disease name."""
279 | params = {
280 | "q": quote(name),
281 | "fields": "_id,name,mondo",
282 | "size": 10,
283 | }
284 |
285 | response, error = await http_client.request_api(
286 | url=MYDISEASE_QUERY_URL,
287 | request=params,
288 | method="GET",
289 | domain="mydisease",
290 | )
291 |
292 | if error or not response:
293 | return None
294 |
295 | return response.get("hits", [])
296 |
297 | async def _get_disease_by_id(
298 | self, disease_id: str, fields: list[str] | None = None
299 | ) -> DiseaseInfo | None:
300 | """Get disease details by ID from MyDisease.info."""
301 | if fields is None:
302 | fields = [
303 | "name",
304 | "mondo",
305 | "definition",
306 | "synonyms",
307 | "xrefs",
308 | "phenotypes",
309 | ]
310 |
311 | params = {"fields": ",".join(fields)}
312 |
313 | response, error = await http_client.request_api(
314 | url=f"{MYDISEASE_GET_URL}/{quote(disease_id, safe='')}",
315 | request=params,
316 | method="GET",
317 | domain="mydisease",
318 | )
319 |
320 | if error or not response:
321 | return None
322 |
323 | try:
324 | # Extract definition from mondo if available
325 | if "mondo" in response and isinstance(response["mondo"], dict):
326 | if (
327 | "definition" in response["mondo"]
328 | and "definition" not in response
329 | ):
330 | response["definition"] = response["mondo"]["definition"]
331 | # Extract synonyms from mondo
332 | if "synonym" in response["mondo"]:
333 | mondo_synonyms = response["mondo"]["synonym"]
334 | if isinstance(mondo_synonyms, dict):
335 | # Handle exact synonyms
336 | exact = mondo_synonyms.get("exact", [])
337 | if isinstance(exact, list):
338 | response["synonyms"] = exact
339 | elif isinstance(mondo_synonyms, list):
340 | response["synonyms"] = mondo_synonyms
341 |
342 | return DiseaseInfo(**response)
343 | except Exception as e:
344 | self.logger.warning(f"Failed to parse disease response: {e}")
345 | return None
346 |
347 | async def get_disease_synonyms(self, disease_id_or_name: str) -> list[str]:
348 | """Get disease synonyms for query expansion.
349 |
350 | Args:
351 | disease_id_or_name: Disease ID or name
352 |
353 | Returns:
354 | List of synonyms including the original term
355 | """
356 | disease_info = await self.get_disease_info(disease_id_or_name)
357 | if not disease_info:
358 | return [disease_id_or_name]
359 |
360 | synonyms = [disease_id_or_name]
361 | if disease_info.name and disease_info.name != disease_id_or_name:
362 | synonyms.append(disease_info.name)
363 |
364 | if disease_info.synonyms:
365 | synonyms.extend(disease_info.synonyms)
366 |
367 | # Remove duplicates while preserving order
368 | seen = set()
369 | unique_synonyms = []
370 | for syn in synonyms:
371 | if syn.lower() not in seen:
372 | seen.add(syn.lower())
373 | unique_synonyms.append(syn)
374 |
375 | return unique_synonyms[
376 | :5
377 | ] # Limit to top 5 to avoid overly broad searches
378 |
379 | async def get_drug_info(
380 | self, drug_id_or_name: str, fields: list[str] | None = None
381 | ) -> DrugInfo | None:
382 | """Get drug/chemical information from MyChem.info.
383 |
384 | Args:
385 | drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name
386 | fields: Optional list of fields to return
387 |
388 | Returns:
389 | DrugInfo object or None if not found
390 | """
391 | try:
392 | # Check if it's an ID (starts with known prefixes)
393 | if any(
394 | drug_id_or_name.upper().startswith(prefix)
395 | for prefix in ["DRUGBANK:", "DB", "CHEMBL", "CHEBI:", "CID"]
396 | ):
397 | return await self._get_drug_by_id(drug_id_or_name, fields)
398 |
399 | # Otherwise, query by name
400 | query_result = await self._query_drug(drug_id_or_name)
401 | if not query_result:
402 | return None
403 |
404 | # Get the best match
405 | drug_id = query_result[0].get("_id")
406 | if not drug_id:
407 | return None
408 |
409 | # Now get full details
410 | return await self._get_drug_by_id(drug_id, fields)
411 |
412 | except Exception as e:
413 | self.logger.warning(
414 | f"Failed to get drug info for {drug_id_or_name}: {e}"
415 | )
416 | return None
417 |
418 | async def _query_drug(self, name: str) -> list[dict[str, Any]] | None:
419 | """Query MyChem.info for a drug name."""
420 | params = {
421 | "q": quote(name),
422 | "fields": "_id,name,drugbank.name,chebi.name,chembl.pref_name,unii.display_name",
423 | "size": 10,
424 | }
425 |
426 | response, error = await http_client.request_api(
427 | url=MYCHEM_QUERY_URL,
428 | request=params,
429 | method="GET",
430 | domain="mychem",
431 | )
432 |
433 | if error or not response:
434 | return None
435 |
436 | hits = response.get("hits", [])
437 |
438 | # Sort hits to prioritize those with actual drug names
439 | def score_hit(hit):
440 | score = hit.get("_score", 0)
441 | # Boost score if hit has drug name fields
442 | if hit.get("drugbank", {}).get("name"):
443 | score += 10
444 | if hit.get("chembl", {}).get("pref_name"):
445 | score += 5
446 | if hit.get("unii", {}).get("display_name"):
447 | score += 3
448 | return score
449 |
450 | hits.sort(key=score_hit, reverse=True)
451 | return hits
452 |
453 | async def _get_drug_by_id(
454 | self, drug_id: str, fields: list[str] | None = None
455 | ) -> DrugInfo | None:
456 | """Get drug details by ID from MyChem.info."""
457 | if fields is None:
458 | fields = [
459 | "name",
460 | "drugbank",
461 | "chebi",
462 | "chembl",
463 | "pubchem",
464 | "unii",
465 | "inchikey",
466 | "formula",
467 | "description",
468 | "indication",
469 | "pharmacology",
470 | "mechanism_of_action",
471 | ]
472 |
473 | params = {"fields": ",".join(fields)}
474 |
475 | response, error = await http_client.request_api(
476 | url=f"{MYCHEM_GET_URL}/{quote(drug_id, safe='')}",
477 | request=params,
478 | method="GET",
479 | domain="mychem",
480 | )
481 |
482 | if error or not response:
483 | return None
484 |
485 | try:
486 | # Handle array response (multiple results)
487 | if isinstance(response, list):
488 | if not response:
489 | return None
490 | # Take the first result
491 | response = response[0]
492 |
493 | # Extract fields from nested structures
494 | self._extract_drugbank_fields(response)
495 | self._extract_chebi_fields(response)
496 | self._extract_chembl_fields(response)
497 | self._extract_pubchem_fields(response)
498 | self._extract_unii_fields(response)
499 |
500 | return DrugInfo(**response)
501 | except Exception as e:
502 | self.logger.warning(f"Failed to parse drug response: {e}")
503 | return None
504 |
505 | def _extract_drugbank_fields(self, response: dict[str, Any]) -> None:
506 | """Extract DrugBank fields from response."""
507 | if "drugbank" in response and isinstance(response["drugbank"], dict):
508 | db = response["drugbank"]
509 | response["drugbank_id"] = db.get("id")
510 | response["name"] = response.get("name") or db.get("name")
511 | response["tradename"] = db.get("products", {}).get("name", [])
512 | if isinstance(response["tradename"], str):
513 | response["tradename"] = [response["tradename"]]
514 | response["indication"] = db.get("indication")
515 | response["mechanism_of_action"] = db.get("mechanism_of_action")
516 | response["description"] = db.get("description")
517 |
518 | def _extract_chebi_fields(self, response: dict[str, Any]) -> None:
519 | """Extract ChEBI fields from response."""
520 | if "chebi" in response and isinstance(response["chebi"], dict):
521 | response["chebi_id"] = response["chebi"].get("id")
522 | if not response.get("name"):
523 | response["name"] = response["chebi"].get("name")
524 |
525 | def _extract_chembl_fields(self, response: dict[str, Any]) -> None:
526 | """Extract ChEMBL fields from response."""
527 | if "chembl" in response and isinstance(response["chembl"], dict):
528 | response["chembl_id"] = response["chembl"].get(
529 | "molecule_chembl_id"
530 | )
531 | if not response.get("name"):
532 | response["name"] = response["chembl"].get("pref_name")
533 |
534 | def _extract_pubchem_fields(self, response: dict[str, Any]) -> None:
535 | """Extract PubChem fields from response."""
536 | if "pubchem" in response and isinstance(response["pubchem"], dict):
537 | response["pubchem_cid"] = str(response["pubchem"].get("cid", ""))
538 |
539 | def _extract_unii_fields(self, response: dict[str, Any]) -> None:
540 | """Extract UNII fields from response."""
541 | if "unii" in response and isinstance(response["unii"], dict):
542 | unii_data = response["unii"]
543 | # Set UNII code
544 | response["unii"] = unii_data.get("unii", "")
545 | # Use display name as drug name if not already set
546 | if not response.get("name") and unii_data.get("display_name"):
547 | response["name"] = unii_data["display_name"]
548 | # Use NCIT description if no description
549 | if not response.get("description") and unii_data.get(
550 | "ncit_description"
551 | ):
552 | response["description"] = unii_data["ncit_description"]
553 |
554 | async def get_variant_info(
555 | self, variant_id: str, fields: list[str] | None = None
556 | ) -> dict[str, Any] | None:
557 | """Get variant information from MyVariant.info.
558 |
559 | This is a wrapper around the existing MyVariant integration.
560 |
561 | Args:
562 | variant_id: Variant ID (rsID, HGVS)
563 | fields: Optional list of fields to return
564 |
565 | Returns:
566 | Variant data dictionary or None if not found
567 | """
568 | params = {"fields": "all" if fields is None else ",".join(fields)}
569 |
570 | response, error = await http_client.request_api(
571 | url=f"{MYVARIANT_GET_URL}/{variant_id}",
572 | request=params,
573 | method="GET",
574 | domain="myvariant",
575 | )
576 |
577 | if error or not response:
578 | return None
579 |
580 | return response
581 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/test_oncokb_client.py:
--------------------------------------------------------------------------------
```python
1 | """Comprehensive unit tests for OncoKB client."""
2 |
3 | import json
4 | import os
5 | from pathlib import Path
6 | from unittest.mock import patch
7 |
8 | import pytest
9 |
10 | from biomcp.http_client import RequestError
11 | from biomcp.variants.oncokb_client import (
12 | ONCOKB_DEMO_URL,
13 | ONCOKB_PROD_URL,
14 | OncoKBClient,
15 | )
16 |
17 |
18 | # Load mock responses from test data file
19 | def load_mock_responses() -> dict:
20 | """Load mock OncoKB responses from JSON file."""
21 | test_data_dir = Path(__file__).parent.parent.parent / "data"
22 | mock_file = test_data_dir / "oncokb_mock_responses.json"
23 | with open(mock_file) as f:
24 | return json.load(f)
25 |
26 |
27 | @pytest.fixture
28 | def mock_responses():
29 | """Fixture providing mock OncoKB responses."""
30 | return load_mock_responses()
31 |
32 |
33 | class TestOncoKBClient:
34 | """Test suite for OncoKBClient functionality."""
35 |
36 | def test_client_initialization_demo(self):
37 | """Test client initializes with demo URL when no token present."""
38 | with patch.dict(os.environ, {}, clear=True):
39 | client = OncoKBClient()
40 | assert client.base_url == ONCOKB_DEMO_URL
41 | assert client.is_demo is True
42 | assert "Accept" in client.headers
43 | assert "Authorization" not in client.headers
44 |
45 | def test_client_initialization_prod(self):
46 | """Test client switches to production URL when token is set."""
47 | with (
48 | patch.dict(os.environ, {"ONCOKB_TOKEN": "test-token"}, clear=True),
49 | patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", "test-token"),
50 | ):
51 | client = OncoKBClient()
52 | assert client.base_url == ONCOKB_PROD_URL
53 | assert client.is_demo is False
54 | assert "Authorization" in client.headers
55 | assert client.headers["Authorization"] == "Bearer test-token"
56 |
57 | def test_token_detection_with_bearer_prefix(self):
58 | """Test that Bearer prefix is not duplicated if already present."""
59 | with (
60 | patch.dict(
61 | os.environ,
62 | {"ONCOKB_TOKEN": "Bearer existing-token"},
63 | clear=True,
64 | ),
65 | patch(
66 | "biomcp.variants.oncokb_client.ONCOKB_TOKEN",
67 | "Bearer existing-token",
68 | ),
69 | ):
70 | client = OncoKBClient()
71 | assert client.headers["Authorization"] == "Bearer existing-token"
72 | assert not client.headers["Authorization"].startswith(
73 | "Bearer Bearer"
74 | )
75 |
76 | def test_server_selection_demo_mode(self):
77 | """Test demo server selection when no token is configured."""
78 | with patch.dict(os.environ, {}, clear=True):
79 | client = OncoKBClient()
80 | assert client.base_url == ONCOKB_DEMO_URL
81 | assert client.is_demo is True
82 |
83 | def test_server_selection_prod_mode(self):
84 | """Test production server selection when token is configured."""
85 | token = "my-oncokb-token" # noqa: S105 - test token
86 | with (
87 | patch.dict(os.environ, {"ONCOKB_TOKEN": token}, clear=True),
88 | patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", token),
89 | ):
90 | client = OncoKBClient()
91 | assert client.base_url == ONCOKB_PROD_URL
92 | assert client.is_demo is False
93 |
94 | @pytest.mark.asyncio
95 | async def test_get_curated_genes_success(self, mock_responses):
96 | """Test successful retrieval of curated genes list."""
97 | with patch.dict(os.environ, {}, clear=True):
98 | client = OncoKBClient()
99 |
100 | mock_genes = mock_responses["allCuratedGenes"]
101 |
102 | with patch(
103 | "biomcp.variants.oncokb_client.request_api"
104 | ) as mock_request:
105 | mock_request.return_value = (mock_genes, None)
106 |
107 | result, error = await client.get_curated_genes()
108 |
109 | # Verify result
110 | assert error is None
111 | assert result is not None
112 | assert isinstance(result, list)
113 | assert len(result) == 3
114 |
115 | # Check BRAF entry
116 | braf = next(
117 | (g for g in result if g["hugoSymbol"] == "BRAF"), None
118 | )
119 | assert braf is not None
120 | assert braf["entrezGeneId"] == 673
121 | assert braf["geneType"] == "ONCOGENE"
122 | assert "BRAF" in braf["summary"]
123 |
124 | # Check TP53 entry
125 | tp53 = next(
126 | (g for g in result if g["hugoSymbol"] == "TP53"), None
127 | )
128 | assert tp53 is not None
129 | assert tp53["geneType"] == "TSG"
130 | assert tp53["entrezGeneId"] == 7157
131 |
132 | # Verify API was called correctly
133 | mock_request.assert_called_once()
134 | call_kwargs = mock_request.call_args[1]
135 | assert call_kwargs["domain"] == "oncokb"
136 | assert call_kwargs["endpoint_key"] == "oncokb_curated_genes"
137 | assert call_kwargs["cache_ttl"] == 86400 # 24 hours
138 |
139 | @pytest.mark.asyncio
140 | async def test_get_curated_genes_api_error(self):
141 | """Test handling of API errors in get_curated_genes."""
142 | with patch.dict(os.environ, {}, clear=True):
143 | client = OncoKBClient()
144 |
145 | error_response = RequestError(
146 | code=500, message="Internal server error"
147 | )
148 |
149 | with patch(
150 | "biomcp.variants.oncokb_client.request_api"
151 | ) as mock_request:
152 | mock_request.return_value = (None, error_response)
153 |
154 | result, error = await client.get_curated_genes()
155 |
156 | assert result is None
157 | assert error is not None
158 | assert error.code == 500
159 | assert "Internal server error" in error.message
160 |
161 | @pytest.mark.asyncio
162 | async def test_get_curated_genes_unexpected_format(self):
163 | """Test handling of unexpected response format."""
164 | with patch.dict(os.environ, {}, clear=True):
165 | client = OncoKBClient()
166 |
167 | # Return dict instead of list
168 | with patch(
169 | "biomcp.variants.oncokb_client.request_api"
170 | ) as mock_request:
171 | mock_request.return_value = ({"error": "not a list"}, None)
172 |
173 | result, error = await client.get_curated_genes()
174 |
175 | assert result is None
176 | assert error is not None
177 | assert "Unexpected response format" in error.message
178 |
179 | @pytest.mark.asyncio
180 | async def test_get_curated_genes_exception_handling(self):
181 | """Test exception handling in get_curated_genes."""
182 | with patch.dict(os.environ, {}, clear=True):
183 | client = OncoKBClient()
184 |
185 | with patch(
186 | "biomcp.variants.oncokb_client.request_api"
187 | ) as mock_request:
188 | mock_request.side_effect = ValueError("Unexpected error")
189 |
190 | result, error = await client.get_curated_genes()
191 |
192 | assert result is None
193 | assert error is not None
194 | assert "Failed to fetch curated genes" in error.message
195 |
196 | @pytest.mark.asyncio
197 | async def test_get_gene_annotation_success(self, mock_responses):
198 | """Test successful retrieval of BRAF gene annotation."""
199 | with patch.dict(os.environ, {}, clear=True):
200 | client = OncoKBClient()
201 |
202 | mock_annotation = mock_responses["genesByHugoSymbol"][0]
203 |
204 | with patch(
205 | "biomcp.variants.oncokb_client.request_api"
206 | ) as mock_request:
207 | mock_request.return_value = (mock_annotation, None)
208 |
209 | result, error = await client.get_gene_annotation("BRAF")
210 |
211 | # Verify result
212 | assert error is None
213 | assert result is not None
214 | assert result["hugoSymbol"] == "BRAF"
215 | assert result["entrezGeneId"] == 673
216 | assert result["geneType"] == "ONCOGENE"
217 | assert "geneAliases" in result
218 | assert "BRAF1" in result["geneAliases"]
219 |
220 | # Verify API was called correctly
221 | mock_request.assert_called_once()
222 | call_kwargs = mock_request.call_args[1]
223 | assert call_kwargs["domain"] == "oncokb"
224 | assert call_kwargs["endpoint_key"] == "oncokb_gene_annotation"
225 | assert call_kwargs["cache_ttl"] == 3600 # 1 hour
226 |
227 | @pytest.mark.asyncio
228 | async def test_get_gene_annotation_multiple_genes(self, mock_responses):
229 | """Test annotation retrieval for multiple different genes."""
230 | with patch.dict(os.environ, {}, clear=True):
231 | client = OncoKBClient()
232 |
233 | # Test BRAF
234 | braf_annotation = mock_responses["genesByHugoSymbol"][0]
235 | with patch(
236 | "biomcp.variants.oncokb_client.request_api"
237 | ) as mock_request:
238 | mock_request.return_value = (braf_annotation, None)
239 | result, error = await client.get_gene_annotation("BRAF")
240 | assert error is None
241 | assert result["hugoSymbol"] == "BRAF"
242 |
243 | # Test ROS1
244 | ros1_annotation = mock_responses["genesByHugoSymbol"][1]
245 | with patch(
246 | "biomcp.variants.oncokb_client.request_api"
247 | ) as mock_request:
248 | mock_request.return_value = (ros1_annotation, None)
249 | result, error = await client.get_gene_annotation("ROS1")
250 | assert error is None
251 | assert result["hugoSymbol"] == "ROS1"
252 | assert result["geneType"] == "ONCOGENE"
253 |
254 | # Test TP53
255 | tp53_annotation = mock_responses["genesByHugoSymbol"][2]
256 | with patch(
257 | "biomcp.variants.oncokb_client.request_api"
258 | ) as mock_request:
259 | mock_request.return_value = (tp53_annotation, None)
260 | result, error = await client.get_gene_annotation("TP53")
261 | assert error is None
262 | assert result["hugoSymbol"] == "TP53"
263 | assert result["geneType"] == "TSG"
264 |
265 | @pytest.mark.asyncio
266 | async def test_get_gene_annotation_api_error(self):
267 | """Test handling of API errors in get_gene_annotation."""
268 | with patch.dict(os.environ, {}, clear=True):
269 | client = OncoKBClient()
270 |
271 | error_response = RequestError(code=404, message="Gene not found")
272 |
273 | with patch(
274 | "biomcp.variants.oncokb_client.request_api"
275 | ) as mock_request:
276 | mock_request.return_value = (None, error_response)
277 |
278 | result, error = await client.get_gene_annotation("INVALID")
279 |
280 | assert result is None
281 | assert error is not None
282 | assert error.code == 404
283 |
284 | @pytest.mark.asyncio
285 | async def test_get_gene_annotation_unexpected_format(self):
286 | """Test handling of unexpected response format in gene annotation."""
287 | with patch.dict(os.environ, {}, clear=True):
288 | client = OncoKBClient()
289 |
290 | # Return list instead of dict
291 | with patch(
292 | "biomcp.variants.oncokb_client.request_api"
293 | ) as mock_request:
294 | mock_request.return_value = (["not", "a", "dict"], None)
295 |
296 | result, error = await client.get_gene_annotation("BRAF")
297 |
298 | assert result is None
299 | assert error is not None
300 | assert "Unexpected response format" in error.message
301 |
302 | @pytest.mark.asyncio
303 | async def test_get_variant_annotation_success(self, mock_responses):
304 | """Test successful retrieval of BRAF V600E variant annotation."""
305 | with patch.dict(os.environ, {}, clear=True):
306 | client = OncoKBClient()
307 |
308 | mock_annotation = mock_responses["variantAnnotation"][
309 | "BRAF_V600E_melanoma"
310 | ]
311 |
312 | with patch(
313 | "biomcp.variants.oncokb_client.request_api"
314 | ) as mock_request:
315 | mock_request.return_value = (mock_annotation, None)
316 |
317 | result, error = await client.get_variant_annotation(
318 | "BRAF", "V600E"
319 | )
320 |
321 | # Verify result
322 | assert error is None
323 | assert result is not None
324 |
325 | # Check query details
326 | query = result["query"]
327 | assert query["hugoSymbol"] == "BRAF"
328 | assert query["alteration"] == "V600E"
329 | assert query["entrezGeneId"] == 673
330 |
331 | # Check oncogenicity
332 | assert result["oncogenic"] == "Oncogenic"
333 | assert result["mutationEffect"]["knownEffect"] == (
334 | "Gain-of-function"
335 | )
336 |
337 | # Check evidence levels
338 | assert result["highestSensitiveLevel"] == "LEVEL_1"
339 | assert result["highestFdaLevel"] == "LEVEL_Fda2"
340 | assert result["hotspot"] is True
341 |
342 | # Check treatments
343 | treatments = result["treatments"]
344 | assert len(treatments) > 0
345 | dabrafenib_treatment = treatments[0]
346 | assert dabrafenib_treatment["level"] == "LEVEL_1"
347 | assert len(dabrafenib_treatment["drugs"]) > 0
348 | assert dabrafenib_treatment["drugs"][0]["drugName"] == (
349 | "Dabrafenib"
350 | )
351 |
352 | # Verify API was called correctly
353 | mock_request.assert_called_once()
354 | call_kwargs = mock_request.call_args[1]
355 | assert call_kwargs["domain"] == "oncokb"
356 | assert (
357 | call_kwargs["endpoint_key"] == "oncokb_variant_annotation"
358 | )
359 | assert call_kwargs["cache_ttl"] == 3600 # 1 hour
360 |
361 | @pytest.mark.asyncio
362 | async def test_get_variant_annotation_parameters(self):
363 | """Test that variant annotation sends correct parameters."""
364 | with patch.dict(os.environ, {}, clear=True):
365 | client = OncoKBClient()
366 |
367 | with patch(
368 | "biomcp.variants.oncokb_client.request_api"
369 | ) as mock_request:
370 | mock_request.return_value = (
371 | {"query": {}, "oncogenic": "Oncogenic"},
372 | None,
373 | )
374 |
375 | await client.get_variant_annotation("BRAF", "V600E")
376 |
377 | # Verify parameters
378 | call_kwargs = mock_request.call_args[1]
379 | request_params = call_kwargs["request"]
380 | assert request_params["hugoSymbol"] == "BRAF"
381 | assert request_params["alteration"] == "V600E"
382 | assert "_headers" in request_params
383 |
384 | @pytest.mark.asyncio
385 | async def test_get_variant_annotation_api_error(self):
386 | """Test handling of API errors in get_variant_annotation."""
387 | with patch.dict(os.environ, {}, clear=True):
388 | client = OncoKBClient()
389 |
390 | error_response = RequestError(
391 | code=404, message="Variant not found"
392 | )
393 |
394 | with patch(
395 | "biomcp.variants.oncokb_client.request_api"
396 | ) as mock_request:
397 | mock_request.return_value = (None, error_response)
398 |
399 | result, error = await client.get_variant_annotation(
400 | "BRAF", "INVALID"
401 | )
402 |
403 | assert result is None
404 | assert error is not None
405 | assert error.code == 404
406 |
407 | @pytest.mark.asyncio
408 | async def test_get_variant_annotation_exception_handling(self):
409 | """Test exception handling in get_variant_annotation."""
410 | with patch.dict(os.environ, {}, clear=True):
411 | client = OncoKBClient()
412 |
413 | with patch(
414 | "biomcp.variants.oncokb_client.request_api"
415 | ) as mock_request:
416 | mock_request.side_effect = RuntimeError("Network error")
417 |
418 | result, error = await client.get_variant_annotation(
419 | "BRAF", "V600E"
420 | )
421 |
422 | assert result is None
423 | assert error is not None
424 | assert "Failed to fetch variant annotation" in error.message
425 |
426 | def test_headers_json_formatting(self):
427 | """Test that headers are properly formatted as JSON."""
428 | with (
429 | patch.dict(os.environ, {"ONCOKB_TOKEN": "test-token"}, clear=True),
430 | patch("biomcp.variants.oncokb_client.ONCOKB_TOKEN", "test-token"),
431 | ):
432 | client = OncoKBClient()
433 | headers_json = client._headers_json()
434 |
435 | # Should be valid JSON
436 | parsed = json.loads(headers_json)
437 | assert "Accept" in parsed
438 | assert "Authorization" in parsed
439 | assert parsed["Authorization"] == "Bearer test-token"
440 |
441 | @pytest.mark.asyncio
442 | async def test_error_handling_graceful_degradation(self):
443 | """Test that all methods gracefully handle errors and return None."""
444 | with patch.dict(os.environ, {}, clear=True):
445 | client = OncoKBClient()
446 |
447 | # Simulate complete API failure
448 | with patch(
449 | "biomcp.variants.oncokb_client.request_api"
450 | ) as mock_request:
451 | mock_request.return_value = (
452 | None,
453 | RequestError(code=503, message="Service unavailable"),
454 | )
455 |
456 | # All methods should return None without raising exceptions
457 | genes_result, genes_error = await client.get_curated_genes()
458 | assert genes_result is None
459 | assert genes_error is not None
460 |
461 | gene_result, gene_error = await client.get_gene_annotation(
462 | "BRAF"
463 | )
464 | assert gene_result is None
465 | assert gene_error is not None
466 |
467 | (
468 | variant_result,
469 | variant_error,
470 | ) = await client.get_variant_annotation("BRAF", "V600E")
471 | assert variant_result is None
472 | assert variant_error is not None
473 |
474 | @pytest.mark.asyncio
475 | async def test_caching_behavior(self):
476 | """Test that caching parameters are correctly set."""
477 | with patch.dict(os.environ, {}, clear=True):
478 | client = OncoKBClient()
479 |
480 | with patch(
481 | "biomcp.variants.oncokb_client.request_api"
482 | ) as mock_request:
483 | mock_request.return_value = ([], None)
484 |
485 | # Test curated genes - 24 hour cache
486 | await client.get_curated_genes()
487 | assert mock_request.call_args[1]["cache_ttl"] == 86400
488 |
489 | # Test gene annotation - 1 hour cache
490 | mock_request.return_value = ({}, None)
491 | await client.get_gene_annotation("BRAF")
492 | assert mock_request.call_args[1]["cache_ttl"] == 3600
493 |
494 | # Test variant annotation - 1 hour cache
495 | await client.get_variant_annotation("BRAF", "V600E")
496 | assert mock_request.call_args[1]["cache_ttl"] == 3600
497 |
498 | @pytest.mark.asyncio
499 | async def test_retry_enabled_for_all_methods(self):
500 | """Test that retry is enabled for all API methods."""
501 | with patch.dict(os.environ, {}, clear=True):
502 | client = OncoKBClient()
503 |
504 | with patch(
505 | "biomcp.variants.oncokb_client.request_api"
506 | ) as mock_request:
507 | mock_request.return_value = ([], None)
508 |
509 | await client.get_curated_genes()
510 | assert mock_request.call_args[1]["enable_retry"] is True
511 |
512 | mock_request.return_value = ({}, None)
513 | await client.get_gene_annotation("BRAF")
514 | assert mock_request.call_args[1]["enable_retry"] is True
515 |
516 | await client.get_variant_annotation("BRAF", "V600E")
517 | assert mock_request.call_args[1]["enable_retry"] is True
518 |
```
--------------------------------------------------------------------------------
/docs/user-guides/02-mcp-tools-reference.md:
--------------------------------------------------------------------------------
```markdown
1 | # MCP Tools Reference
2 |
3 | BioMCP provides 35 specialized tools for biomedical research through the Model Context Protocol (MCP). This reference covers all available tools, their parameters, and usage patterns.
4 |
5 | ## Related Guides
6 |
7 | - **Conceptual Overview**: [Sequential Thinking with the Think Tool](../concepts/03-sequential-thinking-with-the-think-tool.md)
8 | - **Practical Examples**: See the [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for real-world usage patterns
9 | - **Integration Setup**: [Claude Desktop Integration](../getting-started/02-claude-desktop-integration.md)
10 |
11 | ## Tool Categories
12 |
13 | | Category | Count | Tools |
14 | | ------------------- | ----- | -------------------------------------------------------------- |
15 | | **Core Tools** | 3 | `search`, `fetch`, `think` |
16 | | **Article Tools** | 2 | `article_searcher`, `article_getter` |
17 | | **Trial Tools** | 6 | `trial_searcher`, `trial_getter`, + 4 detail getters |
18 | | **Variant Tools** | 3 | `variant_searcher`, `variant_getter`, `alphagenome_predictor` |
19 | | **BioThings Tools** | 3 | `gene_getter`, `disease_getter`, `drug_getter` |
20 | | **NCI Tools** | 6 | Organization, intervention, biomarker, and disease tools |
21 | | **OpenFDA Tools** | 12 | Adverse events, labels, devices, approvals, recalls, shortages |
22 |
23 | ## Core Unified Tools
24 |
25 | ### 1. search
26 |
27 | **Universal search across all biomedical domains with unified query language.**
28 |
29 | ```python
30 | search(
31 | query: str = None, # Unified query syntax
32 | domain: str = None, # Target domain
33 | genes: list[str] = None, # Gene symbols
34 | diseases: list[str] = None, # Disease/condition terms
35 | variants: list[str] = None, # Variant notations
36 | chemicals: list[str] = None, # Drug/chemical names
37 | keywords: list[str] = None, # Additional keywords
38 | conditions: list[str] = None, # Trial conditions
39 | interventions: list[str] = None,# Trial interventions
40 | lat: float = None, # Latitude for trials
41 | long: float = None, # Longitude for trials
42 | page: int = 1, # Page number
43 | page_size: int = 10, # Results per page
44 | api_key: str = None # For NCI domains
45 | ) -> dict
46 | ```
47 |
48 | **Domains:** `article`, `trial`, `variant`, `gene`, `drug`, `disease`, `nci_organization`, `nci_intervention`, `nci_biomarker`, `nci_disease`, `fda_adverse`, `fda_label`, `fda_device`, `fda_approval`, `fda_recall`, `fda_shortage`
49 |
50 | **Query Language Examples:**
51 |
52 | - `"gene:BRAF AND disease:melanoma"`
53 | - `"drugs.tradename:gleevec"`
54 | - `"gene:TP53 AND (mutation OR variant)"`
55 |
56 | **Usage Examples:**
57 |
58 | ```python
59 | # Domain-specific search
60 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
61 |
62 | # Unified query language
63 | search(query="gene:EGFR AND mutation:T790M")
64 |
65 | # Clinical trials by location
66 | search(domain="trial", conditions=["lung cancer"], lat=40.7128, long=-74.0060)
67 |
68 | # FDA adverse events
69 | search(domain="fda_adverse", chemicals=["aspirin"])
70 |
71 | # FDA drug approvals
72 | search(domain="fda_approval", chemicals=["keytruda"])
73 | ```
74 |
75 | ### 2. fetch
76 |
77 | **Retrieve detailed information for any biomedical record.**
78 |
79 | ```python
80 | fetch(
81 | id: str, # Record identifier
82 | domain: str = None, # Domain (auto-detected if not provided)
83 | detail: str = None, # Specific section for trials
84 | api_key: str = None # For NCI records
85 | ) -> dict
86 | ```
87 |
88 | **Supported IDs:**
89 |
90 | - Articles: PMID (e.g., "38768446"), DOI (e.g., "10.1101/2024.01.20")
91 | - Trials: NCT ID (e.g., "NCT03006926")
92 | - Variants: HGVS, rsID, genomic coordinates
93 | - Genes/Drugs/Diseases: Names or database IDs
94 | - FDA Records: Report IDs, Application Numbers (e.g., "BLA125514"), Recall Numbers, etc.
95 |
96 | **Detail Options for Trials:** `protocol`, `locations`, `outcomes`, `references`, `all`
97 |
98 | **Usage Examples:**
99 |
100 | ```python
101 | # Fetch article by PMID
102 | fetch(id="38768446", domain="article")
103 |
104 | # Fetch trial with specific details
105 | fetch(id="NCT03006926", domain="trial", detail="locations")
106 |
107 | # Auto-detect domain
108 | fetch(id="rs121913529") # Variant
109 | fetch(id="BRAF") # Gene
110 |
111 | # Fetch FDA records
112 | fetch(id="BLA125514", domain="fda_approval") # Drug approval
113 | fetch(id="D-0001-2023", domain="fda_recall") # Drug recall
114 | ```
115 |
116 | ### 3. think
117 |
118 | **Sequential thinking tool for structured problem-solving.**
119 |
120 | ```python
121 | think(
122 | thought: str, # Current reasoning step
123 | thoughtNumber: int, # Sequential number (1, 2, 3...)
124 | totalThoughts: int = None, # Estimated total thoughts
125 | nextThoughtNeeded: bool = True # Continue thinking?
126 | ) -> str
127 | ```
128 |
129 | **CRITICAL:** Always use `think` BEFORE any other BioMCP operation!
130 |
131 | **Usage Pattern:**
132 |
133 | ```python
134 | # Step 1: Problem decomposition
135 | think(
136 | thought="Breaking down query: need to find BRAF inhibitor trials...",
137 | thoughtNumber=1,
138 | nextThoughtNeeded=True
139 | )
140 |
141 | # Step 2: Search strategy
142 | think(
143 | thought="Will search trials for BRAF V600E melanoma, then articles...",
144 | thoughtNumber=2,
145 | nextThoughtNeeded=True
146 | )
147 |
148 | # Final step: Synthesis
149 | think(
150 | thought="Ready to synthesize findings from 5 trials and 12 articles...",
151 | thoughtNumber=3,
152 | nextThoughtNeeded=False # Analysis complete
153 | )
154 | ```
155 |
156 | ## Article Tools
157 |
158 | ### 4. article_searcher
159 |
160 | **Search PubMed/PubTator3 for biomedical literature.**
161 |
162 | ```python
163 | article_searcher(
164 | chemicals: list[str] = None,
165 | diseases: list[str] = None,
166 | genes: list[str] = None,
167 | keywords: list[str] = None, # Supports OR with "|"
168 | variants: list[str] = None,
169 | include_preprints: bool = True,
170 | include_cbioportal: bool = True,
171 | page: int = 1,
172 | page_size: int = 10
173 | ) -> str
174 | ```
175 |
176 | **Features:**
177 |
178 | - Automatic cBioPortal integration for gene searches
179 | - Preprint inclusion from bioRxiv/medRxiv
180 | - OR logic in keywords: `"V600E|p.V600E|c.1799T>A"`
181 |
182 | **Example:**
183 |
184 | ```python
185 | # Search with multiple filters
186 | article_searcher(
187 | genes=["BRAF"],
188 | diseases=["melanoma"],
189 | keywords=["resistance|resistant"],
190 | include_cbioportal=True
191 | )
192 | ```
193 |
194 | ### 5. article_getter
195 |
196 | **Fetch detailed article information.**
197 |
198 | ```python
199 | article_getter(
200 | pmid: str # PubMed ID, PMC ID, or DOI
201 | ) -> str
202 | ```
203 |
204 | **Supports:**
205 |
206 | - PubMed IDs: "38768446"
207 | - PMC IDs: "PMC7498215"
208 | - DOIs: "10.1101/2024.01.20.23288905"
209 |
210 | ## Trial Tools
211 |
212 | ### 6. trial_searcher
213 |
214 | **Search ClinicalTrials.gov with comprehensive filters.**
215 |
216 | ```python
217 | trial_searcher(
218 | conditions: list[str] = None,
219 | interventions: list[str] = None,
220 | other_terms: list[str] = None,
221 | recruiting_status: str = "ANY", # "OPEN", "CLOSED", "ANY"
222 | phase: str = None, # "PHASE1", "PHASE2", etc.
223 | lat: float = None, # Location-based search
224 | long: float = None,
225 | distance: int = None, # Miles from coordinates
226 | age_group: str = None, # "CHILD", "ADULT", "OLDER_ADULT"
227 | sex: str = None, # "MALE", "FEMALE", "ALL"
228 | study_type: str = None, # "INTERVENTIONAL", "OBSERVATIONAL"
229 | funder_type: str = None, # "NIH", "INDUSTRY", etc.
230 | page: int = 1,
231 | page_size: int = 10
232 | ) -> str
233 | ```
234 |
235 | **Location Search Example:**
236 |
237 | ```python
238 | # Trials near Boston
239 | trial_searcher(
240 | conditions=["breast cancer"],
241 | lat=42.3601,
242 | long=-71.0589,
243 | distance=50,
244 | recruiting_status="OPEN"
245 | )
246 | ```
247 |
248 | ### 7-11. Trial Detail Getters
249 |
250 | ```python
251 | # Get complete trial information
252 | trial_getter(nct_id: str) -> str
253 |
254 | # Get specific sections
255 | trial_protocol_getter(nct_id: str) -> str # Core protocol info
256 | trial_locations_getter(nct_id: str) -> str # Sites and contacts
257 | trial_outcomes_getter(nct_id: str) -> str # Outcome measures
258 | trial_references_getter(nct_id: str) -> str # Publications
259 | ```
260 |
261 | ## Variant Tools
262 |
263 | ### 12. variant_searcher
264 |
265 | **Search MyVariant.info for genetic variants.**
266 |
267 | ```python
268 | variant_searcher(
269 | gene: str = None,
270 | hgvs: str = None,
271 | hgvsp: str = None, # Protein HGVS
272 | hgvsc: str = None, # Coding DNA HGVS
273 | rsid: str = None,
274 | region: str = None, # "chr7:140753336-140753337"
275 | significance: str = None, # Clinical significance
276 | frequency_min: float = None,
277 | frequency_max: float = None,
278 | cadd_score_min: float = None,
279 | sift_prediction: str = None,
280 | polyphen_prediction: str = None,
281 | sources: list[str] = None,
282 | include_cbioportal: bool = True,
283 | page: int = 1,
284 | page_size: int = 10
285 | ) -> str
286 | ```
287 |
288 | **Significance Options:** `pathogenic`, `likely_pathogenic`, `uncertain_significance`, `likely_benign`, `benign`
289 |
290 | **Example:**
291 |
292 | ```python
293 | # Find rare pathogenic BRCA1 variants
294 | variant_searcher(
295 | gene="BRCA1",
296 | significance="pathogenic",
297 | frequency_max=0.001,
298 | cadd_score_min=20
299 | )
300 | ```
301 |
302 | ### 13. variant_getter
303 |
304 | **Fetch comprehensive variant details.**
305 |
306 | ```python
307 | variant_getter(
308 | variant_id: str, # HGVS, rsID, or MyVariant ID
309 | include_external: bool = True # Include TCGA, 1000 Genomes
310 | ) -> str
311 | ```
312 |
313 | ### 14. alphagenome_predictor
314 |
315 | **Predict variant effects using Google DeepMind's AlphaGenome.**
316 |
317 | ```python
318 | alphagenome_predictor(
319 | chromosome: str, # e.g., "chr7"
320 | position: int, # 1-based position
321 | reference: str, # Reference allele
322 | alternate: str, # Alternate allele
323 | interval_size: int = 131072, # Analysis window
324 | tissue_types: list[str] = None, # UBERON terms
325 | significance_threshold: float = 0.5,
326 | api_key: str = None # AlphaGenome API key
327 | ) -> str
328 | ```
329 |
330 | **Requires:** AlphaGenome API key (environment variable or per-request)
331 |
332 | **Tissue Examples:**
333 |
334 | - `UBERON:0002367` - prostate gland
335 | - `UBERON:0001155` - colon
336 | - `UBERON:0002048` - lung
337 |
338 | **Example:**
339 |
340 | ```python
341 | # Predict BRAF V600E effects
342 | alphagenome_predictor(
343 | chromosome="chr7",
344 | position=140753336,
345 | reference="A",
346 | alternate="T",
347 | tissue_types=["UBERON:0002367"], # prostate
348 | api_key="your-key"
349 | )
350 | ```
351 |
352 | ## BioThings Tools
353 |
354 | ### 15. gene_getter
355 |
356 | **Get gene information from MyGene.info.**
357 |
358 | ```python
359 | gene_getter(
360 | gene_id_or_symbol: str # Gene symbol or Entrez ID
361 | ) -> str
362 | ```
363 |
364 | **Returns:** Official name, aliases, summary, genomic location, database links
365 |
366 | ### 16. disease_getter
367 |
368 | **Get disease information from MyDisease.info.**
369 |
370 | ```python
371 | disease_getter(
372 | disease_id_or_name: str # Disease name or ontology ID
373 | ) -> str
374 | ```
375 |
376 | **Returns:** Definition, synonyms, MONDO/DOID IDs, associated phenotypes
377 |
378 | ### 17. drug_getter
379 |
380 | **Get drug/chemical information from MyChem.info.**
381 |
382 | ```python
383 | drug_getter(
384 | drug_id_or_name: str # Drug name or database ID
385 | ) -> str
386 | ```
387 |
388 | **Returns:** Chemical structure, mechanism, indications, trade names, identifiers
389 |
390 | ## NCI-Specific Tools
391 |
392 | All NCI tools require an API key from [clinicaltrialsapi.cancer.gov](https://clinicaltrialsapi.cancer.gov).
393 |
394 | ### 18-19. Organization Tools
395 |
396 | ```python
397 | # Search organizations
398 | nci_organization_searcher(
399 | name: str = None,
400 | organization_type: str = None,
401 | city: str = None, # Must use with state
402 | state: str = None, # Must use with city
403 | api_key: str = None
404 | ) -> str
405 |
406 | # Get organization details
407 | nci_organization_getter(
408 | organization_id: str,
409 | api_key: str = None
410 | ) -> str
411 | ```
412 |
413 | ### 20-21. Intervention Tools
414 |
415 | ```python
416 | # Search interventions
417 | nci_intervention_searcher(
418 | name: str = None,
419 | intervention_type: str = None, # "Drug", "Device", etc.
420 | synonyms: bool = True,
421 | api_key: str = None
422 | ) -> str
423 |
424 | # Get intervention details
425 | nci_intervention_getter(
426 | intervention_id: str,
427 | api_key: str = None
428 | ) -> str
429 | ```
430 |
431 | ### 22. Biomarker Search
432 |
433 | ```python
434 | nci_biomarker_searcher(
435 | name: str = None,
436 | biomarker_type: str = None,
437 | api_key: str = None
438 | ) -> str
439 | ```
440 |
441 | ### 23. Disease Search (NCI)
442 |
443 | ```python
444 | nci_disease_searcher(
445 | name: str = None,
446 | include_synonyms: bool = True,
447 | category: str = None,
448 | api_key: str = None
449 | ) -> str
450 | ```
451 |
452 | ## OpenFDA Tools
453 |
454 | All OpenFDA tools support optional API keys for higher rate limits (240/min vs 40/min). Get a free key at [open.fda.gov/apis/authentication](https://open.fda.gov/apis/authentication/).
455 |
456 | ### 24. openfda_adverse_searcher
457 |
458 | **Search FDA Adverse Event Reporting System (FAERS).**
459 |
460 | ```python
461 | openfda_adverse_searcher(
462 | drug: str = None,
463 | reaction: str = None,
464 | serious: bool = None, # Filter serious events only
465 | limit: int = 25,
466 | skip: int = 0,
467 | api_key: str = None # Optional OpenFDA API key
468 | ) -> str
469 | ```
470 |
471 | **Example:**
472 |
473 | ```python
474 | # Find serious bleeding events for warfarin
475 | openfda_adverse_searcher(
476 | drug="warfarin",
477 | reaction="bleeding",
478 | serious=True,
479 | api_key="your-key" # Optional
480 | )
481 | ```
482 |
483 | ### 25. openfda_adverse_getter
484 |
485 | **Get detailed adverse event report.**
486 |
487 | ```python
488 | openfda_adverse_getter(
489 | report_id: str, # Safety report ID
490 | api_key: str = None
491 | ) -> str
492 | ```
493 |
494 | ### 26. openfda_label_searcher
495 |
496 | **Search FDA drug product labels.**
497 |
498 | ```python
499 | openfda_label_searcher(
500 | name: str = None,
501 | indication: str = None, # Search by indication
502 | boxed_warning: bool = False, # Filter for boxed warnings
503 | section: str = None, # Specific label section
504 | limit: int = 25,
505 | skip: int = 0,
506 | api_key: str = None
507 | ) -> str
508 | ```
509 |
510 | ### 27. openfda_label_getter
511 |
512 | **Get complete drug label information.**
513 |
514 | ```python
515 | openfda_label_getter(
516 | set_id: str, # Label set ID
517 | sections: list[str] = None, # Specific sections to retrieve
518 | api_key: str = None
519 | ) -> str
520 | ```
521 |
522 | **Label Sections:** `indications_and_usage`, `contraindications`, `warnings_and_precautions`, `dosage_and_administration`, `adverse_reactions`, `drug_interactions`, `pregnancy`, `pediatric_use`, `geriatric_use`
523 |
524 | ### 28. openfda_device_searcher
525 |
526 | **Search FDA device adverse event reports (MAUDE).**
527 |
528 | ```python
529 | openfda_device_searcher(
530 | device: str = None,
531 | manufacturer: str = None,
532 | problem: str = None,
533 | product_code: str = None, # FDA product code
534 | genomics_only: bool = True, # Filter genomic/diagnostic devices
535 | limit: int = 25,
536 | skip: int = 0,
537 | api_key: str = None
538 | ) -> str
539 | ```
540 |
541 | **Note:** FDA uses abbreviated device names (e.g., "F1CDX" for "FoundationOne CDx").
542 |
543 | ### 29. openfda_device_getter
544 |
545 | **Get detailed device event report.**
546 |
547 | ```python
548 | openfda_device_getter(
549 | mdr_report_key: str, # MDR report key
550 | api_key: str = None
551 | ) -> str
552 | ```
553 |
554 | ### 30. openfda_approval_searcher
555 |
556 | **Search FDA drug approval records (Drugs@FDA).**
557 |
558 | ```python
559 | openfda_approval_searcher(
560 | drug: str = None,
561 | application_number: str = None, # NDA/BLA number
562 | approval_year: str = None, # YYYY format
563 | limit: int = 25,
564 | skip: int = 0,
565 | api_key: str = None
566 | ) -> str
567 | ```
568 |
569 | ### 31. openfda_approval_getter
570 |
571 | **Get drug approval details.**
572 |
573 | ```python
574 | openfda_approval_getter(
575 | application_number: str, # NDA/BLA number
576 | api_key: str = None
577 | ) -> str
578 | ```
579 |
580 | ### 32. openfda_recall_searcher
581 |
582 | **Search FDA drug recall records.**
583 |
584 | ```python
585 | openfda_recall_searcher(
586 | drug: str = None,
587 | recall_class: str = None, # "1", "2", or "3"
588 | status: str = None, # "ongoing" or "completed"
589 | reason: str = None,
590 | since_date: str = None, # YYYYMMDD format
591 | limit: int = 25,
592 | skip: int = 0,
593 | api_key: str = None
594 | ) -> str
595 | ```
596 |
597 | **Recall Classes:**
598 |
599 | - Class 1: Dangerous or defective products that could cause serious health problems or death
600 | - Class 2: Products that might cause temporary health problems or pose slight threat
601 | - Class 3: Products unlikely to cause adverse health consequences
602 |
603 | ### 33. openfda_recall_getter
604 |
605 | **Get drug recall details.**
606 |
607 | ```python
608 | openfda_recall_getter(
609 | recall_number: str, # FDA recall number
610 | api_key: str = None
611 | ) -> str
612 | ```
613 |
614 | ### 34. openfda_shortage_searcher
615 |
616 | **Search FDA drug shortage database.**
617 |
618 | ```python
619 | openfda_shortage_searcher(
620 | drug: str = None,
621 | status: str = None, # "current" or "resolved"
622 | therapeutic_category: str = None,
623 | limit: int = 25,
624 | skip: int = 0,
625 | api_key: str = None
626 | ) -> str
627 | ```
628 |
629 | ### 35. openfda_shortage_getter
630 |
631 | **Get drug shortage details.**
632 |
633 | ```python
634 | openfda_shortage_getter(
635 | drug_name: str,
636 | api_key: str = None
637 | ) -> str
638 | ```
639 |
640 | ## Best Practices
641 |
642 | ### 1. Always Think First
643 |
644 | ```python
645 | # ✅ CORRECT - Think before searching
646 | think(thought="Planning BRAF melanoma research...", thoughtNumber=1)
647 | results = article_searcher(genes=["BRAF"], diseases=["melanoma"])
648 |
649 | # ❌ INCORRECT - Skipping think tool
650 | results = article_searcher(genes=["BRAF"]) # Poor results!
651 | ```
652 |
653 | ### 2. Use Unified Tools for Flexibility
654 |
655 | ```python
656 | # Unified search supports complex queries
657 | results = search(query="gene:EGFR AND (mutation:T790M OR mutation:C797S)")
658 |
659 | # Unified fetch auto-detects domain
660 | details = fetch(id="NCT03006926") # Knows it's a trial
661 | ```
662 |
663 | ### 3. Leverage Domain-Specific Features
664 |
665 | ```python
666 | # Article search with cBioPortal
667 | articles = article_searcher(
668 | genes=["KRAS"],
669 | include_cbioportal=True # Adds cancer genomics context
670 | )
671 |
672 | # Variant search with multiple filters
673 | variants = variant_searcher(
674 | gene="TP53",
675 | significance="pathogenic",
676 | frequency_max=0.01,
677 | cadd_score_min=25
678 | )
679 | ```
680 |
681 | ### 4. Handle API Keys Properly
682 |
683 | ```python
684 | # For personal use - environment variable
685 | # export NCI_API_KEY="your-key"
686 | nci_results = search(domain="nci_organization", name="Mayo Clinic")
687 |
688 | # For shared environments - per-request
689 | nci_results = search(
690 | domain="nci_organization",
691 | name="Mayo Clinic",
692 | api_key="user-provided-key"
693 | )
694 | ```
695 |
696 | ### 5. Use Appropriate Page Sizes
697 |
698 | ```python
699 | # Large result sets - increase page_size
700 | results = article_searcher(
701 | genes=["TP53"],
702 | page_size=50 # Get more results at once
703 | )
704 |
705 | # Iterative exploration - use pagination
706 | page1 = trial_searcher(conditions=["cancer"], page=1, page_size=10)
707 | page2 = trial_searcher(conditions=["cancer"], page=2, page_size=10)
708 | ```
709 |
710 | ## Error Handling
711 |
712 | All tools include comprehensive error handling:
713 |
714 | - **Invalid parameters**: Clear error messages with valid options
715 | - **API failures**: Graceful degradation with informative messages
716 | - **Rate limits**: Automatic retry with exponential backoff
717 | - **Missing API keys**: Helpful instructions for obtaining keys
718 |
719 | ## Tool Selection Guide
720 |
721 | | If you need to... | Use this tool |
722 | | ------------------------------ | ------------------------------------------------- |
723 | | Search across multiple domains | `search` with query language |
724 | | Get any record by ID | `fetch` with auto-detection |
725 | | Plan your research approach | `think` (always first!) |
726 | | Find recent papers | `article_searcher` |
727 | | Locate clinical trials | `trial_searcher` |
728 | | Analyze genetic variants | `variant_searcher` + `variant_getter` |
729 | | Predict variant effects | `alphagenome_predictor` |
730 | | Get gene/drug/disease info | `gene_getter`, `drug_getter`, `disease_getter` |
731 | | Access NCI databases | `nci_*` tools with API key |
732 | | Check drug adverse events | `openfda_adverse_searcher` |
733 | | Review FDA drug labels | `openfda_label_searcher` + `openfda_label_getter` |
734 | | Investigate device issues | `openfda_device_searcher` |
735 | | Find drug approvals | `openfda_approval_searcher` |
736 | | Check drug recalls | `openfda_recall_searcher` |
737 | | Monitor drug shortages | `openfda_shortage_searcher` |
738 |
739 | ## Next Steps
740 |
741 | - Review [Sequential Thinking](../concepts/03-sequential-thinking-with-the-think-tool.md) methodology
742 | - Explore [How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md) for complex workflows
743 | - Set up [API Keys](../getting-started/03-authentication-and-api-keys.md) for enhanced features
744 |
```
--------------------------------------------------------------------------------
/tests/integration/test_oncokb_integration.py:
--------------------------------------------------------------------------------
```python
1 | """Integration tests for OncoKB API.
2 |
3 | These tests make real API calls to verify OncoKB integration works correctly.
4 | They use the demo server by default (demo.oncokb.org) which has limited data.
5 | Tests are marked with pytest.mark.integration and gracefully skip if API is
6 | unavailable.
7 |
8 | Demo Server Limitations:
9 | - Only has data for BRAF, ROS1, and TP53
10 | - No authentication required
11 | - Limited to basic annotations
12 |
13 | Production Server:
14 | - Requires ONCOKB_TOKEN environment variable
15 | - Full cancer gene database
16 | - Complete therapeutic/diagnostic annotations
17 | """
18 |
19 | import os
20 |
21 | import pytest
22 |
23 | from biomcp.variants.oncokb_client import OncoKBClient
24 |
25 |
26 | @pytest.mark.integration
27 | class TestOncoKBDemoServer:
28 | """Integration tests for OncoKB demo server (no auth required)."""
29 |
30 | @pytest.mark.asyncio
31 | async def test_demo_server_access(self):
32 | """Test basic access to demo server with curated genes list."""
33 | # Temporarily remove token to force demo server
34 | original_token = os.environ.get("ONCOKB_TOKEN")
35 | if original_token:
36 | del os.environ["ONCOKB_TOKEN"]
37 |
38 | try:
39 | client = OncoKBClient()
40 |
41 | # Verify demo server is being used
42 | assert client.is_demo is True
43 | assert "demo.oncokb.org" in client.base_url
44 |
45 | # Fetch curated genes list (this works on demo)
46 | result, error = await client.get_curated_genes()
47 |
48 | # Skip if demo server is unavailable
49 | if error and error.code in [500, 503, 504]:
50 | pytest.skip(f"OncoKB demo server unavailable: {error.message}")
51 |
52 | # Should succeed with curated genes
53 | assert error is None, f"Expected success but got error: {error}"
54 | assert result is not None
55 | assert isinstance(result, list)
56 | assert len(result) > 0
57 |
58 | # Find BRAF in the results
59 | braf = next(
60 | (g for g in result if g.get("hugoSymbol") == "BRAF"), None
61 | )
62 | assert braf is not None, "BRAF should be in demo curated genes"
63 |
64 | print("✓ Demo server access successful")
65 | print(f" Total curated genes: {len(result)}")
66 | print(f" BRAF gene: {braf.get('hugoSymbol')}")
67 | print(f" BRAF Entrez ID: {braf.get('entrezGeneId')}")
68 | print(f" BRAF gene type: {braf.get('geneType')}")
69 |
70 | finally:
71 | # Restore token if it was set
72 | if original_token:
73 | os.environ["ONCOKB_TOKEN"] = original_token
74 |
75 | @pytest.mark.asyncio
76 | async def test_demo_gene_limits(self):
77 | """Test that demo server only has BRAF, ROS1, and TP53."""
78 | # Temporarily remove token to force demo server
79 | original_token = os.environ.get("ONCOKB_TOKEN")
80 | if original_token:
81 | del os.environ["ONCOKB_TOKEN"]
82 |
83 | try:
84 | client = OncoKBClient()
85 | assert client.is_demo is True
86 |
87 | # Get all curated genes from demo
88 | result, error = await client.get_curated_genes()
89 |
90 | # Skip if server unavailable
91 | if error and error.code in [500, 503, 504]:
92 | pytest.skip(f"OncoKB demo server unavailable: {error.message}")
93 |
94 | assert error is None, f"Expected success but got error: {error}"
95 | assert result is not None
96 | assert isinstance(result, list)
97 |
98 | # Extract gene symbols
99 | gene_symbols = {g.get("hugoSymbol") for g in result}
100 |
101 | # Demo should have exactly BRAF, ROS1, and TP53
102 | expected_demo_genes = {"BRAF", "ROS1", "TP53"}
103 | assert gene_symbols == expected_demo_genes, (
104 | f"Expected demo genes {expected_demo_genes}, "
105 | f"got {gene_symbols}"
106 | )
107 |
108 | print(
109 | f"✓ Demo server has exactly the expected genes: {gene_symbols}"
110 | )
111 |
112 | # Verify KRAS is NOT in demo
113 | assert "KRAS" not in gene_symbols, "KRAS should not be in demo"
114 | print("✓ Demo correctly excludes non-demo genes like KRAS")
115 |
116 | finally:
117 | # Restore token if it was set
118 | if original_token:
119 | os.environ["ONCOKB_TOKEN"] = original_token
120 |
121 | @pytest.mark.asyncio
122 | async def test_variant_annotation(self):
123 | """Test variant annotation with BRAF V600E on demo server."""
124 | # Temporarily remove token to force demo server
125 | original_token = os.environ.get("ONCOKB_TOKEN")
126 | if original_token:
127 | del os.environ["ONCOKB_TOKEN"]
128 |
129 | try:
130 | client = OncoKBClient()
131 | assert client.is_demo is True
132 |
133 | # Request BRAF V600E annotation
134 | result, error = await client.get_variant_annotation(
135 | gene="BRAF", protein_change="V600E"
136 | )
137 |
138 | # Skip if server unavailable
139 | if error and error.code in [500, 503, 504]:
140 | pytest.skip(f"OncoKB demo server unavailable: {error.message}")
141 |
142 | # Should get annotation for this well-known variant
143 | if error:
144 | # Some demo servers may not support variant endpoints
145 | print(
146 | f"Note: Variant endpoint returned error: {error.message}"
147 | )
148 | pytest.skip("Demo variant endpoint not available")
149 |
150 | assert result is not None
151 | assert isinstance(result, dict)
152 |
153 | # Check basic annotation fields
154 | query = result.get("query", {})
155 | assert query.get("hugoSymbol") == "BRAF"
156 | assert query.get("alteration") == "V600E"
157 |
158 | # Check if variant is marked as oncogenic
159 | oncogenic = result.get("oncogenic")
160 | if oncogenic:
161 | print(f"✓ BRAF V600E oncogenicity: {oncogenic}")
162 | # V600E is a well-known oncogenic mutation
163 | assert "Oncogenic" in oncogenic or "Likely" in oncogenic
164 |
165 | # Check mutation effect
166 | mutation_effect = result.get("mutationEffect")
167 | if mutation_effect:
168 | known_effect = mutation_effect.get("knownEffect")
169 | print(f"✓ BRAF V600E effect: {known_effect}")
170 |
171 | # Check if it's a hotspot
172 | hotspot = result.get("hotspot")
173 | if hotspot is not None:
174 | print(f"✓ BRAF V600E hotspot: {hotspot}")
175 | # V600E is a known hotspot
176 | assert hotspot is True
177 |
178 | print("✓ Variant annotation successful for BRAF V600E")
179 |
180 | finally:
181 | # Restore token if it was set
182 | if original_token:
183 | os.environ["ONCOKB_TOKEN"] = original_token
184 |
185 | @pytest.mark.asyncio
186 | async def test_curated_genes_demo(self):
187 | """Test fetching curated genes list from demo server."""
188 | # Temporarily remove token to force demo server
189 | original_token = os.environ.get("ONCOKB_TOKEN")
190 | if original_token:
191 | del os.environ["ONCOKB_TOKEN"]
192 |
193 | try:
194 | client = OncoKBClient()
195 | assert client.is_demo is True
196 |
197 | # Fetch curated genes
198 | result, error = await client.get_curated_genes()
199 |
200 | # Skip if server unavailable
201 | if error and error.code in [500, 503, 504]:
202 | pytest.skip(f"OncoKB demo server unavailable: {error.message}")
203 |
204 | # Should get a list of genes
205 | assert error is None, f"Expected success but got error: {error}"
206 | assert result is not None
207 | assert isinstance(result, list)
208 |
209 | # Demo should have at least BRAF, ROS1, TP53
210 | if len(result) > 0:
211 | print(f"✓ Demo server has {len(result)} curated genes")
212 |
213 | # Check structure of first gene
214 | first_gene = result[0]
215 | assert "hugoSymbol" in first_gene
216 | assert "entrezGeneId" in first_gene
217 |
218 | # Verify demo genes are present
219 | gene_symbols = {g.get("hugoSymbol") for g in result}
220 | demo_expected = {"BRAF", "ROS1", "TP53"}
221 |
222 | # At least some demo genes should be present
223 | found = gene_symbols & demo_expected
224 | if found:
225 | print(f"✓ Found expected demo genes: {found}")
226 |
227 | # Print first few genes
228 | for gene in result[:5]:
229 | symbol = gene.get("hugoSymbol")
230 | oncogene = gene.get("oncogene")
231 | tsg = gene.get("tsg")
232 | print(f" - {symbol}: oncogene={oncogene}, tsg={tsg}")
233 | else:
234 | pytest.skip("Demo server returned empty gene list")
235 |
236 | finally:
237 | # Restore token if it was set
238 | if original_token:
239 | os.environ["ONCOKB_TOKEN"] = original_token
240 |
241 |
242 | @pytest.mark.integration
243 | class TestOncoKBProductionServer:
244 | """Integration tests for OncoKB production server (requires auth)."""
245 |
246 | @pytest.mark.asyncio
247 | async def test_production_requires_token(self):
248 | """Test that demo/production server selection works correctly."""
249 | # Get original token state
250 | original_token = os.environ.get("ONCOKB_TOKEN")
251 |
252 | try:
253 | # Test 1: Without token, should use demo
254 | if original_token:
255 | del os.environ["ONCOKB_TOKEN"]
256 |
257 | # Need to reload module to pick up env var change
258 | import importlib
259 |
260 | from biomcp.variants import oncokb_client
261 |
262 | importlib.reload(oncokb_client)
263 |
264 | client_no_token = oncokb_client.OncoKBClient()
265 | assert client_no_token.is_demo is True
266 | assert "demo.oncokb.org" in client_no_token.base_url
267 | print("✓ Without token, client correctly uses demo server")
268 |
269 | # Test 2: With token (invalid), should try production
270 | os.environ["ONCOKB_TOKEN"] = "invalid_token_for_testing" # noqa: S105
271 | importlib.reload(oncokb_client)
272 |
273 | client_with_token = oncokb_client.OncoKBClient()
274 | assert client_with_token.is_demo is False
275 | assert "www.oncokb.org" in client_with_token.base_url
276 | print("✓ With token set, client correctly uses production server")
277 |
278 | # Try to fetch with invalid token - should get auth error
279 | result, error = await client_with_token.get_curated_genes()
280 |
281 | if error:
282 | # Expected: auth error with invalid token
283 | assert error.code in [
284 | 400,
285 | 401,
286 | 403,
287 | ], f"Expected auth error, got: {error.code}"
288 | print(
289 | f"✓ Production correctly rejects invalid token (HTTP {error.code})"
290 | )
291 | else:
292 | # Unexpected but not a failure - maybe public endpoint
293 | print(
294 | "Note: Production endpoint accessible with invalid token"
295 | )
296 |
297 | finally:
298 | # Restore original state
299 | if original_token:
300 | os.environ["ONCOKB_TOKEN"] = original_token
301 | elif "ONCOKB_TOKEN" in os.environ:
302 | del os.environ["ONCOKB_TOKEN"]
303 |
304 | # Reload one more time to restore original state
305 | import importlib
306 |
307 | from biomcp.variants import oncokb_client
308 |
309 | importlib.reload(oncokb_client)
310 |
311 | @pytest.mark.asyncio
312 | async def test_production_with_token(self):
313 | """Test production server with valid token (if available)."""
314 | # Only run if token is set
315 | if not os.environ.get("ONCOKB_TOKEN"):
316 | pytest.skip("ONCOKB_TOKEN not set - skipping production test")
317 |
318 | client = OncoKBClient()
319 |
320 | # Should be using production server
321 | assert client.is_demo is False
322 | assert "www.oncokb.org" in client.base_url
323 | print("✓ Using production server with token")
324 |
325 | # Try to fetch curated genes (works on production with token)
326 | result, error = await client.get_curated_genes()
327 |
328 | # Skip if server unavailable
329 | if error and error.code in [500, 503, 504]:
330 | pytest.skip(
331 | f"OncoKB production server unavailable: {error.message}"
332 | )
333 |
334 | # Should succeed with valid token
335 | if error:
336 | if error.code in [401, 403]:
337 | pytest.skip(f"Token authentication failed: {error.message}")
338 | else:
339 | pytest.fail(f"Unexpected error: {error}")
340 |
341 | assert result is not None
342 | assert isinstance(result, list)
343 | assert len(result) > 0
344 |
345 | # Find a common cancer gene like EGFR
346 | egfr = next((g for g in result if g.get("hugoSymbol") == "EGFR"), None)
347 |
348 | print("✓ Production server access successful with token")
349 | print(f" Total genes: {len(result)}")
350 | if egfr:
351 | print(f" Sample gene: {egfr.get('hugoSymbol')}")
352 | print(f" Entrez ID: {egfr.get('entrezGeneId')}")
353 |
354 | @pytest.mark.asyncio
355 | async def test_production_curated_genes(self):
356 | """Test production server has full gene database."""
357 | # Only run if token is set
358 | if not os.environ.get("ONCOKB_TOKEN"):
359 | pytest.skip("ONCOKB_TOKEN not set - skipping production test")
360 |
361 | client = OncoKBClient()
362 | assert client.is_demo is False
363 |
364 | # Fetch all curated genes
365 | result, error = await client.get_curated_genes()
366 |
367 | # Skip if server unavailable or auth fails
368 | if error:
369 | if error.code in [401, 403]:
370 | pytest.skip(f"Token authentication failed: {error.message}")
371 | elif error.code in [500, 503, 504]:
372 | pytest.skip(f"OncoKB production unavailable: {error.message}")
373 | else:
374 | pytest.fail(f"Unexpected error: {error}")
375 |
376 | assert result is not None
377 | assert isinstance(result, list)
378 |
379 | # Production should have many genes (>700)
380 | assert (
381 | len(result) > 100
382 | ), f"Expected >100 genes in production, got {len(result)}"
383 |
384 | print(f"✓ Production server has {len(result)} curated genes")
385 |
386 | # Check for well-known cancer genes
387 | gene_symbols = {g.get("hugoSymbol") for g in result}
388 | expected_genes = {"BRAF", "TP53", "EGFR", "KRAS", "PIK3CA"}
389 |
390 | found = gene_symbols & expected_genes
391 | assert len(found) == len(
392 | expected_genes
393 | ), f"Expected all cancer genes, found: {found}"
394 |
395 | print(f"✓ Found expected cancer genes: {found}")
396 |
397 |
398 | @pytest.mark.integration
399 | class TestOncoKBErrorHandling:
400 | """Integration tests for error handling and edge cases."""
401 |
402 | @pytest.mark.asyncio
403 | async def test_invalid_gene_symbol(self):
404 | """Test handling of genes not in curated list."""
405 | # Use demo server for this test
406 | original_token = os.environ.get("ONCOKB_TOKEN")
407 | if original_token:
408 | del os.environ["ONCOKB_TOKEN"]
409 |
410 | try:
411 | client = OncoKBClient()
412 |
413 | # Get curated genes list
414 | result, error = await client.get_curated_genes()
415 |
416 | # Skip if server unavailable
417 | if error and error.code in [500, 503, 504]:
418 | pytest.skip(f"OncoKB server unavailable: {error.message}")
419 |
420 | assert error is None, f"Expected success but got error: {error}"
421 | assert result is not None
422 |
423 | # Verify an invalid gene like "NOTAREALGENE" is not in the list
424 | gene_symbols = {g.get("hugoSymbol") for g in result}
425 | assert "NOTAREALGENE" not in gene_symbols
426 | print("✓ Invalid gene correctly not in curated genes list")
427 |
428 | finally:
429 | if original_token:
430 | os.environ["ONCOKB_TOKEN"] = original_token
431 |
432 | @pytest.mark.asyncio
433 | async def test_empty_query_handling(self):
434 | """Test handling of empty/missing parameters."""
435 | original_token = os.environ.get("ONCOKB_TOKEN")
436 | if original_token:
437 | del os.environ["ONCOKB_TOKEN"]
438 |
439 | try:
440 | client = OncoKBClient()
441 |
442 | # Fetching curated genes requires no parameters
443 | # This should always work
444 | result, error = await client.get_curated_genes()
445 |
446 | # Skip if server unavailable
447 | if error and error.code in [500, 503, 504]:
448 | pytest.skip(f"OncoKB server unavailable: {error.message}")
449 |
450 | # Should succeed
451 | assert error is None, f"Expected success but got error: {error}"
452 | assert result is not None
453 | assert isinstance(result, list)
454 | print(
455 | f"✓ Curated genes query works without parameters ({len(result)} genes)"
456 | )
457 |
458 | finally:
459 | if original_token:
460 | os.environ["ONCOKB_TOKEN"] = original_token
461 |
462 | @pytest.mark.asyncio
463 | async def test_invalid_variant_format(self):
464 | """Test handling of invalid variant formats."""
465 | original_token = os.environ.get("ONCOKB_TOKEN")
466 | if original_token:
467 | del os.environ["ONCOKB_TOKEN"]
468 |
469 | try:
470 | client = OncoKBClient()
471 |
472 | # Try with invalid protein change format
473 | result, error = await client.get_variant_annotation(
474 | gene="BRAF", protein_change="invalid_format_123"
475 | )
476 |
477 | # Skip if server unavailable
478 | if error and error.code in [500, 503, 504]:
479 | pytest.skip(f"OncoKB server unavailable: {error.message}")
480 |
481 | # Should handle gracefully (may return error or empty result)
482 | if error:
483 | print(
484 | f"✓ Invalid variant format returns error (HTTP {error.code})"
485 | )
486 | else:
487 | # Some servers may return result with warnings
488 | assert result is not None
489 | print("✓ Invalid variant format handled gracefully")
490 |
491 | finally:
492 | if original_token:
493 | os.environ["ONCOKB_TOKEN"] = original_token
494 |
495 | @pytest.mark.asyncio
496 | async def test_concurrent_requests(self):
497 | """Test handling of concurrent API requests."""
498 | import asyncio
499 |
500 | original_token = os.environ.get("ONCOKB_TOKEN")
501 | if original_token:
502 | del os.environ["ONCOKB_TOKEN"]
503 |
504 | try:
505 | client = OncoKBClient()
506 |
507 | # Make multiple concurrent requests
508 | genes = ["BRAF", "ROS1", "TP53"]
509 | tasks = [client.get_gene_annotation(gene) for gene in genes]
510 |
511 | results = await asyncio.gather(*tasks, return_exceptions=True)
512 |
513 | # Check results
514 | for gene, result in zip(genes, results, strict=False):
515 | if isinstance(result, Exception):
516 | pytest.skip(
517 | f"Server error during concurrent test: {result}"
518 | )
519 |
520 | data, error = result
521 |
522 | # Skip if server unavailable
523 | if error and error.code in [500, 503, 504]:
524 | pytest.skip(f"OncoKB server unavailable: {error.message}")
525 |
526 | # Should handle concurrent requests
527 | if data:
528 | assert data.get("hugoSymbol") == gene
529 | print(f"✓ Concurrent request successful for {gene}")
530 |
531 | finally:
532 | if original_token:
533 | os.environ["ONCOKB_TOKEN"] = original_token
534 |
535 |
536 | if __name__ == "__main__":
537 | """
538 | Run integration tests directly for debugging.
539 |
540 | Usage:
541 | python tests/integration/test_oncokb_integration.py
542 | """
543 | import asyncio
544 |
545 | async def run_tests():
546 | """Run all test classes."""
547 | print("=" * 70)
548 | print("OncoKB Integration Tests")
549 | print("=" * 70)
550 |
551 | # Demo server tests
552 | print("\n[1/4] Testing Demo Server Access...")
553 | await TestOncoKBDemoServer().test_demo_server_access()
554 |
555 | print("\n[2/4] Testing Demo Gene Limits...")
556 | await TestOncoKBDemoServer().test_demo_gene_limits()
557 |
558 | print("\n[3/4] Testing Variant Annotation...")
559 | await TestOncoKBDemoServer().test_variant_annotation()
560 |
561 | print("\n[4/4] Testing Production Auth Requirement...")
562 | await TestOncoKBProductionServer().test_production_requires_token()
563 |
564 | print("\n" + "=" * 70)
565 | print("✓ All integration tests completed")
566 | print("=" * 70)
567 |
568 | asyncio.run(run_tests())
569 |
```
--------------------------------------------------------------------------------
/src/biomcp/domain_handlers.py:
--------------------------------------------------------------------------------
```python
1 | """Domain-specific result handlers for BioMCP.
2 |
3 | This module contains formatting functions for converting raw API responses
4 | from different biomedical data sources into a standardized format.
5 | """
6 |
7 | import logging
8 | from typing import Any
9 |
10 | from biomcp.constants import (
11 | DEFAULT_SIGNIFICANCE,
12 | DEFAULT_TITLE,
13 | METADATA_AUTHORS,
14 | METADATA_COMPLETION_DATE,
15 | METADATA_CONSEQUENCE,
16 | METADATA_GENE,
17 | METADATA_JOURNAL,
18 | METADATA_PHASE,
19 | METADATA_RSID,
20 | METADATA_SIGNIFICANCE,
21 | METADATA_SOURCE,
22 | METADATA_START_DATE,
23 | METADATA_STATUS,
24 | METADATA_YEAR,
25 | RESULT_ID,
26 | RESULT_METADATA,
27 | RESULT_SNIPPET,
28 | RESULT_TITLE,
29 | RESULT_URL,
30 | SNIPPET_LENGTH,
31 | )
32 |
33 | logger = logging.getLogger(__name__)
34 |
35 |
36 | class ArticleHandler:
37 | """Handles formatting for article/publication results."""
38 |
39 | @staticmethod
40 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
41 | """Format a single article result.
42 |
43 | Args:
44 | result: Raw article data from PubTator3 or preprint APIs
45 |
46 | Returns:
47 | Standardized article result with id, title, snippet, url, and metadata
48 | """
49 | if "pmid" in result:
50 | # PubMed article
51 | # Clean up title - remove extra spaces
52 | title = result.get("title", "").strip()
53 | title = " ".join(title.split()) # Normalize whitespace
54 |
55 | # Use default if empty
56 | if not title:
57 | title = DEFAULT_TITLE
58 |
59 | return {
60 | RESULT_ID: result["pmid"],
61 | RESULT_TITLE: title,
62 | RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
63 | + "..."
64 | if result.get("abstract")
65 | else "",
66 | RESULT_URL: f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/",
67 | RESULT_METADATA: {
68 | METADATA_YEAR: result.get("pub_year")
69 | or (
70 | result.get("date", "")[:4]
71 | if result.get("date")
72 | else None
73 | ),
74 | METADATA_JOURNAL: result.get("journal", ""),
75 | METADATA_AUTHORS: result.get("authors", [])[:3],
76 | },
77 | }
78 | else:
79 | # Preprint result
80 | return {
81 | RESULT_ID: result.get("doi", result.get("id", "")),
82 | RESULT_TITLE: result.get("title", ""),
83 | RESULT_SNIPPET: result.get("abstract", "")[:SNIPPET_LENGTH]
84 | + "..."
85 | if result.get("abstract")
86 | else "",
87 | RESULT_URL: result.get("url", ""),
88 | RESULT_METADATA: {
89 | METADATA_YEAR: result.get("pub_year"),
90 | METADATA_SOURCE: result.get("source", ""),
91 | METADATA_AUTHORS: result.get("authors", [])[:3],
92 | },
93 | }
94 |
95 |
96 | class TrialHandler:
97 | """Handles formatting for clinical trial results."""
98 |
99 | @staticmethod
100 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
101 | """Format a single trial result.
102 |
103 | Handles both ClinicalTrials.gov API v2 nested structure and legacy formats.
104 |
105 | Args:
106 | result: Raw trial data from ClinicalTrials.gov API
107 |
108 | Returns:
109 | Standardized trial result with id, title, snippet, url, and metadata
110 | """
111 | # Handle ClinicalTrials.gov API v2 nested structure
112 | if "protocolSection" in result:
113 | # API v2 format - extract from nested modules
114 | protocol = result.get("protocolSection", {})
115 | identification = protocol.get("identificationModule", {})
116 | status = protocol.get("statusModule", {})
117 | description = protocol.get("descriptionModule", {})
118 |
119 | nct_id = identification.get("nctId", "")
120 | brief_title = identification.get("briefTitle", "")
121 | official_title = identification.get("officialTitle", "")
122 | brief_summary = description.get("briefSummary", "")
123 | overall_status = status.get("overallStatus", "")
124 | start_date = status.get("startDateStruct", {}).get("date", "")
125 | completion_date = status.get(
126 | "primaryCompletionDateStruct", {}
127 | ).get("date", "")
128 |
129 | # Extract phase from designModule
130 | design = protocol.get("designModule", {})
131 | phases = design.get("phases", [])
132 | phase = phases[0] if phases else ""
133 | elif "NCT Number" in result:
134 | # Legacy flat format from search results
135 | nct_id = result.get("NCT Number", "")
136 | brief_title = result.get("Study Title", "")
137 | official_title = "" # Not available in this format
138 | brief_summary = result.get("Brief Summary", "")
139 | overall_status = result.get("Study Status", "")
140 | phase = result.get("Phases", "")
141 | start_date = result.get("Start Date", "")
142 | completion_date = result.get("Completion Date", "")
143 | else:
144 | # Original legacy format or simplified structure
145 | nct_id = result.get("nct_id", "")
146 | brief_title = result.get("brief_title", "")
147 | official_title = result.get("official_title", "")
148 | brief_summary = result.get("brief_summary", "")
149 | overall_status = result.get("overall_status", "")
150 | phase = result.get("phase", "")
151 | start_date = result.get("start_date", "")
152 | completion_date = result.get("primary_completion_date", "")
153 |
154 | return {
155 | RESULT_ID: nct_id,
156 | RESULT_TITLE: brief_title or official_title or DEFAULT_TITLE,
157 | RESULT_SNIPPET: brief_summary[:SNIPPET_LENGTH] + "..."
158 | if brief_summary
159 | else "",
160 | RESULT_URL: f"https://clinicaltrials.gov/study/{nct_id}",
161 | RESULT_METADATA: {
162 | METADATA_STATUS: overall_status,
163 | METADATA_PHASE: phase,
164 | METADATA_START_DATE: start_date,
165 | METADATA_COMPLETION_DATE: completion_date,
166 | },
167 | }
168 |
169 |
170 | class VariantHandler:
171 | """Handles formatting for genetic variant results."""
172 |
173 | @staticmethod
174 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
175 | """Format a single variant result.
176 |
177 | Args:
178 | result: Raw variant data from MyVariant.info API
179 |
180 | Returns:
181 | Standardized variant result with id, title, snippet, url, and metadata
182 | """
183 | # Extract gene symbol - MyVariant.info stores this in multiple locations
184 | gene = (
185 | result.get("dbnsfp", {}).get("genename", "")
186 | or result.get("dbsnp", {}).get("gene", {}).get("symbol", "")
187 | or ""
188 | )
189 | # Handle case where gene is a list
190 | if isinstance(gene, list):
191 | gene = gene[0] if gene else ""
192 |
193 | # Extract rsid
194 | rsid = result.get("dbsnp", {}).get("rsid", "") or ""
195 |
196 | # Extract clinical significance
197 | clinvar = result.get("clinvar", {})
198 | significance = ""
199 | if isinstance(clinvar.get("rcv"), dict):
200 | significance = clinvar["rcv"].get("clinical_significance", "")
201 | elif isinstance(clinvar.get("rcv"), list) and clinvar["rcv"]:
202 | significance = clinvar["rcv"][0].get("clinical_significance", "")
203 |
204 | # Build a meaningful title
205 | hgvs = ""
206 | if "dbnsfp" in result and "hgvsp" in result["dbnsfp"]:
207 | hgvs = result["dbnsfp"]["hgvsp"]
208 | if isinstance(hgvs, list):
209 | hgvs = hgvs[0] if hgvs else ""
210 |
211 | title = f"{gene} {hgvs}".strip() or result.get("_id", DEFAULT_TITLE)
212 |
213 | return {
214 | RESULT_ID: result.get("_id", ""),
215 | RESULT_TITLE: title,
216 | RESULT_SNIPPET: f"Clinical significance: {significance or DEFAULT_SIGNIFICANCE}",
217 | RESULT_URL: f"https://www.ncbi.nlm.nih.gov/snp/{rsid}"
218 | if rsid
219 | else "",
220 | RESULT_METADATA: {
221 | METADATA_GENE: gene,
222 | METADATA_RSID: rsid,
223 | METADATA_SIGNIFICANCE: significance,
224 | METADATA_CONSEQUENCE: result.get("cadd", {}).get(
225 | "consequence", ""
226 | ),
227 | },
228 | }
229 |
230 |
231 | class GeneHandler:
232 | """Handles formatting for gene information results from MyGene.info."""
233 |
234 | @staticmethod
235 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
236 | """Format a single gene result.
237 |
238 | Args:
239 | result: Raw gene data from MyGene.info API
240 |
241 | Returns:
242 | Standardized gene result with id, title, snippet, url, and metadata
243 | """
244 | # Extract gene information
245 | gene_id = result.get("_id", result.get("entrezgene", ""))
246 | symbol = result.get("symbol", "")
247 | name = result.get("name", "")
248 | summary = result.get("summary", "")
249 |
250 | # Build title
251 | title = (
252 | f"{symbol}: {name}"
253 | if symbol and name
254 | else symbol or name or DEFAULT_TITLE
255 | )
256 |
257 | # Create snippet from summary
258 | snippet = (
259 | summary[:SNIPPET_LENGTH] + "..."
260 | if summary and len(summary) > SNIPPET_LENGTH
261 | else summary
262 | )
263 |
264 | return {
265 | RESULT_ID: str(gene_id),
266 | RESULT_TITLE: title,
267 | RESULT_SNIPPET: snippet or "No summary available",
268 | RESULT_URL: f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{symbol}"
269 | if symbol
270 | else "",
271 | RESULT_METADATA: {
272 | "entrezgene": result.get("entrezgene"),
273 | "symbol": symbol,
274 | "name": name,
275 | "type_of_gene": result.get("type_of_gene", ""),
276 | "ensembl": result.get("ensembl", {}).get("gene")
277 | if isinstance(result.get("ensembl"), dict)
278 | else None,
279 | "refseq": result.get("refseq", {}),
280 | },
281 | }
282 |
283 |
284 | class DrugHandler:
285 | """Handles formatting for drug/chemical information results from MyChem.info."""
286 |
287 | @staticmethod
288 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
289 | """Format a single drug result.
290 |
291 | Args:
292 | result: Raw drug data from MyChem.info API
293 |
294 | Returns:
295 | Standardized drug result with id, title, snippet, url, and metadata
296 | """
297 | # Extract drug information
298 | drug_id = result.get("_id", "")
299 | name = result.get("name", "")
300 | drugbank_id = result.get("drugbank_id", "")
301 | description = result.get("description", "")
302 | indication = result.get("indication", "")
303 |
304 | # Build title
305 | title = name or drug_id or DEFAULT_TITLE
306 |
307 | # Create snippet from description or indication
308 | snippet_text = indication or description
309 | snippet = (
310 | snippet_text[:SNIPPET_LENGTH] + "..."
311 | if snippet_text and len(snippet_text) > SNIPPET_LENGTH
312 | else snippet_text
313 | )
314 |
315 | # Determine URL based on available IDs
316 | url = ""
317 | if drugbank_id:
318 | url = f"https://www.drugbank.ca/drugs/{drugbank_id}"
319 | elif result.get("pubchem_cid"):
320 | url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{result['pubchem_cid']}"
321 |
322 | return {
323 | RESULT_ID: drug_id,
324 | RESULT_TITLE: title,
325 | RESULT_SNIPPET: snippet or "No description available",
326 | RESULT_URL: url,
327 | RESULT_METADATA: {
328 | "drugbank_id": drugbank_id,
329 | "chembl_id": result.get("chembl_id", ""),
330 | "pubchem_cid": result.get("pubchem_cid", ""),
331 | "chebi_id": result.get("chebi_id", ""),
332 | "formula": result.get("formula", ""),
333 | "tradename": result.get("tradename", []),
334 | },
335 | }
336 |
337 |
338 | class DiseaseHandler:
339 | """Handles formatting for disease information results from MyDisease.info."""
340 |
341 | @staticmethod
342 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
343 | """Format a single disease result.
344 |
345 | Args:
346 | result: Raw disease data from MyDisease.info API
347 |
348 | Returns:
349 | Standardized disease result with id, title, snippet, url, and metadata
350 | """
351 | # Extract disease information
352 | disease_id = result.get("_id", "")
353 | name = result.get("name", "")
354 | definition = result.get("definition", "")
355 | mondo_info = result.get("mondo", {})
356 |
357 | # Build title
358 | title = name or disease_id or DEFAULT_TITLE
359 |
360 | # Create snippet from definition
361 | snippet = (
362 | definition[:SNIPPET_LENGTH] + "..."
363 | if definition and len(definition) > SNIPPET_LENGTH
364 | else definition
365 | )
366 |
367 | # Extract MONDO ID for URL
368 | mondo_id = mondo_info.get("id") if isinstance(mondo_info, dict) else ""
369 | url = (
370 | f"https://monarchinitiative.org/disease/{mondo_id}"
371 | if mondo_id
372 | else ""
373 | )
374 |
375 | return {
376 | RESULT_ID: disease_id,
377 | RESULT_TITLE: title,
378 | RESULT_SNIPPET: snippet or "No definition available",
379 | RESULT_URL: url,
380 | RESULT_METADATA: {
381 | "mondo_id": mondo_id,
382 | "definition": definition,
383 | "synonyms": result.get("synonyms", []),
384 | "xrefs": result.get("xrefs", {}),
385 | "phenotypes": len(result.get("phenotypes", [])),
386 | },
387 | }
388 |
389 |
390 | class NCIOrganizationHandler:
391 | """Handles formatting for NCI organization results."""
392 |
393 | @staticmethod
394 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
395 | """Format a single NCI organization result.
396 |
397 | Args:
398 | result: Raw organization data from NCI CTS API
399 |
400 | Returns:
401 | Standardized organization result with id, title, snippet, url, and metadata
402 | """
403 | org_id = result.get("id", result.get("org_id", ""))
404 | name = result.get("name", "Unknown Organization")
405 | org_type = result.get("type", result.get("category", ""))
406 | city = result.get("city", "")
407 | state = result.get("state", "")
408 |
409 | # Build location string
410 | location_parts = [p for p in [city, state] if p]
411 | location = ", ".join(location_parts) if location_parts else ""
412 |
413 | # Create snippet
414 | snippet_parts = []
415 | if org_type:
416 | snippet_parts.append(f"Type: {org_type}")
417 | if location:
418 | snippet_parts.append(f"Location: {location}")
419 | snippet = " | ".join(snippet_parts) or "No details available"
420 |
421 | return {
422 | RESULT_ID: org_id,
423 | RESULT_TITLE: name,
424 | RESULT_SNIPPET: snippet,
425 | RESULT_URL: "", # NCI doesn't provide direct URLs to organizations
426 | RESULT_METADATA: {
427 | "type": org_type,
428 | "city": city,
429 | "state": state,
430 | "country": result.get("country", ""),
431 | },
432 | }
433 |
434 |
435 | class NCIInterventionHandler:
436 | """Handles formatting for NCI intervention results."""
437 |
438 | @staticmethod
439 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
440 | """Format a single NCI intervention result.
441 |
442 | Args:
443 | result: Raw intervention data from NCI CTS API
444 |
445 | Returns:
446 | Standardized intervention result with id, title, snippet, url, and metadata
447 | """
448 | int_id = result.get("id", result.get("intervention_id", ""))
449 | name = result.get("name", "Unknown Intervention")
450 | int_type = result.get("type", result.get("category", ""))
451 | synonyms = result.get("synonyms", [])
452 |
453 | # Create snippet
454 | snippet_parts = []
455 | if int_type:
456 | snippet_parts.append(f"Type: {int_type}")
457 | if synonyms:
458 | if isinstance(synonyms, list) and synonyms:
459 | snippet_parts.append(
460 | f"Also known as: {', '.join(synonyms[:3])}"
461 | )
462 | elif isinstance(synonyms, str):
463 | snippet_parts.append(f"Also known as: {synonyms}")
464 | snippet = " | ".join(snippet_parts) or "No details available"
465 |
466 | return {
467 | RESULT_ID: int_id,
468 | RESULT_TITLE: name,
469 | RESULT_SNIPPET: snippet,
470 | RESULT_URL: "", # NCI doesn't provide direct URLs to interventions
471 | RESULT_METADATA: {
472 | "type": int_type,
473 | "synonyms": synonyms,
474 | "description": result.get("description", ""),
475 | },
476 | }
477 |
478 |
479 | class NCIBiomarkerHandler:
480 | """Handles formatting for NCI biomarker results."""
481 |
482 | @staticmethod
483 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
484 | """Format a single NCI biomarker result.
485 |
486 | Args:
487 | result: Raw biomarker data from NCI CTS API
488 |
489 | Returns:
490 | Standardized biomarker result with id, title, snippet, url, and metadata
491 | """
492 | bio_id = result.get("id", result.get("biomarker_id", ""))
493 | name = result.get("name", "Unknown Biomarker")
494 | gene = result.get("gene", result.get("gene_symbol", ""))
495 | bio_type = result.get("type", result.get("category", ""))
496 | assay_type = result.get("assay_type", "")
497 |
498 | # Build title
499 | title = name
500 | if gene and gene not in name:
501 | title = f"{gene} - {name}"
502 |
503 | # Create snippet
504 | snippet_parts = []
505 | if bio_type:
506 | snippet_parts.append(f"Type: {bio_type}")
507 | if assay_type:
508 | snippet_parts.append(f"Assay: {assay_type}")
509 | snippet = (
510 | " | ".join(snippet_parts) or "Biomarker for trial eligibility"
511 | )
512 |
513 | return {
514 | RESULT_ID: bio_id,
515 | RESULT_TITLE: title,
516 | RESULT_SNIPPET: snippet,
517 | RESULT_URL: "", # NCI doesn't provide direct URLs to biomarkers
518 | RESULT_METADATA: {
519 | "gene": gene,
520 | "type": bio_type,
521 | "assay_type": assay_type,
522 | "trial_count": result.get("trial_count", 0),
523 | },
524 | }
525 |
526 |
527 | class NCIDiseaseHandler:
528 | """Handles formatting for NCI disease vocabulary results."""
529 |
530 | @staticmethod
531 | def format_result(result: dict[str, Any]) -> dict[str, Any]:
532 | """Format a single NCI disease result.
533 |
534 | Args:
535 | result: Raw disease data from NCI CTS API
536 |
537 | Returns:
538 | Standardized disease result with id, title, snippet, url, and metadata
539 | """
540 | disease_id = result.get("id", result.get("disease_id", ""))
541 | name = result.get(
542 | "name", result.get("preferred_name", "Unknown Disease")
543 | )
544 | category = result.get("category", result.get("type", ""))
545 | synonyms = result.get("synonyms", [])
546 |
547 | # Create snippet
548 | snippet_parts = []
549 | if category:
550 | snippet_parts.append(f"Category: {category}")
551 | if synonyms:
552 | if isinstance(synonyms, list) and synonyms:
553 | snippet_parts.append(
554 | f"Also known as: {', '.join(synonyms[:3])}"
555 | )
556 | if len(synonyms) > 3:
557 | snippet_parts.append(f"and {len(synonyms) - 3} more")
558 | elif isinstance(synonyms, str):
559 | snippet_parts.append(f"Also known as: {synonyms}")
560 | snippet = " | ".join(snippet_parts) or "NCI cancer vocabulary term"
561 |
562 | return {
563 | RESULT_ID: disease_id,
564 | RESULT_TITLE: name,
565 | RESULT_SNIPPET: snippet,
566 | RESULT_URL: "", # NCI doesn't provide direct URLs to disease terms
567 | RESULT_METADATA: {
568 | "category": category,
569 | "synonyms": synonyms,
570 | "codes": result.get("codes", {}),
571 | },
572 | }
573 |
574 |
575 | def get_domain_handler(
576 | domain: str,
577 | ) -> (
578 | type[ArticleHandler]
579 | | type[TrialHandler]
580 | | type[VariantHandler]
581 | | type[GeneHandler]
582 | | type[DrugHandler]
583 | | type[DiseaseHandler]
584 | | type[NCIOrganizationHandler]
585 | | type[NCIInterventionHandler]
586 | | type[NCIBiomarkerHandler]
587 | | type[NCIDiseaseHandler]
588 | ):
589 | """Get the appropriate handler class for a domain.
590 |
591 | Args:
592 | domain: The domain name ('article', 'trial', 'variant', 'gene', 'drug', 'disease',
593 | 'nci_organization', 'nci_intervention', 'nci_biomarker', 'nci_disease')
594 |
595 | Returns:
596 | The handler class for the domain
597 |
598 | Raises:
599 | ValueError: If domain is not recognized
600 | """
601 | handlers: dict[
602 | str,
603 | type[ArticleHandler]
604 | | type[TrialHandler]
605 | | type[VariantHandler]
606 | | type[GeneHandler]
607 | | type[DrugHandler]
608 | | type[DiseaseHandler]
609 | | type[NCIOrganizationHandler]
610 | | type[NCIInterventionHandler]
611 | | type[NCIBiomarkerHandler]
612 | | type[NCIDiseaseHandler],
613 | ] = {
614 | "article": ArticleHandler,
615 | "trial": TrialHandler,
616 | "variant": VariantHandler,
617 | "gene": GeneHandler,
618 | "drug": DrugHandler,
619 | "disease": DiseaseHandler,
620 | "nci_organization": NCIOrganizationHandler,
621 | "nci_intervention": NCIInterventionHandler,
622 | "nci_biomarker": NCIBiomarkerHandler,
623 | "nci_disease": NCIDiseaseHandler,
624 | }
625 |
626 | handler = handlers.get(domain)
627 | if handler is None:
628 | raise ValueError(f"Unknown domain: {domain}")
629 |
630 | return handler
631 |
```
--------------------------------------------------------------------------------
/tests/tdd/trials/test_search.py:
--------------------------------------------------------------------------------
```python
1 | import pytest
2 |
3 | from biomcp.trials.search import (
4 | CLOSED_STATUSES,
5 | AgeGroup,
6 | DateField,
7 | InterventionType,
8 | LineOfTherapy,
9 | PrimaryPurpose,
10 | RecruitingStatus,
11 | SortOrder,
12 | SponsorType,
13 | StudyDesign,
14 | StudyType,
15 | TrialPhase,
16 | TrialQuery,
17 | _build_biomarker_expression_essie,
18 | _build_brain_mets_essie,
19 | _build_excluded_mutations_essie,
20 | _build_line_of_therapy_essie,
21 | _build_prior_therapy_essie,
22 | _build_progression_essie,
23 | _build_required_mutations_essie,
24 | _inject_ids,
25 | convert_query,
26 | )
27 |
28 |
29 | @pytest.mark.asyncio
30 | async def test_convert_query_basic_parameters():
31 | """Test basic parameter conversion from TrialQuery to API format."""
32 | query = TrialQuery(conditions=["lung cancer"])
33 | params = await convert_query(query)
34 |
35 | assert "markupFormat" in params
36 | assert params["markupFormat"] == ["markdown"]
37 | assert "query.cond" in params
38 | assert params["query.cond"] == ["lung cancer"]
39 | assert "filter.overallStatus" in params
40 | assert "RECRUITING" in params["filter.overallStatus"][0]
41 |
42 |
43 | @pytest.mark.asyncio
44 | async def test_convert_query_multiple_conditions():
45 | """Test conversion of multiple conditions to API format."""
46 | query = TrialQuery(conditions=["lung cancer", "metastatic"])
47 | params = await convert_query(query)
48 |
49 | assert "query.cond" in params
50 | # The query should contain the original terms, but may have expanded synonyms
51 | cond_value = params["query.cond"][0]
52 | assert "lung cancer" in cond_value
53 | assert "metastatic" in cond_value
54 | assert cond_value.startswith("(") and cond_value.endswith(")")
55 |
56 |
57 | @pytest.mark.asyncio
58 | async def test_convert_query_terms_parameter():
59 | """Test conversion of terms parameter to API format."""
60 | query = TrialQuery(terms=["immunotherapy"])
61 | params = await convert_query(query)
62 |
63 | assert "query.term" in params
64 | assert params["query.term"] == ["immunotherapy"]
65 |
66 |
67 | @pytest.mark.asyncio
68 | async def test_convert_query_interventions_parameter():
69 | """Test conversion of interventions parameter to API format."""
70 | query = TrialQuery(interventions=["pembrolizumab"])
71 | params = await convert_query(query)
72 |
73 | assert "query.intr" in params
74 | assert params["query.intr"] == ["pembrolizumab"]
75 |
76 |
77 | @pytest.mark.asyncio
78 | async def test_convert_query_lead_sponsor_parameter():
79 | """Test conversion of lead_sponsor parameter to API format."""
80 | query = TrialQuery(lead_sponsor=["Pfizer"])
81 | params = await convert_query(query)
82 |
83 | assert "query.lead" in params
84 | assert params["query.lead"] == ["Pfizer"]
85 |
86 |
87 | @pytest.mark.asyncio
88 | async def test_convert_query_multiple_lead_sponsors():
89 | """Test conversion of multiple lead sponsors to API format."""
90 | query = TrialQuery(lead_sponsor=["Pfizer", "National Cancer Institute"])
91 | params = await convert_query(query)
92 |
93 | assert "query.lead" in params
94 | # Multiple sponsors are combined with OR logic
95 | assert len(params["query.lead"]) == 1
96 | lead_value = params["query.lead"][0]
97 | assert "Pfizer" in lead_value
98 | assert "National Cancer Institute" in lead_value
99 | assert " OR " in lead_value or lead_value.startswith(
100 | "("
101 | ) # OR or parenthesized format
102 |
103 |
104 | @pytest.mark.asyncio
105 | async def test_convert_query_nct_ids():
106 | """Test conversion of NCT IDs to API format."""
107 | query = TrialQuery(nct_ids=["NCT04179552"])
108 | params = await convert_query(query)
109 |
110 | assert "query.id" in params
111 | assert params["query.id"] == ["NCT04179552"]
112 | # Note: The implementation keeps filter.overallStatus when using nct_ids
113 | # So we don't assert its absence
114 |
115 |
116 | @pytest.mark.asyncio
117 | async def test_convert_query_recruiting_status():
118 | """Test conversion of recruiting status to API format."""
119 | # Test open status
120 | query = TrialQuery(recruiting_status=RecruitingStatus.OPEN)
121 | params = await convert_query(query)
122 |
123 | assert "filter.overallStatus" in params
124 | assert "RECRUITING" in params["filter.overallStatus"][0]
125 |
126 | # Test closed status
127 | query = TrialQuery(recruiting_status=RecruitingStatus.CLOSED)
128 | params = await convert_query(query)
129 |
130 | assert "filter.overallStatus" in params
131 | assert all(
132 | status in params["filter.overallStatus"][0]
133 | for status in CLOSED_STATUSES
134 | )
135 |
136 | # Test any status
137 | query = TrialQuery(recruiting_status=RecruitingStatus.ANY)
138 | params = await convert_query(query)
139 |
140 | assert "filter.overallStatus" not in params
141 |
142 |
143 | @pytest.mark.asyncio
144 | async def test_convert_query_location_parameters():
145 | """Test conversion of location parameters to API format."""
146 | query = TrialQuery(lat=40.7128, long=-74.0060, distance=10)
147 | params = await convert_query(query)
148 |
149 | assert "filter.geo" in params
150 | assert params["filter.geo"] == ["distance(40.7128,-74.006,10mi)"]
151 |
152 |
153 | @pytest.mark.asyncio
154 | async def test_convert_query_study_type():
155 | """Test conversion of study type to API format."""
156 | query = TrialQuery(study_type=StudyType.INTERVENTIONAL)
157 | params = await convert_query(query)
158 |
159 | assert "filter.advanced" in params
160 | assert "AREA[StudyType]Interventional" in params["filter.advanced"][0]
161 |
162 |
163 | @pytest.mark.asyncio
164 | async def test_convert_query_phase():
165 | """Test conversion of phase to API format."""
166 | query = TrialQuery(phase=TrialPhase.PHASE3)
167 | params = await convert_query(query)
168 |
169 | assert "filter.advanced" in params
170 | assert "AREA[Phase]PHASE3" in params["filter.advanced"][0]
171 |
172 |
173 | @pytest.mark.asyncio
174 | async def test_convert_query_date_range():
175 | """Test conversion of date range to API format."""
176 | query = TrialQuery(
177 | min_date="2020-01-01",
178 | max_date="2020-12-31",
179 | date_field=DateField.LAST_UPDATE,
180 | )
181 | params = await convert_query(query)
182 |
183 | assert "filter.advanced" in params
184 | assert (
185 | "AREA[LastUpdatePostDate]RANGE[2020-01-01,2020-12-31]"
186 | in params["filter.advanced"][0]
187 | )
188 |
189 | # Test min date only
190 | query = TrialQuery(
191 | min_date="2021-01-01",
192 | date_field=DateField.STUDY_START,
193 | )
194 | params = await convert_query(query)
195 |
196 | assert "filter.advanced" in params
197 | assert (
198 | "AREA[StartDate]RANGE[2021-01-01,MAX]" in params["filter.advanced"][0]
199 | )
200 |
201 |
202 | @pytest.mark.asyncio
203 | async def test_convert_query_sort_order():
204 | """Test conversion of sort order to API format."""
205 | query = TrialQuery(sort=SortOrder.RELEVANCE)
206 | params = await convert_query(query)
207 |
208 | assert "sort" in params
209 | assert params["sort"] == ["@relevance"]
210 |
211 | query = TrialQuery(sort=SortOrder.LAST_UPDATE)
212 | params = await convert_query(query)
213 |
214 | assert "sort" in params
215 | assert params["sort"] == ["LastUpdatePostDate:desc"]
216 |
217 |
218 | @pytest.mark.asyncio
219 | async def test_convert_query_intervention_type():
220 | """Test conversion of intervention type to API format."""
221 | query = TrialQuery(intervention_type=InterventionType.DRUG)
222 | params = await convert_query(query)
223 |
224 | assert "filter.advanced" in params
225 | assert "AREA[InterventionType]Drug" in params["filter.advanced"][0]
226 |
227 |
228 | @pytest.mark.asyncio
229 | async def test_convert_query_sponsor_type():
230 | """Test conversion of sponsor type to API format."""
231 | query = TrialQuery(sponsor_type=SponsorType.ACADEMIC)
232 | params = await convert_query(query)
233 |
234 | assert "filter.advanced" in params
235 | assert "AREA[SponsorType]Academic" in params["filter.advanced"][0]
236 |
237 |
238 | @pytest.mark.asyncio
239 | async def test_convert_query_study_design():
240 | """Test conversion of study design to API format."""
241 | query = TrialQuery(study_design=StudyDesign.RANDOMIZED)
242 | params = await convert_query(query)
243 |
244 | assert "filter.advanced" in params
245 | assert "AREA[StudyDesign]Randomized" in params["filter.advanced"][0]
246 |
247 |
248 | @pytest.mark.asyncio
249 | async def test_convert_query_age_group():
250 | """Test conversion of age group to API format."""
251 | query = TrialQuery(age_group=AgeGroup.ADULT)
252 | params = await convert_query(query)
253 |
254 | assert "filter.advanced" in params
255 | assert "AREA[StdAge]Adult" in params["filter.advanced"][0]
256 |
257 |
258 | @pytest.mark.asyncio
259 | async def test_convert_query_primary_purpose():
260 | """Test conversion of primary purpose to API format."""
261 | query = TrialQuery(primary_purpose=PrimaryPurpose.TREATMENT)
262 | params = await convert_query(query)
263 |
264 | assert "filter.advanced" in params
265 | assert (
266 | "AREA[DesignPrimaryPurpose]Treatment" in params["filter.advanced"][0]
267 | )
268 |
269 |
270 | @pytest.mark.asyncio
271 | async def test_convert_query_next_page_hash():
272 | """Test conversion of next_page_hash to API format."""
273 | query = TrialQuery(next_page_hash="abc123")
274 | params = await convert_query(query)
275 |
276 | assert "pageToken" in params
277 | assert params["pageToken"] == ["abc123"]
278 |
279 |
280 | @pytest.mark.asyncio
281 | async def test_convert_query_complex_parameters():
282 | """Test conversion of multiple parameters to API format."""
283 | query = TrialQuery(
284 | conditions=["diabetes"],
285 | terms=["obesity"],
286 | interventions=["metformin"],
287 | primary_purpose=PrimaryPurpose.TREATMENT,
288 | study_type=StudyType.INTERVENTIONAL,
289 | intervention_type=InterventionType.DRUG,
290 | recruiting_status=RecruitingStatus.OPEN,
291 | phase=TrialPhase.PHASE3,
292 | age_group=AgeGroup.ADULT,
293 | sort=SortOrder.RELEVANCE,
294 | )
295 | params = await convert_query(query)
296 |
297 | assert "query.cond" in params
298 | # Disease synonym expansion may add synonyms to diabetes
299 | assert "diabetes" in params["query.cond"][0]
300 | assert "query.term" in params
301 | assert params["query.term"] == ["obesity"]
302 | assert "query.intr" in params
303 | assert params["query.intr"] == ["metformin"]
304 | assert "filter.advanced" in params
305 | assert (
306 | "AREA[DesignPrimaryPurpose]Treatment" in params["filter.advanced"][0]
307 | )
308 | assert "AREA[StudyType]Interventional" in params["filter.advanced"][0]
309 | assert "AREA[InterventionType]Drug" in params["filter.advanced"][0]
310 | assert "AREA[Phase]PHASE3" in params["filter.advanced"][0]
311 | assert "AREA[StdAge]Adult" in params["filter.advanced"][0]
312 | assert "filter.overallStatus" in params
313 | assert "RECRUITING" in params["filter.overallStatus"][0]
314 | assert "sort" in params
315 | assert params["sort"] == ["@relevance"]
316 |
317 |
318 | # Test TrialQuery field validation for CLI input processing
319 | # noinspection PyTypeChecker
320 | def test_trial_query_field_validation_basic():
321 | """Test basic field validation for TrialQuery."""
322 | # Test list fields conversion
323 | query = TrialQuery(conditions="diabetes")
324 | assert query.conditions == ["diabetes"]
325 |
326 | query = TrialQuery(interventions="metformin")
327 | assert query.interventions == ["metformin"]
328 |
329 | query = TrialQuery(terms="blood glucose")
330 | assert query.terms == ["blood glucose"]
331 |
332 | query = TrialQuery(nct_ids="NCT01234567")
333 | assert query.nct_ids == ["NCT01234567"]
334 |
335 |
336 | # noinspection PyTypeChecker
337 | def test_trial_query_field_validation_recruiting_status():
338 | """Test recruiting status field validation."""
339 | # Exact match uppercase
340 | query = TrialQuery(recruiting_status="OPEN")
341 | assert query.recruiting_status == RecruitingStatus.OPEN
342 |
343 | # Exact match lowercase
344 | query = TrialQuery(recruiting_status="closed")
345 | assert query.recruiting_status == RecruitingStatus.CLOSED
346 |
347 | # Invalid value
348 | with pytest.raises(ValueError) as excinfo:
349 | TrialQuery(recruiting_status="invalid")
350 | assert "validation error for TrialQuery" in str(excinfo.value)
351 |
352 |
353 | # noinspection PyTypeChecker
354 | @pytest.mark.asyncio
355 | async def test_trial_query_field_validation_combined():
356 | """Test combined parameters validation."""
357 | query = TrialQuery(
358 | conditions=["diabetes", "obesity"],
359 | interventions="metformin",
360 | recruiting_status="open",
361 | study_type="interventional",
362 | lat=40.7128,
363 | long=-74.0060,
364 | distance=10,
365 | )
366 |
367 | assert query.conditions == ["diabetes", "obesity"]
368 | assert query.interventions == ["metformin"]
369 | assert query.recruiting_status == RecruitingStatus.OPEN
370 | assert query.study_type == StudyType.INTERVENTIONAL
371 | assert query.lat == 40.7128
372 | assert query.long == -74.0060
373 | assert query.distance == 10
374 |
375 | # Check that the query can be converted to parameters properly
376 | params = await convert_query(query)
377 | assert "query.cond" in params
378 | # The query should contain the original terms, but may have expanded synonyms
379 | cond_value = params["query.cond"][0]
380 | assert "diabetes" in cond_value
381 | assert "obesity" in cond_value
382 | assert cond_value.startswith("(") and cond_value.endswith(")")
383 | assert "query.intr" in params
384 | assert "metformin" in params["query.intr"][0]
385 | assert "filter.geo" in params
386 | assert "distance(40.7128,-74.006,10mi)" in params["filter.geo"][0]
387 |
388 |
389 | # noinspection PyTypeChecker
390 | @pytest.mark.asyncio
391 | async def test_trial_query_field_validation_terms():
392 | """Test terms parameter validation."""
393 | # Single term as string
394 | query = TrialQuery(terms="cancer")
395 | assert query.terms == ["cancer"]
396 |
397 | # Multiple terms as list
398 | query = TrialQuery(terms=["cancer", "therapy"])
399 | assert query.terms == ["cancer", "therapy"]
400 |
401 | # Check parameter generation
402 | params = await convert_query(query)
403 | assert "query.term" in params
404 | assert "(cancer OR therapy)" in params["query.term"][0]
405 |
406 |
407 | # noinspection PyTypeChecker
408 | @pytest.mark.asyncio
409 | async def test_trial_query_field_validation_nct_ids():
410 | """Test NCT IDs parameter validation."""
411 | # Single NCT ID
412 | query = TrialQuery(nct_ids="NCT01234567")
413 | assert query.nct_ids == ["NCT01234567"]
414 |
415 | # Multiple NCT IDs
416 | query = TrialQuery(nct_ids=["NCT01234567", "NCT89012345"])
417 | assert query.nct_ids == ["NCT01234567", "NCT89012345"]
418 |
419 | # Check parameter generation
420 | params = await convert_query(query)
421 | assert "query.id" in params
422 | assert "NCT01234567,NCT89012345" in params["query.id"][0]
423 |
424 |
425 | # noinspection PyTypeChecker
426 | @pytest.mark.asyncio
427 | async def test_trial_query_field_validation_date_range():
428 | """Test date range parameters validation."""
429 | # Min date only with date field
430 | query = TrialQuery(min_date="2020-01-01", date_field=DateField.STUDY_START)
431 | assert query.min_date == "2020-01-01"
432 | assert query.date_field == DateField.STUDY_START
433 |
434 | # Min and max date with date field using lazy mapping
435 | query = TrialQuery(
436 | min_date="2020-01-01",
437 | max_date="2021-12-31",
438 | date_field="last update", # space not underscore.
439 | )
440 | assert query.min_date == "2020-01-01"
441 | assert query.max_date == "2021-12-31"
442 | assert query.date_field == DateField.LAST_UPDATE
443 |
444 | # Check parameter generation
445 | params = await convert_query(query)
446 | assert "filter.advanced" in params
447 | assert (
448 | "AREA[LastUpdatePostDate]RANGE[2020-01-01,2021-12-31]"
449 | in params["filter.advanced"][0]
450 | )
451 |
452 |
453 | # noinspection PyTypeChecker
454 | def test_trial_query_field_validation_primary_purpose():
455 | """Test primary purpose parameter validation."""
456 | # Exact match uppercase
457 | query = TrialQuery(primary_purpose=PrimaryPurpose.TREATMENT)
458 | assert query.primary_purpose == PrimaryPurpose.TREATMENT
459 |
460 | # Exact match lowercase
461 | query = TrialQuery(primary_purpose=PrimaryPurpose.PREVENTION)
462 | assert query.primary_purpose == PrimaryPurpose.PREVENTION
463 |
464 | # Case-insensitive
465 | query = TrialQuery(primary_purpose="ScReeNING")
466 | assert query.primary_purpose == PrimaryPurpose.SCREENING
467 |
468 | # Invalid
469 | with pytest.raises(ValueError):
470 | TrialQuery(primary_purpose="invalid")
471 |
472 |
473 | def test_inject_ids_with_many_ids_and_condition():
474 | """Test _inject_ids function with 300 IDs and a condition to ensure filter.ids is used."""
475 | # Create a params dict with a condition (indicating other filters present)
476 | params = {
477 | "query.cond": ["melanoma"],
478 | "format": ["json"],
479 | "markupFormat": ["markdown"],
480 | }
481 |
482 | # Generate 300 NCT IDs
483 | nct_ids = [f"NCT{str(i).zfill(8)}" for i in range(1, 301)]
484 |
485 | # Call _inject_ids with has_other_filters=True
486 | _inject_ids(params, nct_ids, has_other_filters=True)
487 |
488 | # Assert that filter.ids is used (not query.id)
489 | assert "filter.ids" in params
490 | assert "query.id" not in params
491 |
492 | # Verify the IDs are properly formatted
493 | ids_param = params["filter.ids"][0]
494 | assert ids_param.startswith("NCT")
495 | assert "NCT00000001" in ids_param
496 | assert "NCT00000300" in ids_param
497 |
498 | # Verify it's a comma-separated list
499 | assert "," in ids_param
500 | assert ids_param.count(",") == 299 # 300 IDs = 299 commas
501 |
502 |
503 | def test_inject_ids_without_other_filters():
504 | """Test _inject_ids function with only NCT IDs (no other filters)."""
505 | # Create a minimal params dict
506 | params = {
507 | "format": ["json"],
508 | "markupFormat": ["markdown"],
509 | }
510 |
511 | # Use a small number of NCT IDs
512 | nct_ids = ["NCT00000001", "NCT00000002", "NCT00000003"]
513 |
514 | # Call _inject_ids with has_other_filters=False
515 | _inject_ids(params, nct_ids, has_other_filters=False)
516 |
517 | # Assert that query.id is used (not filter.ids) for small lists
518 | assert "query.id" in params
519 | assert "filter.ids" not in params
520 |
521 | # Verify the format
522 | assert params["query.id"][0] == "NCT00000001,NCT00000002,NCT00000003"
523 |
524 |
525 | def test_inject_ids_large_list_without_filters():
526 | """Test _inject_ids with a large ID list but no other filters."""
527 | params = {
528 | "format": ["json"],
529 | "markupFormat": ["markdown"],
530 | }
531 |
532 | # Generate enough IDs to exceed 1800 character limit
533 | nct_ids = [f"NCT{str(i).zfill(8)}" for i in range(1, 201)] # ~2200 chars
534 |
535 | # Call _inject_ids with has_other_filters=False
536 | _inject_ids(params, nct_ids, has_other_filters=False)
537 |
538 | # Assert that filter.ids is used for large lists even without other filters
539 | assert "filter.ids" in params
540 | assert "query.id" not in params
541 |
542 |
543 | # Tests for new Essie builder functions
544 | def test_build_prior_therapy_essie():
545 | """Test building Essie fragments for prior therapies."""
546 | # Single therapy
547 | fragments = _build_prior_therapy_essie(["osimertinib"])
548 | assert len(fragments) == 1
549 | assert (
550 | fragments[0]
551 | == 'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
552 | )
553 |
554 | # Multiple therapies
555 | fragments = _build_prior_therapy_essie(["osimertinib", "erlotinib"])
556 | assert len(fragments) == 2
557 | assert (
558 | fragments[0]
559 | == 'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
560 | )
561 | assert (
562 | fragments[1]
563 | == 'AREA[EligibilityCriteria]("erlotinib" AND (prior OR previous OR received))'
564 | )
565 |
566 | # Empty strings are filtered out
567 | fragments = _build_prior_therapy_essie(["osimertinib", "", "erlotinib"])
568 | assert len(fragments) == 2
569 |
570 |
571 | def test_build_progression_essie():
572 | """Test building Essie fragments for progression on therapy."""
573 | fragments = _build_progression_essie(["pembrolizumab"])
574 | assert len(fragments) == 1
575 | assert (
576 | fragments[0]
577 | == 'AREA[EligibilityCriteria]("pembrolizumab" AND (progression OR resistant OR refractory))'
578 | )
579 |
580 |
581 | def test_build_required_mutations_essie():
582 | """Test building Essie fragments for required mutations."""
583 | fragments = _build_required_mutations_essie(["EGFR L858R", "T790M"])
584 | assert len(fragments) == 2
585 | assert fragments[0] == 'AREA[EligibilityCriteria]("EGFR L858R")'
586 | assert fragments[1] == 'AREA[EligibilityCriteria]("T790M")'
587 |
588 |
589 | def test_build_excluded_mutations_essie():
590 | """Test building Essie fragments for excluded mutations."""
591 | fragments = _build_excluded_mutations_essie(["KRAS G12C"])
592 | assert len(fragments) == 1
593 | assert fragments[0] == 'AREA[EligibilityCriteria](NOT "KRAS G12C")'
594 |
595 |
596 | def test_build_biomarker_expression_essie():
597 | """Test building Essie fragments for biomarker expression."""
598 | biomarkers = {"PD-L1": "≥50%", "TMB": "≥10 mut/Mb"}
599 | fragments = _build_biomarker_expression_essie(biomarkers)
600 | assert len(fragments) == 2
601 | assert 'AREA[EligibilityCriteria]("PD-L1" AND "≥50%")' in fragments
602 | assert 'AREA[EligibilityCriteria]("TMB" AND "≥10 mut/Mb")' in fragments
603 |
604 | # Empty values are filtered out
605 | biomarkers = {"PD-L1": "≥50%", "TMB": "", "HER2": "positive"}
606 | fragments = _build_biomarker_expression_essie(biomarkers)
607 | assert len(fragments) == 2
608 |
609 |
610 | def test_build_line_of_therapy_essie():
611 | """Test building Essie fragment for line of therapy."""
612 | # First line
613 | fragment = _build_line_of_therapy_essie(LineOfTherapy.FIRST_LINE)
614 | assert (
615 | fragment
616 | == 'AREA[EligibilityCriteria]("first line" OR "first-line" OR "1st line" OR "frontline" OR "treatment naive" OR "previously untreated")'
617 | )
618 |
619 | # Second line
620 | fragment = _build_line_of_therapy_essie(LineOfTherapy.SECOND_LINE)
621 | assert (
622 | fragment
623 | == 'AREA[EligibilityCriteria]("second line" OR "second-line" OR "2nd line" OR "one prior line" OR "1 prior line")'
624 | )
625 |
626 | # Third line plus
627 | fragment = _build_line_of_therapy_essie(LineOfTherapy.THIRD_LINE_PLUS)
628 | assert (
629 | fragment
630 | == 'AREA[EligibilityCriteria]("third line" OR "third-line" OR "3rd line" OR "≥2 prior" OR "at least 2 prior" OR "heavily pretreated")'
631 | )
632 |
633 |
634 | def test_build_brain_mets_essie():
635 | """Test building Essie fragment for brain metastases filter."""
636 | # Allow brain mets (no filter)
637 | fragment = _build_brain_mets_essie(True)
638 | assert fragment == ""
639 |
640 | # Exclude brain mets
641 | fragment = _build_brain_mets_essie(False)
642 | assert fragment == 'AREA[EligibilityCriteria](NOT "brain metastases")'
643 |
644 |
645 | @pytest.mark.asyncio
646 | async def test_convert_query_with_eligibility_fields():
647 | """Test conversion of query with new eligibility-focused fields."""
648 | query = TrialQuery(
649 | conditions=["lung cancer"],
650 | prior_therapies=["osimertinib"],
651 | progression_on=["erlotinib"],
652 | required_mutations=["EGFR L858R"],
653 | excluded_mutations=["T790M"],
654 | biomarker_expression={"PD-L1": "≥50%"},
655 | line_of_therapy=LineOfTherapy.SECOND_LINE,
656 | allow_brain_mets=False,
657 | )
658 | params = await convert_query(query)
659 |
660 | # Check that query.term contains all the Essie fragments
661 | assert "query.term" in params
662 | term = params["query.term"][0]
663 |
664 | # Prior therapy
665 | assert (
666 | 'AREA[EligibilityCriteria]("osimertinib" AND (prior OR previous OR received))'
667 | in term
668 | )
669 |
670 | # Progression
671 | assert (
672 | 'AREA[EligibilityCriteria]("erlotinib" AND (progression OR resistant OR refractory))'
673 | in term
674 | )
675 |
676 | # Required mutation
677 | assert 'AREA[EligibilityCriteria]("EGFR L858R")' in term
678 |
679 | # Excluded mutation
680 | assert 'AREA[EligibilityCriteria](NOT "T790M")' in term
681 |
682 | # Biomarker expression
683 | assert 'AREA[EligibilityCriteria]("PD-L1" AND "≥50%")' in term
684 |
685 | # Line of therapy
686 | assert 'AREA[EligibilityCriteria]("second line" OR "second-line"' in term
687 |
688 | # Brain mets exclusion
689 | assert 'AREA[EligibilityCriteria](NOT "brain metastases")' in term
690 |
691 | # All fragments should be combined with AND
692 | assert " AND " in term
693 |
694 |
695 | @pytest.mark.asyncio
696 | async def test_convert_query_with_custom_fields_and_page_size():
697 | """Test conversion of query with custom return fields and page size."""
698 | query = TrialQuery(
699 | conditions=["diabetes"],
700 | return_fields=["NCTId", "BriefTitle", "OverallStatus"],
701 | page_size=100,
702 | )
703 | params = await convert_query(query)
704 |
705 | assert "fields" in params
706 | assert params["fields"] == ["NCTId,BriefTitle,OverallStatus"]
707 |
708 | assert "pageSize" in params
709 | assert params["pageSize"] == ["100"]
710 |
711 |
712 | @pytest.mark.asyncio
713 | async def test_convert_query_eligibility_with_existing_terms():
714 | """Test that eligibility Essie fragments are properly combined with existing terms."""
715 | query = TrialQuery(
716 | terms=["immunotherapy"],
717 | prior_therapies=["chemotherapy"],
718 | )
719 | params = await convert_query(query)
720 |
721 | assert "query.term" in params
722 | term = params["query.term"][0]
723 |
724 | # Should contain both the original term and the new Essie fragment
725 | assert "immunotherapy" in term
726 | assert (
727 | 'AREA[EligibilityCriteria]("chemotherapy" AND (prior OR previous OR received))'
728 | in term
729 | )
730 | # Should be combined with AND
731 | assert "immunotherapy AND AREA[EligibilityCriteria]" in term
732 |
```