This is page 3 of 20. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .github
│ ├── actions
│ │ └── setup-python-env
│ │ └── action.yml
│ ├── dependabot.yml
│ └── workflows
│ ├── ci.yml
│ ├── deploy-docs.yml
│ ├── main.yml.disabled
│ ├── on-release-main.yml
│ └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│ ├── apis
│ │ ├── error-codes.md
│ │ ├── overview.md
│ │ └── python-sdk.md
│ ├── assets
│ │ ├── biomcp-cursor-locations.png
│ │ ├── favicon.ico
│ │ ├── icon.png
│ │ ├── logo.png
│ │ ├── mcp_architecture.txt
│ │ └── remote-connection
│ │ ├── 00_connectors.png
│ │ ├── 01_add_custom_connector.png
│ │ ├── 02_connector_enabled.png
│ │ ├── 03_connect_to_biomcp.png
│ │ ├── 04_select_google_oauth.png
│ │ └── 05_success_connect.png
│ ├── backend-services-reference
│ │ ├── 01-overview.md
│ │ ├── 02-biothings-suite.md
│ │ ├── 03-cbioportal.md
│ │ ├── 04-clinicaltrials-gov.md
│ │ ├── 05-nci-cts-api.md
│ │ ├── 06-pubtator3.md
│ │ └── 07-alphagenome.md
│ ├── blog
│ │ ├── ai-assisted-clinical-trial-search-analysis.md
│ │ ├── images
│ │ │ ├── deep-researcher-video.png
│ │ │ ├── researcher-announce.png
│ │ │ ├── researcher-drop-down.png
│ │ │ ├── researcher-prompt.png
│ │ │ ├── trial-search-assistant.png
│ │ │ └── what_is_biomcp_thumbnail.png
│ │ └── researcher-persona-resource.md
│ ├── changelog.md
│ ├── CNAME
│ ├── concepts
│ │ ├── 01-what-is-biomcp.md
│ │ ├── 02-the-deep-researcher-persona.md
│ │ └── 03-sequential-thinking-with-the-think-tool.md
│ ├── developer-guides
│ │ ├── 01-server-deployment.md
│ │ ├── 02-contributing-and-testing.md
│ │ ├── 03-third-party-endpoints.md
│ │ ├── 04-transport-protocol.md
│ │ ├── 05-error-handling.md
│ │ ├── 06-http-client-and-caching.md
│ │ ├── 07-performance-optimizations.md
│ │ └── generate_endpoints.py
│ ├── faq-condensed.md
│ ├── FDA_SECURITY.md
│ ├── genomoncology.md
│ ├── getting-started
│ │ ├── 01-quickstart-cli.md
│ │ ├── 02-claude-desktop-integration.md
│ │ └── 03-authentication-and-api-keys.md
│ ├── how-to-guides
│ │ ├── 01-find-articles-and-cbioportal-data.md
│ │ ├── 02-find-trials-with-nci-and-biothings.md
│ │ ├── 03-get-comprehensive-variant-annotations.md
│ │ ├── 04-predict-variant-effects-with-alphagenome.md
│ │ ├── 05-logging-and-monitoring-with-bigquery.md
│ │ └── 06-search-nci-organizations-and-interventions.md
│ ├── index.md
│ ├── policies.md
│ ├── reference
│ │ ├── architecture-diagrams.md
│ │ ├── quick-architecture.md
│ │ ├── quick-reference.md
│ │ └── visual-architecture.md
│ ├── robots.txt
│ ├── stylesheets
│ │ ├── announcement.css
│ │ └── extra.css
│ ├── troubleshooting.md
│ ├── tutorials
│ │ ├── biothings-prompts.md
│ │ ├── claude-code-biomcp-alphagenome.md
│ │ ├── nci-prompts.md
│ │ ├── openfda-integration.md
│ │ ├── openfda-prompts.md
│ │ ├── pydantic-ai-integration.md
│ │ └── remote-connection.md
│ ├── user-guides
│ │ ├── 01-command-line-interface.md
│ │ ├── 02-mcp-tools-reference.md
│ │ └── 03-integrating-with-ides-and-clients.md
│ └── workflows
│ └── all-workflows.md
├── example_scripts
│ ├── mcp_integration.py
│ └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│ ├── check_docs_in_mkdocs.py
│ ├── check_http_imports.py
│ └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│ └── biomcp
│ ├── __init__.py
│ ├── __main__.py
│ ├── articles
│ │ ├── __init__.py
│ │ ├── autocomplete.py
│ │ ├── fetch.py
│ │ ├── preprints.py
│ │ ├── search_optimized.py
│ │ ├── search.py
│ │ └── unified.py
│ ├── biomarkers
│ │ ├── __init__.py
│ │ └── search.py
│ ├── cbioportal_helper.py
│ ├── circuit_breaker.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── articles.py
│ │ ├── biomarkers.py
│ │ ├── diseases.py
│ │ ├── health.py
│ │ ├── interventions.py
│ │ ├── main.py
│ │ ├── openfda.py
│ │ ├── organizations.py
│ │ ├── server.py
│ │ ├── trials.py
│ │ └── variants.py
│ ├── connection_pool.py
│ ├── constants.py
│ ├── core.py
│ ├── diseases
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── domain_handlers.py
│ ├── drugs
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── exceptions.py
│ ├── genes
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── http_client_simple.py
│ ├── http_client.py
│ ├── individual_tools.py
│ ├── integrations
│ │ ├── __init__.py
│ │ ├── biothings_client.py
│ │ └── cts_api.py
│ ├── interventions
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── logging_filter.py
│ ├── metrics_handler.py
│ ├── metrics.py
│ ├── oncokb_helper.py
│ ├── openfda
│ │ ├── __init__.py
│ │ ├── adverse_events_helpers.py
│ │ ├── adverse_events.py
│ │ ├── cache.py
│ │ ├── constants.py
│ │ ├── device_events_helpers.py
│ │ ├── device_events.py
│ │ ├── drug_approvals.py
│ │ ├── drug_labels_helpers.py
│ │ ├── drug_labels.py
│ │ ├── drug_recalls_helpers.py
│ │ ├── drug_recalls.py
│ │ ├── drug_shortages_detail_helpers.py
│ │ ├── drug_shortages_helpers.py
│ │ ├── drug_shortages.py
│ │ ├── exceptions.py
│ │ ├── input_validation.py
│ │ ├── rate_limiter.py
│ │ ├── utils.py
│ │ └── validation.py
│ ├── organizations
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── parameter_parser.py
│ ├── query_parser.py
│ ├── query_router.py
│ ├── rate_limiter.py
│ ├── render.py
│ ├── request_batcher.py
│ ├── resources
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── instructions.md
│ │ └── researcher.md
│ ├── retry.py
│ ├── router_handlers.py
│ ├── router.py
│ ├── shared_context.py
│ ├── thinking
│ │ ├── __init__.py
│ │ ├── sequential.py
│ │ └── session.py
│ ├── thinking_tool.py
│ ├── thinking_tracker.py
│ ├── trials
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── nci_getter.py
│ │ ├── nci_search.py
│ │ └── search.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cancer_types_api.py
│ │ ├── cbio_http_adapter.py
│ │ ├── endpoint_registry.py
│ │ ├── gene_validator.py
│ │ ├── metrics.py
│ │ ├── mutation_filter.py
│ │ ├── query_utils.py
│ │ ├── rate_limiter.py
│ │ └── request_cache.py
│ ├── variants
│ │ ├── __init__.py
│ │ ├── alphagenome.py
│ │ ├── cancer_types.py
│ │ ├── cbio_external_client.py
│ │ ├── cbioportal_mutations.py
│ │ ├── cbioportal_search_helpers.py
│ │ ├── cbioportal_search.py
│ │ ├── constants.py
│ │ ├── external.py
│ │ ├── filters.py
│ │ ├── getter.py
│ │ ├── links.py
│ │ ├── oncokb_client.py
│ │ ├── oncokb_models.py
│ │ └── search.py
│ └── workers
│ ├── __init__.py
│ ├── worker_entry_stytch.js
│ ├── worker_entry.js
│ └── worker.py
├── tests
│ ├── bdd
│ │ ├── cli_help
│ │ │ ├── help.feature
│ │ │ └── test_help.py
│ │ ├── conftest.py
│ │ ├── features
│ │ │ └── alphagenome_integration.feature
│ │ ├── fetch_articles
│ │ │ ├── fetch.feature
│ │ │ └── test_fetch.py
│ │ ├── get_trials
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── get_variants
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── search_articles
│ │ │ ├── autocomplete.feature
│ │ │ ├── search.feature
│ │ │ ├── test_autocomplete.py
│ │ │ └── test_search.py
│ │ ├── search_trials
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ ├── search_variants
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ └── steps
│ │ └── test_alphagenome_steps.py
│ ├── config
│ │ └── test_smithery_config.py
│ ├── conftest.py
│ ├── data
│ │ ├── ct_gov
│ │ │ ├── clinical_trials_api_v2.yaml
│ │ │ ├── trials_NCT04280705.json
│ │ │ └── trials_NCT04280705.txt
│ │ ├── myvariant
│ │ │ ├── myvariant_api.yaml
│ │ │ ├── myvariant_field_descriptions.csv
│ │ │ ├── variants_full_braf_v600e.json
│ │ │ ├── variants_full_braf_v600e.txt
│ │ │ └── variants_part_braf_v600_multiple.json
│ │ ├── oncokb_mock_responses.json
│ │ ├── openfda
│ │ │ ├── drugsfda_detail.json
│ │ │ ├── drugsfda_search.json
│ │ │ ├── enforcement_detail.json
│ │ │ └── enforcement_search.json
│ │ └── pubtator
│ │ ├── pubtator_autocomplete.json
│ │ └── pubtator3_paper.txt
│ ├── integration
│ │ ├── test_oncokb_integration.py
│ │ ├── test_openfda_integration.py
│ │ ├── test_preprints_integration.py
│ │ ├── test_simple.py
│ │ └── test_variants_integration.py
│ ├── tdd
│ │ ├── articles
│ │ │ ├── test_autocomplete.py
│ │ │ ├── test_cbioportal_integration.py
│ │ │ ├── test_fetch.py
│ │ │ ├── test_preprints.py
│ │ │ ├── test_search.py
│ │ │ └── test_unified.py
│ │ ├── conftest.py
│ │ ├── drugs
│ │ │ ├── __init__.py
│ │ │ └── test_drug_getter.py
│ │ ├── openfda
│ │ │ ├── __init__.py
│ │ │ ├── test_adverse_events.py
│ │ │ ├── test_device_events.py
│ │ │ ├── test_drug_approvals.py
│ │ │ ├── test_drug_labels.py
│ │ │ ├── test_drug_recalls.py
│ │ │ ├── test_drug_shortages.py
│ │ │ └── test_security.py
│ │ ├── test_biothings_integration_real.py
│ │ ├── test_biothings_integration.py
│ │ ├── test_circuit_breaker.py
│ │ ├── test_concurrent_requests.py
│ │ ├── test_connection_pool.py
│ │ ├── test_domain_handlers.py
│ │ ├── test_drug_approvals.py
│ │ ├── test_drug_recalls.py
│ │ ├── test_drug_shortages.py
│ │ ├── test_endpoint_documentation.py
│ │ ├── test_error_scenarios.py
│ │ ├── test_europe_pmc_fetch.py
│ │ ├── test_mcp_integration.py
│ │ ├── test_mcp_tools.py
│ │ ├── test_metrics.py
│ │ ├── test_nci_integration.py
│ │ ├── test_nci_mcp_tools.py
│ │ ├── test_network_policies.py
│ │ ├── test_offline_mode.py
│ │ ├── test_openfda_unified.py
│ │ ├── test_pten_r173_search.py
│ │ ├── test_render.py
│ │ ├── test_request_batcher.py.disabled
│ │ ├── test_retry.py
│ │ ├── test_router.py
│ │ ├── test_shared_context.py.disabled
│ │ ├── test_unified_biothings.py
│ │ ├── thinking
│ │ │ ├── __init__.py
│ │ │ └── test_sequential.py
│ │ ├── trials
│ │ │ ├── test_backward_compatibility.py
│ │ │ ├── test_getter.py
│ │ │ └── test_search.py
│ │ ├── utils
│ │ │ ├── test_gene_validator.py
│ │ │ ├── test_mutation_filter.py
│ │ │ ├── test_rate_limiter.py
│ │ │ └── test_request_cache.py
│ │ ├── variants
│ │ │ ├── constants.py
│ │ │ ├── test_alphagenome_api_key.py
│ │ │ ├── test_alphagenome_comprehensive.py
│ │ │ ├── test_alphagenome.py
│ │ │ ├── test_cbioportal_mutations.py
│ │ │ ├── test_cbioportal_search.py
│ │ │ ├── test_external_integration.py
│ │ │ ├── test_external.py
│ │ │ ├── test_extract_gene_aa_change.py
│ │ │ ├── test_filters.py
│ │ │ ├── test_getter.py
│ │ │ ├── test_links.py
│ │ │ ├── test_oncokb_client.py
│ │ │ ├── test_oncokb_helper.py
│ │ │ └── test_search.py
│ │ └── workers
│ │ └── test_worker_sanitization.js
│ └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```
# Files
--------------------------------------------------------------------------------
/tests/tdd/test_pten_r173_search.py:
--------------------------------------------------------------------------------
```python
1 | """Test case demonstrating PTEN R173 search limitations."""
2 |
3 | import asyncio
4 | import json
5 |
6 | import pytest
7 |
8 | from biomcp.articles.search import PubmedRequest, search_articles
9 |
10 |
11 | @pytest.mark.asyncio
12 | async def test_pten_r173_search_limitations():
13 | """Demonstrate that current AND logic is too restrictive for finding PTEN R173 papers."""
14 |
15 | # Test 1: Current approach with multiple keywords
16 | request_restrictive = PubmedRequest(
17 | genes=["PTEN"], keywords=["R173", "Arg173"]
18 | )
19 | result_restrictive = await search_articles(
20 | request_restrictive, output_json=True
21 | )
22 | data_restrictive = json.loads(result_restrictive)
23 |
24 | # Test 2: Less restrictive approach
25 | request_less_restrictive = PubmedRequest(genes=["PTEN"], keywords=["R173"])
26 | result_less_restrictive = await search_articles(
27 | request_less_restrictive, output_json=True
28 | )
29 | data_less_restrictive = json.loads(result_less_restrictive)
30 |
31 | # Test 3: Alternative variant notations
32 | request_notation = PubmedRequest(genes=["PTEN"], keywords=["p.R173C"])
33 | result_notation = await search_articles(request_notation, output_json=True)
34 | data_notation = json.loads(result_notation)
35 |
36 | print("\nPTEN R173 Search Results:")
37 | print(
38 | f"1. PTEN + R173 + Arg173 (AND logic): {len(data_restrictive)} articles"
39 | )
40 | print(f"2. PTEN + R173 only: {len(data_less_restrictive)} articles")
41 | print(f"3. PTEN + p.R173C: {len(data_notation)} articles")
42 |
43 | # The restrictive search should find fewer results
44 | assert len(data_restrictive) <= len(data_less_restrictive)
45 |
46 | # Show some example articles found
47 | if data_less_restrictive:
48 | print("\nExample articles found with 'PTEN + R173':")
49 | for i, article in enumerate(data_less_restrictive[:5]):
50 | title = article.get("title", "No title")
51 | pmid = article.get("pmid", "N/A")
52 | year = article.get("pub_year", article.get("date", "N/A"))
53 | print(f"{i + 1}. {title[:80]}... (PMID: {pmid}, Year: {year[:4]})")
54 |
55 |
56 | @pytest.mark.asyncio
57 | async def test_specific_pten_papers_not_found():
58 | """Test that specific PTEN R173 papers mentioned by user are not found."""
59 |
60 | # Papers mentioned by user that should be found
61 | expected_papers = [
62 | "Mester et al 2018 Human Mutation",
63 | "Mighell et al 2020 AJHG",
64 | "Smith et al 2016 Proteins",
65 | "Smith et al 2019 AJHG",
66 | "Smith et al 2023 JPCB",
67 | ]
68 |
69 | # Search for Smith IN papers on PTEN
70 | request = PubmedRequest(keywords=["Smith IN", "PTEN"])
71 | result = await search_articles(request, output_json=True)
72 | data = json.loads(result)
73 |
74 | print(f"\nSmith IN + PTEN search found {len(data)} articles")
75 |
76 | # Check if any contain R173 in title/abstract
77 | r173_papers = []
78 | for article in data:
79 | title = article.get("title", "")
80 | abstract = article.get("abstract", "")
81 | if (
82 | "R173" in title
83 | or "R173" in abstract
84 | or "Arg173" in title
85 | or "Arg173" in abstract
86 | ):
87 | r173_papers.append(article)
88 |
89 | print(f"Papers mentioning R173/Arg173: {len(r173_papers)}")
90 |
91 | # The issue: R173 might only be in full text, not abstract
92 | assert len(r173_papers) < len(
93 | expected_papers
94 | ), "Not all expected R173 papers are found"
95 |
96 |
97 | def test_and_logic_explanation():
98 | """Document why AND logic causes issues for variant searches."""
99 |
100 | explanation = """
101 | Current search behavior:
102 | - Query: genes=['PTEN'], keywords=['R173', 'Arg173']
103 | - Translates to: "@GENE_PTEN AND R173 AND Arg173"
104 | - This requires ALL terms to be present
105 |
106 | Issues:
107 | 1. Papers may use either "R173" OR "Arg173", not both
108 | 2. Variant notations vary: "R173C", "p.R173C", "c.517C>T", etc.
109 | 3. Specific mutation details may only be in full text, not abstract
110 | 4. AND logic is too restrictive for synonym/variant searches
111 |
112 | Potential solutions:
113 | 1. Implement OR logic within variant/keyword groups
114 | 2. Add variant notation normalization
115 | 3. Support multiple search strategies (AND vs OR)
116 | 4. Consider full-text search capabilities
117 | """
118 |
119 | print(explanation)
120 | assert True # This test is for documentation
121 |
122 |
123 | if __name__ == "__main__":
124 | # Run the tests to demonstrate the issue
125 | asyncio.run(test_pten_r173_search_limitations())
126 | asyncio.run(test_specific_pten_papers_not_found())
127 | test_and_logic_explanation()
128 |
```
--------------------------------------------------------------------------------
/src/biomcp/interventions/getter.py:
--------------------------------------------------------------------------------
```python
1 | """Get specific intervention details via NCI CTS API."""
2 |
3 | import logging
4 | from typing import Any
5 |
6 | from ..constants import NCI_INTERVENTIONS_URL
7 | from ..integrations.cts_api import CTSAPIError, make_cts_request
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | async def get_intervention(
13 | intervention_id: str,
14 | api_key: str | None = None,
15 | ) -> dict[str, Any]:
16 | """
17 | Get detailed information about a specific intervention.
18 |
19 | Args:
20 | intervention_id: Intervention ID
21 | api_key: Optional API key (if not provided, uses NCI_API_KEY env var)
22 |
23 | Returns:
24 | Dictionary with intervention details
25 |
26 | Raises:
27 | CTSAPIError: If the API request fails or intervention not found
28 | """
29 | try:
30 | # Make API request
31 | url = f"{NCI_INTERVENTIONS_URL}/{intervention_id}"
32 | response = await make_cts_request(
33 | url=url,
34 | api_key=api_key,
35 | )
36 |
37 | # Return the intervention data
38 | if "data" in response:
39 | return response["data"]
40 | elif "intervention" in response:
41 | return response["intervention"]
42 | else:
43 | return response
44 |
45 | except CTSAPIError:
46 | raise
47 | except Exception as e:
48 | logger.error(f"Failed to get intervention {intervention_id}: {e}")
49 | raise CTSAPIError(f"Failed to retrieve intervention: {e!s}") from e
50 |
51 |
52 | def _format_intervention_header(intervention: dict[str, Any]) -> list[str]:
53 | """Format intervention header and basic info."""
54 | int_id = intervention.get(
55 | "id", intervention.get("intervention_id", "Unknown")
56 | )
57 | name = intervention.get("name", "Unknown Intervention")
58 | int_type = intervention.get(
59 | "type", intervention.get("category", "Unknown")
60 | )
61 |
62 | return [
63 | f"## Intervention: {name}",
64 | "",
65 | "### Basic Information",
66 | f"- **ID**: {int_id}",
67 | f"- **Type**: {int_type}",
68 | ]
69 |
70 |
71 | def _format_intervention_synonyms(synonyms: Any) -> list[str]:
72 | """Format intervention synonyms section."""
73 | if not synonyms:
74 | return []
75 |
76 | lines = ["", "### Synonyms"]
77 | if isinstance(synonyms, list):
78 | for syn in synonyms:
79 | lines.append(f"- {syn}")
80 | else:
81 | lines.append(f"- {synonyms}")
82 |
83 | return lines
84 |
85 |
86 | def _format_intervention_regulatory(intervention: dict[str, Any]) -> list[str]:
87 | """Format regulatory information section."""
88 | if not intervention.get("fda_approved"):
89 | return []
90 |
91 | lines = [
92 | "",
93 | "### Regulatory Status",
94 | f"- **FDA Approved**: {'Yes' if intervention['fda_approved'] else 'No'}",
95 | ]
96 |
97 | if intervention.get("approval_date"):
98 | lines.append(f"- **Approval Date**: {intervention['approval_date']}")
99 |
100 | return lines
101 |
102 |
103 | def _format_intervention_indications(indications: Any) -> list[str]:
104 | """Format clinical indications section."""
105 | if not indications:
106 | return []
107 |
108 | lines = ["", "### Clinical Indications"]
109 | if isinstance(indications, list):
110 | for indication in indications:
111 | lines.append(f"- {indication}")
112 | else:
113 | lines.append(f"- {indications}")
114 |
115 | return lines
116 |
117 |
118 | def format_intervention_details(intervention: dict[str, Any]) -> str:
119 | """
120 | Format intervention details as markdown.
121 |
122 | Args:
123 | intervention: Intervention data dictionary
124 |
125 | Returns:
126 | Formatted markdown string
127 | """
128 | lines = _format_intervention_header(intervention)
129 |
130 | # Add synonyms
131 | lines.extend(
132 | _format_intervention_synonyms(intervention.get("synonyms", []))
133 | )
134 |
135 | # Add description
136 | if intervention.get("description"):
137 | lines.extend([
138 | "",
139 | "### Description",
140 | intervention["description"],
141 | ])
142 |
143 | # Add mechanism of action for drugs
144 | if intervention.get("mechanism_of_action"):
145 | lines.extend([
146 | "",
147 | "### Mechanism of Action",
148 | intervention["mechanism_of_action"],
149 | ])
150 |
151 | # Add regulatory info
152 | lines.extend(_format_intervention_regulatory(intervention))
153 |
154 | # Add clinical indications
155 | lines.extend(
156 | _format_intervention_indications(intervention.get("indications"))
157 | )
158 |
159 | # Add related trials count if available
160 | if intervention.get("trial_count"):
161 | lines.extend([
162 | "",
163 | "### Clinical Trial Activity",
164 | f"- **Number of Trials**: {intervention['trial_count']}",
165 | ])
166 |
167 | return "\n".join(lines)
168 |
```
--------------------------------------------------------------------------------
/src/biomcp/thinking/session.py:
--------------------------------------------------------------------------------
```python
1 | """Session management for sequential thinking."""
2 |
3 | import uuid
4 | from collections import defaultdict
5 | from dataclasses import dataclass, field
6 | from datetime import datetime
7 | from typing import Any
8 |
9 |
10 | @dataclass
11 | class ThoughtEntry:
12 | """Represents a single thought in the thinking process."""
13 |
14 | thought: str
15 | thought_number: int
16 | total_thoughts: int
17 | next_thought_needed: bool
18 | timestamp: datetime = field(default_factory=datetime.now)
19 | is_revision: bool = False
20 | revises_thought: int | None = None
21 | branch_from_thought: int | None = None
22 | branch_id: str | None = None
23 | metadata: dict[str, Any] = field(default_factory=dict)
24 |
25 |
26 | @dataclass
27 | class ThinkingSession:
28 | """Manages state for a thinking session."""
29 |
30 | session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
31 | created_at: datetime = field(default_factory=datetime.now)
32 | thought_history: list[ThoughtEntry] = field(default_factory=list)
33 | thought_branches: dict[str, list[ThoughtEntry]] = field(
34 | default_factory=lambda: defaultdict(list)
35 | )
36 | metadata: dict[str, Any] = field(default_factory=dict)
37 |
38 | def add_thought(self, entry: ThoughtEntry) -> None:
39 | """Add a thought to the session."""
40 | # If this is a revision, replace the original thought
41 | if entry.is_revision and entry.revises_thought:
42 | for i, thought in enumerate(self.thought_history):
43 | if thought.thought_number == entry.revises_thought:
44 | self.thought_history[i] = entry
45 | return
46 |
47 | # Add to appropriate collection
48 | if entry.branch_id:
49 | self.thought_branches[entry.branch_id].append(entry)
50 | else:
51 | self.thought_history.append(entry)
52 |
53 | def get_thought(self, thought_number: int) -> ThoughtEntry | None:
54 | """Get a specific thought by number."""
55 | for thought in self.thought_history:
56 | if thought.thought_number == thought_number:
57 | return thought
58 | return None
59 |
60 | def get_branch_thoughts(self, branch_id: str) -> list[ThoughtEntry]:
61 | """Get all thoughts in a specific branch."""
62 | return self.thought_branches.get(branch_id, [])
63 |
64 | def get_all_thoughts(self) -> list[ThoughtEntry]:
65 | """Get all thoughts across main history and branches."""
66 | all_thoughts = list(self.thought_history)
67 | for branch_thoughts in self.thought_branches.values():
68 | all_thoughts.extend(branch_thoughts)
69 | return sorted(all_thoughts, key=lambda t: t.timestamp)
70 |
71 |
72 | class SessionManager:
73 | """Manages multiple thinking sessions."""
74 |
75 | def __init__(self):
76 | self.sessions: dict[str, ThinkingSession] = {}
77 | self._current_session_id: str | None = None
78 |
79 | def create_session(self) -> ThinkingSession:
80 | """Create a new thinking session."""
81 | session = ThinkingSession()
82 | self.sessions[session.session_id] = session
83 | self._current_session_id = session.session_id
84 | return session
85 |
86 | def get_session(
87 | self, session_id: str | None = None
88 | ) -> ThinkingSession | None:
89 | """Get a session by ID or the current session."""
90 | if session_id:
91 | return self.sessions.get(session_id)
92 | elif self._current_session_id:
93 | return self.sessions.get(self._current_session_id)
94 | return None
95 |
96 | def get_or_create_session(
97 | self, session_id: str | None = None
98 | ) -> ThinkingSession:
99 | """Get existing session or create new one."""
100 | if session_id and session_id in self.sessions:
101 | self._current_session_id = session_id
102 | return self.sessions[session_id]
103 |
104 | session = self.get_session()
105 | if not session:
106 | session = self.create_session()
107 | return session
108 |
109 | def clear_session(self, session_id: str | None = None) -> None:
110 | """Clear a specific session or the current session."""
111 | if session_id:
112 | self.sessions.pop(session_id, None)
113 | if self._current_session_id == session_id:
114 | self._current_session_id = None
115 | elif self._current_session_id:
116 | self.sessions.pop(self._current_session_id, None)
117 | self._current_session_id = None
118 |
119 | def clear_all_sessions(self) -> None:
120 | """Clear all sessions."""
121 | self.sessions.clear()
122 | self._current_session_id = None
123 |
124 |
125 | # Global session manager instance
126 | _session_manager = SessionManager()
127 |
```
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
```yaml
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [main, develop]
6 | pull_request:
7 | branches: [main]
8 | workflow_dispatch:
9 |
10 | env:
11 | PYTHON_VERSION: "3.12"
12 | UV_VERSION: "0.4.29"
13 |
14 | jobs:
15 | # Quality check from main.yml - uses make check
16 | quality:
17 | runs-on: ubuntu-latest
18 | name: Quality
19 | steps:
20 | - name: Check out
21 | uses: actions/checkout@v6
22 |
23 | - uses: actions/cache@v5
24 | with:
25 | path: ~/.cache/pre-commit
26 | key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
27 |
28 | - name: Set up Python
29 | uses: actions/setup-python@v6
30 | with:
31 | python-version: ${{ env.PYTHON_VERSION }}
32 |
33 | - name: Install uv
34 | uses: astral-sh/setup-uv@v7
35 | with:
36 | version: ${{ env.UV_VERSION }}
37 |
38 | - name: Install dependencies
39 | run: |
40 | uv sync --group dev
41 |
42 | - name: Run checks
43 | run: make check
44 |
45 | # Tests and type check specifically on Python 3.11
46 | tests-and-type-check:
47 | runs-on: ubuntu-latest
48 | name: Tests and Type Check (Python 3.11)
49 | steps:
50 | - name: Check out
51 | uses: actions/checkout@v6
52 |
53 | - name: Set up Python
54 | uses: actions/setup-python@v6
55 | with:
56 | python-version: "3.11"
57 |
58 | - name: Install uv
59 | uses: astral-sh/setup-uv@v7
60 | with:
61 | version: ${{ env.UV_VERSION }}
62 |
63 | - name: Install dependencies
64 | run: |
65 | uv sync --group dev
66 |
67 | - name: Run tests
68 | run: uv run python -m pytest tests -m "not integration" --cov --cov-config=pyproject.toml --cov-report=xml
69 |
70 | - name: Check typing
71 | run: uv run mypy
72 |
73 | - name: Upload coverage reports to Codecov with GitHub Action on Python 3.11
74 | uses: codecov/codecov-action@v5
75 |
76 | # Documentation check from main.yml
77 | check-docs:
78 | runs-on: ubuntu-latest
79 | name: Check Docs
80 | steps:
81 | - name: Check out
82 | uses: actions/checkout@v6
83 |
84 | - name: Set up Python
85 | uses: actions/setup-python@v6
86 | with:
87 | python-version: ${{ env.PYTHON_VERSION }}
88 |
89 | - name: Install uv
90 | uses: astral-sh/setup-uv@v7
91 | with:
92 | version: ${{ env.UV_VERSION }}
93 |
94 | - name: Install dependencies
95 | run: |
96 | uv sync --group dev
97 |
98 | - name: Check if documentation can be built
99 | run: uv run mkdocs build -s
100 |
101 | # Build package check
102 | build-package:
103 | runs-on: ubuntu-latest
104 | name: Build Package
105 | steps:
106 | - uses: actions/checkout@v6
107 |
108 | - name: Set up Python
109 | uses: actions/setup-python@v6
110 | with:
111 | python-version: ${{ env.PYTHON_VERSION }}
112 |
113 | - name: Install uv
114 | uses: astral-sh/setup-uv@v7
115 | with:
116 | version: ${{ env.UV_VERSION }}
117 |
118 | - name: Build package
119 | run: |
120 | uvx --from build pyproject-build --installer uv
121 |
122 | - name: Check package
123 | run: |
124 | uvx twine check dist/*
125 |
126 | - name: Upload artifacts
127 | uses: actions/upload-artifact@v6
128 | with:
129 | name: dist
130 | path: dist/
131 |
132 | # MCP integration test - quick check
133 | test-mcp:
134 | runs-on: ubuntu-latest
135 | name: Test MCP Integration
136 | steps:
137 | - uses: actions/checkout@v6
138 |
139 | - name: Set up Python
140 | uses: actions/setup-python@v6
141 | with:
142 | python-version: ${{ env.PYTHON_VERSION }}
143 |
144 | - name: Install uv
145 | uses: astral-sh/setup-uv@v7
146 | with:
147 | version: ${{ env.UV_VERSION }}
148 |
149 | - name: Install dependencies
150 | run: |
151 | uv sync --group dev
152 |
153 | - name: Test MCP server startup
154 | run: |
155 | timeout 10s uv run biomcp run || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
156 |
157 | - name: Run MCP integration tests
158 | run: |
159 | uv run python -m pytest tests/tdd/test_mcp_integration.py -v
160 |
161 | # Run integration tests separately - allowed to fail
162 | integration-tests:
163 | runs-on: ubuntu-latest
164 | name: Integration Tests (Optional)
165 | continue-on-error: true
166 | steps:
167 | - name: Check out
168 | uses: actions/checkout@v6
169 |
170 | - name: Set up Python
171 | uses: actions/setup-python@v6
172 | with:
173 | python-version: "3.11"
174 |
175 | - name: Install uv
176 | uses: astral-sh/setup-uv@v7
177 | with:
178 | version: ${{ env.UV_VERSION }}
179 |
180 | - name: Install dependencies
181 | run: |
182 | uv sync --group dev
183 |
184 | - name: Run integration tests
185 | run: |
186 | uv run python -m pytest tests -m "integration" -v --tb=short
187 | continue-on-error: true
188 |
```
--------------------------------------------------------------------------------
/docs/backend-services-reference/03-cbioportal.md:
--------------------------------------------------------------------------------
```markdown
1 | # cBioPortal Integration
2 |
3 | BioMCP integrates with [cBioPortal](https://www.cbioportal.org/), a comprehensive cancer genomics portal that provides visualization and analysis tools for large-scale cancer genomics datasets.
4 |
5 | ## Overview
6 |
7 | The cBioPortal integration enhances article searches by automatically including relevant cancer genomics data when searching for genes. This integration provides:
8 |
9 | 1. **Gene-level summaries** - Mutation frequency and distribution across cancer studies
10 | 2. **Mutation-specific searches** - Find studies containing specific mutations (e.g., BRAF V600E)
11 | 3. **Cancer type resolution** - Accurate cancer type categorization using cBioPortal's API
12 |
13 | ## How It Works
14 |
15 | ### Automatic Integration
16 |
17 | When you search for articles with a gene parameter, BioMCP automatically queries cBioPortal to provide additional context:
18 |
19 | ```python
20 | # Basic gene search includes cBioPortal summary
21 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
22 | ```
23 |
24 | This returns:
25 |
26 | - Standard PubMed/PubTator3 article results
27 | - cBioPortal summary showing mutation frequency across cancer studies
28 | - Top cancer types where the gene is mutated
29 |
30 | ### Mutation-Specific Searches
31 |
32 | To search for specific mutations, include the mutation notation in keywords:
33 |
34 | ```python
35 | # Search for BRAF V600E mutation
36 | search(domain="article", genes=["BRAF"], keywords=["V600E"])
37 |
38 | # Search for SRSF2 F57Y mutation
39 | search(domain="article", genes=["SRSF2"], keywords=["F57Y"])
40 |
41 | # Use wildcards for mutation patterns (e.g., any amino acid at position 57)
42 | search(domain="article", genes=["SRSF2"], keywords=["F57*"])
43 | ```
44 |
45 | Mutation-specific searches return:
46 |
47 | - Total number of studies in cBioPortal
48 | - Number of studies containing the mutation
49 | - Top studies ranked by mutation count
50 | - Cancer type distribution
51 |
52 | ## Example Output
53 |
54 | ### Gene-Level Summary
55 |
56 | ```
57 | ### cBioPortal Summary for BRAF
58 | - **Mutation Frequency**: 76.7% (368 mutations in 480 samples)
59 | - **Top Cancer Types**: Melanoma (45%), Thyroid (23%), Colorectal (18%)
60 | - **Top Mutations**: V600E (89%), V600K (7%), G469A (2%)
61 | ```
62 |
63 | ### Mutation-Specific Results
64 |
65 | ```
66 | ### cBioPortal Mutation Search: BRAF
67 | **Specific Mutation**: V600E
68 | - **Total Studies**: 2340
69 | - **Studies with Mutation**: 170
70 | - **Total Mutations Found**: 5780
71 |
72 | **Top Studies by Mutation Count:**
73 | | Count | Study ID | Cancer Type | Study Name |
74 | |-------|----------|-------------|------------|
75 | | 804 | msk_met_2021 | Mixed Cancer Types | MSK MetTropism (MSK, Cell 2021) |
76 | | 555 | msk_chord_2024 | Mixed Cancer Types | MSK-CHORD (MSK, Nature 2024) |
77 | | 295 | msk_impact_2017 | Mixed Cancer Types | MSK-IMPACT Clinical Sequencing Cohort |
78 | ```
79 |
80 | ## Supported Mutation Notations
81 |
82 | The integration recognizes standard protein change notation:
83 |
84 | - **Specific mutations**: `V600E`, `F57Y`, `T790M`
85 | - **Wildcard patterns**: `F57*` (matches F57Y, F57L, etc.)
86 | - **Multiple mutations**: Include multiple keywords for OR search
87 |
88 | ## API Details
89 |
90 | ### Endpoints Used
91 |
92 | 1. **Gene Information**: `/api/genes/{gene}`
93 | 2. **Cancer Types**: `/api/cancer-types`
94 | 3. **Mutation Data**: `/api/mutations/fetch`
95 | 4. **Study Information**: `/api/studies`
96 |
97 | ### Rate Limiting
98 |
99 | - Conservative rate limit of 5 requests/second
100 | - Results cached for 15-30 minutes (mutations) or 24 hours (cancer types)
101 |
102 | ### Authentication
103 |
104 | Optional authentication via environment variable:
105 |
106 | ```bash
107 | export CBIO_TOKEN="your-api-token"
108 | ```
109 |
110 | Public cBioPortal instance works without authentication but may have rate limits.
111 |
112 | ## CLI Usage
113 |
114 | For detailed command-line options for searching articles with cBioPortal integration, see the [CLI User Guide](../user-guides/01-command-line-interface.md#article-commands).
115 |
116 | ## Performance Considerations
117 |
118 | 1. **Caching**: Results are cached to minimize API calls
119 |
120 | - Gene summaries: 15 minutes
121 | - Mutation searches: 30 minutes
122 | - Cancer types: 24 hours
123 |
124 | 2. **Graceful Degradation**: If cBioPortal is unavailable, searches continue without the additional data
125 |
126 | 3. **Parallel Processing**: API calls are made in parallel with article searches for optimal performance
127 |
128 | ## Limitations
129 |
130 | 1. Only works with valid HUGO gene symbols
131 | 2. Mutation searches require exact protein change notation
132 | 3. Limited to mutations in cBioPortal's curated studies
133 | 4. Rate limits may apply for high-volume usage
134 |
135 | ## Error Handling
136 |
137 | The integration handles various error scenarios:
138 |
139 | - Invalid gene symbols are validated before API calls
140 | - Network timeouts fall back to article-only results
141 | - API errors are logged but don't block search results
142 |
```
--------------------------------------------------------------------------------
/src/biomcp/utils/cancer_types_api.py:
--------------------------------------------------------------------------------
```python
1 | """Cancer type utilities using cBioPortal API."""
2 |
3 | import logging
4 |
5 | from ..utils.cbio_http_adapter import CBioHTTPAdapter
6 | from ..utils.request_cache import request_cache
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | class CancerTypeAPIClient:
12 | """Client for fetching cancer types from cBioPortal API."""
13 |
14 | def __init__(self):
15 | """Initialize the cancer type API client."""
16 | self.http_adapter = CBioHTTPAdapter()
17 | # Cache for cancer types
18 | self._cancer_types_cache: dict[str, str] | None = None
19 |
20 | @request_cache(ttl=86400) # Cache for 24 hours
21 | async def get_all_cancer_types(self) -> dict[str, str]:
22 | """Fetch all cancer types from cBioPortal API.
23 |
24 | Returns:
25 | Dictionary mapping cancer type IDs to display names
26 | """
27 | if self._cancer_types_cache is not None:
28 | return self._cancer_types_cache
29 |
30 | try:
31 | cancer_types, error = await self.http_adapter.get(
32 | "/cancer-types",
33 | endpoint_key="cbioportal_cancer_types",
34 | cache_ttl=86400, # 24 hours
35 | )
36 |
37 | if error:
38 | logger.error(f"Failed to fetch cancer types: {error.message}")
39 | return {}
40 |
41 | if cancer_types:
42 | # Build mapping from ID to name
43 | result = {}
44 | for ct in cancer_types:
45 | cancer_type_id = ct.get("cancerTypeId", "")
46 | name = ct.get("name", "")
47 |
48 | if cancer_type_id and name:
49 | result[cancer_type_id.lower()] = name
50 |
51 | # Also add common abbreviations
52 | short_name = ct.get("shortName", "")
53 | if short_name and short_name != cancer_type_id:
54 | result[short_name.lower()] = name
55 |
56 | self._cancer_types_cache = result
57 | logger.info(f"Loaded {len(result)} cancer types from API")
58 | return result
59 |
60 | return {}
61 |
62 | except Exception as e:
63 | logger.error(f"Error fetching cancer types: {e}")
64 | return {}
65 |
66 | async def get_cancer_type_name(self, cancer_type_id: str) -> str:
67 | """Get the display name for a cancer type ID.
68 |
69 | Args:
70 | cancer_type_id: The cancer type identifier
71 |
72 | Returns:
73 | Display name or the original ID if not found
74 | """
75 | if not cancer_type_id:
76 | return "Unknown"
77 |
78 | cancer_types = await self.get_all_cancer_types()
79 |
80 | # Try exact match (case-insensitive)
81 | normalized_id = cancer_type_id.lower()
82 | if normalized_id in cancer_types:
83 | return cancer_types[normalized_id]
84 |
85 | # If not found, return the original ID with title case
86 | if cancer_type_id == cancer_type_id.lower():
87 | return cancer_type_id.title()
88 | return cancer_type_id
89 |
90 | @request_cache(ttl=3600) # Cache for 1 hour
91 | async def get_study_cancer_type(self, study_id: str) -> str:
92 | """Get cancer type for a specific study.
93 |
94 | Args:
95 | study_id: The study identifier
96 |
97 | Returns:
98 | Cancer type name or "Unknown"
99 | """
100 | try:
101 | study_data, error = await self.http_adapter.get(
102 | f"/studies/{study_id}",
103 | endpoint_key="cbioportal_studies",
104 | cache_ttl=3600, # 1 hour
105 | )
106 |
107 | if error or not study_data:
108 | logger.debug(f"Study {study_id} not found")
109 | return "Unknown"
110 |
111 | cancer_type_id = study_data.get("cancerType", {}).get(
112 | "cancerTypeId", ""
113 | )
114 |
115 | if cancer_type_id and cancer_type_id != "unknown":
116 | return await self.get_cancer_type_name(cancer_type_id)
117 |
118 | # Fallback to the cancer type name directly
119 | cancer_type_name = study_data.get("cancerType", {}).get("name", "")
120 | if cancer_type_name:
121 | return cancer_type_name
122 |
123 | return "Unknown"
124 |
125 | except Exception as e:
126 | logger.debug(f"Error fetching study {study_id}: {e}")
127 | return "Unknown"
128 |
129 |
130 | # Global instance for reuse
131 | _cancer_type_client: CancerTypeAPIClient | None = None
132 |
133 |
134 | def get_cancer_type_client() -> CancerTypeAPIClient:
135 | """Get or create the global cancer type client."""
136 | global _cancer_type_client
137 | if _cancer_type_client is None:
138 | _cancer_type_client = CancerTypeAPIClient()
139 | return _cancer_type_client
140 |
```
--------------------------------------------------------------------------------
/tests/tdd/utils/test_mutation_filter.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for mutation filter utility."""
2 |
3 | from biomcp.utils.mutation_filter import MutationFilter
4 |
5 |
6 | class MockMutation:
7 | """Mock mutation object for testing."""
8 |
9 | def __init__(self, protein_change: str):
10 | self.protein_change = protein_change
11 |
12 |
13 | class TestMutationFilter:
14 | """Test mutation filtering functionality."""
15 |
16 | def test_specific_mutation_filter(self):
17 | """Test filtering for specific mutations."""
18 | mutation_filter = MutationFilter(specific_mutation="V600E")
19 |
20 | assert mutation_filter.matches("V600E")
21 | assert not mutation_filter.matches("V600K")
22 | assert not mutation_filter.matches("V600")
23 | assert not mutation_filter.matches("")
24 |
25 | def test_wildcard_pattern_filter(self):
26 | """Test filtering with wildcard patterns."""
27 | mutation_filter = MutationFilter(pattern="V600*")
28 |
29 | assert mutation_filter.matches("V600E")
30 | assert mutation_filter.matches("V600K")
31 | assert mutation_filter.matches("V600D")
32 | assert not mutation_filter.matches("V601E")
33 | assert not mutation_filter.matches("K600E")
34 |
35 | def test_pattern_without_wildcard(self):
36 | """Test pattern matching without wildcard."""
37 | # Pattern does exact match via regex (no prefix matching without *)
38 | mutation_filter = MutationFilter(pattern="F57")
39 |
40 | # Exact match works
41 | assert mutation_filter.matches("F57")
42 | # No prefix matching without wildcard
43 | assert not mutation_filter.matches("F57Y")
44 | assert not mutation_filter.matches("F57L")
45 | assert not mutation_filter.matches("F58Y")
46 |
47 | def test_no_filter(self):
48 | """Test when no filter is specified."""
49 | mutation_filter = MutationFilter()
50 |
51 | assert mutation_filter.matches("V600E")
52 | assert mutation_filter.matches("anything")
53 | # Empty protein change returns False even with no filter
54 | assert not mutation_filter.matches("")
55 |
56 | def test_filter_mutations_list(self):
57 | """Test filtering a list of mutations."""
58 | mutations = [
59 | MockMutation("V600E"),
60 | MockMutation("V600K"),
61 | MockMutation("V600D"),
62 | MockMutation("T790M"),
63 | MockMutation("L858R"),
64 | ]
65 |
66 | # Test specific mutation
67 | mutation_filter1 = MutationFilter(specific_mutation="V600E")
68 | filtered1 = mutation_filter1.filter_mutations(mutations)
69 | assert len(filtered1) == 1
70 | assert filtered1[0].protein_change == "V600E"
71 |
72 | # Test pattern
73 | mutation_filter2 = MutationFilter(pattern="V600*")
74 | filtered2 = mutation_filter2.filter_mutations(mutations)
75 | assert len(filtered2) == 3
76 | assert all(m.protein_change.startswith("V600") for m in filtered2)
77 |
78 | # Test no filter
79 | mutation_filter3 = MutationFilter()
80 | filtered3 = mutation_filter3.filter_mutations(mutations)
81 | assert len(filtered3) == 5
82 |
83 | def test_string_representations(self):
84 | """Test string representations of filters."""
85 | mutation_filter1 = MutationFilter(specific_mutation="V600E")
86 | assert str(mutation_filter1) == "MutationFilter(specific=V600E)"
87 | assert (
88 | repr(mutation_filter1)
89 | == "MutationFilter(specific_mutation='V600E', pattern=None)"
90 | )
91 |
92 | mutation_filter2 = MutationFilter(pattern="V600*")
93 | assert str(mutation_filter2) == "MutationFilter(pattern=V600*)"
94 |
95 | mutation_filter3 = MutationFilter()
96 | assert str(mutation_filter3) == "MutationFilter(no_filter)"
97 |
98 | def test_edge_cases(self):
99 | """Test edge cases in mutation matching."""
100 | # Empty protein change
101 | mutation_filter = MutationFilter(specific_mutation="V600E")
102 | assert not mutation_filter.matches("")
103 | assert not mutation_filter.matches(None)
104 |
105 | # Complex patterns
106 | mutation_filter2 = MutationFilter(pattern="[VL]600*")
107 | # This will use regex escaping, so won't work as expected
108 | # But should not crash
109 | assert not mutation_filter2.matches("V600E") # Because [ is escaped
110 |
111 | def test_filter_mutations_preserves_type(self):
112 | """Test that filter preserves the original list type."""
113 | mutations = [
114 | MockMutation("V600E"),
115 | MockMutation("V600K"),
116 | ]
117 |
118 | mutation_filter = MutationFilter(pattern="V600*")
119 | result = mutation_filter.filter_mutations(mutations)
120 |
121 | # Result should be a list
122 | assert isinstance(result, list)
123 | assert len(result) == 2
124 |
```
--------------------------------------------------------------------------------
/src/biomcp/integrations/cts_api.py:
--------------------------------------------------------------------------------
```python
1 | """NCI Clinical Trials Search API integration helper."""
2 |
3 | import json
4 | import logging
5 | import os
6 | from typing import Any, Literal
7 |
8 | from ..constants import NCI_API_KEY_ENV
9 | from ..http_client import request_api
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class CTSAPIError(Exception):
15 | """Error raised when CTS API requests fail."""
16 |
17 | pass
18 |
19 |
20 | def _validate_api_key(api_key: str | None) -> str:
21 | """Validate and return API key."""
22 | if not api_key:
23 | api_key = os.getenv(NCI_API_KEY_ENV)
24 |
25 | if not api_key:
26 | raise CTSAPIError(
27 | f"NCI API key required. Please set {NCI_API_KEY_ENV} environment "
28 | "variable or provide api_key parameter.\n"
29 | "Get a free API key at: https://clinicaltrialsapi.cancer.gov"
30 | )
31 |
32 | return api_key
33 |
34 |
35 | def _prepare_request_data(
36 | method: str,
37 | params: dict[str, Any] | None,
38 | json_data: dict[str, Any] | None,
39 | headers: dict[str, str],
40 | ) -> dict[str, Any]:
41 | """Prepare request data based on method."""
42 | if method == "GET":
43 | request_data = params or {}
44 | logger.debug(f"CTS API GET request with params: {params}")
45 | else:
46 | request_data = json_data or {}
47 | if method == "POST":
48 | logger.debug(f"CTS API POST request with data: {json_data}")
49 |
50 | # Add headers to request data
51 | if headers:
52 | request_data["_headers"] = json.dumps(headers)
53 |
54 | return request_data
55 |
56 |
57 | def _handle_api_error(error: Any) -> None:
58 | """Handle API errors with appropriate messages."""
59 | if error.code == 401:
60 | raise CTSAPIError(
61 | f"Invalid API key. Please check your {NCI_API_KEY_ENV} "
62 | "environment variable or api_key parameter."
63 | )
64 | elif error.code == 403:
65 | raise CTSAPIError(
66 | "Access forbidden. Your API key may not have permission "
67 | "to access this resource."
68 | )
69 | else:
70 | raise CTSAPIError(f"CTS API error: {error.message}")
71 |
72 |
73 | async def make_cts_request(
74 | url: str,
75 | method: Literal["GET", "POST"] = "GET",
76 | params: dict[str, Any] | None = None,
77 | json_data: dict[str, Any] | None = None,
78 | api_key: str | None = None,
79 | ) -> dict[str, Any]:
80 | """
81 | Make a request to the NCI CTS API with proper authentication.
82 |
83 | Args:
84 | url: Full URL to the CTS API endpoint
85 | method: HTTP method (GET or POST)
86 | params: Query parameters
87 | json_data: JSON data for POST requests
88 | api_key: Optional API key (if not provided, uses NCI_API_KEY env var)
89 |
90 | Returns:
91 | JSON response from the API
92 |
93 | Raises:
94 | CTSAPIError: If the request fails or API key is missing
95 | """
96 | # Validate API key
97 | api_key = _validate_api_key(api_key)
98 |
99 | # Prepare headers
100 | headers = {"x-api-key": api_key, "Accept": "application/json"}
101 |
102 | try:
103 | # Prepare request data
104 | request_data = _prepare_request_data(
105 | method, params, json_data, headers
106 | )
107 |
108 | # Make API request
109 | response, error = await request_api(
110 | url=url,
111 | request=request_data,
112 | method=method,
113 | cache_ttl=0, # Disable caching for NCI API to ensure fresh results
114 | )
115 |
116 | # Handle errors
117 | if error:
118 | _handle_api_error(error)
119 |
120 | if response is None:
121 | raise CTSAPIError("No response received from NCI CTS API")
122 |
123 | return response
124 |
125 | except Exception as e:
126 | # Re-raise CTSAPIError as-is
127 | if isinstance(e, CTSAPIError):
128 | raise
129 |
130 | # Wrap other exceptions
131 | logger.error(f"CTS API request failed: {e}")
132 | raise CTSAPIError(f"Failed to connect to NCI CTS API: {e!s}") from e
133 |
134 |
135 | def get_api_key_instructions() -> str:
136 | """
137 | Get user-friendly instructions for obtaining and setting the API key.
138 |
139 | Returns:
140 | Formatted string with instructions
141 | """
142 | return (
143 | "## NCI Clinical Trials API Key Required\n\n"
144 | "To use NCI's Clinical Trials Search API, you need an API key.\n\n"
145 | "**Option 1: Set environment variable (recommended)**\n"
146 | "```bash\n"
147 | f"export {NCI_API_KEY_ENV}='your-api-key'\n"
148 | "```\n\n"
149 | "**Option 2: Provide via CLI**\n"
150 | "```bash\n"
151 | "biomcp trial search --api-key YOUR_KEY --condition melanoma\n"
152 | "```\n\n"
153 | "**Get your free API key:**\n"
154 | "Visit https://clinicaltrialsapi.cancer.gov\n\n"
155 | "The API key provides access to NCI's comprehensive cancer clinical trials "
156 | "database with advanced search capabilities."
157 | )
158 |
```
--------------------------------------------------------------------------------
/tests/tdd/variants/test_alphagenome_api_key.py:
--------------------------------------------------------------------------------
```python
1 | """Test AlphaGenome per-request API key functionality."""
2 |
3 | import os
4 | from unittest.mock import MagicMock, patch
5 |
6 | import pandas as pd
7 | import pytest
8 |
9 | from biomcp.variants.alphagenome import predict_variant_effects
10 |
11 |
12 | @pytest.mark.asyncio
13 | async def test_api_key_parameter_overrides_env_var():
14 | """Test that api_key parameter takes precedence over environment variable."""
15 | # Set up environment variable
16 | with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "env-key"}):
17 | # Mock AlphaGenome modules
18 | mock_genome = MagicMock()
19 | mock_client = MagicMock()
20 | mock_scorers = MagicMock()
21 |
22 | # Mock successful prediction
23 | test_scores_df = pd.DataFrame({
24 | "output_type": ["RNA_SEQ"],
25 | "raw_score": [1.5],
26 | "gene_name": ["BRAF"],
27 | "track_name": [None],
28 | })
29 |
30 | # Track which API key was used
31 | api_keys_used = []
32 |
33 | def track_create(api_key):
34 | api_keys_used.append(api_key)
35 | mock_model = MagicMock()
36 | mock_model.score_variant.return_value = test_scores_df
37 | return mock_model
38 |
39 | mock_client.create.side_effect = track_create
40 |
41 | mock_scorers.tidy_scores.return_value = test_scores_df
42 | mock_scorers.get_recommended_scorers.return_value = []
43 |
44 | # Create a mock module with the correct attributes
45 | mock_models = MagicMock()
46 | mock_models.dna_client = mock_client
47 | mock_models.variant_scorers = mock_scorers
48 |
49 | mock_data = MagicMock()
50 | mock_data.genome = mock_genome
51 |
52 | with patch.dict(
53 | "sys.modules",
54 | {
55 | "alphagenome.data": mock_data,
56 | "alphagenome.models": mock_models,
57 | },
58 | ):
59 | # Test with parameter API key
60 | result = await predict_variant_effects(
61 | "chr7", 140753336, "A", "T", api_key="param-key"
62 | )
63 |
64 | # Verify the parameter key was used, not the env var
65 | assert len(api_keys_used) == 1
66 | assert api_keys_used[0] == "param-key"
67 | assert "BRAF" in result
68 |
69 |
70 | @pytest.mark.asyncio
71 | async def test_no_api_key_shows_instructions():
72 | """Test that missing API key shows helpful instructions."""
73 | # Ensure no environment variable is set
74 | with patch.dict("os.environ", {}, clear=True):
75 | # Remove ALPHAGENOME_API_KEY if it exists
76 | os.environ.pop("ALPHAGENOME_API_KEY", None)
77 |
78 | result = await predict_variant_effects(
79 | "chr7", 140753336, "A", "T", skip_cache=True
80 | )
81 |
82 | # Check for instructions
83 | assert "AlphaGenome API key required" in result
84 | assert "My AlphaGenome API key is" in result
85 | assert "ACTION REQUIRED" in result
86 | assert "https://deepmind.google.com/science/alphagenome" in result
87 |
88 |
89 | @pytest.mark.asyncio
90 | async def test_env_var_used_when_no_parameter():
91 | """Test that environment variable is used when no parameter is provided."""
92 | # Set up environment variable
93 | with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "env-key"}):
94 | # Mock AlphaGenome modules
95 | mock_genome = MagicMock()
96 | mock_client = MagicMock()
97 | mock_scorers = MagicMock()
98 |
99 | # Mock successful prediction
100 | test_scores_df = pd.DataFrame({
101 | "output_type": ["RNA_SEQ"],
102 | "raw_score": [1.5],
103 | "gene_name": ["BRAF"],
104 | "track_name": [None],
105 | })
106 |
107 | # Track which API key was used
108 | api_keys_used = []
109 |
110 | def track_create(api_key):
111 | api_keys_used.append(api_key)
112 | mock_model = MagicMock()
113 | mock_model.score_variant.return_value = test_scores_df
114 | return mock_model
115 |
116 | mock_client.create.side_effect = track_create
117 |
118 | mock_scorers.tidy_scores.return_value = test_scores_df
119 | mock_scorers.get_recommended_scorers.return_value = []
120 |
121 | # Create a mock module with the correct attributes
122 | mock_models = MagicMock()
123 | mock_models.dna_client = mock_client
124 | mock_models.variant_scorers = mock_scorers
125 |
126 | mock_data = MagicMock()
127 | mock_data.genome = mock_genome
128 |
129 | with patch.dict(
130 | "sys.modules",
131 | {
132 | "alphagenome.data": mock_data,
133 | "alphagenome.models": mock_models,
134 | },
135 | ):
136 | # Test without parameter API key
137 | result = await predict_variant_effects("chr7", 140753336, "A", "T")
138 |
139 | # Verify the env var key was used
140 | assert len(api_keys_used) == 1
141 | assert api_keys_used[0] == "env-key"
142 | assert "BRAF" in result
143 |
```
--------------------------------------------------------------------------------
/src/biomcp/request_batcher.py:
--------------------------------------------------------------------------------
```python
1 | """Request batching utility for combining multiple small requests.
2 |
3 | This module provides a request batcher that accumulates multiple requests
4 | and processes them together in batches, reducing the number of API calls
5 | and improving performance for bulk operations.
6 |
7 | Key Features:
8 | - Automatic batching based on size or time threshold
9 | - Configurable batch size and timeout
10 | - Thread-safe request accumulation
11 | - Error propagation to individual requests
12 |
13 | Example:
14 | ```python
15 | async def batch_api_call(params_list):
16 | # Make a single API call with multiple parameters
17 | return await api.bulk_request(params_list)
18 |
19 | batcher = RequestBatcher(
20 | batch_func=batch_api_call,
21 | batch_size=10,
22 | batch_timeout=0.1
23 | )
24 |
25 | # Individual requests are automatically batched
26 | result1 = await batcher.request({"id": 1})
27 | result2 = await batcher.request({"id": 2})
28 | ```
29 | """
30 |
31 | import asyncio
32 | from collections.abc import Callable, Coroutine
33 | from typing import Any, TypeVar
34 |
35 | T = TypeVar("T")
36 |
37 |
38 | class RequestBatcher:
39 | """Batches multiple requests together to reduce overhead.
40 |
41 | This is particularly useful for APIs that support batch operations
42 | or when network latency dominates over processing time.
43 |
44 | The batcher accumulates requests until either:
45 | 1. The batch size threshold is reached
46 | 2. The batch timeout expires
47 |
48 | At which point all accumulated requests are processed together.
49 | """
50 |
51 | def __init__(
52 | self,
53 | batch_func: Callable[[list[Any]], Coroutine[Any, Any, list[Any]]],
54 | batch_size: int = 10,
55 | batch_timeout: float = 0.05, # 50ms
56 | ):
57 | """Initialize the batcher.
58 |
59 | Args:
60 | batch_func: Async function that processes a batch of requests
61 | batch_size: Maximum number of requests to batch together
62 | batch_timeout: Maximum time to wait for batch to fill (seconds)
63 | """
64 | self.batch_func = batch_func
65 | self.batch_size = batch_size
66 | self.batch_timeout = batch_timeout
67 | self.pending_requests: list[tuple[Any, asyncio.Future]] = []
68 | self.batch_task: asyncio.Task | None = None
69 | self._lock = asyncio.Lock()
70 |
71 | async def request(self, params: Any) -> Any:
72 | """Add a request to the batch and wait for result."""
73 | future: asyncio.Future[Any] = asyncio.Future()
74 |
75 | async with self._lock:
76 | self.pending_requests.append((params, future))
77 |
78 | # Check if we should flush immediately
79 | if len(self.pending_requests) >= self.batch_size:
80 | await self._flush_batch()
81 | elif not self.batch_task or self.batch_task.done():
82 | # Start a timer to flush the batch
83 | self.batch_task = asyncio.create_task(self._batch_timer())
84 |
85 | return await future
86 |
87 | async def _batch_timer(self):
88 | """Timer that flushes the batch after timeout."""
89 | await asyncio.sleep(self.batch_timeout)
90 | async with self._lock:
91 | await self._flush_batch()
92 |
93 | async def _flush_batch(self):
94 | """Process all pending requests as a batch."""
95 | if not self.pending_requests:
96 | return
97 |
98 | # Extract current batch
99 | batch = self.pending_requests.copy()
100 | self.pending_requests.clear()
101 |
102 | # Cancel timer if running
103 | if self.batch_task and not self.batch_task.done():
104 | self.batch_task.cancel()
105 |
106 | # Process batch
107 | try:
108 | params_list = [params for params, _ in batch]
109 | results = await self.batch_func(params_list)
110 |
111 | # Distribute results to futures
112 | for i, (_, future) in enumerate(batch):
113 | if not future.done():
114 | if i < len(results):
115 | future.set_result(results[i])
116 | else:
117 | future.set_exception(
118 | Exception(f"No result for request at index {i}")
119 | )
120 | except Exception as e:
121 | # Propagate error to all futures
122 | for _, future in batch:
123 | if not future.done():
124 | future.set_exception(e)
125 |
126 |
127 | # Example usage for autocomplete batching
128 | async def batch_autocomplete_requests(requests: list[dict]) -> list[Any]:
129 | """Process multiple autocomplete requests in parallel.
130 |
131 | This is an example implementation that could be used to batch
132 | autocomplete requests more efficiently.
133 | """
134 | from .articles.autocomplete import EntityRequest, autocomplete
135 |
136 | tasks = []
137 | for req in requests:
138 | entity_req = EntityRequest(**req)
139 | tasks.append(autocomplete(entity_req))
140 |
141 | return await asyncio.gather(*tasks)
142 |
```
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
```toml
1 | [project]
2 | name = "biomcp-python"
3 | version = "0.4.6"
4 | description = "Biomedical Model Context Protocol Server"
5 | authors = [{ name = "Ian Maurer", email = "[email protected]" }]
6 | readme = "README.md"
7 | keywords = ['python']
8 | requires-python = ">=3.10,<4.0"
9 | classifiers = [
10 | "Intended Audience :: Developers",
11 | "Programming Language :: Python",
12 | "Programming Language :: Python :: 3",
13 | "Programming Language :: Python :: 3.10",
14 | "Programming Language :: Python :: 3.11",
15 | "Programming Language :: Python :: 3.12",
16 | "Programming Language :: Python :: 3.13",
17 | "Topic :: Software Development :: Libraries :: Python Modules",
18 | ]
19 | dependencies = [
20 | "certifi>=2025.1.31",
21 | "diskcache>=5.6.3",
22 | "httpx>=0.28.1",
23 | "mcp[cli]>=1.12.3,<2.0.0",
24 | "platformdirs>=4.3.6",
25 | "psutil>=7.0.0",
26 | "pydantic>=2.10.6",
27 | "python-dotenv>=1.0.0",
28 | "rich>=14.0.0",
29 | "typer>=0.15.2",
30 | "uvicorn>=0.34.2",
31 | "alphagenome>=0.1.0",
32 | ]
33 |
34 | [project.urls]
35 | Homepage = "https://genomoncology.com/biomcp/"
36 | Repository = "https://github.com/genomoncology/biomcp"
37 | Documentation = "https://genomoncology.com/biomcp/"
38 |
39 | [dependency-groups]
40 | dev = [
41 | "pytest>=7.2.0",
42 | "pytest-xdist>=3.5.0",
43 | "pre-commit>=2.20.0",
44 | "tox-uv>=1.11.3",
45 | "deptry>=0.22.0",
46 | "mypy>=0.991",
47 | "pytest-cov>=4.0.0",
48 | "pytest-asyncio>=0.24.0",
49 | "ruff>=0.9.2",
50 | "mkdocs>=1.4.2",
51 | "mkdocs-material>=8.5.10",
52 | "mkdocstrings[python]>=0.26.1",
53 | "anyio>=4.8.0",
54 | # "ipython>=9.0.2",
55 | "pytest-bdd>=8.1.0",
56 | "tomlkit>=0.13.2",
57 | "assertpy>=1.1",
58 | "twine>=4.0.0",
59 | "pandas>=2.0.0", # Used for mocking AlphaGenome responses in tests
60 | "PyYAML>=6.0.0", # Used for mkdocs.yml parsing in scripts
61 | "pydantic-ai>=0.0.14", # For testing Pydantic AI integration
62 | ]
63 |
64 | [project.optional-dependencies]
65 | api = [
66 | ]
67 |
68 | worker = [
69 | "fastapi>=0.110.0",
70 | "starlette>=0.36.0",
71 | "uvicorn>=0.28.0",
72 | ]
73 |
74 | [build-system]
75 | requires = ["setuptools >= 61.0"]
76 | build-backend = "setuptools.build_meta"
77 |
78 | [tool.setuptools.package-data]
79 | biomcp = ["resources/*.md"]
80 |
81 | [project.scripts]
82 | biomcp = "biomcp.__main__:main"
83 |
84 | [tool.mypy]
85 | files = ["src"]
86 | ignore_missing_imports = true
87 | disallow_untyped_defs = false
88 | disallow_any_unimported = false
89 | no_implicit_optional = true
90 | check_untyped_defs = false
91 | warn_return_any = false
92 | warn_unused_ignores = true
93 | show_error_codes = true
94 | plugins = [
95 | "pydantic.mypy"
96 | ]
97 | disable_error_code = [
98 | "union-attr",
99 | "prop-decorator",
100 | ]
101 |
102 | [tool.pytest.ini_options]
103 | testpaths = ["tests"]
104 | addopts = "--import-mode=importlib"
105 | asyncio_mode = "auto"
106 | asyncio_default_fixture_loop_scope = "function"
107 | markers = [
108 | "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
109 | ]
110 | filterwarnings = [
111 | # Ignore protobuf version warnings from AlphaGenome
112 | "ignore:Protobuf gencode version.*is exactly one major version older.*:UserWarning",
113 | # Ignore false positive warning from pytest-xdist about coroutines
114 | # This occurs during parallel test execution when mock objects are cleaned up
115 | "ignore:coroutine 'search_trials_unified' was never awaited:RuntimeWarning",
116 | ]
117 |
118 | [tool.ruff]
119 | target-version = "py310"
120 | line-length = 79
121 | fix = true
122 | unsafe-fixes = true
123 |
124 | [tool.ruff.lint]
125 | select = [
126 | # flake8-2020
127 | "YTT",
128 | # flake8-bandit
129 | "S",
130 | # flake8-bugbear
131 | "B",
132 | # flake8-builtins
133 | "A",
134 | # flake8-comprehensions
135 | "C4",
136 | # flake8-debugger
137 | "T10",
138 | # flake8-simplify
139 | "SIM",
140 | # isort
141 | "I",
142 | # mccabe
143 | "C90",
144 | # pycodestyle
145 | "E", "W",
146 | # pyflakes
147 | "F",
148 | # pygrep-hooks
149 | "PGH",
150 | # pyupgrade
151 | "UP",
152 | # ruff
153 | "RUF",
154 | ]
155 | ignore = [
156 | # LineTooLong
157 | "E501",
158 | # DoNotAssignLambda
159 | "E731",
160 | # Consider unpacking
161 | "RUF005",
162 | # Union for type annotations
163 | "UP007",
164 | # Asserts are ok when I say they are ok.
165 | "S101",
166 | ]
167 |
168 | [tool.ruff.lint.per-file-ignores]
169 | "tests/*" = ["S101"]
170 | "__init__.py" = ["I001"]
171 | "src/biomcp/variants/external.py" = ["C901"] # Complex API interactions are acceptable
172 |
173 | [tool.ruff.format]
174 | preview = true
175 |
176 | [tool.ruff.lint.flake8-bugbear]
177 | extend-immutable-calls = [
178 | "fastapi.Depends",
179 | "fastapi.Query",
180 | "typer.Argument",
181 | "typer.Option",
182 | ]
183 |
184 | [tool.coverage.report]
185 | skip_empty = true
186 |
187 | [tool.coverage.run]
188 | branch = true
189 | source = ["src"]
190 | omit = [
191 | "src/*/__main__.py",
192 | "src/*/server.py",
193 | "src/*/http_client.py",
194 | ]
195 |
196 | [tool.deptry]
197 | exclude = [
198 | "example_scripts/python_sdk.py",
199 | "venv",
200 | ".venv",
201 | ".direnv",
202 | "tests",
203 | ".git",
204 | "build",
205 | "dist",
206 | "scripts",
207 | "spike",
208 | ]
209 |
210 | [tool.deptry.per_rule_ignores]
211 | DEP001 = ["alphagenome"] # Optional dependency, must be installed manually
212 | DEP002 = ["uvicorn"]
213 | DEP003 = ["biomcp", "alphagenome"]
214 |
```
--------------------------------------------------------------------------------
/tests/integration/test_preprints_integration.py:
--------------------------------------------------------------------------------
```python
1 | """Integration tests for preprint search functionality."""
2 |
3 | import asyncio
4 |
5 | import pytest
6 |
7 | from biomcp.articles.preprints import (
8 | BiorxivClient,
9 | EuropePMCClient,
10 | PreprintSearcher,
11 | )
12 | from biomcp.articles.search import PubmedRequest
13 | from biomcp.core import PublicationState
14 |
15 |
16 | class TestBiorxivIntegration:
17 | """Integration tests for bioRxiv API."""
18 |
19 | @pytest.mark.asyncio
20 | async def test_biorxiv_real_search(self):
21 | """Test real bioRxiv API search."""
22 | client = BiorxivClient()
23 |
24 | # Try multiple search terms to find one with results
25 | search_terms = ["cancer", "gene", "cell", "protein", "RNA", "DNA"]
26 | results = []
27 | successful_term = None
28 |
29 | for term in search_terms:
30 | results = await client.search(term)
31 | if len(results) > 0:
32 | successful_term = term
33 | break
34 |
35 | # If no results with any term, the API might be down or have no recent articles
36 | if len(results) == 0:
37 | pytest.skip(
38 | "No results found with any search term - API may be down or have no matching recent articles"
39 | )
40 |
41 | # Check the structure of results
42 | first_result = results[0]
43 | assert first_result.doi is not None
44 | assert first_result.title is not None
45 | assert first_result.publication_state == PublicationState.PREPRINT
46 | assert "preprint" in first_result.journal.lower()
47 |
48 | print(
49 | f"Found {len(results)} bioRxiv results for term '{successful_term}'"
50 | )
51 | print(f"First result: {first_result.title}")
52 |
53 |
54 | class TestEuropePMCIntegration:
55 | """Integration tests for Europe PMC API."""
56 |
57 | @pytest.mark.asyncio
58 | async def test_europe_pmc_real_search(self):
59 | """Test real Europe PMC API search for preprints."""
60 | client = EuropePMCClient()
61 |
62 | # Try multiple search terms to find one with results
63 | search_terms = [
64 | "cancer",
65 | "gene",
66 | "cell",
67 | "protein",
68 | "SARS-CoV-2",
69 | "COVID",
70 | ]
71 | results = []
72 | successful_term = None
73 |
74 | for term in search_terms:
75 | results = await client.search(term)
76 | if len(results) > 0:
77 | successful_term = term
78 | break
79 |
80 | # If no results with any term, the API might be down
81 | if len(results) == 0:
82 | pytest.skip(
83 | "No results found with any search term - Europe PMC API may be down"
84 | )
85 |
86 | # Check the structure
87 | first_result = results[0]
88 | assert first_result.title is not None
89 | assert first_result.publication_state == PublicationState.PREPRINT
90 |
91 | print(
92 | f"Found {len(results)} Europe PMC preprint results for term '{successful_term}'"
93 | )
94 | print(f"First result: {first_result.title}")
95 | if first_result.doi:
96 | print(f"DOI: {first_result.doi}")
97 |
98 |
99 | class TestPreprintSearcherIntegration:
100 | """Integration tests for combined preprint search."""
101 |
102 | @pytest.mark.asyncio
103 | async def test_combined_search_real(self):
104 | """Test searching across both preprint sources."""
105 | searcher = PreprintSearcher()
106 |
107 | # Try different search combinations
108 | search_configs = [
109 | {"genes": ["TP53"], "diseases": ["cancer"]},
110 | {"keywords": ["protein", "structure"]},
111 | {"genes": ["BRAF"], "diseases": ["melanoma"]},
112 | {"keywords": ["gene", "expression"]},
113 | ]
114 |
115 | response = None
116 | successful_config = None
117 |
118 | for config in search_configs:
119 | request = PubmedRequest(**config)
120 | response = await searcher.search(request)
121 | if response.count > 0:
122 | successful_config = config
123 | break
124 |
125 | print(f"Total results: {response.count if response else 0}")
126 |
127 | # Check if we got any results
128 | if response and response.count > 0:
129 | # Check result structure
130 | first = response.results[0]
131 | assert first.title is not None
132 | assert first.publication_state == PublicationState.PREPRINT
133 |
134 | print(f"Successful search config: {successful_config}")
135 | print(f"First result: {first.title}")
136 | print(f"Date: {first.date}")
137 | print(f"Journal: {first.journal}")
138 | else:
139 | pytest.skip(
140 | "No results found with any search configuration - APIs may be down"
141 | )
142 |
143 |
144 | if __name__ == "__main__":
145 | # Run the tests directly
146 | asyncio.run(TestBiorxivIntegration().test_biorxiv_real_search())
147 | print("\n" + "=" * 50 + "\n")
148 | asyncio.run(TestEuropePMCIntegration().test_europe_pmc_real_search())
149 | print("\n" + "=" * 50 + "\n")
150 | asyncio.run(TestPreprintSearcherIntegration().test_combined_search_real())
151 |
```
--------------------------------------------------------------------------------
/docs/developer-guides/05-error-handling.md:
--------------------------------------------------------------------------------
```markdown
1 | # Error Handling Guide
2 |
3 | ## Overview
4 |
5 | BioMCP uses a consistent error handling pattern across all HTTP operations. This guide explains the error types, when they occur, and how to handle them.
6 |
7 | ## Error Structure
8 |
9 | All HTTP operations return a tuple: `(data, error)` where one is always `None`.
10 |
11 | ```python
12 | data, error = await http_client.request_api(...)
13 | if error:
14 | # Handle error
15 | logger.error(f"Request failed: {error.code} - {error.message}")
16 | else:
17 | # Process data
18 | process_result(data)
19 | ```
20 |
21 | ## Error Types
22 |
23 | ### Network Errors
24 |
25 | - **When**: Connection timeout, DNS resolution failure, network unreachable
26 | - **Error Code**: Various HTTP client exceptions
27 | - **Handling**: Retry with exponential backoff or fail gracefully
28 |
29 | ### HTTP Status Errors
30 |
31 | - **When**: Server returns 4xx or 5xx status codes
32 | - **Error Codes**:
33 | - `400-499`: Client errors (bad request, unauthorized, not found)
34 | - `500-599`: Server errors (internal error, service unavailable)
35 | - **Handling**:
36 | - 4xx: Fix request parameters or authentication
37 | - 5xx: Retry with backoff or use cached data
38 |
39 | ### Circuit Breaker Errors
40 |
41 | - **When**: Too many consecutive failures to a domain
42 | - **Error**: Circuit breaker opens to prevent cascading failures
43 | - **Handling**: Wait for recovery timeout or use alternative data source
44 |
45 | ### Offline Mode Errors
46 |
47 | - **When**: `BIOMCP_OFFLINE=true` and no cached data available
48 | - **Error**: Request blocked in offline mode
49 | - **Handling**: Use cached data only or inform user about offline status
50 |
51 | ### Parse Errors
52 |
53 | - **When**: Response is not valid JSON or doesn't match expected schema
54 | - **Error**: JSON decode error or validation error
55 | - **Handling**: Log error and treat as service issue
56 |
57 | ## Best Practices
58 |
59 | ### 1. Always Check Errors
60 |
61 | ```python
62 | # ❌ Bad - ignoring error
63 | data, _ = await http_client.request_api(...)
64 | process(data) # data might be None!
65 |
66 | # ✅ Good - checking error
67 | data, error = await http_client.request_api(...)
68 | if error:
69 | logger.warning(f"Failed to fetch data: {error}")
70 | return None
71 | process(data)
72 | ```
73 |
74 | ### 2. Provide Context in Error Messages
75 |
76 | ```python
77 | # ❌ Bad - generic error
78 | if error:
79 | logger.error("Request failed")
80 |
81 | # ✅ Good - contextual error
82 | if error:
83 | logger.error(f"Failed to fetch gene {gene_id} from cBioPortal: {error.message}")
84 | ```
85 |
86 | ### 3. Graceful Degradation
87 |
88 | ```python
89 | async def get_variant_with_fallback(variant_id: str):
90 | # Try primary source
91 | data, error = await primary_source.get_variant(variant_id)
92 | if not error:
93 | return data
94 |
95 | logger.warning(f"Primary source failed: {error}, trying secondary")
96 |
97 | # Try secondary source
98 | data, error = await secondary_source.get_variant(variant_id)
99 | if not error:
100 | return data
101 |
102 | # Use cached data as last resort
103 | return get_cached_variant(variant_id)
104 | ```
105 |
106 | ### 4. User-Friendly Error Messages
107 |
108 | ```python
109 | def format_error_for_user(error: RequestError) -> str:
110 | if error.code >= 500:
111 | return "The service is temporarily unavailable. Please try again later."
112 | elif error.code == 404:
113 | return "The requested data was not found."
114 | elif error.code == 401:
115 | return "Authentication required. Please check your credentials."
116 | elif "OFFLINE" in str(error):
117 | return "You are in offline mode. Only cached data is available."
118 | else:
119 | return "An error occurred while fetching data. Please try again."
120 | ```
121 |
122 | ## Testing Error Conditions
123 |
124 | ### 1. Simulate Network Errors
125 |
126 | ```python
127 | with patch("biomcp.http_client.call_http") as mock:
128 | mock.side_effect = Exception("Network error")
129 | data, error = await client.fetch_data()
130 | assert error is not None
131 | assert data is None
132 | ```
133 |
134 | ### 2. Test Circuit Breaker
135 |
136 | ```python
137 | # Simulate multiple failures
138 | for _ in range(5):
139 | with patch("biomcp.http_client.call_http") as mock:
140 | mock.return_value = (500, "Server Error")
141 | await client.fetch_data()
142 |
143 | # Circuit should be open
144 | data, error = await client.fetch_data()
145 | assert error is not None
146 | assert "circuit" in error.message.lower()
147 | ```
148 |
149 | ### 3. Test Offline Mode
150 |
151 | ```python
152 | with patch.dict(os.environ, {"BIOMCP_OFFLINE": "true"}):
153 | data, error = await client.fetch_data()
154 | # Should only return cached data or error
155 | ```
156 |
157 | ## Common Patterns
158 |
159 | ### Retry with Backoff
160 |
161 | The centralized HTTP client automatically retries with exponential backoff for:
162 |
163 | - Network errors
164 | - 5xx server errors
165 | - Rate limit errors (429)
166 |
167 | ### Caching
168 |
169 | Failed requests don't overwrite cached data, ensuring availability during outages.
170 |
171 | ### Rate Limiting
172 |
173 | Requests are automatically rate-limited per domain to prevent overwhelming services.
174 |
175 | ## Debugging
176 |
177 | Enable debug logging to see all HTTP requests and errors:
178 |
179 | ```python
180 | import logging
181 | logging.getLogger("biomcp.http_client").setLevel(logging.DEBUG)
182 | ```
183 |
184 | This will show:
185 |
186 | - All HTTP requests with URLs and methods
187 | - Response status codes and times
188 | - Error details and retry attempts
189 | - Circuit breaker state changes
190 |
```
--------------------------------------------------------------------------------
/src/biomcp/openfda/cache.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Simple in-memory caching for OpenFDA API responses.
3 |
4 | This module provides a time-based cache to reduce API calls and improve performance.
5 | Cache entries expire after a configurable TTL (time-to-live).
6 | """
7 |
8 | import hashlib
9 | import json
10 | import logging
11 | import os
12 | from datetime import datetime, timedelta
13 | from typing import Any
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 | # Cache configuration
18 | CACHE_TTL_MINUTES = int(os.environ.get("BIOMCP_FDA_CACHE_TTL", "15"))
19 | MAX_CACHE_SIZE = int(os.environ.get("BIOMCP_FDA_MAX_CACHE_SIZE", "100"))
20 | MAX_RESPONSE_SIZE = int(
21 | os.environ.get("BIOMCP_FDA_MAX_RESPONSE_SIZE", str(1024 * 1024))
22 | ) # 1MB default
23 |
24 | # Global cache dictionary
25 | _cache: dict[str, tuple[Any, datetime]] = {}
26 |
27 |
28 | def _generate_cache_key(endpoint: str, params: dict[str, Any]) -> str:
29 | """
30 | Generate a unique cache key for an API request.
31 |
32 | Args:
33 | endpoint: The API endpoint URL
34 | params: Query parameters
35 |
36 | Returns:
37 | A unique hash key for the request
38 | """
39 | # Remove sensitive parameters before hashing
40 | safe_params = {
41 | k: v
42 | for k, v in params.items()
43 | if k.lower() not in ["api_key", "apikey", "key", "token", "secret"]
44 | }
45 |
46 | # Sort params for consistent hashing
47 | sorted_params = json.dumps(safe_params, sort_keys=True)
48 | combined = f"{endpoint}:{sorted_params}"
49 |
50 | # Use SHA256 for cache key
51 | return hashlib.sha256(combined.encode()).hexdigest()
52 |
53 |
54 | def get_cached_response(
55 | endpoint: str, params: dict[str, Any]
56 | ) -> dict[str, Any] | None:
57 | """
58 | Retrieve a cached response if available and not expired.
59 |
60 | Args:
61 | endpoint: The API endpoint URL
62 | params: Query parameters
63 |
64 | Returns:
65 | Cached response data or None if not found/expired
66 | """
67 | cache_key = _generate_cache_key(endpoint, params)
68 |
69 | if cache_key in _cache:
70 | data, timestamp = _cache[cache_key]
71 |
72 | # Check if cache entry is still valid
73 | age = datetime.now() - timestamp
74 | if age < timedelta(minutes=CACHE_TTL_MINUTES):
75 | logger.debug(
76 | f"Cache hit for {endpoint} (age: {age.total_seconds():.1f}s)"
77 | )
78 | return data
79 | else:
80 | # Remove expired entry
81 | del _cache[cache_key]
82 | logger.debug(f"Cache expired for {endpoint}")
83 |
84 | return None
85 |
86 |
87 | def set_cached_response(
88 | endpoint: str, params: dict[str, Any], response: dict[str, Any]
89 | ) -> None:
90 | """
91 | Store a response in the cache.
92 |
93 | Args:
94 | endpoint: The API endpoint URL
95 | params: Query parameters
96 | response: Response data to cache
97 | """
98 | # Check response size limit
99 | import json
100 | import sys
101 |
102 | # Better size estimation using JSON serialization
103 | try:
104 | response_json = json.dumps(response)
105 | response_size = len(response_json.encode("utf-8"))
106 | except (TypeError, ValueError):
107 | # If can't serialize, use sys.getsizeof
108 | response_size = sys.getsizeof(response)
109 |
110 | if response_size > MAX_RESPONSE_SIZE:
111 | logger.warning(
112 | f"Response too large to cache: {response_size} bytes > {MAX_RESPONSE_SIZE} bytes"
113 | )
114 | return
115 |
116 | # Check cache size limit
117 | if len(_cache) >= MAX_CACHE_SIZE:
118 | # Remove oldest entries (simple FIFO)
119 | oldest_keys = sorted(_cache.keys(), key=lambda k: _cache[k][1])[
120 | : len(_cache) - MAX_CACHE_SIZE + 1
121 | ]
122 |
123 | for key in oldest_keys:
124 | del _cache[key]
125 |
126 | logger.debug(
127 | f"Cache size limit reached, removed {len(oldest_keys)} entries"
128 | )
129 |
130 | cache_key = _generate_cache_key(endpoint, params)
131 | _cache[cache_key] = (response, datetime.now())
132 |
133 | logger.debug(f"Cached response for {endpoint} (cache size: {len(_cache)})")
134 |
135 |
136 | def clear_cache() -> None:
137 | """Clear all cached responses."""
138 | global _cache
139 | size = len(_cache)
140 | _cache = {}
141 | logger.info(f"Cleared FDA cache ({size} entries)")
142 |
143 |
144 | def get_cache_stats() -> dict[str, Any]:
145 | """
146 | Get cache statistics.
147 |
148 | Returns:
149 | Dictionary with cache statistics
150 | """
151 | now = datetime.now()
152 | valid_count = 0
153 | total_age = 0.0
154 |
155 | for _data, timestamp in _cache.values():
156 | age = (now - timestamp).total_seconds()
157 | if age < CACHE_TTL_MINUTES * 60:
158 | valid_count += 1
159 | total_age += age
160 |
161 | avg_age = total_age / valid_count if valid_count > 0 else 0
162 |
163 | return {
164 | "total_entries": len(_cache),
165 | "valid_entries": valid_count,
166 | "expired_entries": len(_cache) - valid_count,
167 | "average_age_seconds": avg_age,
168 | "ttl_minutes": CACHE_TTL_MINUTES,
169 | "max_size": MAX_CACHE_SIZE,
170 | }
171 |
172 |
173 | def is_cacheable_request(endpoint: str, params: dict[str, Any]) -> bool:
174 | """
175 | Determine if a request should be cached.
176 |
177 | Args:
178 | endpoint: The API endpoint URL
179 | params: Query parameters
180 |
181 | Returns:
182 | True if the request should be cached
183 | """
184 | # Don't cache if caching is disabled
185 | if CACHE_TTL_MINUTES <= 0:
186 | return False
187 |
188 | # Don't cache very large requests
189 | return params.get("limit", 0) <= 100
190 |
```
--------------------------------------------------------------------------------
/tests/tdd/drugs/test_drug_getter.py:
--------------------------------------------------------------------------------
```python
1 | """Unit tests for drug information retrieval."""
2 |
3 | import json
4 |
5 | import pytest
6 |
7 | from biomcp.drugs.getter import get_drug
8 |
9 |
10 | class TestDrugGetter:
11 | """Test drug information retrieval."""
12 |
13 | @pytest.fixture
14 | def mock_drug_response(self):
15 | """Mock drug response from MyChem.info."""
16 | return {
17 | "_id": "CHEMBL941",
18 | "name": "Imatinib",
19 | "drugbank": {
20 | "id": "DB00619",
21 | "name": "Imatinib",
22 | "description": "Imatinib is a tyrosine kinase inhibitor...",
23 | "indication": "Treatment of chronic myeloid leukemia...",
24 | "mechanism_of_action": "Inhibits BCR-ABL tyrosine kinase...",
25 | "products": {"name": ["Gleevec", "Glivec"]},
26 | },
27 | "chembl": {
28 | "molecule_chembl_id": "CHEMBL941",
29 | "pref_name": "IMATINIB",
30 | },
31 | "pubchem": {"cid": 5291},
32 | "chebi": {"id": "CHEBI:45783", "name": "imatinib"},
33 | "inchikey": "KTUFNOKKBVMGRW-UHFFFAOYSA-N",
34 | "formula": "C29H31N7O",
35 | }
36 |
37 | @pytest.mark.asyncio
38 | async def test_get_drug_by_name(self, monkeypatch, mock_drug_response):
39 | """Test getting drug by name."""
40 | # Mock the API call
41 | call_count = 0
42 | responses = [
43 | # Query response
44 | ({"hits": [{"_id": "CHEMBL941"}]}, None),
45 | # Get response
46 | (mock_drug_response, None),
47 | ]
48 |
49 | async def mock_request_api(url, request, method, domain):
50 | nonlocal call_count
51 | result = responses[call_count]
52 | call_count += 1
53 | return result
54 |
55 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
56 |
57 | result = await get_drug("imatinib")
58 |
59 | assert "## Drug: Imatinib" in result
60 | assert "DrugBank ID**: DB00619" in result
61 | assert "ChEMBL ID**: CHEMBL941" in result
62 | assert "Formula**: C29H31N7O" in result
63 | assert "Trade Names**: Gleevec, Glivec" in result
64 | assert "External Links" in result
65 | assert "DrugBank](https://www.drugbank.ca/drugs/DB00619)" in result
66 |
67 | @pytest.mark.asyncio
68 | async def test_get_drug_by_id(self, monkeypatch, mock_drug_response):
69 | """Test getting drug by DrugBank ID."""
70 |
71 | # Mock the API call
72 | async def mock_request_api(url, request, method, domain):
73 | return (mock_drug_response, None)
74 |
75 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
76 |
77 | result = await get_drug("DB00619")
78 |
79 | assert "## Drug: Imatinib" in result
80 | assert "DrugBank ID**: DB00619" in result
81 |
82 | @pytest.mark.asyncio
83 | async def test_get_drug_json_output(self, monkeypatch, mock_drug_response):
84 | """Test getting drug with JSON output."""
85 |
86 | # Mock the API call
87 | async def mock_request_api(url, request, method, domain):
88 | return (mock_drug_response, None)
89 |
90 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
91 |
92 | result = await get_drug("DB00619", output_json=True)
93 | data = json.loads(result)
94 |
95 | assert data["drug_id"] == "CHEMBL941"
96 | assert data["name"] == "Imatinib"
97 | assert data["drugbank_id"] == "DB00619"
98 | assert (
99 | data["_links"]["DrugBank"]
100 | == "https://www.drugbank.ca/drugs/DB00619"
101 | )
102 |
103 | @pytest.mark.asyncio
104 | async def test_drug_not_found(self, monkeypatch):
105 | """Test drug not found."""
106 |
107 | # Mock the API call
108 | async def mock_request_api(url, request, method, domain):
109 | return ({"hits": []}, None)
110 |
111 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
112 |
113 | result = await get_drug("INVALID_DRUG_XYZ")
114 |
115 | assert "Drug 'INVALID_DRUG_XYZ' not found" in result
116 |
117 | @pytest.mark.asyncio
118 | async def test_drug_with_description_truncation(self, monkeypatch):
119 | """Test drug with long description gets truncated."""
120 | long_desc = "A" * 600
121 | mock_response = {
122 | "_id": "TEST001",
123 | "name": "TestDrug",
124 | "drugbank": {"id": "DB99999", "description": long_desc},
125 | }
126 |
127 | async def mock_request_api(url, request, method, domain):
128 | return (mock_response, None)
129 |
130 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
131 |
132 | result = await get_drug("DB99999")
133 |
134 | assert "Description" in result
135 | assert "A" * 500 in result
136 | assert "..." in result # Truncation indicator
137 |
138 | @pytest.mark.asyncio
139 | async def test_drug_error_handling(self, monkeypatch):
140 | """Test error handling."""
141 |
142 | # Mock the API call to raise an exception
143 | async def mock_request_api(url, request, method, domain):
144 | raise Exception("API error")
145 |
146 | monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
147 |
148 | result = await get_drug("imatinib")
149 |
150 | # When an exception occurs, it's caught and the drug is reported as not found
151 | assert "Drug 'imatinib' not found in MyChem.info" in result
152 |
```
--------------------------------------------------------------------------------
/src/biomcp/drugs/getter.py:
--------------------------------------------------------------------------------
```python
1 | """Drug information retrieval from MyChem.info."""
2 |
3 | import json
4 | import logging
5 |
6 | from ..integrations import BioThingsClient
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def _add_drug_links(drug_info, result: dict) -> None:
12 | """Add external database links for the drug."""
13 | links = {}
14 |
15 | if drug_info.drugbank_id:
16 | links["DrugBank"] = (
17 | f"https://www.drugbank.ca/drugs/{drug_info.drugbank_id}"
18 | )
19 |
20 | if drug_info.chembl_id:
21 | links["ChEMBL"] = (
22 | f"https://www.ebi.ac.uk/chembl/compound_report_card/{drug_info.chembl_id}/"
23 | )
24 |
25 | if drug_info.pubchem_cid:
26 | links["PubChem"] = (
27 | f"https://pubchem.ncbi.nlm.nih.gov/compound/{drug_info.pubchem_cid}"
28 | )
29 |
30 | if drug_info.chebi_id:
31 | chebi_id = drug_info.chebi_id.replace("CHEBI:", "")
32 | links["ChEBI"] = (
33 | f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{chebi_id}"
34 | )
35 |
36 | if links:
37 | result["_links"] = links
38 |
39 |
40 | def _format_basic_info(drug_info, output_lines: list[str]) -> None:
41 | """Format basic drug information."""
42 | if drug_info.formula:
43 | output_lines.append(f"- **Formula**: {drug_info.formula}")
44 |
45 | if drug_info.drugbank_id:
46 | output_lines.append(f"- **DrugBank ID**: {drug_info.drugbank_id}")
47 |
48 | if drug_info.chembl_id:
49 | output_lines.append(f"- **ChEMBL ID**: {drug_info.chembl_id}")
50 |
51 | if drug_info.pubchem_cid:
52 | output_lines.append(f"- **PubChem CID**: {drug_info.pubchem_cid}")
53 |
54 | if drug_info.chebi_id:
55 | output_lines.append(f"- **ChEBI ID**: {drug_info.chebi_id}")
56 |
57 | if drug_info.inchikey:
58 | output_lines.append(f"- **InChIKey**: {drug_info.inchikey}")
59 |
60 |
61 | def _format_clinical_info(drug_info, output_lines: list[str]) -> None:
62 | """Format clinical drug information."""
63 | if drug_info.tradename:
64 | names = drug_info.tradename[:5] # Limit to first 5
65 | output_lines.append(f"- **Trade Names**: {', '.join(names)}")
66 | if len(drug_info.tradename) > 5:
67 | output_lines.append(f" (and {len(drug_info.tradename) - 5} more)")
68 |
69 | if drug_info.description:
70 | desc = drug_info.description[:500]
71 | if len(drug_info.description) > 500:
72 | desc += "..."
73 | output_lines.append(f"\n### Description\n{desc}")
74 |
75 | if drug_info.indication:
76 | ind = drug_info.indication[:500]
77 | if len(drug_info.indication) > 500:
78 | ind += "..."
79 | output_lines.append(f"\n### Indication\n{ind}")
80 |
81 | if drug_info.mechanism_of_action:
82 | moa = drug_info.mechanism_of_action[:500]
83 | if len(drug_info.mechanism_of_action) > 500:
84 | moa += "..."
85 | output_lines.append(f"\n### Mechanism of Action\n{moa}")
86 |
87 |
88 | def _format_drug_output(drug_info, result: dict) -> None:
89 | """Format drug information for text output."""
90 | output_lines = [f"## Drug: {drug_info.name or 'Unknown'}"]
91 |
92 | _format_basic_info(drug_info, output_lines)
93 | _format_clinical_info(drug_info, output_lines)
94 |
95 | if result.get("_links"):
96 | output_lines.append("\n### External Links")
97 | for name, url in result["_links"].items():
98 | output_lines.append(f"- [{name}]({url})")
99 |
100 | result["_formatted"] = "\n".join(output_lines)
101 |
102 |
103 | async def get_drug(drug_id_or_name: str, output_json: bool = False) -> str:
104 | """Get drug information from MyChem.info.
105 |
106 | Args:
107 | drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name
108 | output_json: Return JSON instead of formatted text
109 |
110 | Returns:
111 | Formatted drug information or JSON string
112 | """
113 | try:
114 | client = BioThingsClient()
115 | drug_info = await client.get_drug_info(drug_id_or_name)
116 |
117 | if not drug_info:
118 | error_msg = f"Drug '{drug_id_or_name}' not found in MyChem.info"
119 | if output_json:
120 | return json.dumps({"error": error_msg}, indent=2)
121 | return error_msg
122 |
123 | # Build result dictionary
124 | result = drug_info.model_dump(by_alias=False, exclude_none=True)
125 |
126 | # Add external links
127 | _add_drug_links(drug_info, result)
128 |
129 | if output_json:
130 | return json.dumps(result, indent=2)
131 |
132 | # Format for text output
133 | _format_drug_output(drug_info, result)
134 | return result["_formatted"]
135 |
136 | except Exception as e:
137 | logger.error(f"Error getting drug info: {e}")
138 | error_msg = f"Error retrieving drug information: {e!s}"
139 | if output_json:
140 | return json.dumps({"error": error_msg}, indent=2)
141 | return error_msg
142 |
143 |
144 | # MCP tool function
145 | async def _drug_details(drug_id_or_name: str) -> str:
146 | """Get drug/chemical information from MyChem.info.
147 |
148 | This tool retrieves comprehensive drug information including:
149 | - Drug identifiers (DrugBank, ChEMBL, PubChem, etc.)
150 | - Chemical properties (formula, InChIKey)
151 | - Trade names and synonyms
152 | - Clinical indications
153 | - Mechanism of action
154 | - Links to external databases
155 |
156 | Args:
157 | drug_id_or_name: Drug name (e.g., "aspirin") or ID (e.g., "DB00945", "CHEMBL25")
158 |
159 | Returns:
160 | Formatted drug information with external database links
161 | """
162 | return await get_drug(drug_id_or_name, output_json=False)
163 |
```
--------------------------------------------------------------------------------
/docs/getting-started/01-quickstart-cli.md:
--------------------------------------------------------------------------------
```markdown
1 | # Quickstart: BioMCP CLI
2 |
3 | Get started with BioMCP in under 5 minutes! This guide walks you through installation and your first biomedical search.
4 |
5 | ## Prerequisites
6 |
7 | - Python 3.10 or higher
8 | - [uv](https://docs.astral.sh/uv/) package manager (recommended) or pip
9 |
10 | ## Installation
11 |
12 | ### Option 1: Using uv (Recommended)
13 |
14 | ```bash
15 | # Install uv if you haven't already
16 | curl -LsSf https://astral.sh/uv/install.sh | sh
17 |
18 | # Install BioMCP
19 | uv tool install biomcp-python
20 | ```
21 |
22 | ### Option 2: Using pip
23 |
24 | ```bash
25 | pip install biomcp-python
26 | ```
27 |
28 | ## Your First Search
29 |
30 | Let's search for recent articles about BRAF mutations in melanoma:
31 |
32 | ```bash
33 | biomcp article search \
34 | --gene BRAF --disease melanoma
35 | ```
36 |
37 | This command:
38 |
39 | - Searches PubMed/PubTator3 for articles
40 | - Filters by BRAF gene and melanoma disease
41 | - Returns the 5 most recent results
42 | - Automatically includes cBioPortal cancer genomics data
43 | - Includes preprints from bioRxiv/medRxiv by default
44 |
45 | ## Understanding the Output
46 |
47 | The search returns:
48 |
49 | 1. **cBioPortal Summary** (if gene specified): Cancer genomics data showing mutation frequencies and hotspots
50 | 2. **Article Results**: Each result includes:
51 | - Title and authors
52 | - Journal and publication date
53 | - PubMed ID and direct link
54 | - Abstract snippet
55 | - Annotated entities (genes, diseases, chemicals)
56 |
57 | ## Essential Commands
58 |
59 | ### Search Clinical Trials
60 |
61 | Find active trials for lung cancer:
62 |
63 | ```bash
64 | biomcp trial search \
65 | --condition "lung cancer" \
66 | --status open
67 | ```
68 |
69 | ### Get Gene Information
70 |
71 | Retrieve details about the TP53 tumor suppressor:
72 |
73 | ```bash
74 | biomcp variant search --gene TP53 # get variants for a gene
75 | biomcp article search --gene TP53 # find articles about a gene
76 | ```
77 |
78 | ### Look Up Drug Information
79 |
80 | Get details about imatinib (Gleevec):
81 |
82 | ```bash
83 | biomcp intervention search imatinib
84 | ```
85 |
86 | ### Search for Genetic Variants
87 |
88 | Find pathogenic variants in the BRCA1 gene:
89 |
90 | ```bash
91 | biomcp variant search \
92 | --gene BRCA1 --significance pathogenic
93 | ```
94 |
95 | ### Analyze a Clinically Actionable Variant
96 |
97 | Get OncoKB clinical interpretations for known cancer variants. BioMCP uses a demo server for key genes like BRAF out-of-the-box, no setup required!
98 |
99 | ```bash
100 | # Get clinical actionability for BRAF V600E
101 | biomcp variant search --gene BRAF
102 | ```
103 |
104 | This will automatically prepend an "OncoKB Gene Summary" table to the search results.
105 |
106 | ## Next Steps
107 |
108 | ### Set Up API Keys (Optional but Recommended)
109 |
110 | Some features require API keys for enhanced functionality:
111 |
112 | ```bash
113 | # For NCI clinical trials database
114 | export NCI_API_KEY="your-key-here"
115 |
116 | # For AlphaGenome variant predictions
117 | export ALPHAGENOME_API_KEY="your-key-here"
118 |
119 | # For additional cBioPortal features
120 | export CBIO_TOKEN="your-token-here"
121 | ```
122 |
123 | See [Authentication and API Keys](03-authentication-and-api-keys.md) for detailed setup.
124 |
125 | ### Explore Advanced Features
126 |
127 | - **Combine Multiple Filters**:
128 |
129 | ```bash
130 | biomcp article search \
131 | --gene EGFR --disease "lung cancer" \
132 | --chemical erlotinib
133 | ```
134 |
135 | - **Use OR Logic in Keywords**:
136 |
137 | ```bash
138 | biomcp article search --gene BRAF --keyword "V600E|p.V600E|c.1799T>A"
139 | ```
140 |
141 | - **Exclude Preprints**:
142 | ```bash
143 | biomcp article search --gene TP53 --no-preprints
144 | ```
145 |
146 | ### Get Help
147 |
148 | View all available commands:
149 |
150 | ```bash
151 | biomcp --help
152 | ```
153 |
154 | Get help for a specific command:
155 |
156 | ```bash
157 | biomcp article search --help
158 | ```
159 |
160 | ## Common Use Cases
161 |
162 | ### 1. Research a Specific Mutation
163 |
164 | ```bash
165 | # Find articles about EGFR T790M resistance mutation
166 | biomcp article search --gene EGFR \
167 | --keyword "T790M|p.T790M" \
168 | --disease "lung cancer"
169 | ```
170 |
171 | ### 2. Find Trials for a Patient
172 |
173 | ```bash
174 | # Active trials for HER2-positive breast cancer
175 | biomcp trial search \
176 | --condition "breast cancer" \
177 | --keyword "HER2 positive" \
178 | --status RECRUITING
179 | ```
180 |
181 | ### 3. Investigate Drug Mechanisms
182 |
183 | ```bash
184 | # Get information about pembrolizumab
185 | biomcp drug get pembrolizumab
186 |
187 | # Find articles about its use in melanoma
188 | biomcp article search --chemical pembrolizumab --disease melanoma
189 | ```
190 |
191 | ## Troubleshooting
192 |
193 | ### Command Not Found
194 |
195 | If `biomcp` is not recognized:
196 |
197 | - Ensure your PATH includes the installation directory
198 | - Try running with full path: `~/.local/bin/biomcp`
199 | - Restart your terminal after installation
200 |
201 | ### No Results Found
202 |
203 | If searches return no results:
204 |
205 | - Check spelling of gene names (use official symbols)
206 | - Try broader search terms
207 | - Remove filters one by one to identify the constraint
208 |
209 | ### API Rate Limits
210 |
211 | If you encounter rate limit errors:
212 |
213 | - Add delays between requests
214 | - Consider setting up API keys for higher limits
215 | - Use the `--limit` parameter to reduce result count
216 |
217 | ## Next Steps
218 |
219 | Now that you've run your first searches, explore these resources:
220 |
221 | 1. **[Complete CLI Reference](../user-guides/01-command-line-interface.md)** - Comprehensive documentation for all commands and options
222 | 2. **[Claude Desktop Integration](02-claude-desktop-integration.md)** - Use BioMCP with AI assistants
223 | 3. **[Set up API Keys](03-authentication-and-api-keys.md)** - Enable advanced features with NCI, AlphaGenome, and cBioPortal
224 | 4. **[How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md)** - Step-by-step tutorials for complex research workflows
225 | 5. **[Deep Researcher Persona](../concepts/02-the-deep-researcher-persona.md)** - Learn about BioMCP's philosophy and methodology
226 |
227 | Happy researching! 🧬🔬
228 |
```
--------------------------------------------------------------------------------
/docs/backend-services-reference/01-overview.md:
--------------------------------------------------------------------------------
```markdown
1 | # Backend Services Reference Overview
2 |
3 | BioMCP integrates with multiple biomedical databases and services to provide comprehensive research capabilities. This reference documents the underlying APIs and their capabilities.
4 |
5 | ## Service Categories
6 |
7 | ### Literature and Publications
8 |
9 | - **[PubTator3](06-pubtator3.md)**: Biomedical literature with entity annotations
10 | - **Europe PMC**: Preprints from bioRxiv and medRxiv
11 |
12 | ### Clinical Trials
13 |
14 | - **[ClinicalTrials.gov](04-clinicaltrials-gov.md)**: U.S. and international clinical trials registry
15 | - **[NCI CTS API](05-nci-cts-api.md)**: National Cancer Institute's enhanced trial search
16 |
17 | ### Biomedical Annotations
18 |
19 | - **[BioThings Suite](02-biothings-suite.md)**:
20 | - MyGene.info - Gene annotations
21 | - MyVariant.info - Variant annotations
22 | - MyDisease.info - Disease ontology
23 | - MyChem.info - Drug/chemical data
24 |
25 | ### Cancer Genomics
26 |
27 | - **[cBioPortal](03-cbioportal.md)**: Cancer genomics portal with mutation data
28 | - **TCGA**: The Cancer Genome Atlas (via MyVariant.info)
29 |
30 | ### Variant Effect Prediction
31 |
32 | - **[AlphaGenome](07-alphagenome.md)**: Google DeepMind's AI for regulatory predictions
33 |
34 | ## API Authentication
35 |
36 | | Service | Authentication Required | Type | Rate Limits |
37 | | ------------------ | ----------------------- | ------- | ------------------- |
38 | | PubTator3 | No | Public | 3 requests/second |
39 | | ClinicalTrials.gov | No | Public | 50,000 requests/day |
40 | | NCI CTS API | Yes | API Key | 1,000 requests/day |
41 | | BioThings APIs | No | Public | 1,000 requests/hour |
42 | | cBioPortal | Optional | Token | Higher with token |
43 | | AlphaGenome | Yes | API Key | Contact provider |
44 |
45 | ## Data Flow Architecture
46 |
47 | ```
48 | User Query → BioMCP Tools → Backend APIs → Unified Response
49 |
50 | Example Flow:
51 | 1. User: "Find articles about BRAF mutations"
52 | 2. BioMCP: article_searcher tool
53 | 3. APIs Called:
54 | - PubTator3 (articles)
55 | - cBioPortal (mutation data)
56 | - Europe PMC (preprints)
57 | 4. Response: Integrated results with citations
58 | ```
59 |
60 | ## Service Reliability
61 |
62 | ### Primary Services
63 |
64 | - **PubTator3**: 99.9% uptime, updated daily
65 | - **ClinicalTrials.gov**: 99.5% uptime, updated daily
66 | - **BioThings APIs**: 99.9% uptime, real-time data
67 |
68 | ### Fallback Strategies
69 |
70 | - Cache frequently accessed data
71 | - Implement exponential backoff
72 | - Use alternative endpoints when available
73 |
74 | ## Common Integration Patterns
75 |
76 | ### 1. Entity Recognition Enhancement
77 |
78 | ```
79 | PubTator3 → Extract entities → BioThings → Get detailed annotations
80 | ```
81 |
82 | ### 2. Variant to Trial Pipeline
83 |
84 | ```
85 | MyVariant.info → Get gene → ClinicalTrials.gov → Find relevant trials
86 | ```
87 |
88 | ### 3. Comprehensive Gene Analysis
89 |
90 | ```
91 | MyGene.info → Basic info
92 | cBioPortal → Cancer mutations
93 | PubTator3 → Literature
94 | AlphaGenome → Predictions
95 | ```
96 |
97 | ## Performance Considerations
98 |
99 | ### Response Times (typical)
100 |
101 | - PubTator3: 200-500ms
102 | - ClinicalTrials.gov: 300-800ms
103 | - BioThings APIs: 100-300ms
104 | - cBioPortal: 200-600ms
105 | - AlphaGenome: 1-3 seconds
106 |
107 | ### Optimization Strategies
108 |
109 | 1. **Batch requests** when APIs support it
110 | 2. **Cache static data** (gene names, ontologies)
111 | 3. **Parallelize independent** API calls
112 | 4. **Use pagination** for large result sets
113 |
114 | ## Error Handling
115 |
116 | ### Common Error Types
117 |
118 | - **Rate Limiting**: 429 errors, implement backoff
119 | - **Invalid Parameters**: 400 errors, validate inputs
120 | - **Service Unavailable**: 503 errors, retry with delay
121 | - **Authentication**: 401 errors, check API keys
122 |
123 | ### Error Response Format
124 |
125 | ```json
126 | {
127 | "error": {
128 | "code": "RATE_LIMIT_EXCEEDED",
129 | "message": "API rate limit exceeded",
130 | "retry_after": 3600
131 | }
132 | }
133 | ```
134 |
135 | ## Data Formats
136 |
137 | ### Input Formats
138 |
139 | - **Identifiers**: HGNC symbols, rsIDs, NCT numbers, PMIDs
140 | - **Coordinates**: GRCh38 genomic positions
141 | - **Terms**: MeSH, MONDO, HPO ontologies
142 |
143 | ### Output Formats
144 |
145 | - **JSON**: Primary format for all APIs
146 | - **XML**: Available for some services
147 | - **TSV/CSV**: Export options for bulk data
148 |
149 | ## Update Frequencies
150 |
151 | | Service | Update Frequency | Data Lag |
152 | | ------------------ | ---------------- | ---------- |
153 | | PubTator3 | Daily | 1-2 days |
154 | | ClinicalTrials.gov | Daily | Real-time |
155 | | NCI CTS | Daily | 1 day |
156 | | BioThings | Real-time | Minutes |
157 | | cBioPortal | Quarterly | 3-6 months |
158 |
159 | ## Best Practices
160 |
161 | ### 1. API Key Management
162 |
163 | - Store keys securely
164 | - Rotate keys periodically
165 | - Monitor usage against limits
166 |
167 | ### 2. Error Recovery
168 |
169 | - Implement retry logic
170 | - Log failed requests
171 | - Provide fallback data
172 |
173 | ### 3. Data Validation
174 |
175 | - Verify gene symbols
176 | - Validate genomic coordinates
177 | - Check identifier formats
178 |
179 | ### 4. Performance
180 |
181 | - Cache when appropriate
182 | - Batch similar requests
183 | - Use appropriate page sizes
184 |
185 | ## Getting Started
186 |
187 | 1. Review individual service documentation
188 | 2. Obtain necessary API keys
189 | 3. Test endpoints with sample data
190 | 4. Implement error handling
191 | 5. Monitor usage and performance
192 |
193 | ## Support Resources
194 |
195 | - **PubTator3**: [Support Forum](https://www.ncbi.nlm.nih.gov/research/pubtator3/)
196 | - **ClinicalTrials.gov**: [Help Desk](https://clinicaltrials.gov/help)
197 | - **BioThings**: [Documentation](https://docs.biothings.io/)
198 | - **cBioPortal**: [User Guide](https://docs.cbioportal.org/)
199 | - **NCI**: [API Support](https://clinicaltrialsapi.cancer.gov/support)
200 |
```
--------------------------------------------------------------------------------
/tests/tdd/test_concurrent_requests.py:
--------------------------------------------------------------------------------
```python
1 | """Test concurrent request handling in the HTTP client."""
2 |
3 | import asyncio
4 | from unittest.mock import AsyncMock, patch
5 |
6 | import pytest
7 |
8 | from biomcp import http_client
9 |
10 |
11 | class TestConcurrentRequests:
12 | """Test concurrent request handling."""
13 |
14 | @pytest.mark.asyncio
15 | async def test_concurrent_requests_same_domain(self):
16 | """Test multiple concurrent requests to the same domain."""
17 | # Use patch instead of direct replacement
18 | with patch(
19 | "biomcp.http_client.call_http", new_callable=AsyncMock
20 | ) as mock_call:
21 | # Configure mock to return success
22 | mock_call.return_value = (200, '{"data": "response"}')
23 |
24 | # Make 10 concurrent requests with different URLs to avoid caching
25 | # and disable caching explicitly
26 | tasks = [
27 | http_client.request_api(
28 | url=f"https://api.example.com/resource/{i}",
29 | request={},
30 | domain="example",
31 | cache_ttl=0, # Disable caching
32 | )
33 | for i in range(10)
34 | ]
35 |
36 | results = await asyncio.gather(*tasks)
37 |
38 | # All requests should succeed
39 | assert len(results) == 10
40 | for data, error in results:
41 | assert error is None
42 | assert data == {"data": "response"}
43 |
44 | # Check that rate limiting was applied
45 | assert mock_call.call_count == 10
46 |
47 | @pytest.mark.asyncio
48 | async def test_concurrent_requests_different_domains(self):
49 | """Test concurrent requests to different domains."""
50 | with patch(
51 | "biomcp.http_client.call_http", new_callable=AsyncMock
52 | ) as mock_call:
53 | # Return different responses based on URL
54 | async def side_effect(method, url, *args, **kwargs):
55 | if "domain1" in url:
56 | return (200, '{"source": "domain1"}')
57 | elif "domain2" in url:
58 | return (200, '{"source": "domain2"}')
59 | else:
60 | return (200, '{"source": "other"}')
61 |
62 | mock_call.side_effect = side_effect
63 |
64 | # Make requests to different domains
65 | tasks = [
66 | http_client.request_api(
67 | "https://domain1.com/api", {}, domain="domain1"
68 | ),
69 | http_client.request_api(
70 | "https://domain2.com/api", {}, domain="domain2"
71 | ),
72 | http_client.request_api(
73 | "https://domain3.com/api", {}, domain="domain3"
74 | ),
75 | ]
76 |
77 | results = await asyncio.gather(*tasks)
78 |
79 | # Check results
80 | assert results[0][0] == {"source": "domain1"}
81 | assert results[1][0] == {"source": "domain2"}
82 | assert results[2][0] == {"source": "other"}
83 |
84 | @pytest.mark.asyncio
85 | async def test_concurrent_cache_access(self):
86 | """Test that concurrent requests properly use cache."""
87 | with patch(
88 | "biomcp.http_client.call_http", new_callable=AsyncMock
89 | ) as mock_call:
90 | mock_call.return_value = (200, '{"data": "cached"}')
91 |
92 | # First request to populate cache
93 | await http_client.request_api(
94 | url="https://api.example.com/data",
95 | request={},
96 | domain="example",
97 | cache_ttl=60,
98 | )
99 |
100 | # Reset call count
101 | initial_calls = mock_call.call_count
102 |
103 | # Make 5 concurrent requests to same URL
104 | tasks = [
105 | http_client.request_api(
106 | url="https://api.example.com/data",
107 | request={},
108 | domain="example",
109 | cache_ttl=60,
110 | )
111 | for _ in range(5)
112 | ]
113 |
114 | results = await asyncio.gather(*tasks)
115 |
116 | # All should get cached response
117 | assert len(results) == 5
118 | for data, _error in results:
119 | assert data == {"data": "cached"}
120 |
121 | # No additional HTTP calls should have been made
122 | assert mock_call.call_count == initial_calls
123 |
124 | @pytest.mark.asyncio
125 | async def test_concurrent_circuit_breaker(self):
126 | """Test circuit breaker behavior with concurrent failures."""
127 | with patch(
128 | "biomcp.http_client.call_http", new_callable=AsyncMock
129 | ) as mock_call:
130 | # Simulate failures
131 | mock_call.return_value = (500, "Internal Server Error")
132 |
133 | # Make concurrent failing requests
134 | tasks = [
135 | http_client.request_api(
136 | url=f"https://failing.com/api/{i}",
137 | request={},
138 | domain="failing",
139 | )
140 | for i in range(10)
141 | ]
142 |
143 | results = await asyncio.gather(*tasks, return_exceptions=True)
144 |
145 | # All should fail
146 | error_count = sum(1 for _, error in results if error is not None)
147 | assert error_count == 10
148 |
149 | # Circuit should be open now
150 | # Additional requests should fail immediately
151 | _, error = await http_client.request_api(
152 | url="https://failing.com/api/test",
153 | request={},
154 | domain="failing",
155 | )
156 |
157 | assert error is not None
158 | # Check that circuit breaker is preventing calls
159 | # (exact behavior depends on implementation details)
160 |
```
--------------------------------------------------------------------------------
/tests/tdd/test_connection_pool.py:
--------------------------------------------------------------------------------
```python
1 | """Tests for connection pool management."""
2 |
3 | import asyncio
4 | import ssl
5 | import weakref
6 | from unittest.mock import patch
7 |
8 | import httpx
9 | import pytest
10 |
11 | from biomcp.connection_pool import (
12 | EventLoopConnectionPools,
13 | close_all_pools,
14 | get_connection_pool,
15 | )
16 |
17 |
18 | @pytest.fixture
19 | def pool_manager():
20 | """Create a fresh pool manager for testing."""
21 | return EventLoopConnectionPools()
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_get_pool_creates_new_pool(pool_manager):
26 | """Test that get_pool creates a new pool when none exists."""
27 | timeout = httpx.Timeout(30)
28 |
29 | pool = await pool_manager.get_pool(verify=True, timeout=timeout)
30 |
31 | assert pool is not None
32 | assert isinstance(pool, httpx.AsyncClient)
33 | assert not pool.is_closed
34 |
35 |
36 | @pytest.mark.asyncio
37 | async def test_get_pool_reuses_existing_pool(pool_manager):
38 | """Test that get_pool reuses existing pools."""
39 | timeout = httpx.Timeout(30)
40 |
41 | pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
42 | pool2 = await pool_manager.get_pool(verify=True, timeout=timeout)
43 |
44 | assert pool1 is pool2
45 |
46 |
47 | @pytest.mark.asyncio
48 | async def test_get_pool_different_verify_settings(pool_manager):
49 | """Test that different verify settings create different pools."""
50 | timeout = httpx.Timeout(30)
51 |
52 | pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
53 | pool2 = await pool_manager.get_pool(verify=False, timeout=timeout)
54 |
55 | assert pool1 is not pool2
56 |
57 |
58 | @pytest.mark.asyncio
59 | async def test_get_pool_ssl_context(pool_manager):
60 | """Test pool creation with SSL context."""
61 | ssl_context = ssl.create_default_context()
62 | timeout = httpx.Timeout(30)
63 |
64 | pool = await pool_manager.get_pool(verify=ssl_context, timeout=timeout)
65 |
66 | assert pool is not None
67 | assert isinstance(pool, httpx.AsyncClient)
68 |
69 |
70 | @pytest.mark.asyncio
71 | async def test_pool_cleanup_on_close_all(pool_manager):
72 | """Test that close_all properly closes all pools."""
73 | timeout = httpx.Timeout(30)
74 |
75 | await pool_manager.get_pool(verify=True, timeout=timeout)
76 | await pool_manager.get_pool(verify=False, timeout=timeout)
77 |
78 | await pool_manager.close_all()
79 |
80 | # After close_all, pools should be cleared
81 | assert len(pool_manager._loop_pools) == 0
82 |
83 |
84 | @pytest.mark.asyncio
85 | async def test_no_event_loop_returns_single_use_client(pool_manager):
86 | """Test behavior when no event loop is running."""
87 | with patch("asyncio.get_running_loop", side_effect=RuntimeError):
88 | timeout = httpx.Timeout(30)
89 |
90 | pool = await pool_manager.get_pool(verify=True, timeout=timeout)
91 |
92 | assert pool is not None
93 | # Single-use client should have no keepalive
94 | # Note: httpx client internal structure may vary
95 |
96 |
97 | @pytest.mark.asyncio
98 | async def test_pool_recreation_after_close(pool_manager):
99 | """Test that a new pool is created after the old one is closed."""
100 | timeout = httpx.Timeout(30)
101 |
102 | pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
103 | await pool1.aclose()
104 |
105 | pool2 = await pool_manager.get_pool(verify=True, timeout=timeout)
106 |
107 | assert pool1 is not pool2
108 | assert pool1.is_closed
109 | assert not pool2.is_closed
110 |
111 |
112 | @pytest.mark.asyncio
113 | async def test_weak_reference_cleanup():
114 | """Test that weak references are used for event loops."""
115 | pool_manager = EventLoopConnectionPools()
116 |
117 | # Verify that the pool manager uses weak references
118 | assert isinstance(pool_manager._loop_pools, weakref.WeakKeyDictionary)
119 |
120 | # Create a pool
121 | timeout = httpx.Timeout(30)
122 | pool = await pool_manager.get_pool(verify=True, timeout=timeout)
123 |
124 | # Verify pool was created
125 | assert pool is not None
126 |
127 | # The current event loop should be in the weak key dict
128 | current_loop = asyncio.get_running_loop()
129 | assert current_loop in pool_manager._loop_pools
130 |
131 |
132 | @pytest.mark.asyncio
133 | async def test_global_get_connection_pool():
134 | """Test the global get_connection_pool function."""
135 | with patch.dict("os.environ", {"BIOMCP_USE_CONNECTION_POOL": "true"}):
136 | timeout = httpx.Timeout(30)
137 |
138 | pool = await get_connection_pool(verify=True, timeout=timeout)
139 |
140 | assert pool is not None
141 | assert isinstance(pool, httpx.AsyncClient)
142 |
143 |
144 | @pytest.mark.asyncio
145 | async def test_global_close_all_pools():
146 | """Test the global close_all_pools function."""
147 | # Create some pools
148 | timeout = httpx.Timeout(30)
149 | await get_connection_pool(verify=True, timeout=timeout)
150 | await get_connection_pool(verify=False, timeout=timeout)
151 |
152 | # Close all pools
153 | await close_all_pools()
154 |
155 | # Verify cleanup (this is implementation-specific)
156 | from biomcp.connection_pool import _pool_manager
157 |
158 | assert len(_pool_manager._loop_pools) == 0
159 |
160 |
161 | @pytest.mark.asyncio
162 | async def test_concurrent_pool_creation(pool_manager):
163 | """Test thread-safe pool creation under concurrent access."""
164 | timeout = httpx.Timeout(30)
165 |
166 | async def get_pool():
167 | return await pool_manager.get_pool(verify=True, timeout=timeout)
168 |
169 | # Create 10 concurrent requests for the same pool
170 | pools = await asyncio.gather(*[get_pool() for _ in range(10)])
171 |
172 | # All should return the same pool instance
173 | assert all(pool is pools[0] for pool in pools)
174 |
175 |
176 | @pytest.mark.asyncio
177 | async def test_connection_pool_limits():
178 | """Test that connection pools have proper limits set."""
179 | pool_manager = EventLoopConnectionPools()
180 | timeout = httpx.Timeout(30)
181 |
182 | pool = await pool_manager.get_pool(verify=True, timeout=timeout)
183 |
184 | # Verify pool was created (actual limits are internal to httpx)
185 | assert pool is not None
186 | assert isinstance(pool, httpx.AsyncClient)
187 |
```
--------------------------------------------------------------------------------
/tests/data/myvariant/variants_part_braf_v600_multiple.json:
--------------------------------------------------------------------------------
```json
1 | [
2 | {
3 | "_id": "chr7:g.140453136A>G",
4 | "_score": 19.419012,
5 | "cadd": {
6 | "_license": "http://bit.ly/2TIuab9",
7 | "phred": 21.2
8 | },
9 | "chrom": "7",
10 | "clinvar": {
11 | "_license": "http://bit.ly/2SQdcI0",
12 | "rcv": {
13 | "clinical_significance": "Likely pathogenic"
14 | },
15 | "variant_id": 376288
16 | },
17 | "cosmic": {
18 | "_license": "http://bit.ly/2VMkY7R",
19 | "cosmic_id": "COSM18443"
20 | },
21 | "dbnsfp": {
22 | "_license": "http://bit.ly/2VLnQBz",
23 | "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
24 | "hgvsc": ["c.620T>C", "c.1919T>C", "c.1799T>C"],
25 | "hgvsp": ["p.V600A", "p.Val600Ala", "p.Val640Ala", "p.Val207Ala"],
26 | "polyphen2": {
27 | "hdiv": {
28 | "pred": "B",
29 | "score": 0.207
30 | }
31 | }
32 | },
33 | "dbsnp": {
34 | "_license": "http://bit.ly/2AqoLOc",
35 | "rsid": "rs113488022"
36 | },
37 | "vcf": {
38 | "alt": "G",
39 | "position": "140453136",
40 | "ref": "A"
41 | }
42 | },
43 | {
44 | "_id": "chr7:g.140453136A>T",
45 | "_score": 18.693962,
46 | "cadd": {
47 | "_license": "http://bit.ly/2TIuab9",
48 | "phred": 32
49 | },
50 | "chrom": "7",
51 | "civic": {
52 | "_license": "http://bit.ly/2FqS871",
53 | "id": 12,
54 | "openCravatUrl": "https://run.opencravat.org/webapps/variantreport/index.html?alt_base=T&chrom=chr7&pos=140753336&ref_base=A"
55 | },
56 | "clinvar": {
57 | "_license": "http://bit.ly/2SQdcI0",
58 | "rcv": [
59 | {
60 | "clinical_significance": "Pathogenic"
61 | },
62 | {
63 | "clinical_significance": "Pathogenic"
64 | },
65 | {
66 | "clinical_significance": "Pathogenic"
67 | },
68 | {
69 | "clinical_significance": "Pathogenic"
70 | },
71 | {
72 | "clinical_significance": "Pathogenic"
73 | },
74 | {
75 | "clinical_significance": "Pathogenic"
76 | },
77 | {
78 | "clinical_significance": "Pathogenic"
79 | },
80 | {
81 | "clinical_significance": "not provided"
82 | },
83 | {
84 | "clinical_significance": "Likely pathogenic"
85 | },
86 | {
87 | "clinical_significance": "Likely pathogenic"
88 | },
89 | {
90 | "clinical_significance": "Likely pathogenic"
91 | },
92 | {
93 | "clinical_significance": "Likely pathogenic"
94 | },
95 | {
96 | "clinical_significance": "Likely pathogenic"
97 | },
98 | {
99 | "clinical_significance": "Likely pathogenic"
100 | },
101 | {
102 | "clinical_significance": "Likely pathogenic"
103 | },
104 | {
105 | "clinical_significance": "Pathogenic"
106 | },
107 | {
108 | "clinical_significance": "Pathogenic"
109 | },
110 | {
111 | "clinical_significance": "Likely pathogenic"
112 | },
113 | {
114 | "clinical_significance": "Pathogenic"
115 | },
116 | {
117 | "clinical_significance": "Likely pathogenic"
118 | },
119 | {
120 | "clinical_significance": "Likely pathogenic"
121 | },
122 | {
123 | "clinical_significance": "Pathogenic"
124 | },
125 | {
126 | "clinical_significance": "Pathogenic"
127 | },
128 | {
129 | "clinical_significance": "Pathogenic"
130 | },
131 | {
132 | "clinical_significance": "Pathogenic"
133 | },
134 | {
135 | "clinical_significance": "Likely pathogenic"
136 | },
137 | {
138 | "clinical_significance": "Pathogenic"
139 | },
140 | {
141 | "clinical_significance": "Pathogenic"
142 | },
143 | {
144 | "clinical_significance": "Likely pathogenic"
145 | }
146 | ],
147 | "variant_id": 13961
148 | },
149 | "cosmic": {
150 | "_license": "http://bit.ly/2VMkY7R",
151 | "cosmic_id": "COSM476"
152 | },
153 | "dbnsfp": {
154 | "_license": "http://bit.ly/2VLnQBz",
155 | "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
156 | "hgvsc": ["c.620T>A", "c.1919T>A", "c.1799T>A"],
157 | "hgvsp": ["p.Val640Glu", "p.Val207Glu", "p.Val600Glu", "p.V600E"],
158 | "polyphen2": {
159 | "hdiv": {
160 | "pred": "D",
161 | "score": 0.971
162 | }
163 | }
164 | },
165 | "dbsnp": {
166 | "_license": "http://bit.ly/2AqoLOc",
167 | "rsid": "rs113488022"
168 | },
169 | "exac": {
170 | "_license": "http://bit.ly/2H9c4hg",
171 | "af": 1.647e-5
172 | },
173 | "gnomad_exome": {
174 | "_license": "http://bit.ly/2I1cl1I",
175 | "af": {
176 | "af": 3.97994e-6
177 | }
178 | },
179 | "vcf": {
180 | "alt": "T",
181 | "position": "140453136",
182 | "ref": "A"
183 | }
184 | },
185 | {
186 | "_id": "chr7:g.140453136A>C",
187 | "_score": 18.476965,
188 | "cadd": {
189 | "_license": "http://bit.ly/2TIuab9",
190 | "phred": 26.0
191 | },
192 | "chrom": "7",
193 | "clinvar": {
194 | "_license": "http://bit.ly/2SQdcI0",
195 | "rcv": [
196 | {
197 | "clinical_significance": "not provided"
198 | },
199 | {
200 | "clinical_significance": "Pathogenic"
201 | },
202 | {
203 | "clinical_significance": "Pathogenic"
204 | },
205 | {
206 | "clinical_significance": "Uncertain significance"
207 | }
208 | ],
209 | "variant_id": 40389
210 | },
211 | "cosmic": {
212 | "_license": "http://bit.ly/2VMkY7R",
213 | "cosmic_id": "COSM6137"
214 | },
215 | "dbnsfp": {
216 | "_license": "http://bit.ly/2VLnQBz",
217 | "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
218 | "hgvsc": ["c.1919T>G", "c.1799T>G", "c.620T>G"],
219 | "hgvsp": ["p.Val640Gly", "p.Val207Gly", "p.Val600Gly", "p.V600G"],
220 | "polyphen2": {
221 | "hdiv": {
222 | "pred": "P",
223 | "score": 0.822
224 | }
225 | }
226 | },
227 | "dbsnp": {
228 | "_license": "http://bit.ly/2AqoLOc",
229 | "rsid": "rs113488022"
230 | },
231 | "vcf": {
232 | "alt": "C",
233 | "position": "140453136",
234 | "ref": "A"
235 | }
236 | }
237 | ]
238 |
```
--------------------------------------------------------------------------------
/src/biomcp/http_client_simple.py:
--------------------------------------------------------------------------------
```python
1 | """Helper functions for simpler HTTP client operations."""
2 |
3 | import asyncio
4 | import contextlib
5 | import json
6 | import os
7 | import ssl
8 |
9 | import httpx
10 |
11 | # Global connection pools per SSL context
12 | _connection_pools: dict[str, httpx.AsyncClient] = {}
13 | _pool_lock = asyncio.Lock()
14 |
15 |
16 | def close_all_pools():
17 | """Close all connection pools. Useful for cleanup in tests."""
18 | global _connection_pools
19 | for pool in _connection_pools.values():
20 | if pool and not pool.is_closed:
21 | # Schedule the close in a safe way
22 | try:
23 | # Store task reference to avoid garbage collection
24 | close_task = asyncio.create_task(pool.aclose())
25 | # Optionally add a callback to handle completion
26 | close_task.add_done_callback(lambda t: None)
27 | except RuntimeError:
28 | # If no event loop is running, close synchronously
29 | pool._transport.close()
30 | _connection_pools.clear()
31 |
32 |
33 | async def get_connection_pool(
34 | verify: ssl.SSLContext | str | bool,
35 | timeout: httpx.Timeout,
36 | ) -> httpx.AsyncClient:
37 | """Get or create a shared connection pool for the given SSL context."""
38 | global _connection_pools
39 |
40 | # Create a key for the pool based on verify setting
41 | if isinstance(verify, ssl.SSLContext):
42 | pool_key = f"ssl_{id(verify)}"
43 | else:
44 | pool_key = str(verify)
45 |
46 | async with _pool_lock:
47 | pool = _connection_pools.get(pool_key)
48 | if pool is None or pool.is_closed:
49 | # Create a new connection pool with optimized settings
50 | pool = httpx.AsyncClient(
51 | verify=verify,
52 | http2=False, # HTTP/2 can add overhead for simple requests
53 | timeout=timeout,
54 | limits=httpx.Limits(
55 | max_keepalive_connections=20, # Reuse connections
56 | max_connections=100, # Total connection limit
57 | keepalive_expiry=30, # Keep connections alive for 30s
58 | ),
59 | # Enable connection pooling
60 | transport=httpx.AsyncHTTPTransport(
61 | retries=0, # We handle retries at a higher level
62 | ),
63 | )
64 | _connection_pools[pool_key] = pool
65 | return pool
66 |
67 |
68 | async def execute_http_request( # noqa: C901
69 | method: str,
70 | url: str,
71 | params: dict,
72 | verify: ssl.SSLContext | str | bool,
73 | headers: dict[str, str] | None = None,
74 | ) -> tuple[int, str]:
75 | """Execute the actual HTTP request using connection pooling.
76 |
77 | Args:
78 | method: HTTP method (GET or POST)
79 | url: Target URL
80 | params: Request parameters
81 | verify: SSL verification settings
82 | headers: Optional custom headers
83 |
84 | Returns:
85 | Tuple of (status_code, response_text)
86 |
87 | Raises:
88 | ConnectionError: For connection failures
89 | TimeoutError: For timeout errors
90 | """
91 | from .constants import HTTP_TIMEOUT_SECONDS
92 |
93 | try:
94 | # Extract custom headers from params if present
95 | custom_headers = headers or {}
96 | if "_headers" in params:
97 | with contextlib.suppress(json.JSONDecodeError, TypeError):
98 | custom_headers.update(json.loads(params.pop("_headers")))
99 |
100 | # Use the configured timeout from constants
101 | timeout = httpx.Timeout(HTTP_TIMEOUT_SECONDS)
102 |
103 | # Use connection pooling with proper error handling
104 | use_pool = (
105 | os.getenv("BIOMCP_USE_CONNECTION_POOL", "true").lower() == "true"
106 | )
107 |
108 | if use_pool:
109 | try:
110 | # Use the new connection pool manager
111 | from ..connection_pool import get_connection_pool as get_pool
112 |
113 | client = await get_pool(verify, timeout)
114 | should_close = False
115 | except Exception:
116 | # Fallback to creating a new client
117 | client = httpx.AsyncClient(
118 | verify=verify, http2=False, timeout=timeout
119 | )
120 | should_close = True
121 | else:
122 | # Create a new client for each request
123 | client = httpx.AsyncClient(
124 | verify=verify, http2=False, timeout=timeout
125 | )
126 | should_close = True
127 |
128 | try:
129 | # Make the request
130 | if method.upper() == "GET":
131 | resp = await client.get(
132 | url, params=params, headers=custom_headers
133 | )
134 | elif method.upper() == "POST":
135 | resp = await client.post(
136 | url, json=params, headers=custom_headers
137 | )
138 | else:
139 | from .constants import HTTP_ERROR_CODE_UNSUPPORTED_METHOD
140 |
141 | return (
142 | HTTP_ERROR_CODE_UNSUPPORTED_METHOD,
143 | f"Unsupported method {method}",
144 | )
145 |
146 | # Check for empty response
147 | if not resp.text:
148 | return resp.status_code, "{}"
149 |
150 | return resp.status_code, resp.text
151 | finally:
152 | # Only close if we created a new client
153 | if should_close:
154 | await client.aclose()
155 |
156 | except httpx.ConnectError as exc:
157 | raise ConnectionError(f"Failed to connect to {url}: {exc}") from exc
158 | except httpx.TimeoutException as exc:
159 | raise TimeoutError(f"Request to {url} timed out: {exc}") from exc
160 | except httpx.HTTPError as exc:
161 | error_msg = str(exc) if str(exc) else "Network connectivity error"
162 | from .constants import HTTP_ERROR_CODE_NETWORK
163 |
164 | return HTTP_ERROR_CODE_NETWORK, error_msg
165 |
```
--------------------------------------------------------------------------------
/src/biomcp/rate_limiter.py:
--------------------------------------------------------------------------------
```python
1 | """Rate limiting implementation for BioMCP API calls."""
2 |
3 | import asyncio
4 | import time
5 | from collections import defaultdict
6 | from contextlib import asynccontextmanager
7 |
8 | from .constants import (
9 | DEFAULT_BURST_SIZE,
10 | DEFAULT_RATE_LIMIT_PER_SECOND,
11 | )
12 | from .exceptions import BioMCPError
13 |
14 |
15 | class RateLimitExceeded(BioMCPError):
16 | """Raised when rate limit is exceeded."""
17 |
18 | def __init__(self, domain: str, limit: int, window: int):
19 | message = f"Rate limit exceeded for {domain}: {limit} requests per {window} seconds"
20 | super().__init__(
21 | message, {"domain": domain, "limit": limit, "window": window}
22 | )
23 |
24 |
25 | class RateLimiter:
26 | """Token bucket rate limiter implementation."""
27 |
28 | def __init__(
29 | self,
30 | requests_per_second: float = DEFAULT_RATE_LIMIT_PER_SECOND,
31 | burst_size: int = DEFAULT_BURST_SIZE,
32 | ):
33 | """Initialize rate limiter.
34 |
35 | Args:
36 | requests_per_second: Sustained request rate
37 | burst_size: Maximum burst capacity
38 | """
39 | self.rate = requests_per_second
40 | self.burst_size = burst_size
41 | self.tokens = float(burst_size)
42 | self.last_update = time.monotonic()
43 | self._lock = asyncio.Lock()
44 |
45 | async def acquire(self, tokens: int = 1) -> None:
46 | """Acquire tokens from the bucket."""
47 | async with self._lock:
48 | now = time.monotonic()
49 | elapsed = now - self.last_update
50 | self.last_update = now
51 |
52 | # Add tokens based on elapsed time
53 | self.tokens = min(
54 | self.burst_size, self.tokens + elapsed * self.rate
55 | )
56 |
57 | if self.tokens < tokens:
58 | # Calculate wait time
59 | wait_time = (tokens - self.tokens) / self.rate
60 | await asyncio.sleep(wait_time)
61 | self.tokens = 0
62 | else:
63 | self.tokens -= tokens
64 |
65 | @asynccontextmanager
66 | async def limit(self):
67 | """Context manager for rate limiting."""
68 | await self.acquire()
69 | yield
70 |
71 |
72 | class DomainRateLimiter:
73 | """Rate limiter with per-domain limits."""
74 |
75 | def __init__(self, default_rps: float = 10.0, default_burst: int = 20):
76 | """Initialize domain rate limiter.
77 |
78 | Args:
79 | default_rps: Default requests per second
80 | default_burst: Default burst size
81 | """
82 | self.default_rps = default_rps
83 | self.default_burst = default_burst
84 | self.limiters: dict[str, RateLimiter] = {}
85 | self.domain_configs = {
86 | "article": {"rps": 20.0, "burst": 40}, # PubMed can handle more
87 | "trial": {"rps": 10.0, "burst": 20}, # ClinicalTrials.gov standard
88 | "thinking": {"rps": 50.0, "burst": 100}, # Local processing
89 | "mygene": {"rps": 10.0, "burst": 20}, # MyGene.info
90 | "mydisease": {"rps": 10.0, "burst": 20}, # MyDisease.info
91 | "mychem": {"rps": 10.0, "burst": 20}, # MyChem.info
92 | "myvariant": {"rps": 15.0, "burst": 30}, # MyVariant.info
93 | "oncokb": {"rps": 5.0, "burst": 10}, # OncoKB conservative limits
94 | }
95 |
96 | def get_limiter(self, domain: str) -> RateLimiter:
97 | """Get or create rate limiter for domain."""
98 | if domain not in self.limiters:
99 | config = self.domain_configs.get(domain, {})
100 | rps = config.get("rps", self.default_rps)
101 | burst = config.get("burst", self.default_burst)
102 | self.limiters[domain] = RateLimiter(rps, int(burst))
103 | return self.limiters[domain]
104 |
105 | @asynccontextmanager
106 | async def limit(self, domain: str):
107 | """Rate limit context manager for a domain."""
108 | limiter = self.get_limiter(domain)
109 | async with limiter.limit():
110 | yield
111 |
112 |
113 | class SlidingWindowRateLimiter:
114 | """Sliding window rate limiter for user/IP based limiting."""
115 |
116 | def __init__(self, requests: int = 100, window_seconds: int = 60):
117 | """Initialize sliding window rate limiter.
118 |
119 | Args:
120 | requests: Maximum requests per window
121 | window_seconds: Window size in seconds
122 | """
123 | self.max_requests = requests
124 | self.window_seconds = window_seconds
125 | self.requests: dict[str, list[float]] = defaultdict(list)
126 | self._lock = asyncio.Lock()
127 |
128 | async def check_limit(self, key: str) -> bool:
129 | """Check if request is allowed for key."""
130 | async with self._lock:
131 | now = time.time()
132 | cutoff = now - self.window_seconds
133 |
134 | # Remove old requests
135 | self.requests[key] = [
136 | req_time
137 | for req_time in self.requests[key]
138 | if req_time > cutoff
139 | ]
140 |
141 | # Check limit
142 | if len(self.requests[key]) >= self.max_requests:
143 | return False
144 |
145 | # Add current request
146 | self.requests[key].append(now)
147 | return True
148 |
149 | async def acquire(self, key: str) -> None:
150 | """Acquire permission to make request."""
151 | if not await self.check_limit(key):
152 | raise RateLimitExceeded(
153 | key, self.max_requests, self.window_seconds
154 | )
155 |
156 |
157 | # Global instances
158 | domain_limiter = DomainRateLimiter()
159 | user_limiter = SlidingWindowRateLimiter(
160 | requests=1000, window_seconds=3600
161 | ) # 1000 req/hour
162 |
163 |
164 | async def rate_limit_domain(domain: str) -> None:
165 | """Apply rate limiting for a domain."""
166 | async with domain_limiter.limit(domain):
167 | pass
168 |
169 |
170 | async def rate_limit_user(user_id: str | None = None) -> None:
171 | """Apply rate limiting for a user."""
172 | if user_id:
173 | await user_limiter.acquire(user_id)
174 |
```
--------------------------------------------------------------------------------
/docs/developer-guides/06-http-client-and-caching.md:
--------------------------------------------------------------------------------
```markdown
1 | # BioMCP HTTP Client Guide
2 |
3 | ## Overview
4 |
5 | BioMCP uses a centralized HTTP client for all external API calls. This provides:
6 |
7 | - Consistent error handling and retry logic
8 | - Request/response caching
9 | - Rate limiting per domain
10 | - Circuit breaker for fault tolerance
11 | - Offline mode support
12 | - Comprehensive endpoint tracking
13 |
14 | ## Migration from Direct HTTP Libraries
15 |
16 | ### Before (Direct httpx usage):
17 |
18 | ```python
19 | import httpx
20 |
21 | async def fetch_gene(gene: str):
22 | async with httpx.AsyncClient() as client:
23 | response = await client.get(f"https://api.example.com/genes/{gene}")
24 | response.raise_for_status()
25 | return response.json()
26 | ```
27 |
28 | ### After (Centralized client):
29 |
30 | ```python
31 | from biomcp import http_client
32 |
33 | async def fetch_gene(gene: str):
34 | data, error = await http_client.request_api(
35 | url=f"https://api.example.com/genes/{gene}",
36 | request={},
37 | domain="example"
38 | )
39 | if error:
40 | # Handle error consistently
41 | return None
42 | return data
43 | ```
44 |
45 | ## Error Handling
46 |
47 | The centralized client uses a consistent error handling pattern:
48 |
49 | ```python
50 | result, error = await http_client.request_api(...)
51 |
52 | if error:
53 | # error is a RequestError object with:
54 | # - error.code: HTTP status code or error type
55 | # - error.message: Human-readable error message
56 | # - error.details: Additional context
57 | logger.error(f"Request failed: {error.message}")
58 | return None # or handle appropriately
59 | ```
60 |
61 | ### Error Handling Guidelines
62 |
63 | 1. **For optional data**: Return `None` when the data is not critical
64 | 2. **For required data**: Raise an exception or return an error to the caller
65 | 3. **For batch operations**: Collect errors and report at the end
66 | 4. **For user-facing operations**: Provide clear, actionable error messages
67 |
68 | ## Creating Domain-Specific Adapters
69 |
70 | For complex APIs, create an adapter class:
71 |
72 | ```python
73 | from biomcp import http_client
74 | from biomcp.http_client import RequestError
75 |
76 | class MyAPIAdapter:
77 | """Adapter for MyAPI using centralized HTTP client."""
78 |
79 | def __init__(self):
80 | self.base_url = "https://api.example.com"
81 |
82 | async def get_resource(self, resource_id: str) -> tuple[dict | None, RequestError | None]:
83 | """Fetch a resource by ID.
84 |
85 | Returns:
86 | Tuple of (data, error) where one is always None
87 | """
88 | return await http_client.request_api(
89 | url=f"{self.base_url}/resources/{resource_id}",
90 | request={},
91 | domain="example",
92 | endpoint_key="example_resources"
93 | )
94 | ```
95 |
96 | ## Configuration
97 |
98 | ### Cache TTL (Time To Live)
99 |
100 | ```python
101 | # Cache for 1 hour (3600 seconds)
102 | data, error = await http_client.request_api(
103 | url=url,
104 | request=request,
105 | cache_ttl=3600
106 | )
107 |
108 | # Disable caching for this request
109 | data, error = await http_client.request_api(
110 | url=url,
111 | request=request,
112 | cache_ttl=0
113 | )
114 | ```
115 |
116 | ### Rate Limiting
117 |
118 | Rate limits are configured per domain in `http_client.py`:
119 |
120 | ```python
121 | # Default rate limits
122 | rate_limits = {
123 | "ncbi.nlm.nih.gov": 20, # 20 requests/second
124 | "clinicaltrials.gov": 10, # 10 requests/second
125 | "myvariant.info": 1000/3600, # 1000 requests/hour
126 | }
127 | ```
128 |
129 | ### Circuit Breaker
130 |
131 | The circuit breaker prevents cascading failures:
132 |
133 | - **Closed**: Normal operation
134 | - **Open**: Failing fast after threshold exceeded
135 | - **Half-Open**: Testing if service recovered
136 |
137 | Configure thresholds:
138 |
139 | ```python
140 | CIRCUIT_BREAKER_FAILURE_THRESHOLD = 5 # Open after 5 failures
141 | CIRCUIT_BREAKER_RECOVERY_TIMEOUT = 60 # Try again after 60 seconds
142 | ```
143 |
144 | ## Offline Mode
145 |
146 | Enable offline mode to only serve cached responses:
147 |
148 | ```bash
149 | export BIOMCP_OFFLINE=true
150 | biomcp run
151 | ```
152 |
153 | In offline mode:
154 |
155 | - Only cached responses are returned
156 | - No external HTTP requests are made
157 | - Missing cache entries return None with appropriate error
158 |
159 | ## Performance Tuning
160 |
161 | ### Connection Pooling
162 |
163 | The HTTP client maintains connection pools per domain:
164 |
165 | ```python
166 | # Configure in http_client_simple.py
167 | limits = httpx.Limits(
168 | max_keepalive_connections=20,
169 | max_connections=100,
170 | keepalive_expiry=30
171 | )
172 | ```
173 |
174 | ### Concurrent Requests
175 |
176 | For parallel requests to the same API:
177 |
178 | ```python
179 | import asyncio
180 |
181 | # Fetch multiple resources concurrently
182 | tasks = [
183 | http_client.request_api(f"/resource/{i}", {}, domain="example")
184 | for i in range(10)
185 | ]
186 | results = await asyncio.gather(*tasks)
187 | ```
188 |
189 | ## Monitoring and Debugging
190 |
191 | ### Request Metrics
192 |
193 | The client tracks metrics per endpoint:
194 |
195 | - Request count
196 | - Error count
197 | - Cache hit/miss ratio
198 | - Average response time
199 |
200 | Access metrics:
201 |
202 | ```python
203 | from biomcp.http_client import get_metrics
204 | metrics = get_metrics()
205 | ```
206 |
207 | ### Debug Logging
208 |
209 | Enable debug logging to see all HTTP requests:
210 |
211 | ```python
212 | import logging
213 | logging.getLogger("biomcp.http_client").setLevel(logging.DEBUG)
214 | ```
215 |
216 | ## Best Practices
217 |
218 | 1. **Always use the centralized client** for external HTTP calls
219 | 2. **Register new endpoints** in the endpoint registry
220 | 3. **Set appropriate cache TTLs** based on data volatility
221 | 4. **Handle errors gracefully** with user-friendly messages
222 | 5. **Test with offline mode** to ensure cache coverage
223 | 6. **Monitor rate limits** to avoid API throttling
224 | 7. **Use domain-specific adapters** for complex APIs
225 |
226 | ## Endpoint Registration
227 |
228 | Register new endpoints in `endpoint_registry.py`:
229 |
230 | ```python
231 | registry.register(
232 | "my_api_endpoint",
233 | EndpointInfo(
234 | url="https://api.example.com/v1/data",
235 | category=EndpointCategory.BIOMEDICAL_LITERATURE,
236 | data_types=[DataType.RESEARCH_ARTICLES],
237 | description="My API for fetching data",
238 | compliance_notes="Public API, no PII",
239 | rate_limit="100 requests/minute"
240 | )
241 | )
242 | ```
243 |
244 | This ensures the endpoint is documented and tracked properly.
245 |
```
--------------------------------------------------------------------------------
/tests/tdd/articles/test_cbioportal_integration.py:
--------------------------------------------------------------------------------
```python
1 | """Test cBioPortal integration with article searches."""
2 |
3 | import json
4 |
5 | import pytest
6 |
7 | from biomcp.articles.search import PubmedRequest
8 | from biomcp.articles.unified import search_articles_unified
9 |
10 |
11 | class TestArticleCBioPortalIntegration:
12 | """Test that cBioPortal summaries appear in article searches."""
13 |
14 | @pytest.mark.asyncio
15 | @pytest.mark.integration
16 | async def test_article_search_with_gene_includes_cbioportal(self):
17 | """Test that searching articles for a gene includes cBioPortal summary."""
18 | request = PubmedRequest(
19 | genes=["BRAF"],
20 | keywords=["melanoma"],
21 | )
22 |
23 | # Test markdown output
24 | result = await search_articles_unified(
25 | request,
26 | include_pubmed=True,
27 | include_preprints=False,
28 | output_json=False,
29 | )
30 |
31 | # Should include cBioPortal summary
32 | assert "cBioPortal Summary for BRAF" in result
33 | assert "Mutation Frequency" in result
34 | # Top Hotspots is only included when mutations are found
35 | # When cBioPortal API returns empty data, it won't be present
36 | if "0.0%" not in result: # If mutation frequency is not 0
37 | assert "Top Hotspots" in result
38 | assert "---" in result # Separator between summary and articles
39 |
40 | # Should still include article results
41 | assert "pmid" in result or "Title" in result or "Record" in result
42 |
43 | @pytest.mark.asyncio
44 | @pytest.mark.integration
45 | async def test_article_search_json_with_gene(self):
46 | """Test JSON output includes cBioPortal summary."""
47 | request = PubmedRequest(
48 | genes=["TP53"],
49 | keywords=["cancer"],
50 | )
51 |
52 | result = await search_articles_unified(
53 | request,
54 | include_pubmed=True,
55 | include_preprints=False,
56 | output_json=True,
57 | )
58 |
59 | # Parse JSON
60 | data = json.loads(result)
61 |
62 | # Should have both summary and articles
63 | assert "cbioportal_summary" in data
64 | assert "articles" in data
65 | assert "TP53" in data["cbioportal_summary"]
66 | assert isinstance(data["articles"], list)
67 | assert len(data["articles"]) > 0
68 |
69 | @pytest.mark.asyncio
70 | @pytest.mark.integration
71 | async def test_article_search_without_gene_no_cbioportal(self):
72 | """Test that searches without genes don't include cBioPortal summary."""
73 | request = PubmedRequest(
74 | diseases=["hypertension"],
75 | keywords=["treatment"],
76 | )
77 |
78 | # Test markdown output
79 | result = await search_articles_unified(
80 | request,
81 | include_pubmed=True,
82 | include_preprints=False,
83 | output_json=False,
84 | )
85 |
86 | # Should NOT include cBioPortal summary
87 | assert "cBioPortal Summary" not in result
88 | assert "Mutation Frequency" not in result
89 |
90 | @pytest.mark.asyncio
91 | @pytest.mark.integration
92 | async def test_article_search_multiple_genes(self):
93 | """Test that searching with multiple genes uses the first one."""
94 | request = PubmedRequest(
95 | genes=["KRAS", "NRAS", "BRAF"],
96 | diseases=["colorectal cancer"],
97 | )
98 |
99 | result = await search_articles_unified(
100 | request,
101 | include_pubmed=True,
102 | include_preprints=False,
103 | output_json=False,
104 | )
105 |
106 | # Should include cBioPortal summary for KRAS (first gene)
107 | assert "cBioPortal Summary for KRAS" in result
108 | # Common KRAS hotspot
109 | assert "G12" in result or "mutation" in result
110 |
111 | @pytest.mark.asyncio
112 | @pytest.mark.integration
113 | async def test_article_search_with_invalid_gene(self):
114 | """Test graceful handling of invalid gene names."""
115 | request = PubmedRequest(
116 | genes=["BRCA1"], # Valid gene
117 | keywords=["cancer"],
118 | )
119 |
120 | # First check that we handle invalid genes gracefully
121 | # by using a real gene that might have cBioPortal data
122 | result = await search_articles_unified(
123 | request,
124 | include_pubmed=True,
125 | include_preprints=False,
126 | output_json=False,
127 | )
128 |
129 | # Should have some content - either cBioPortal summary or articles
130 | assert len(result) > 50 # Some reasonable content
131 |
132 | # Now test with a gene that's valid for search but not in cBioPortal
133 | request2 = PubmedRequest(
134 | genes=["ACE2"], # Real gene but might not be in cancer studies
135 | keywords=["COVID-19"],
136 | )
137 |
138 | result2 = await search_articles_unified(
139 | request2,
140 | include_pubmed=True,
141 | include_preprints=False,
142 | output_json=False,
143 | )
144 |
145 | # Should return results even if cBioPortal data is not available
146 | assert len(result2) > 50
147 |
148 | @pytest.mark.asyncio
149 | @pytest.mark.integration
150 | async def test_article_search_with_preprints_and_cbioportal(self):
151 | """Test that cBioPortal summary works with preprint searches too."""
152 | request = PubmedRequest(
153 | genes=["EGFR"],
154 | keywords=["lung cancer", "osimertinib"],
155 | )
156 |
157 | result = await search_articles_unified(
158 | request,
159 | include_pubmed=True,
160 | include_preprints=True,
161 | output_json=False,
162 | )
163 |
164 | # Should include cBioPortal summary
165 | assert "cBioPortal Summary for EGFR" in result
166 | # Should include both peer-reviewed and preprint results
167 | assert ("pmid" in result or "Title" in result) and (
168 | "Preprint" in result
169 | or "bioRxiv" in result
170 | or "peer_reviewed" in result
171 | )
172 |
```
--------------------------------------------------------------------------------
/src/biomcp/diseases/getter.py:
--------------------------------------------------------------------------------
```python
1 | """Disease information retrieval from MyDisease.info."""
2 |
3 | import json
4 | import logging
5 | from typing import Annotated
6 |
7 | from pydantic import Field
8 |
9 | from ..integrations import BioThingsClient
10 | from ..render import to_markdown
11 |
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | def _add_disease_links(disease_info, result: dict) -> None:
16 | """Add helpful links to disease result."""
17 | links = {}
18 |
19 | # Add MONDO browser link if available
20 | if (
21 | disease_info.mondo
22 | and isinstance(disease_info.mondo, dict)
23 | and (mondo_id := disease_info.mondo.get("mondo"))
24 | and isinstance(mondo_id, str)
25 | and mondo_id.startswith("MONDO:")
26 | ):
27 | links["MONDO Browser"] = (
28 | f"https://www.ebi.ac.uk/ols/ontologies/mondo/terms?iri=http://purl.obolibrary.org/obo/{mondo_id.replace(':', '_')}"
29 | )
30 |
31 | # Add Disease Ontology link if available
32 | if (
33 | disease_info.xrefs
34 | and isinstance(disease_info.xrefs, dict)
35 | and (doid := disease_info.xrefs.get("doid"))
36 | ):
37 | if isinstance(doid, list) and doid:
38 | doid_id = doid[0] if isinstance(doid[0], str) else str(doid[0])
39 | links["Disease Ontology"] = (
40 | f"https://www.disease-ontology.org/?id={doid_id}"
41 | )
42 | elif isinstance(doid, str):
43 | links["Disease Ontology"] = (
44 | f"https://www.disease-ontology.org/?id={doid}"
45 | )
46 |
47 | # Add PubMed search link
48 | if disease_info.name:
49 | links["PubMed Search"] = (
50 | f"https://pubmed.ncbi.nlm.nih.gov/?term={disease_info.name.replace(' ', '+')}"
51 | )
52 |
53 | if links:
54 | result["_links"] = links
55 |
56 |
57 | def _format_disease_output(disease_info, result: dict) -> None:
58 | """Format disease output for display."""
59 | # Format synonyms nicely
60 | if disease_info.synonyms:
61 | result["synonyms"] = ", ".join(
62 | disease_info.synonyms[:10]
63 | ) # Limit to first 10
64 | if len(disease_info.synonyms) > 10:
65 | result["synonyms"] += (
66 | f" (and {len(disease_info.synonyms) - 10} more)"
67 | )
68 |
69 | # Format phenotypes if present
70 | if disease_info.phenotypes:
71 | # Just show count and first few phenotypes
72 | phenotype_names = []
73 | for pheno in disease_info.phenotypes[:5]:
74 | if isinstance(pheno, dict) and "phenotype" in pheno:
75 | phenotype_names.append(pheno["phenotype"])
76 | if phenotype_names:
77 | result["associated_phenotypes"] = ", ".join(phenotype_names)
78 | if len(disease_info.phenotypes) > 5:
79 | result["associated_phenotypes"] += (
80 | f" (and {len(disease_info.phenotypes) - 5} more)"
81 | )
82 | # Remove the raw phenotypes data for cleaner output
83 | result.pop("phenotypes", None)
84 |
85 |
86 | async def get_disease(
87 | disease_id_or_name: str,
88 | output_json: bool = False,
89 | ) -> str:
90 | """
91 | Get disease information from MyDisease.info.
92 |
93 | Args:
94 | disease_id_or_name: Disease ID (MONDO, DOID) or name (e.g., "melanoma", "MONDO:0016575")
95 | output_json: Return as JSON instead of markdown
96 |
97 | Returns:
98 | Disease information as markdown or JSON string
99 | """
100 | client = BioThingsClient()
101 |
102 | try:
103 | disease_info = await client.get_disease_info(disease_id_or_name)
104 |
105 | if not disease_info:
106 | error_data = {
107 | "error": f"Disease '{disease_id_or_name}' not found",
108 | "suggestion": "Please check the disease name or ID (MONDO:, DOID:, OMIM:, MESH:)",
109 | }
110 | return (
111 | json.dumps(error_data, indent=2)
112 | if output_json
113 | else to_markdown([error_data])
114 | )
115 |
116 | # Convert to dict for rendering
117 | result = disease_info.model_dump(exclude_none=True)
118 |
119 | # Add helpful links
120 | _add_disease_links(disease_info, result)
121 |
122 | # Format output for display
123 | _format_disease_output(disease_info, result)
124 |
125 | if output_json:
126 | return json.dumps(result, indent=2)
127 | else:
128 | return to_markdown([result])
129 |
130 | except Exception as e:
131 | logger.error(
132 | f"Error fetching disease info for {disease_id_or_name}: {e}"
133 | )
134 | error_data = {
135 | "error": "Failed to retrieve disease information",
136 | "details": str(e),
137 | }
138 | return (
139 | json.dumps(error_data, indent=2)
140 | if output_json
141 | else to_markdown([error_data])
142 | )
143 |
144 |
145 | async def _disease_details(
146 | call_benefit: Annotated[
147 | str,
148 | "Define and summarize why this function is being called and the intended benefit",
149 | ],
150 | disease_id_or_name: Annotated[
151 | str,
152 | Field(
153 | description="Disease name (e.g., melanoma, GIST) or ID (e.g., MONDO:0016575, DOID:1909)"
154 | ),
155 | ],
156 | ) -> str:
157 | """
158 | Retrieves detailed information for a disease from MyDisease.info.
159 |
160 | This tool provides real-time disease annotations including:
161 | - Official disease name and definition
162 | - Disease synonyms and alternative names
163 | - Ontology mappings (MONDO, DOID, OMIM, etc.)
164 | - Associated phenotypes
165 | - Links to disease databases
166 |
167 | Parameters:
168 | - call_benefit: Define why this function is being called
169 | - disease_id_or_name: Disease name or ontology ID
170 |
171 | Process: Queries MyDisease.info API for up-to-date disease information
172 | Output: Markdown formatted disease information with definition and metadata
173 |
174 | Note: For clinical trials about diseases, use trial_searcher. For articles about diseases, use article_searcher.
175 | """
176 | return await get_disease(disease_id_or_name, output_json=False)
177 |
```
--------------------------------------------------------------------------------
/src/biomcp/connection_pool.py:
--------------------------------------------------------------------------------
```python
1 | """Connection pool manager with proper event loop lifecycle management.
2 |
3 | This module provides HTTP connection pooling that is properly integrated
4 | with asyncio event loops. It ensures that connection pools are:
5 | - Created per event loop to avoid cross-loop usage
6 | - Automatically cleaned up when event loops are garbage collected
7 | - Reused across requests for better performance
8 |
9 | Key Features:
10 | - Event loop isolation - each loop gets its own pools
11 | - Weak references prevent memory leaks
12 | - Automatic cleanup on loop destruction
13 | - Thread-safe pool management
14 |
15 | Example:
16 | ```python
17 | # Get a connection pool for the current event loop
18 | pool = await get_connection_pool(verify=True, timeout=httpx.Timeout(30))
19 |
20 | # Use the pool for multiple requests (no need to close)
21 | response = await pool.get("https://api.example.com/data")
22 | ```
23 |
24 | Environment Variables:
25 | BIOMCP_USE_CONNECTION_POOL: Enable/disable pooling (default: "true")
26 | """
27 |
28 | import asyncio
29 | import ssl
30 | import weakref
31 |
32 | # NOTE: httpx import is allowed in this file for connection pooling infrastructure
33 | import httpx
34 |
35 |
36 | class EventLoopConnectionPools:
37 | """Manages connection pools per event loop.
38 |
39 | This class ensures that each asyncio event loop has its own set of
40 | connection pools, preventing cross-loop contamination and ensuring
41 | proper cleanup when event loops are destroyed.
42 |
43 | Attributes:
44 | _loop_pools: Weak key dictionary mapping event loops to their pools
45 | _lock: Asyncio lock for thread-safe pool creation
46 | """
47 |
48 | def __init__(self):
49 | # Use weak references to avoid keeping event loops alive
50 | self._loop_pools: weakref.WeakKeyDictionary = (
51 | weakref.WeakKeyDictionary()
52 | )
53 | self._lock = asyncio.Lock()
54 |
55 | async def get_pool(
56 | self, verify: ssl.SSLContext | str | bool, timeout: httpx.Timeout
57 | ) -> httpx.AsyncClient:
58 | """Get or create a connection pool for the current event loop."""
59 | try:
60 | loop = asyncio.get_running_loop()
61 | except RuntimeError:
62 | # No event loop running, return a single-use client
63 | return self._create_client(verify, timeout, pooled=False)
64 |
65 | # Get or create pools dict for this event loop
66 | async with self._lock:
67 | if loop not in self._loop_pools:
68 | self._loop_pools[loop] = {}
69 | # Register cleanup when loop is garbage collected
70 | self._register_loop_cleanup(loop)
71 |
72 | pools = self._loop_pools[loop]
73 | pool_key = self._get_pool_key(verify)
74 |
75 | # Check if we have a valid pool
76 | if pool_key in pools and not pools[pool_key].is_closed:
77 | return pools[pool_key]
78 |
79 | # Create new pool
80 | client = self._create_client(verify, timeout, pooled=True)
81 | pools[pool_key] = client
82 | return client
83 |
84 | def _get_pool_key(self, verify: ssl.SSLContext | str | bool) -> str:
85 | """Generate a key for the connection pool."""
86 | if isinstance(verify, ssl.SSLContext):
87 | return f"ssl_{id(verify)}"
88 | return str(verify)
89 |
90 | def _create_client(
91 | self,
92 | verify: ssl.SSLContext | str | bool,
93 | timeout: httpx.Timeout,
94 | pooled: bool = True,
95 | ) -> httpx.AsyncClient:
96 | """Create a new HTTP client."""
97 | if pooled:
98 | limits = httpx.Limits(
99 | max_keepalive_connections=20,
100 | max_connections=100,
101 | keepalive_expiry=30,
102 | )
103 | else:
104 | # Single-use client
105 | limits = httpx.Limits(max_keepalive_connections=0)
106 |
107 | return httpx.AsyncClient(
108 | verify=verify,
109 | http2=False, # HTTP/2 can add overhead
110 | timeout=timeout,
111 | limits=limits,
112 | )
113 |
114 | def _register_loop_cleanup(self, loop: asyncio.AbstractEventLoop):
115 | """Register cleanup when event loop is garbage collected."""
116 | # Store pools to close when loop is garbage collected
117 | # Note: We can't create weak references to dicts, so we'll
118 | # clean up pools when the loop itself is garbage collected
119 |
120 | def cleanup():
121 | # Get pools for this loop if they still exist
122 | pools = self._loop_pools.get(loop, {})
123 | if pools:
124 | # Try to close all clients gracefully
125 | for client in list(pools.values()):
126 | if client and not client.is_closed:
127 | # Close synchronously since loop might be gone
128 | import contextlib
129 |
130 | with contextlib.suppress(Exception):
131 | client._transport.close()
132 |
133 | # Register finalizer on the loop itself
134 | weakref.finalize(loop, cleanup)
135 |
136 | async def close_all(self):
137 | """Close all connection pools."""
138 | async with self._lock:
139 | all_clients = []
140 | for pools in self._loop_pools.values():
141 | all_clients.extend(pools.values())
142 |
143 | # Close all clients
144 | close_tasks = []
145 | for client in all_clients:
146 | if client and not client.is_closed:
147 | close_tasks.append(client.aclose())
148 |
149 | if close_tasks:
150 | await asyncio.gather(*close_tasks, return_exceptions=True)
151 |
152 | self._loop_pools.clear()
153 |
154 |
155 | # Global instance
156 | _pool_manager = EventLoopConnectionPools()
157 |
158 |
159 | async def get_connection_pool(
160 | verify: ssl.SSLContext | str | bool,
161 | timeout: httpx.Timeout,
162 | ) -> httpx.AsyncClient:
163 | """Get a connection pool for the current event loop."""
164 | return await _pool_manager.get_pool(verify, timeout)
165 |
166 |
167 | async def close_all_pools():
168 | """Close all connection pools."""
169 | await _pool_manager.close_all()
170 |
```