genomoncology/biomcp # codebase.md

This is page 3 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/interventions/getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Get specific intervention details via NCI CTS API."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | from ..constants import NCI_INTERVENTIONS_URL
  7 | from ..integrations.cts_api import CTSAPIError, make_cts_request
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | async def get_intervention(
 13 |     intervention_id: str,
 14 |     api_key: str | None = None,
 15 | ) -> dict[str, Any]:
 16 |     """
 17 |     Get detailed information about a specific intervention.
 18 | 
 19 |     Args:
 20 |         intervention_id: Intervention ID
 21 |         api_key: Optional API key (if not provided, uses NCI_API_KEY env var)
 22 | 
 23 |     Returns:
 24 |         Dictionary with intervention details
 25 | 
 26 |     Raises:
 27 |         CTSAPIError: If the API request fails or intervention not found
 28 |     """
 29 |     try:
 30 |         # Make API request
 31 |         url = f"{NCI_INTERVENTIONS_URL}/{intervention_id}"
 32 |         response = await make_cts_request(
 33 |             url=url,
 34 |             api_key=api_key,
 35 |         )
 36 | 
 37 |         # Return the intervention data
 38 |         if "data" in response:
 39 |             return response["data"]
 40 |         elif "intervention" in response:
 41 |             return response["intervention"]
 42 |         else:
 43 |             return response
 44 | 
 45 |     except CTSAPIError:
 46 |         raise
 47 |     except Exception as e:
 48 |         logger.error(f"Failed to get intervention {intervention_id}: {e}")
 49 |         raise CTSAPIError(f"Failed to retrieve intervention: {e!s}") from e
 50 | 
 51 | 
 52 | def _format_intervention_header(intervention: dict[str, Any]) -> list[str]:
 53 |     """Format intervention header and basic info."""
 54 |     int_id = intervention.get(
 55 |         "id", intervention.get("intervention_id", "Unknown")
 56 |     )
 57 |     name = intervention.get("name", "Unknown Intervention")
 58 |     int_type = intervention.get(
 59 |         "type", intervention.get("category", "Unknown")
 60 |     )
 61 | 
 62 |     return [
 63 |         f"## Intervention: {name}",
 64 |         "",
 65 |         "### Basic Information",
 66 |         f"- **ID**: {int_id}",
 67 |         f"- **Type**: {int_type}",
 68 |     ]
 69 | 
 70 | 
 71 | def _format_intervention_synonyms(synonyms: Any) -> list[str]:
 72 |     """Format intervention synonyms section."""
 73 |     if not synonyms:
 74 |         return []
 75 | 
 76 |     lines = ["", "### Synonyms"]
 77 |     if isinstance(synonyms, list):
 78 |         for syn in synonyms:
 79 |             lines.append(f"- {syn}")
 80 |     else:
 81 |         lines.append(f"- {synonyms}")
 82 | 
 83 |     return lines
 84 | 
 85 | 
 86 | def _format_intervention_regulatory(intervention: dict[str, Any]) -> list[str]:
 87 |     """Format regulatory information section."""
 88 |     if not intervention.get("fda_approved"):
 89 |         return []
 90 | 
 91 |     lines = [
 92 |         "",
 93 |         "### Regulatory Status",
 94 |         f"- **FDA Approved**: {'Yes' if intervention['fda_approved'] else 'No'}",
 95 |     ]
 96 | 
 97 |     if intervention.get("approval_date"):
 98 |         lines.append(f"- **Approval Date**: {intervention['approval_date']}")
 99 | 
100 |     return lines
101 | 
102 | 
103 | def _format_intervention_indications(indications: Any) -> list[str]:
104 |     """Format clinical indications section."""
105 |     if not indications:
106 |         return []
107 | 
108 |     lines = ["", "### Clinical Indications"]
109 |     if isinstance(indications, list):
110 |         for indication in indications:
111 |             lines.append(f"- {indication}")
112 |     else:
113 |         lines.append(f"- {indications}")
114 | 
115 |     return lines
116 | 
117 | 
118 | def format_intervention_details(intervention: dict[str, Any]) -> str:
119 |     """
120 |     Format intervention details as markdown.
121 | 
122 |     Args:
123 |         intervention: Intervention data dictionary
124 | 
125 |     Returns:
126 |         Formatted markdown string
127 |     """
128 |     lines = _format_intervention_header(intervention)
129 | 
130 |     # Add synonyms
131 |     lines.extend(
132 |         _format_intervention_synonyms(intervention.get("synonyms", []))
133 |     )
134 | 
135 |     # Add description
136 |     if intervention.get("description"):
137 |         lines.extend([
138 |             "",
139 |             "### Description",
140 |             intervention["description"],
141 |         ])
142 | 
143 |     # Add mechanism of action for drugs
144 |     if intervention.get("mechanism_of_action"):
145 |         lines.extend([
146 |             "",
147 |             "### Mechanism of Action",
148 |             intervention["mechanism_of_action"],
149 |         ])
150 | 
151 |     # Add regulatory info
152 |     lines.extend(_format_intervention_regulatory(intervention))
153 | 
154 |     # Add clinical indications
155 |     lines.extend(
156 |         _format_intervention_indications(intervention.get("indications"))
157 |     )
158 | 
159 |     # Add related trials count if available
160 |     if intervention.get("trial_count"):
161 |         lines.extend([
162 |             "",
163 |             "### Clinical Trial Activity",
164 |             f"- **Number of Trials**: {intervention['trial_count']}",
165 |         ])
166 | 
167 |     return "\n".join(lines)
168 | 
```

--------------------------------------------------------------------------------
/src/biomcp/thinking/session.py:
--------------------------------------------------------------------------------

```python
  1 | """Session management for sequential thinking."""
  2 | 
  3 | import uuid
  4 | from collections import defaultdict
  5 | from dataclasses import dataclass, field
  6 | from datetime import datetime
  7 | from typing import Any
  8 | 
  9 | 
 10 | @dataclass
 11 | class ThoughtEntry:
 12 |     """Represents a single thought in the thinking process."""
 13 | 
 14 |     thought: str
 15 |     thought_number: int
 16 |     total_thoughts: int
 17 |     next_thought_needed: bool
 18 |     timestamp: datetime = field(default_factory=datetime.now)
 19 |     is_revision: bool = False
 20 |     revises_thought: int | None = None
 21 |     branch_from_thought: int | None = None
 22 |     branch_id: str | None = None
 23 |     metadata: dict[str, Any] = field(default_factory=dict)
 24 | 
 25 | 
 26 | @dataclass
 27 | class ThinkingSession:
 28 |     """Manages state for a thinking session."""
 29 | 
 30 |     session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
 31 |     created_at: datetime = field(default_factory=datetime.now)
 32 |     thought_history: list[ThoughtEntry] = field(default_factory=list)
 33 |     thought_branches: dict[str, list[ThoughtEntry]] = field(
 34 |         default_factory=lambda: defaultdict(list)
 35 |     )
 36 |     metadata: dict[str, Any] = field(default_factory=dict)
 37 | 
 38 |     def add_thought(self, entry: ThoughtEntry) -> None:
 39 |         """Add a thought to the session."""
 40 |         # If this is a revision, replace the original thought
 41 |         if entry.is_revision and entry.revises_thought:
 42 |             for i, thought in enumerate(self.thought_history):
 43 |                 if thought.thought_number == entry.revises_thought:
 44 |                     self.thought_history[i] = entry
 45 |                     return
 46 | 
 47 |         # Add to appropriate collection
 48 |         if entry.branch_id:
 49 |             self.thought_branches[entry.branch_id].append(entry)
 50 |         else:
 51 |             self.thought_history.append(entry)
 52 | 
 53 |     def get_thought(self, thought_number: int) -> ThoughtEntry | None:
 54 |         """Get a specific thought by number."""
 55 |         for thought in self.thought_history:
 56 |             if thought.thought_number == thought_number:
 57 |                 return thought
 58 |         return None
 59 | 
 60 |     def get_branch_thoughts(self, branch_id: str) -> list[ThoughtEntry]:
 61 |         """Get all thoughts in a specific branch."""
 62 |         return self.thought_branches.get(branch_id, [])
 63 | 
 64 |     def get_all_thoughts(self) -> list[ThoughtEntry]:
 65 |         """Get all thoughts across main history and branches."""
 66 |         all_thoughts = list(self.thought_history)
 67 |         for branch_thoughts in self.thought_branches.values():
 68 |             all_thoughts.extend(branch_thoughts)
 69 |         return sorted(all_thoughts, key=lambda t: t.timestamp)
 70 | 
 71 | 
 72 | class SessionManager:
 73 |     """Manages multiple thinking sessions."""
 74 | 
 75 |     def __init__(self):
 76 |         self.sessions: dict[str, ThinkingSession] = {}
 77 |         self._current_session_id: str | None = None
 78 | 
 79 |     def create_session(self) -> ThinkingSession:
 80 |         """Create a new thinking session."""
 81 |         session = ThinkingSession()
 82 |         self.sessions[session.session_id] = session
 83 |         self._current_session_id = session.session_id
 84 |         return session
 85 | 
 86 |     def get_session(
 87 |         self, session_id: str | None = None
 88 |     ) -> ThinkingSession | None:
 89 |         """Get a session by ID or the current session."""
 90 |         if session_id:
 91 |             return self.sessions.get(session_id)
 92 |         elif self._current_session_id:
 93 |             return self.sessions.get(self._current_session_id)
 94 |         return None
 95 | 
 96 |     def get_or_create_session(
 97 |         self, session_id: str | None = None
 98 |     ) -> ThinkingSession:
 99 |         """Get existing session or create new one."""
100 |         if session_id and session_id in self.sessions:
101 |             self._current_session_id = session_id
102 |             return self.sessions[session_id]
103 | 
104 |         session = self.get_session()
105 |         if not session:
106 |             session = self.create_session()
107 |         return session
108 | 
109 |     def clear_session(self, session_id: str | None = None) -> None:
110 |         """Clear a specific session or the current session."""
111 |         if session_id:
112 |             self.sessions.pop(session_id, None)
113 |             if self._current_session_id == session_id:
114 |                 self._current_session_id = None
115 |         elif self._current_session_id:
116 |             self.sessions.pop(self._current_session_id, None)
117 |             self._current_session_id = None
118 | 
119 |     def clear_all_sessions(self) -> None:
120 |         """Clear all sessions."""
121 |         self.sessions.clear()
122 |         self._current_session_id = None
123 | 
124 | 
125 | # Global session manager instance
126 | _session_manager = SessionManager()
127 | 
```

--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------

```yaml
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main, develop]
  6 |   pull_request:
  7 |     branches: [main]
  8 |   workflow_dispatch:
  9 | 
 10 | env:
 11 |   PYTHON_VERSION: "3.12"
 12 |   UV_VERSION: "0.4.29"
 13 | 
 14 | jobs:
 15 |   # Quality check from main.yml - uses make check
 16 |   quality:
 17 |     runs-on: ubuntu-latest
 18 |     name: Quality
 19 |     steps:
 20 |       - name: Check out
 21 |         uses: actions/checkout@v5
 22 | 
 23 |       - uses: actions/cache@v4
 24 |         with:
 25 |           path: ~/.cache/pre-commit
 26 |           key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
 27 | 
 28 |       - name: Set up Python
 29 |         uses: actions/setup-python@v6
 30 |         with:
 31 |           python-version: ${{ env.PYTHON_VERSION }}
 32 | 
 33 |       - name: Install uv
 34 |         uses: astral-sh/setup-uv@v7
 35 |         with:
 36 |           version: ${{ env.UV_VERSION }}
 37 | 
 38 |       - name: Install dependencies
 39 |         run: |
 40 |           uv sync --group dev
 41 | 
 42 |       - name: Run checks
 43 |         run: make check
 44 | 
 45 |   # Tests and type check specifically on Python 3.11
 46 |   tests-and-type-check:
 47 |     runs-on: ubuntu-latest
 48 |     name: Tests and Type Check (Python 3.11)
 49 |     steps:
 50 |       - name: Check out
 51 |         uses: actions/checkout@v5
 52 | 
 53 |       - name: Set up Python
 54 |         uses: actions/setup-python@v6
 55 |         with:
 56 |           python-version: "3.11"
 57 | 
 58 |       - name: Install uv
 59 |         uses: astral-sh/setup-uv@v7
 60 |         with:
 61 |           version: ${{ env.UV_VERSION }}
 62 | 
 63 |       - name: Install dependencies
 64 |         run: |
 65 |           uv sync --group dev
 66 | 
 67 |       - name: Run tests
 68 |         run: uv run python -m pytest tests -m "not integration" --cov --cov-config=pyproject.toml --cov-report=xml
 69 | 
 70 |       - name: Check typing
 71 |         run: uv run mypy
 72 | 
 73 |       - name: Upload coverage reports to Codecov with GitHub Action on Python 3.11
 74 |         uses: codecov/codecov-action@v5
 75 | 
 76 |   # Documentation check from main.yml
 77 |   check-docs:
 78 |     runs-on: ubuntu-latest
 79 |     name: Check Docs
 80 |     steps:
 81 |       - name: Check out
 82 |         uses: actions/checkout@v5
 83 | 
 84 |       - name: Set up Python
 85 |         uses: actions/setup-python@v6
 86 |         with:
 87 |           python-version: ${{ env.PYTHON_VERSION }}
 88 | 
 89 |       - name: Install uv
 90 |         uses: astral-sh/setup-uv@v7
 91 |         with:
 92 |           version: ${{ env.UV_VERSION }}
 93 | 
 94 |       - name: Install dependencies
 95 |         run: |
 96 |           uv sync --group dev
 97 | 
 98 |       - name: Check if documentation can be built
 99 |         run: uv run mkdocs build -s
100 | 
101 |   # Build package check
102 |   build-package:
103 |     runs-on: ubuntu-latest
104 |     name: Build Package
105 |     steps:
106 |       - uses: actions/checkout@v5
107 | 
108 |       - name: Set up Python
109 |         uses: actions/setup-python@v6
110 |         with:
111 |           python-version: ${{ env.PYTHON_VERSION }}
112 | 
113 |       - name: Install uv
114 |         uses: astral-sh/setup-uv@v7
115 |         with:
116 |           version: ${{ env.UV_VERSION }}
117 | 
118 |       - name: Build package
119 |         run: |
120 |           uvx --from build pyproject-build --installer uv
121 | 
122 |       - name: Check package
123 |         run: |
124 |           uvx twine check dist/*
125 | 
126 |       - name: Upload artifacts
127 |         uses: actions/upload-artifact@v4
128 |         with:
129 |           name: dist
130 |           path: dist/
131 | 
132 |   # MCP integration test - quick check
133 |   test-mcp:
134 |     runs-on: ubuntu-latest
135 |     name: Test MCP Integration
136 |     steps:
137 |       - uses: actions/checkout@v5
138 | 
139 |       - name: Set up Python
140 |         uses: actions/setup-python@v6
141 |         with:
142 |           python-version: ${{ env.PYTHON_VERSION }}
143 | 
144 |       - name: Install uv
145 |         uses: astral-sh/setup-uv@v7
146 |         with:
147 |           version: ${{ env.UV_VERSION }}
148 | 
149 |       - name: Install dependencies
150 |         run: |
151 |           uv sync --group dev
152 | 
153 |       - name: Test MCP server startup
154 |         run: |
155 |           timeout 10s uv run biomcp run || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
156 | 
157 |       - name: Run MCP integration tests
158 |         run: |
159 |           uv run python -m pytest tests/tdd/test_mcp_integration.py -v
160 | 
161 |   # Run integration tests separately - allowed to fail
162 |   integration-tests:
163 |     runs-on: ubuntu-latest
164 |     name: Integration Tests (Optional)
165 |     continue-on-error: true
166 |     steps:
167 |       - name: Check out
168 |         uses: actions/checkout@v5
169 | 
170 |       - name: Set up Python
171 |         uses: actions/setup-python@v6
172 |         with:
173 |           python-version: "3.11"
174 | 
175 |       - name: Install uv
176 |         uses: astral-sh/setup-uv@v7
177 |         with:
178 |           version: ${{ env.UV_VERSION }}
179 | 
180 |       - name: Install dependencies
181 |         run: |
182 |           uv sync --group dev
183 | 
184 |       - name: Run integration tests
185 |         run: |
186 |           uv run python -m pytest tests -m "integration" -v --tb=short
187 |         continue-on-error: true
188 | 
```

--------------------------------------------------------------------------------
/docs/backend-services-reference/03-cbioportal.md:
--------------------------------------------------------------------------------

```markdown
  1 | # cBioPortal Integration
  2 | 
  3 | BioMCP integrates with [cBioPortal](https://www.cbioportal.org/), a comprehensive cancer genomics portal that provides visualization and analysis tools for large-scale cancer genomics datasets.
  4 | 
  5 | ## Overview
  6 | 
  7 | The cBioPortal integration enhances article searches by automatically including relevant cancer genomics data when searching for genes. This integration provides:
  8 | 
  9 | 1. **Gene-level summaries** - Mutation frequency and distribution across cancer studies
 10 | 2. **Mutation-specific searches** - Find studies containing specific mutations (e.g., BRAF V600E)
 11 | 3. **Cancer type resolution** - Accurate cancer type categorization using cBioPortal's API
 12 | 
 13 | ## How It Works
 14 | 
 15 | ### Automatic Integration
 16 | 
 17 | When you search for articles with a gene parameter, BioMCP automatically queries cBioPortal to provide additional context:
 18 | 
 19 | ```python
 20 | # Basic gene search includes cBioPortal summary
 21 | search(domain="article", genes=["BRAF"], diseases=["melanoma"])
 22 | ```
 23 | 
 24 | This returns:
 25 | 
 26 | - Standard PubMed/PubTator3 article results
 27 | - cBioPortal summary showing mutation frequency across cancer studies
 28 | - Top cancer types where the gene is mutated
 29 | 
 30 | ### Mutation-Specific Searches
 31 | 
 32 | To search for specific mutations, include the mutation notation in keywords:
 33 | 
 34 | ```python
 35 | # Search for BRAF V600E mutation
 36 | search(domain="article", genes=["BRAF"], keywords=["V600E"])
 37 | 
 38 | # Search for SRSF2 F57Y mutation
 39 | search(domain="article", genes=["SRSF2"], keywords=["F57Y"])
 40 | 
 41 | # Use wildcards for mutation patterns (e.g., any amino acid at position 57)
 42 | search(domain="article", genes=["SRSF2"], keywords=["F57*"])
 43 | ```
 44 | 
 45 | Mutation-specific searches return:
 46 | 
 47 | - Total number of studies in cBioPortal
 48 | - Number of studies containing the mutation
 49 | - Top studies ranked by mutation count
 50 | - Cancer type distribution
 51 | 
 52 | ## Example Output
 53 | 
 54 | ### Gene-Level Summary
 55 | 
 56 | ```
 57 | ### cBioPortal Summary for BRAF
 58 | - **Mutation Frequency**: 76.7% (368 mutations in 480 samples)
 59 | - **Top Cancer Types**: Melanoma (45%), Thyroid (23%), Colorectal (18%)
 60 | - **Top Mutations**: V600E (89%), V600K (7%), G469A (2%)
 61 | ```
 62 | 
 63 | ### Mutation-Specific Results
 64 | 
 65 | ```
 66 | ### cBioPortal Mutation Search: BRAF
 67 | **Specific Mutation**: V600E
 68 | - **Total Studies**: 2340
 69 | - **Studies with Mutation**: 170
 70 | - **Total Mutations Found**: 5780
 71 | 
 72 | **Top Studies by Mutation Count:**
 73 | | Count | Study ID | Cancer Type | Study Name |
 74 | |-------|----------|-------------|------------|
 75 | |   804 | msk_met_2021 | Mixed Cancer Types | MSK MetTropism (MSK, Cell 2021) |
 76 | |   555 | msk_chord_2024 | Mixed Cancer Types | MSK-CHORD (MSK, Nature 2024) |
 77 | |   295 | msk_impact_2017 | Mixed Cancer Types | MSK-IMPACT Clinical Sequencing Cohort |
 78 | ```
 79 | 
 80 | ## Supported Mutation Notations
 81 | 
 82 | The integration recognizes standard protein change notation:
 83 | 
 84 | - **Specific mutations**: `V600E`, `F57Y`, `T790M`
 85 | - **Wildcard patterns**: `F57*` (matches F57Y, F57L, etc.)
 86 | - **Multiple mutations**: Include multiple keywords for OR search
 87 | 
 88 | ## API Details
 89 | 
 90 | ### Endpoints Used
 91 | 
 92 | 1. **Gene Information**: `/api/genes/{gene}`
 93 | 2. **Cancer Types**: `/api/cancer-types`
 94 | 3. **Mutation Data**: `/api/mutations/fetch`
 95 | 4. **Study Information**: `/api/studies`
 96 | 
 97 | ### Rate Limiting
 98 | 
 99 | - Conservative rate limit of 5 requests/second
100 | - Results cached for 15-30 minutes (mutations) or 24 hours (cancer types)
101 | 
102 | ### Authentication
103 | 
104 | Optional authentication via environment variable:
105 | 
106 | ```bash
107 | export CBIO_TOKEN="your-api-token"
108 | ```
109 | 
110 | Public cBioPortal instance works without authentication but may have rate limits.
111 | 
112 | ## CLI Usage
113 | 
114 | For detailed command-line options for searching articles with cBioPortal integration, see the [CLI User Guide](../user-guides/01-command-line-interface.md#article-commands).
115 | 
116 | ## Performance Considerations
117 | 
118 | 1. **Caching**: Results are cached to minimize API calls
119 | 
120 |    - Gene summaries: 15 minutes
121 |    - Mutation searches: 30 minutes
122 |    - Cancer types: 24 hours
123 | 
124 | 2. **Graceful Degradation**: If cBioPortal is unavailable, searches continue without the additional data
125 | 
126 | 3. **Parallel Processing**: API calls are made in parallel with article searches for optimal performance
127 | 
128 | ## Limitations
129 | 
130 | 1. Only works with valid HUGO gene symbols
131 | 2. Mutation searches require exact protein change notation
132 | 3. Limited to mutations in cBioPortal's curated studies
133 | 4. Rate limits may apply for high-volume usage
134 | 
135 | ## Error Handling
136 | 
137 | The integration handles various error scenarios:
138 | 
139 | - Invalid gene symbols are validated before API calls
140 | - Network timeouts fall back to article-only results
141 | - API errors are logged but don't block search results
142 | 
```

--------------------------------------------------------------------------------
/src/biomcp/utils/cancer_types_api.py:
--------------------------------------------------------------------------------

```python
  1 | """Cancer type utilities using cBioPortal API."""
  2 | 
  3 | import logging
  4 | 
  5 | from ..utils.cbio_http_adapter import CBioHTTPAdapter
  6 | from ..utils.request_cache import request_cache
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class CancerTypeAPIClient:
 12 |     """Client for fetching cancer types from cBioPortal API."""
 13 | 
 14 |     def __init__(self):
 15 |         """Initialize the cancer type API client."""
 16 |         self.http_adapter = CBioHTTPAdapter()
 17 |         # Cache for cancer types
 18 |         self._cancer_types_cache: dict[str, str] | None = None
 19 | 
 20 |     @request_cache(ttl=86400)  # Cache for 24 hours
 21 |     async def get_all_cancer_types(self) -> dict[str, str]:
 22 |         """Fetch all cancer types from cBioPortal API.
 23 | 
 24 |         Returns:
 25 |             Dictionary mapping cancer type IDs to display names
 26 |         """
 27 |         if self._cancer_types_cache is not None:
 28 |             return self._cancer_types_cache
 29 | 
 30 |         try:
 31 |             cancer_types, error = await self.http_adapter.get(
 32 |                 "/cancer-types",
 33 |                 endpoint_key="cbioportal_cancer_types",
 34 |                 cache_ttl=86400,  # 24 hours
 35 |             )
 36 | 
 37 |             if error:
 38 |                 logger.error(f"Failed to fetch cancer types: {error.message}")
 39 |                 return {}
 40 | 
 41 |             if cancer_types:
 42 |                 # Build mapping from ID to name
 43 |                 result = {}
 44 |                 for ct in cancer_types:
 45 |                     cancer_type_id = ct.get("cancerTypeId", "")
 46 |                     name = ct.get("name", "")
 47 | 
 48 |                     if cancer_type_id and name:
 49 |                         result[cancer_type_id.lower()] = name
 50 | 
 51 |                         # Also add common abbreviations
 52 |                         short_name = ct.get("shortName", "")
 53 |                         if short_name and short_name != cancer_type_id:
 54 |                             result[short_name.lower()] = name
 55 | 
 56 |                 self._cancer_types_cache = result
 57 |                 logger.info(f"Loaded {len(result)} cancer types from API")
 58 |                 return result
 59 | 
 60 |             return {}
 61 | 
 62 |         except Exception as e:
 63 |             logger.error(f"Error fetching cancer types: {e}")
 64 |             return {}
 65 | 
 66 |     async def get_cancer_type_name(self, cancer_type_id: str) -> str:
 67 |         """Get the display name for a cancer type ID.
 68 | 
 69 |         Args:
 70 |             cancer_type_id: The cancer type identifier
 71 | 
 72 |         Returns:
 73 |             Display name or the original ID if not found
 74 |         """
 75 |         if not cancer_type_id:
 76 |             return "Unknown"
 77 | 
 78 |         cancer_types = await self.get_all_cancer_types()
 79 | 
 80 |         # Try exact match (case-insensitive)
 81 |         normalized_id = cancer_type_id.lower()
 82 |         if normalized_id in cancer_types:
 83 |             return cancer_types[normalized_id]
 84 | 
 85 |         # If not found, return the original ID with title case
 86 |         if cancer_type_id == cancer_type_id.lower():
 87 |             return cancer_type_id.title()
 88 |         return cancer_type_id
 89 | 
 90 |     @request_cache(ttl=3600)  # Cache for 1 hour
 91 |     async def get_study_cancer_type(self, study_id: str) -> str:
 92 |         """Get cancer type for a specific study.
 93 | 
 94 |         Args:
 95 |             study_id: The study identifier
 96 | 
 97 |         Returns:
 98 |             Cancer type name or "Unknown"
 99 |         """
100 |         try:
101 |             study_data, error = await self.http_adapter.get(
102 |                 f"/studies/{study_id}",
103 |                 endpoint_key="cbioportal_studies",
104 |                 cache_ttl=3600,  # 1 hour
105 |             )
106 | 
107 |             if error or not study_data:
108 |                 logger.debug(f"Study {study_id} not found")
109 |                 return "Unknown"
110 | 
111 |             cancer_type_id = study_data.get("cancerType", {}).get(
112 |                 "cancerTypeId", ""
113 |             )
114 | 
115 |             if cancer_type_id and cancer_type_id != "unknown":
116 |                 return await self.get_cancer_type_name(cancer_type_id)
117 | 
118 |             # Fallback to the cancer type name directly
119 |             cancer_type_name = study_data.get("cancerType", {}).get("name", "")
120 |             if cancer_type_name:
121 |                 return cancer_type_name
122 | 
123 |             return "Unknown"
124 | 
125 |         except Exception as e:
126 |             logger.debug(f"Error fetching study {study_id}: {e}")
127 |             return "Unknown"
128 | 
129 | 
130 | # Global instance for reuse
131 | _cancer_type_client: CancerTypeAPIClient | None = None
132 | 
133 | 
134 | def get_cancer_type_client() -> CancerTypeAPIClient:
135 |     """Get or create the global cancer type client."""
136 |     global _cancer_type_client
137 |     if _cancer_type_client is None:
138 |         _cancer_type_client = CancerTypeAPIClient()
139 |     return _cancer_type_client
140 | 
```

--------------------------------------------------------------------------------
/tests/tdd/utils/test_mutation_filter.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for mutation filter utility."""
  2 | 
  3 | from biomcp.utils.mutation_filter import MutationFilter
  4 | 
  5 | 
  6 | class MockMutation:
  7 |     """Mock mutation object for testing."""
  8 | 
  9 |     def __init__(self, protein_change: str):
 10 |         self.protein_change = protein_change
 11 | 
 12 | 
 13 | class TestMutationFilter:
 14 |     """Test mutation filtering functionality."""
 15 | 
 16 |     def test_specific_mutation_filter(self):
 17 |         """Test filtering for specific mutations."""
 18 |         mutation_filter = MutationFilter(specific_mutation="V600E")
 19 | 
 20 |         assert mutation_filter.matches("V600E")
 21 |         assert not mutation_filter.matches("V600K")
 22 |         assert not mutation_filter.matches("V600")
 23 |         assert not mutation_filter.matches("")
 24 | 
 25 |     def test_wildcard_pattern_filter(self):
 26 |         """Test filtering with wildcard patterns."""
 27 |         mutation_filter = MutationFilter(pattern="V600*")
 28 | 
 29 |         assert mutation_filter.matches("V600E")
 30 |         assert mutation_filter.matches("V600K")
 31 |         assert mutation_filter.matches("V600D")
 32 |         assert not mutation_filter.matches("V601E")
 33 |         assert not mutation_filter.matches("K600E")
 34 | 
 35 |     def test_pattern_without_wildcard(self):
 36 |         """Test pattern matching without wildcard."""
 37 |         # Pattern does exact match via regex (no prefix matching without *)
 38 |         mutation_filter = MutationFilter(pattern="F57")
 39 | 
 40 |         # Exact match works
 41 |         assert mutation_filter.matches("F57")
 42 |         # No prefix matching without wildcard
 43 |         assert not mutation_filter.matches("F57Y")
 44 |         assert not mutation_filter.matches("F57L")
 45 |         assert not mutation_filter.matches("F58Y")
 46 | 
 47 |     def test_no_filter(self):
 48 |         """Test when no filter is specified."""
 49 |         mutation_filter = MutationFilter()
 50 | 
 51 |         assert mutation_filter.matches("V600E")
 52 |         assert mutation_filter.matches("anything")
 53 |         # Empty protein change returns False even with no filter
 54 |         assert not mutation_filter.matches("")
 55 | 
 56 |     def test_filter_mutations_list(self):
 57 |         """Test filtering a list of mutations."""
 58 |         mutations = [
 59 |             MockMutation("V600E"),
 60 |             MockMutation("V600K"),
 61 |             MockMutation("V600D"),
 62 |             MockMutation("T790M"),
 63 |             MockMutation("L858R"),
 64 |         ]
 65 | 
 66 |         # Test specific mutation
 67 |         mutation_filter1 = MutationFilter(specific_mutation="V600E")
 68 |         filtered1 = mutation_filter1.filter_mutations(mutations)
 69 |         assert len(filtered1) == 1
 70 |         assert filtered1[0].protein_change == "V600E"
 71 | 
 72 |         # Test pattern
 73 |         mutation_filter2 = MutationFilter(pattern="V600*")
 74 |         filtered2 = mutation_filter2.filter_mutations(mutations)
 75 |         assert len(filtered2) == 3
 76 |         assert all(m.protein_change.startswith("V600") for m in filtered2)
 77 | 
 78 |         # Test no filter
 79 |         mutation_filter3 = MutationFilter()
 80 |         filtered3 = mutation_filter3.filter_mutations(mutations)
 81 |         assert len(filtered3) == 5
 82 | 
 83 |     def test_string_representations(self):
 84 |         """Test string representations of filters."""
 85 |         mutation_filter1 = MutationFilter(specific_mutation="V600E")
 86 |         assert str(mutation_filter1) == "MutationFilter(specific=V600E)"
 87 |         assert (
 88 |             repr(mutation_filter1)
 89 |             == "MutationFilter(specific_mutation='V600E', pattern=None)"
 90 |         )
 91 | 
 92 |         mutation_filter2 = MutationFilter(pattern="V600*")
 93 |         assert str(mutation_filter2) == "MutationFilter(pattern=V600*)"
 94 | 
 95 |         mutation_filter3 = MutationFilter()
 96 |         assert str(mutation_filter3) == "MutationFilter(no_filter)"
 97 | 
 98 |     def test_edge_cases(self):
 99 |         """Test edge cases in mutation matching."""
100 |         # Empty protein change
101 |         mutation_filter = MutationFilter(specific_mutation="V600E")
102 |         assert not mutation_filter.matches("")
103 |         assert not mutation_filter.matches(None)
104 | 
105 |         # Complex patterns
106 |         mutation_filter2 = MutationFilter(pattern="[VL]600*")
107 |         # This will use regex escaping, so won't work as expected
108 |         # But should not crash
109 |         assert not mutation_filter2.matches("V600E")  # Because [ is escaped
110 | 
111 |     def test_filter_mutations_preserves_type(self):
112 |         """Test that filter preserves the original list type."""
113 |         mutations = [
114 |             MockMutation("V600E"),
115 |             MockMutation("V600K"),
116 |         ]
117 | 
118 |         mutation_filter = MutationFilter(pattern="V600*")
119 |         result = mutation_filter.filter_mutations(mutations)
120 | 
121 |         # Result should be a list
122 |         assert isinstance(result, list)
123 |         assert len(result) == 2
124 | 
```

--------------------------------------------------------------------------------
/src/biomcp/variants/getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Getter module for retrieving variant details."""
  2 | 
  3 | import json
  4 | import logging
  5 | from typing import Annotated
  6 | 
  7 | from .. import ensure_list, http_client, render
  8 | from ..constants import DEFAULT_ASSEMBLY, MYVARIANT_GET_URL
  9 | from .external import ExternalVariantAggregator, format_enhanced_annotations
 10 | from .filters import filter_variants
 11 | from .links import inject_links
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | async def get_variant(
 17 |     variant_id: str,
 18 |     output_json: bool = False,
 19 |     include_external: bool = False,
 20 |     assembly: str = DEFAULT_ASSEMBLY,
 21 | ) -> str:
 22 |     """
 23 |     Get variant details from MyVariant.info using the variant identifier.
 24 | 
 25 |     The identifier can be a full HGVS-style string (e.g. "chr7:g.140453136A>T")
 26 |     or an rsID (e.g. "rs113488022"). The API response is expected to include a
 27 |     "hits" array; this function extracts the first hit.
 28 | 
 29 |     Args:
 30 |         variant_id: Variant identifier (HGVS or rsID)
 31 |         output_json: Return JSON format if True, else Markdown
 32 |         include_external: Include external annotations (TCGA, 1000 Genomes, cBioPortal)
 33 |         assembly: Genome assembly (hg19 or hg38), defaults to hg19
 34 | 
 35 |     Returns:
 36 |         Formatted variant data as JSON or Markdown string
 37 | 
 38 |     If output_json is True, the result is returned as a formatted JSON string;
 39 |     otherwise, it is rendered as Markdown.
 40 |     """
 41 |     response, error = await http_client.request_api(
 42 |         url=f"{MYVARIANT_GET_URL}/{variant_id}",
 43 |         request={"fields": "all", "assembly": assembly},
 44 |         method="GET",
 45 |         domain="myvariant",
 46 |     )
 47 | 
 48 |     data_to_return: list = ensure_list(response)
 49 | 
 50 |     # Inject database links into the variant data
 51 |     if not error:
 52 |         data_to_return = inject_links(data_to_return)
 53 |         data_to_return = filter_variants(data_to_return)
 54 | 
 55 |         # Add external annotations if requested
 56 |         if include_external and data_to_return:
 57 |             logger.info(
 58 |                 f"Adding external annotations for {len(data_to_return)} variants"
 59 |             )
 60 |             aggregator = ExternalVariantAggregator()
 61 | 
 62 |             for _i, variant_data in enumerate(data_to_return):
 63 |                 logger.info(
 64 |                     f"Processing variant {_i}: keys={list(variant_data.keys())}"
 65 |                 )
 66 |                 # Get enhanced annotations
 67 |                 enhanced = await aggregator.get_enhanced_annotations(
 68 |                     variant_id,
 69 |                     include_tcga=True,
 70 |                     include_1000g=True,
 71 |                     include_cbioportal=True,
 72 |                     variant_data=variant_data,
 73 |                 )
 74 | 
 75 |                 # Add formatted annotations to the variant data
 76 |                 formatted = format_enhanced_annotations(enhanced)
 77 |                 logger.info(
 78 |                     f"Formatted external annotations: {formatted['external_annotations'].keys()}"
 79 |                 )
 80 |                 variant_data.update(formatted["external_annotations"])
 81 | 
 82 |     if error:
 83 |         data_to_return = [{"error": f"Error {error.code}: {error.message}"}]
 84 | 
 85 |     if output_json:
 86 |         return json.dumps(data_to_return, indent=2)
 87 |     else:
 88 |         return render.to_markdown(data_to_return)
 89 | 
 90 | 
 91 | async def _variant_details(
 92 |     call_benefit: Annotated[
 93 |         str,
 94 |         "Define and summarize why this function is being called and the intended benefit",
 95 |     ],
 96 |     variant_id: str,
 97 |     include_external: Annotated[
 98 |         bool,
 99 |         "Include annotations from external sources (TCGA, 1000 Genomes, cBioPortal)",
100 |     ] = True,
101 |     assembly: Annotated[
102 |         str,
103 |         "Genome assembly (hg19 or hg38). Default: hg19",
104 |     ] = DEFAULT_ASSEMBLY,
105 | ) -> str:
106 |     """
107 |     Retrieves detailed information for a *single* genetic variant.
108 | 
109 |     Parameters:
110 |     - call_benefit: Define and summarize why this function is being called and the intended benefit
111 |     - variant_id: A variant identifier ("chr7:g.140453136A>T")
112 |     - include_external: Include annotations from TCGA, 1000 Genomes, cBioPortal, and Mastermind
113 |     - assembly: Genome assembly (hg19 or hg38). Default: hg19
114 | 
115 |     Process: Queries the MyVariant.info GET endpoint, optionally fetching
116 |             additional annotations from external databases
117 |     Output: A Markdown formatted string containing comprehensive
118 |             variant annotations (genomic context, frequencies,
119 |             predictions, clinical data, external annotations). Returns error if invalid.
120 |     Note: Use the variant_searcher to find the variant id first.
121 |     """
122 |     return await get_variant(
123 |         variant_id,
124 |         output_json=False,
125 |         include_external=include_external,
126 |         assembly=assembly,
127 |     )
128 | 
```

--------------------------------------------------------------------------------
/src/biomcp/integrations/cts_api.py:
--------------------------------------------------------------------------------

```python
  1 | """NCI Clinical Trials Search API integration helper."""
  2 | 
  3 | import json
  4 | import logging
  5 | import os
  6 | from typing import Any, Literal
  7 | 
  8 | from ..constants import NCI_API_KEY_ENV
  9 | from ..http_client import request_api
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class CTSAPIError(Exception):
 15 |     """Error raised when CTS API requests fail."""
 16 | 
 17 |     pass
 18 | 
 19 | 
 20 | def _validate_api_key(api_key: str | None) -> str:
 21 |     """Validate and return API key."""
 22 |     if not api_key:
 23 |         api_key = os.getenv(NCI_API_KEY_ENV)
 24 | 
 25 |     if not api_key:
 26 |         raise CTSAPIError(
 27 |             f"NCI API key required. Please set {NCI_API_KEY_ENV} environment "
 28 |             "variable or provide api_key parameter.\n"
 29 |             "Get a free API key at: https://www.cancer.gov/research/participate/"
 30 |             "clinical-trials-search/developers"
 31 |         )
 32 | 
 33 |     return api_key
 34 | 
 35 | 
 36 | def _prepare_request_data(
 37 |     method: str,
 38 |     params: dict[str, Any] | None,
 39 |     json_data: dict[str, Any] | None,
 40 |     headers: dict[str, str],
 41 | ) -> dict[str, Any]:
 42 |     """Prepare request data based on method."""
 43 |     if method == "GET":
 44 |         request_data = params or {}
 45 |         logger.debug(f"CTS API GET request with params: {params}")
 46 |     else:
 47 |         request_data = json_data or {}
 48 |         if method == "POST":
 49 |             logger.debug(f"CTS API POST request with data: {json_data}")
 50 | 
 51 |     # Add headers to request data
 52 |     if headers:
 53 |         request_data["_headers"] = json.dumps(headers)
 54 | 
 55 |     return request_data
 56 | 
 57 | 
 58 | def _handle_api_error(error: Any) -> None:
 59 |     """Handle API errors with appropriate messages."""
 60 |     if error.code == 401:
 61 |         raise CTSAPIError(
 62 |             f"Invalid API key. Please check your {NCI_API_KEY_ENV} "
 63 |             "environment variable or api_key parameter."
 64 |         )
 65 |     elif error.code == 403:
 66 |         raise CTSAPIError(
 67 |             "Access forbidden. Your API key may not have permission "
 68 |             "to access this resource."
 69 |         )
 70 |     else:
 71 |         raise CTSAPIError(f"CTS API error: {error.message}")
 72 | 
 73 | 
 74 | async def make_cts_request(
 75 |     url: str,
 76 |     method: Literal["GET", "POST"] = "GET",
 77 |     params: dict[str, Any] | None = None,
 78 |     json_data: dict[str, Any] | None = None,
 79 |     api_key: str | None = None,
 80 | ) -> dict[str, Any]:
 81 |     """
 82 |     Make a request to the NCI CTS API with proper authentication.
 83 | 
 84 |     Args:
 85 |         url: Full URL to the CTS API endpoint
 86 |         method: HTTP method (GET or POST)
 87 |         params: Query parameters
 88 |         json_data: JSON data for POST requests
 89 |         api_key: Optional API key (if not provided, uses NCI_API_KEY env var)
 90 | 
 91 |     Returns:
 92 |         JSON response from the API
 93 | 
 94 |     Raises:
 95 |         CTSAPIError: If the request fails or API key is missing
 96 |     """
 97 |     # Validate API key
 98 |     api_key = _validate_api_key(api_key)
 99 | 
100 |     # Prepare headers
101 |     headers = {"x-api-key": api_key, "Accept": "application/json"}
102 | 
103 |     try:
104 |         # Prepare request data
105 |         request_data = _prepare_request_data(
106 |             method, params, json_data, headers
107 |         )
108 | 
109 |         # Make API request
110 |         response, error = await request_api(
111 |             url=url,
112 |             request=request_data,
113 |             method=method,
114 |             cache_ttl=0,  # Disable caching for NCI API to ensure fresh results
115 |         )
116 | 
117 |         # Handle errors
118 |         if error:
119 |             _handle_api_error(error)
120 | 
121 |         if response is None:
122 |             raise CTSAPIError("No response received from NCI CTS API")
123 | 
124 |         return response
125 | 
126 |     except Exception as e:
127 |         # Re-raise CTSAPIError as-is
128 |         if isinstance(e, CTSAPIError):
129 |             raise
130 | 
131 |         # Wrap other exceptions
132 |         logger.error(f"CTS API request failed: {e}")
133 |         raise CTSAPIError(f"Failed to connect to NCI CTS API: {e!s}") from e
134 | 
135 | 
136 | def get_api_key_instructions() -> str:
137 |     """
138 |     Get user-friendly instructions for obtaining and setting the API key.
139 | 
140 |     Returns:
141 |         Formatted string with instructions
142 |     """
143 |     return (
144 |         "## NCI Clinical Trials API Key Required\n\n"
145 |         "To use NCI's Clinical Trials Search API, you need an API key.\n\n"
146 |         "**Option 1: Set environment variable (recommended)**\n"
147 |         "```bash\n"
148 |         f"export {NCI_API_KEY_ENV}='your-api-key'\n"
149 |         "```\n\n"
150 |         "**Option 2: Provide via CLI**\n"
151 |         "```bash\n"
152 |         "biomcp trial search --api-key YOUR_KEY --condition melanoma\n"
153 |         "```\n\n"
154 |         "**Get your free API key:**\n"
155 |         "Visit https://www.cancer.gov/research/participate/clinical-trials-search/developers\n\n"
156 |         "The API key provides access to NCI's comprehensive cancer clinical trials "
157 |         "database with advanced search capabilities."
158 |     )
159 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_alphagenome_api_key.py:
--------------------------------------------------------------------------------

```python
  1 | """Test AlphaGenome per-request API key functionality."""
  2 | 
  3 | import os
  4 | from unittest.mock import MagicMock, patch
  5 | 
  6 | import pandas as pd
  7 | import pytest
  8 | 
  9 | from biomcp.variants.alphagenome import predict_variant_effects
 10 | 
 11 | 
 12 | @pytest.mark.asyncio
 13 | async def test_api_key_parameter_overrides_env_var():
 14 |     """Test that api_key parameter takes precedence over environment variable."""
 15 |     # Set up environment variable
 16 |     with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "env-key"}):
 17 |         # Mock AlphaGenome modules
 18 |         mock_genome = MagicMock()
 19 |         mock_client = MagicMock()
 20 |         mock_scorers = MagicMock()
 21 | 
 22 |         # Mock successful prediction
 23 |         test_scores_df = pd.DataFrame({
 24 |             "output_type": ["RNA_SEQ"],
 25 |             "raw_score": [1.5],
 26 |             "gene_name": ["BRAF"],
 27 |             "track_name": [None],
 28 |         })
 29 | 
 30 |         # Track which API key was used
 31 |         api_keys_used = []
 32 | 
 33 |         def track_create(api_key):
 34 |             api_keys_used.append(api_key)
 35 |             mock_model = MagicMock()
 36 |             mock_model.score_variant.return_value = test_scores_df
 37 |             return mock_model
 38 | 
 39 |         mock_client.create.side_effect = track_create
 40 | 
 41 |         mock_scorers.tidy_scores.return_value = test_scores_df
 42 |         mock_scorers.get_recommended_scorers.return_value = []
 43 | 
 44 |         # Create a mock module with the correct attributes
 45 |         mock_models = MagicMock()
 46 |         mock_models.dna_client = mock_client
 47 |         mock_models.variant_scorers = mock_scorers
 48 | 
 49 |         mock_data = MagicMock()
 50 |         mock_data.genome = mock_genome
 51 | 
 52 |         with patch.dict(
 53 |             "sys.modules",
 54 |             {
 55 |                 "alphagenome.data": mock_data,
 56 |                 "alphagenome.models": mock_models,
 57 |             },
 58 |         ):
 59 |             # Test with parameter API key
 60 |             result = await predict_variant_effects(
 61 |                 "chr7", 140753336, "A", "T", api_key="param-key"
 62 |             )
 63 | 
 64 |             # Verify the parameter key was used, not the env var
 65 |             assert len(api_keys_used) == 1
 66 |             assert api_keys_used[0] == "param-key"
 67 |             assert "BRAF" in result
 68 | 
 69 | 
 70 | @pytest.mark.asyncio
 71 | async def test_no_api_key_shows_instructions():
 72 |     """Test that missing API key shows helpful instructions."""
 73 |     # Ensure no environment variable is set
 74 |     with patch.dict("os.environ", {}, clear=True):
 75 |         # Remove ALPHAGENOME_API_KEY if it exists
 76 |         os.environ.pop("ALPHAGENOME_API_KEY", None)
 77 | 
 78 |         result = await predict_variant_effects(
 79 |             "chr7", 140753336, "A", "T", skip_cache=True
 80 |         )
 81 | 
 82 |         # Check for instructions
 83 |         assert "AlphaGenome API key required" in result
 84 |         assert "My AlphaGenome API key is" in result
 85 |         assert "ACTION REQUIRED" in result
 86 |         assert "https://deepmind.google.com/science/alphagenome" in result
 87 | 
 88 | 
 89 | @pytest.mark.asyncio
 90 | async def test_env_var_used_when_no_parameter():
 91 |     """Test that environment variable is used when no parameter is provided."""
 92 |     # Set up environment variable
 93 |     with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "env-key"}):
 94 |         # Mock AlphaGenome modules
 95 |         mock_genome = MagicMock()
 96 |         mock_client = MagicMock()
 97 |         mock_scorers = MagicMock()
 98 | 
 99 |         # Mock successful prediction
100 |         test_scores_df = pd.DataFrame({
101 |             "output_type": ["RNA_SEQ"],
102 |             "raw_score": [1.5],
103 |             "gene_name": ["BRAF"],
104 |             "track_name": [None],
105 |         })
106 | 
107 |         # Track which API key was used
108 |         api_keys_used = []
109 | 
110 |         def track_create(api_key):
111 |             api_keys_used.append(api_key)
112 |             mock_model = MagicMock()
113 |             mock_model.score_variant.return_value = test_scores_df
114 |             return mock_model
115 | 
116 |         mock_client.create.side_effect = track_create
117 | 
118 |         mock_scorers.tidy_scores.return_value = test_scores_df
119 |         mock_scorers.get_recommended_scorers.return_value = []
120 | 
121 |         # Create a mock module with the correct attributes
122 |         mock_models = MagicMock()
123 |         mock_models.dna_client = mock_client
124 |         mock_models.variant_scorers = mock_scorers
125 | 
126 |         mock_data = MagicMock()
127 |         mock_data.genome = mock_genome
128 | 
129 |         with patch.dict(
130 |             "sys.modules",
131 |             {
132 |                 "alphagenome.data": mock_data,
133 |                 "alphagenome.models": mock_models,
134 |             },
135 |         ):
136 |             # Test without parameter API key
137 |             result = await predict_variant_effects("chr7", 140753336, "A", "T")
138 | 
139 |             # Verify the env var key was used
140 |             assert len(api_keys_used) == 1
141 |             assert api_keys_used[0] == "env-key"
142 |             assert "BRAF" in result
143 | 
```

--------------------------------------------------------------------------------
/src/biomcp/request_batcher.py:
--------------------------------------------------------------------------------

```python
  1 | """Request batching utility for combining multiple small requests.
  2 | 
  3 | This module provides a request batcher that accumulates multiple requests
  4 | and processes them together in batches, reducing the number of API calls
  5 | and improving performance for bulk operations.
  6 | 
  7 | Key Features:
  8 | - Automatic batching based on size or time threshold
  9 | - Configurable batch size and timeout
 10 | - Thread-safe request accumulation
 11 | - Error propagation to individual requests
 12 | 
 13 | Example:
 14 |     ```python
 15 |     async def batch_api_call(params_list):
 16 |         # Make a single API call with multiple parameters
 17 |         return await api.bulk_request(params_list)
 18 | 
 19 |     batcher = RequestBatcher(
 20 |         batch_func=batch_api_call,
 21 |         batch_size=10,
 22 |         batch_timeout=0.1
 23 |     )
 24 | 
 25 |     # Individual requests are automatically batched
 26 |     result1 = await batcher.request({"id": 1})
 27 |     result2 = await batcher.request({"id": 2})
 28 |     ```
 29 | """
 30 | 
 31 | import asyncio
 32 | from collections.abc import Callable, Coroutine
 33 | from typing import Any, TypeVar
 34 | 
 35 | T = TypeVar("T")
 36 | 
 37 | 
 38 | class RequestBatcher:
 39 |     """Batches multiple requests together to reduce overhead.
 40 | 
 41 |     This is particularly useful for APIs that support batch operations
 42 |     or when network latency dominates over processing time.
 43 | 
 44 |     The batcher accumulates requests until either:
 45 |     1. The batch size threshold is reached
 46 |     2. The batch timeout expires
 47 | 
 48 |     At which point all accumulated requests are processed together.
 49 |     """
 50 | 
 51 |     def __init__(
 52 |         self,
 53 |         batch_func: Callable[[list[Any]], Coroutine[Any, Any, list[Any]]],
 54 |         batch_size: int = 10,
 55 |         batch_timeout: float = 0.05,  # 50ms
 56 |     ):
 57 |         """Initialize the batcher.
 58 | 
 59 |         Args:
 60 |             batch_func: Async function that processes a batch of requests
 61 |             batch_size: Maximum number of requests to batch together
 62 |             batch_timeout: Maximum time to wait for batch to fill (seconds)
 63 |         """
 64 |         self.batch_func = batch_func
 65 |         self.batch_size = batch_size
 66 |         self.batch_timeout = batch_timeout
 67 |         self.pending_requests: list[tuple[Any, asyncio.Future]] = []
 68 |         self.batch_task: asyncio.Task | None = None
 69 |         self._lock = asyncio.Lock()
 70 | 
 71 |     async def request(self, params: Any) -> Any:
 72 |         """Add a request to the batch and wait for result."""
 73 |         future: asyncio.Future[Any] = asyncio.Future()
 74 | 
 75 |         async with self._lock:
 76 |             self.pending_requests.append((params, future))
 77 | 
 78 |             # Check if we should flush immediately
 79 |             if len(self.pending_requests) >= self.batch_size:
 80 |                 await self._flush_batch()
 81 |             elif not self.batch_task or self.batch_task.done():
 82 |                 # Start a timer to flush the batch
 83 |                 self.batch_task = asyncio.create_task(self._batch_timer())
 84 | 
 85 |         return await future
 86 | 
 87 |     async def _batch_timer(self):
 88 |         """Timer that flushes the batch after timeout."""
 89 |         await asyncio.sleep(self.batch_timeout)
 90 |         async with self._lock:
 91 |             await self._flush_batch()
 92 | 
 93 |     async def _flush_batch(self):
 94 |         """Process all pending requests as a batch."""
 95 |         if not self.pending_requests:
 96 |             return
 97 | 
 98 |         # Extract current batch
 99 |         batch = self.pending_requests.copy()
100 |         self.pending_requests.clear()
101 | 
102 |         # Cancel timer if running
103 |         if self.batch_task and not self.batch_task.done():
104 |             self.batch_task.cancel()
105 | 
106 |         # Process batch
107 |         try:
108 |             params_list = [params for params, _ in batch]
109 |             results = await self.batch_func(params_list)
110 | 
111 |             # Distribute results to futures
112 |             for i, (_, future) in enumerate(batch):
113 |                 if not future.done():
114 |                     if i < len(results):
115 |                         future.set_result(results[i])
116 |                     else:
117 |                         future.set_exception(
118 |                             Exception(f"No result for request at index {i}")
119 |                         )
120 |         except Exception as e:
121 |             # Propagate error to all futures
122 |             for _, future in batch:
123 |                 if not future.done():
124 |                     future.set_exception(e)
125 | 
126 | 
127 | # Example usage for autocomplete batching
128 | async def batch_autocomplete_requests(requests: list[dict]) -> list[Any]:
129 |     """Process multiple autocomplete requests in parallel.
130 | 
131 |     This is an example implementation that could be used to batch
132 |     autocomplete requests more efficiently.
133 |     """
134 |     from .articles.autocomplete import EntityRequest, autocomplete
135 | 
136 |     tasks = []
137 |     for req in requests:
138 |         entity_req = EntityRequest(**req)
139 |         tasks.append(autocomplete(entity_req))
140 | 
141 |     return await asyncio.gather(*tasks)
142 | 
```

--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------

```toml
  1 | [project]
  2 | name = "biomcp-python"
  3 | version = "0.4.6"
  4 | description = "Biomedical Model Context Protocol Server"
  5 | authors = [{ name = "Ian Maurer", email = "[email protected]" }]
  6 | readme = "README.md"
  7 | keywords = ['python']
  8 | requires-python = ">=3.10,<4.0"
  9 | classifiers = [
 10 |     "Intended Audience :: Developers",
 11 |     "Programming Language :: Python",
 12 |     "Programming Language :: Python :: 3",
 13 |     "Programming Language :: Python :: 3.10",
 14 |     "Programming Language :: Python :: 3.11",
 15 |     "Programming Language :: Python :: 3.12",
 16 |     "Programming Language :: Python :: 3.13",
 17 |     "Topic :: Software Development :: Libraries :: Python Modules",
 18 | ]
 19 | dependencies = [
 20 |     "certifi>=2025.1.31",
 21 |     "diskcache>=5.6.3",
 22 |     "httpx>=0.28.1",
 23 |     "mcp[cli]>=1.12.3,<2.0.0",
 24 |     "platformdirs>=4.3.6",
 25 |     "psutil>=7.0.0",
 26 |     "pydantic>=2.10.6",
 27 |     "python-dotenv>=1.0.0",
 28 |     "rich>=14.0.0",
 29 |     "typer>=0.15.2",
 30 |     "uvicorn>=0.34.2",
 31 |     "alphagenome>=0.1.0",
 32 | ]
 33 | 
 34 | [project.urls]
 35 | Homepage = "https://genomoncology.com/biomcp/"
 36 | Repository = "https://github.com/genomoncology/biomcp"
 37 | Documentation = "https://genomoncology.com/biomcp/"
 38 | 
 39 | [dependency-groups]
 40 | dev = [
 41 |     "pytest>=7.2.0",
 42 |     "pytest-xdist>=3.5.0",
 43 |     "pre-commit>=2.20.0",
 44 |     "tox-uv>=1.11.3",
 45 |     "deptry>=0.22.0",
 46 |     "mypy>=0.991",
 47 |     "pytest-cov>=4.0.0",
 48 |     "pytest-asyncio>=0.24.0",
 49 |     "ruff>=0.9.2",
 50 |     "mkdocs>=1.4.2",
 51 |     "mkdocs-material>=8.5.10",
 52 |     "mkdocstrings[python]>=0.26.1",
 53 |     "anyio>=4.8.0",
 54 |     # "ipython>=9.0.2",
 55 |     "pytest-bdd>=8.1.0",
 56 |     "tomlkit>=0.13.2",
 57 |     "assertpy>=1.1",
 58 |     "twine>=4.0.0",
 59 |     "pandas>=2.0.0",  # Used for mocking AlphaGenome responses in tests
 60 |     "PyYAML>=6.0.0",  # Used for mkdocs.yml parsing in scripts
 61 |     "pydantic-ai>=0.0.14",  # For testing Pydantic AI integration
 62 | ]
 63 | 
 64 | [project.optional-dependencies]
 65 | api = [
 66 | ]
 67 | 
 68 | worker = [
 69 |     "fastapi>=0.110.0",
 70 |     "starlette>=0.36.0",
 71 |     "uvicorn>=0.28.0",
 72 | ]
 73 | 
 74 | [build-system]
 75 | requires = ["setuptools >= 61.0"]
 76 | build-backend = "setuptools.build_meta"
 77 | 
 78 | [tool.setuptools.package-data]
 79 | biomcp = ["resources/*.md"]
 80 | 
 81 | [project.scripts]
 82 | biomcp = "biomcp.__main__:main"
 83 | 
 84 | [tool.mypy]
 85 | files = ["src"]
 86 | ignore_missing_imports = true
 87 | disallow_untyped_defs = false
 88 | disallow_any_unimported = false
 89 | no_implicit_optional = true
 90 | check_untyped_defs = false
 91 | warn_return_any = false
 92 | warn_unused_ignores = true
 93 | show_error_codes = true
 94 | plugins = [
 95 |     "pydantic.mypy"
 96 | ]
 97 | disable_error_code = [
 98 |     "union-attr",
 99 |     "prop-decorator",
100 | ]
101 | 
102 | [tool.pytest.ini_options]
103 | testpaths = ["tests"]
104 | addopts = "--import-mode=importlib"
105 | asyncio_mode = "auto"
106 | asyncio_default_fixture_loop_scope = "function"
107 | markers = [
108 |     "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",
109 | ]
110 | filterwarnings = [
111 |     # Ignore protobuf version warnings from AlphaGenome
112 |     "ignore:Protobuf gencode version.*is exactly one major version older.*:UserWarning",
113 |     # Ignore false positive warning from pytest-xdist about coroutines
114 |     # This occurs during parallel test execution when mock objects are cleaned up
115 |     "ignore:coroutine 'search_trials_unified' was never awaited:RuntimeWarning",
116 | ]
117 | 
118 | [tool.ruff]
119 | target-version = "py310"
120 | line-length = 79
121 | fix = true
122 | unsafe-fixes = true
123 | 
124 | [tool.ruff.lint]
125 | select = [
126 |     # flake8-2020
127 |     "YTT",
128 |     # flake8-bandit
129 |     "S",
130 |     # flake8-bugbear
131 |     "B",
132 |     # flake8-builtins
133 |     "A",
134 |     # flake8-comprehensions
135 |     "C4",
136 |     # flake8-debugger
137 |     "T10",
138 |     # flake8-simplify
139 |     "SIM",
140 |     # isort
141 |     "I",
142 |     # mccabe
143 |     "C90",
144 |     # pycodestyle
145 |     "E", "W",
146 |     # pyflakes
147 |     "F",
148 |     # pygrep-hooks
149 |     "PGH",
150 |     # pyupgrade
151 |     "UP",
152 |     # ruff
153 |     "RUF",
154 | ]
155 | ignore = [
156 |     # LineTooLong
157 |     "E501",
158 |     # DoNotAssignLambda
159 |     "E731",
160 |     # Consider unpacking
161 |     "RUF005",
162 |     # Union for type annotations
163 |     "UP007",
164 |     # Asserts are ok when I say they are ok.
165 |     "S101",
166 | ]
167 | 
168 | [tool.ruff.lint.per-file-ignores]
169 | "tests/*" = ["S101"]
170 | "__init__.py" = ["I001"]
171 | "src/biomcp/variants/external.py" = ["C901"]  # Complex API interactions are acceptable
172 | 
173 | [tool.ruff.format]
174 | preview = true
175 | 
176 | [tool.ruff.lint.flake8-bugbear]
177 | extend-immutable-calls = [
178 |     "fastapi.Depends",
179 |     "fastapi.Query",
180 |     "typer.Argument",
181 |     "typer.Option",
182 | ]
183 | 
184 | [tool.coverage.report]
185 | skip_empty = true
186 | 
187 | [tool.coverage.run]
188 | branch = true
189 | source = ["src"]
190 | omit = [
191 |     "src/*/__main__.py",
192 |     "src/*/server.py",
193 |     "src/*/http_client.py",
194 | ]
195 | 
196 | [tool.deptry]
197 | exclude = [
198 |   "example_scripts/python_sdk.py",
199 |   "venv",
200 |   ".venv",
201 |   ".direnv",
202 |   "tests",
203 |   ".git",
204 |   "build",
205 |   "dist",
206 |   "scripts",
207 |   "spike",
208 | ]
209 | 
210 | [tool.deptry.per_rule_ignores]
211 | DEP001 = ["alphagenome"]  # Optional dependency, must be installed manually
212 | DEP002 = ["uvicorn"]
213 | DEP003 = ["biomcp", "alphagenome"]
214 | 
```

--------------------------------------------------------------------------------
/docs/getting-started/01-quickstart-cli.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Quickstart: BioMCP CLI
  2 | 
  3 | Get started with BioMCP in under 5 minutes! This guide walks you through installation and your first biomedical search.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | - Python 3.10 or higher
  8 | - [uv](https://docs.astral.sh/uv/) package manager (recommended) or pip
  9 | 
 10 | ## Installation
 11 | 
 12 | ### Option 1: Using uv (Recommended)
 13 | 
 14 | ```bash
 15 | # Install uv if you haven't already
 16 | curl -LsSf https://astral.sh/uv/install.sh | sh
 17 | 
 18 | # Install BioMCP
 19 | uv tool install biomcp
 20 | ```
 21 | 
 22 | ### Option 2: Using pip
 23 | 
 24 | ```bash
 25 | pip install biomcp
 26 | ```
 27 | 
 28 | ## Your First Search
 29 | 
 30 | Let's search for recent articles about BRAF mutations in melanoma:
 31 | 
 32 | ```bash
 33 | biomcp article search \
 34 |   --gene BRAF --disease melanoma --limit 5
 35 | ```
 36 | 
 37 | This command:
 38 | 
 39 | - Searches PubMed/PubTator3 for articles
 40 | - Filters by BRAF gene and melanoma disease
 41 | - Returns the 5 most recent results
 42 | - Automatically includes cBioPortal cancer genomics data
 43 | - Includes preprints from bioRxiv/medRxiv by default
 44 | 
 45 | ## Understanding the Output
 46 | 
 47 | The search returns:
 48 | 
 49 | 1. **cBioPortal Summary** (if gene specified): Cancer genomics data showing mutation frequencies and hotspots
 50 | 2. **Article Results**: Each result includes:
 51 |    - Title and authors
 52 |    - Journal and publication date
 53 |    - PubMed ID and direct link
 54 |    - Abstract snippet
 55 |    - Annotated entities (genes, diseases, chemicals)
 56 | 
 57 | ## Essential Commands
 58 | 
 59 | ### Search Clinical Trials
 60 | 
 61 | Find active trials for lung cancer:
 62 | 
 63 | ```bash
 64 | biomcp trial search \
 65 |   --condition "lung cancer" \
 66 |   --status RECRUITING --limit 5
 67 | ```
 68 | 
 69 | ### Get Gene Information
 70 | 
 71 | Retrieve details about the TP53 tumor suppressor:
 72 | 
 73 | ```bash
 74 | biomcp gene get TP53
 75 | ```
 76 | 
 77 | ### Look Up Drug Information
 78 | 
 79 | Get details about imatinib (Gleevec):
 80 | 
 81 | ```bash
 82 | biomcp drug get imatinib
 83 | ```
 84 | 
 85 | ### Search for Genetic Variants
 86 | 
 87 | Find pathogenic variants in the BRCA1 gene:
 88 | 
 89 | ```bash
 90 | biomcp variant search \
 91 |   --gene BRCA1 --significance pathogenic \
 92 |   --limit 5
 93 | ```
 94 | 
 95 | ## Next Steps
 96 | 
 97 | ### Set Up API Keys (Optional but Recommended)
 98 | 
 99 | Some features require API keys for enhanced functionality:
100 | 
101 | ```bash
102 | # For NCI clinical trials database
103 | export NCI_API_KEY="your-key-here"
104 | 
105 | # For AlphaGenome variant predictions
106 | export ALPHAGENOME_API_KEY="your-key-here"
107 | 
108 | # For additional cBioPortal features
109 | export CBIO_TOKEN="your-token-here"
110 | ```
111 | 
112 | See [Authentication and API Keys](03-authentication-and-api-keys.md) for detailed setup.
113 | 
114 | ### Explore Advanced Features
115 | 
116 | - **Combine Multiple Filters**:
117 | 
118 |   ```bash
119 |   biomcp article search \
120 |     --gene EGFR --disease "lung cancer" \
121 |     --chemical erlotinib
122 |   ```
123 | 
124 | - **Use OR Logic in Keywords**:
125 | 
126 |   ```bash
127 |   biomcp article search --gene BRAF --keyword "V600E|p.V600E|c.1799T>A"
128 |   ```
129 | 
130 | - **Exclude Preprints**:
131 |   ```bash
132 |   biomcp article search --gene TP53 --no-preprints
133 |   ```
134 | 
135 | ### Get Help
136 | 
137 | View all available commands:
138 | 
139 | ```bash
140 | biomcp --help
141 | ```
142 | 
143 | Get help for a specific command:
144 | 
145 | ```bash
146 | biomcp article search --help
147 | ```
148 | 
149 | ## Common Use Cases
150 | 
151 | ### 1. Research a Specific Mutation
152 | 
153 | ```bash
154 | # Find articles about EGFR T790M resistance mutation
155 | biomcp article search --gene EGFR \
156 |   --keyword "T790M|p.T790M" \
157 |   --disease "lung cancer"
158 | ```
159 | 
160 | ### 2. Find Trials for a Patient
161 | 
162 | ```bash
163 | # Active trials for HER2-positive breast cancer
164 | biomcp trial search \
165 |   --condition "breast cancer" \
166 |   --keyword "HER2 positive" \
167 |   --status RECRUITING
168 | ```
169 | 
170 | ### 3. Investigate Drug Mechanisms
171 | 
172 | ```bash
173 | # Get information about pembrolizumab
174 | biomcp drug get pembrolizumab
175 | 
176 | # Find articles about its use in melanoma
177 | biomcp article search --chemical pembrolizumab --disease melanoma
178 | ```
179 | 
180 | ## Troubleshooting
181 | 
182 | ### Command Not Found
183 | 
184 | If `biomcp` is not recognized:
185 | 
186 | - Ensure your PATH includes the installation directory
187 | - Try running with full path: `~/.local/bin/biomcp`
188 | - Restart your terminal after installation
189 | 
190 | ### No Results Found
191 | 
192 | If searches return no results:
193 | 
194 | - Check spelling of gene names (use official symbols)
195 | - Try broader search terms
196 | - Remove filters one by one to identify the constraint
197 | 
198 | ### API Rate Limits
199 | 
200 | If you encounter rate limit errors:
201 | 
202 | - Add delays between requests
203 | - Consider setting up API keys for higher limits
204 | - Use the `--limit` parameter to reduce result count
205 | 
206 | ## Next Steps
207 | 
208 | Now that you've run your first searches, explore these resources:
209 | 
210 | 1. **[Complete CLI Reference](../user-guides/01-command-line-interface.md)** - Comprehensive documentation for all commands and options
211 | 2. **[Claude Desktop Integration](02-claude-desktop-integration.md)** - Use BioMCP with AI assistants
212 | 3. **[Set up API Keys](03-authentication-and-api-keys.md)** - Enable advanced features with NCI, AlphaGenome, and cBioPortal
213 | 4. **[How-to Guides](../how-to-guides/01-find-articles-and-cbioportal-data.md)** - Step-by-step tutorials for complex research workflows
214 | 5. **[Deep Researcher Persona](../concepts/02-the-deep-researcher-persona.md)** - Learn about BioMCP's philosophy and methodology
215 | 
216 | Happy researching! 🧬🔬
217 | 
```

--------------------------------------------------------------------------------
/tests/integration/test_preprints_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Integration tests for preprint search functionality."""
  2 | 
  3 | import asyncio
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.articles.preprints import (
  8 |     BiorxivClient,
  9 |     EuropePMCClient,
 10 |     PreprintSearcher,
 11 | )
 12 | from biomcp.articles.search import PubmedRequest
 13 | from biomcp.core import PublicationState
 14 | 
 15 | 
 16 | class TestBiorxivIntegration:
 17 |     """Integration tests for bioRxiv API."""
 18 | 
 19 |     @pytest.mark.asyncio
 20 |     async def test_biorxiv_real_search(self):
 21 |         """Test real bioRxiv API search."""
 22 |         client = BiorxivClient()
 23 | 
 24 |         # Try multiple search terms to find one with results
 25 |         search_terms = ["cancer", "gene", "cell", "protein", "RNA", "DNA"]
 26 |         results = []
 27 |         successful_term = None
 28 | 
 29 |         for term in search_terms:
 30 |             results = await client.search(term)
 31 |             if len(results) > 0:
 32 |                 successful_term = term
 33 |                 break
 34 | 
 35 |         # If no results with any term, the API might be down or have no recent articles
 36 |         if len(results) == 0:
 37 |             pytest.skip(
 38 |                 "No results found with any search term - API may be down or have no matching recent articles"
 39 |             )
 40 | 
 41 |         # Check the structure of results
 42 |         first_result = results[0]
 43 |         assert first_result.doi is not None
 44 |         assert first_result.title is not None
 45 |         assert first_result.publication_state == PublicationState.PREPRINT
 46 |         assert "preprint" in first_result.journal.lower()
 47 | 
 48 |         print(
 49 |             f"Found {len(results)} bioRxiv results for term '{successful_term}'"
 50 |         )
 51 |         print(f"First result: {first_result.title}")
 52 | 
 53 | 
 54 | class TestEuropePMCIntegration:
 55 |     """Integration tests for Europe PMC API."""
 56 | 
 57 |     @pytest.mark.asyncio
 58 |     async def test_europe_pmc_real_search(self):
 59 |         """Test real Europe PMC API search for preprints."""
 60 |         client = EuropePMCClient()
 61 | 
 62 |         # Try multiple search terms to find one with results
 63 |         search_terms = [
 64 |             "cancer",
 65 |             "gene",
 66 |             "cell",
 67 |             "protein",
 68 |             "SARS-CoV-2",
 69 |             "COVID",
 70 |         ]
 71 |         results = []
 72 |         successful_term = None
 73 | 
 74 |         for term in search_terms:
 75 |             results = await client.search(term)
 76 |             if len(results) > 0:
 77 |                 successful_term = term
 78 |                 break
 79 | 
 80 |         # If no results with any term, the API might be down
 81 |         if len(results) == 0:
 82 |             pytest.skip(
 83 |                 "No results found with any search term - Europe PMC API may be down"
 84 |             )
 85 | 
 86 |         # Check the structure
 87 |         first_result = results[0]
 88 |         assert first_result.title is not None
 89 |         assert first_result.publication_state == PublicationState.PREPRINT
 90 | 
 91 |         print(
 92 |             f"Found {len(results)} Europe PMC preprint results for term '{successful_term}'"
 93 |         )
 94 |         print(f"First result: {first_result.title}")
 95 |         if first_result.doi:
 96 |             print(f"DOI: {first_result.doi}")
 97 | 
 98 | 
 99 | class TestPreprintSearcherIntegration:
100 |     """Integration tests for combined preprint search."""
101 | 
102 |     @pytest.mark.asyncio
103 |     async def test_combined_search_real(self):
104 |         """Test searching across both preprint sources."""
105 |         searcher = PreprintSearcher()
106 | 
107 |         # Try different search combinations
108 |         search_configs = [
109 |             {"genes": ["TP53"], "diseases": ["cancer"]},
110 |             {"keywords": ["protein", "structure"]},
111 |             {"genes": ["BRAF"], "diseases": ["melanoma"]},
112 |             {"keywords": ["gene", "expression"]},
113 |         ]
114 | 
115 |         response = None
116 |         successful_config = None
117 | 
118 |         for config in search_configs:
119 |             request = PubmedRequest(**config)
120 |             response = await searcher.search(request)
121 |             if response.count > 0:
122 |                 successful_config = config
123 |                 break
124 | 
125 |         print(f"Total results: {response.count if response else 0}")
126 | 
127 |         # Check if we got any results
128 |         if response and response.count > 0:
129 |             # Check result structure
130 |             first = response.results[0]
131 |             assert first.title is not None
132 |             assert first.publication_state == PublicationState.PREPRINT
133 | 
134 |             print(f"Successful search config: {successful_config}")
135 |             print(f"First result: {first.title}")
136 |             print(f"Date: {first.date}")
137 |             print(f"Journal: {first.journal}")
138 |         else:
139 |             pytest.skip(
140 |                 "No results found with any search configuration - APIs may be down"
141 |             )
142 | 
143 | 
144 | if __name__ == "__main__":
145 |     # Run the tests directly
146 |     asyncio.run(TestBiorxivIntegration().test_biorxiv_real_search())
147 |     print("\n" + "=" * 50 + "\n")
148 |     asyncio.run(TestEuropePMCIntegration().test_europe_pmc_real_search())
149 |     print("\n" + "=" * 50 + "\n")
150 |     asyncio.run(TestPreprintSearcherIntegration().test_combined_search_real())
151 | 
```

--------------------------------------------------------------------------------
/docs/developer-guides/05-error-handling.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Error Handling Guide
  2 | 
  3 | ## Overview
  4 | 
  5 | BioMCP uses a consistent error handling pattern across all HTTP operations. This guide explains the error types, when they occur, and how to handle them.
  6 | 
  7 | ## Error Structure
  8 | 
  9 | All HTTP operations return a tuple: `(data, error)` where one is always `None`.
 10 | 
 11 | ```python
 12 | data, error = await http_client.request_api(...)
 13 | if error:
 14 |     # Handle error
 15 |     logger.error(f"Request failed: {error.code} - {error.message}")
 16 | else:
 17 |     # Process data
 18 |     process_result(data)
 19 | ```
 20 | 
 21 | ## Error Types
 22 | 
 23 | ### Network Errors
 24 | 
 25 | - **When**: Connection timeout, DNS resolution failure, network unreachable
 26 | - **Error Code**: Various HTTP client exceptions
 27 | - **Handling**: Retry with exponential backoff or fail gracefully
 28 | 
 29 | ### HTTP Status Errors
 30 | 
 31 | - **When**: Server returns 4xx or 5xx status codes
 32 | - **Error Codes**:
 33 |   - `400-499`: Client errors (bad request, unauthorized, not found)
 34 |   - `500-599`: Server errors (internal error, service unavailable)
 35 | - **Handling**:
 36 |   - 4xx: Fix request parameters or authentication
 37 |   - 5xx: Retry with backoff or use cached data
 38 | 
 39 | ### Circuit Breaker Errors
 40 | 
 41 | - **When**: Too many consecutive failures to a domain
 42 | - **Error**: Circuit breaker opens to prevent cascading failures
 43 | - **Handling**: Wait for recovery timeout or use alternative data source
 44 | 
 45 | ### Offline Mode Errors
 46 | 
 47 | - **When**: `BIOMCP_OFFLINE=true` and no cached data available
 48 | - **Error**: Request blocked in offline mode
 49 | - **Handling**: Use cached data only or inform user about offline status
 50 | 
 51 | ### Parse Errors
 52 | 
 53 | - **When**: Response is not valid JSON or doesn't match expected schema
 54 | - **Error**: JSON decode error or validation error
 55 | - **Handling**: Log error and treat as service issue
 56 | 
 57 | ## Best Practices
 58 | 
 59 | ### 1. Always Check Errors
 60 | 
 61 | ```python
 62 | # ❌ Bad - ignoring error
 63 | data, _ = await http_client.request_api(...)
 64 | process(data)  # data might be None!
 65 | 
 66 | # ✅ Good - checking error
 67 | data, error = await http_client.request_api(...)
 68 | if error:
 69 |     logger.warning(f"Failed to fetch data: {error}")
 70 |     return None
 71 | process(data)
 72 | ```
 73 | 
 74 | ### 2. Provide Context in Error Messages
 75 | 
 76 | ```python
 77 | # ❌ Bad - generic error
 78 | if error:
 79 |     logger.error("Request failed")
 80 | 
 81 | # ✅ Good - contextual error
 82 | if error:
 83 |     logger.error(f"Failed to fetch gene {gene_id} from cBioPortal: {error.message}")
 84 | ```
 85 | 
 86 | ### 3. Graceful Degradation
 87 | 
 88 | ```python
 89 | async def get_variant_with_fallback(variant_id: str):
 90 |     # Try primary source
 91 |     data, error = await primary_source.get_variant(variant_id)
 92 |     if not error:
 93 |         return data
 94 | 
 95 |     logger.warning(f"Primary source failed: {error}, trying secondary")
 96 | 
 97 |     # Try secondary source
 98 |     data, error = await secondary_source.get_variant(variant_id)
 99 |     if not error:
100 |         return data
101 | 
102 |     # Use cached data as last resort
103 |     return get_cached_variant(variant_id)
104 | ```
105 | 
106 | ### 4. User-Friendly Error Messages
107 | 
108 | ```python
109 | def format_error_for_user(error: RequestError) -> str:
110 |     if error.code >= 500:
111 |         return "The service is temporarily unavailable. Please try again later."
112 |     elif error.code == 404:
113 |         return "The requested data was not found."
114 |     elif error.code == 401:
115 |         return "Authentication required. Please check your credentials."
116 |     elif "OFFLINE" in str(error):
117 |         return "You are in offline mode. Only cached data is available."
118 |     else:
119 |         return "An error occurred while fetching data. Please try again."
120 | ```
121 | 
122 | ## Testing Error Conditions
123 | 
124 | ### 1. Simulate Network Errors
125 | 
126 | ```python
127 | with patch("biomcp.http_client.call_http") as mock:
128 |     mock.side_effect = Exception("Network error")
129 |     data, error = await client.fetch_data()
130 |     assert error is not None
131 |     assert data is None
132 | ```
133 | 
134 | ### 2. Test Circuit Breaker
135 | 
136 | ```python
137 | # Simulate multiple failures
138 | for _ in range(5):
139 |     with patch("biomcp.http_client.call_http") as mock:
140 |         mock.return_value = (500, "Server Error")
141 |         await client.fetch_data()
142 | 
143 | # Circuit should be open
144 | data, error = await client.fetch_data()
145 | assert error is not None
146 | assert "circuit" in error.message.lower()
147 | ```
148 | 
149 | ### 3. Test Offline Mode
150 | 
151 | ```python
152 | with patch.dict(os.environ, {"BIOMCP_OFFLINE": "true"}):
153 |     data, error = await client.fetch_data()
154 |     # Should only return cached data or error
155 | ```
156 | 
157 | ## Common Patterns
158 | 
159 | ### Retry with Backoff
160 | 
161 | The centralized HTTP client automatically retries with exponential backoff for:
162 | 
163 | - Network errors
164 | - 5xx server errors
165 | - Rate limit errors (429)
166 | 
167 | ### Caching
168 | 
169 | Failed requests don't overwrite cached data, ensuring availability during outages.
170 | 
171 | ### Rate Limiting
172 | 
173 | Requests are automatically rate-limited per domain to prevent overwhelming services.
174 | 
175 | ## Debugging
176 | 
177 | Enable debug logging to see all HTTP requests and errors:
178 | 
179 | ```python
180 | import logging
181 | logging.getLogger("biomcp.http_client").setLevel(logging.DEBUG)
182 | ```
183 | 
184 | This will show:
185 | 
186 | - All HTTP requests with URLs and methods
187 | - Response status codes and times
188 | - Error details and retry attempts
189 | - Circuit breaker state changes
190 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/cache.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Simple in-memory caching for OpenFDA API responses.
  3 | 
  4 | This module provides a time-based cache to reduce API calls and improve performance.
  5 | Cache entries expire after a configurable TTL (time-to-live).
  6 | """
  7 | 
  8 | import hashlib
  9 | import json
 10 | import logging
 11 | import os
 12 | from datetime import datetime, timedelta
 13 | from typing import Any
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | # Cache configuration
 18 | CACHE_TTL_MINUTES = int(os.environ.get("BIOMCP_FDA_CACHE_TTL", "15"))
 19 | MAX_CACHE_SIZE = int(os.environ.get("BIOMCP_FDA_MAX_CACHE_SIZE", "100"))
 20 | MAX_RESPONSE_SIZE = int(
 21 |     os.environ.get("BIOMCP_FDA_MAX_RESPONSE_SIZE", str(1024 * 1024))
 22 | )  # 1MB default
 23 | 
 24 | # Global cache dictionary
 25 | _cache: dict[str, tuple[Any, datetime]] = {}
 26 | 
 27 | 
 28 | def _generate_cache_key(endpoint: str, params: dict[str, Any]) -> str:
 29 |     """
 30 |     Generate a unique cache key for an API request.
 31 | 
 32 |     Args:
 33 |         endpoint: The API endpoint URL
 34 |         params: Query parameters
 35 | 
 36 |     Returns:
 37 |         A unique hash key for the request
 38 |     """
 39 |     # Remove sensitive parameters before hashing
 40 |     safe_params = {
 41 |         k: v
 42 |         for k, v in params.items()
 43 |         if k.lower() not in ["api_key", "apikey", "key", "token", "secret"]
 44 |     }
 45 | 
 46 |     # Sort params for consistent hashing
 47 |     sorted_params = json.dumps(safe_params, sort_keys=True)
 48 |     combined = f"{endpoint}:{sorted_params}"
 49 | 
 50 |     # Use SHA256 for cache key
 51 |     return hashlib.sha256(combined.encode()).hexdigest()
 52 | 
 53 | 
 54 | def get_cached_response(
 55 |     endpoint: str, params: dict[str, Any]
 56 | ) -> dict[str, Any] | None:
 57 |     """
 58 |     Retrieve a cached response if available and not expired.
 59 | 
 60 |     Args:
 61 |         endpoint: The API endpoint URL
 62 |         params: Query parameters
 63 | 
 64 |     Returns:
 65 |         Cached response data or None if not found/expired
 66 |     """
 67 |     cache_key = _generate_cache_key(endpoint, params)
 68 | 
 69 |     if cache_key in _cache:
 70 |         data, timestamp = _cache[cache_key]
 71 | 
 72 |         # Check if cache entry is still valid
 73 |         age = datetime.now() - timestamp
 74 |         if age < timedelta(minutes=CACHE_TTL_MINUTES):
 75 |             logger.debug(
 76 |                 f"Cache hit for {endpoint} (age: {age.total_seconds():.1f}s)"
 77 |             )
 78 |             return data
 79 |         else:
 80 |             # Remove expired entry
 81 |             del _cache[cache_key]
 82 |             logger.debug(f"Cache expired for {endpoint}")
 83 | 
 84 |     return None
 85 | 
 86 | 
 87 | def set_cached_response(
 88 |     endpoint: str, params: dict[str, Any], response: dict[str, Any]
 89 | ) -> None:
 90 |     """
 91 |     Store a response in the cache.
 92 | 
 93 |     Args:
 94 |         endpoint: The API endpoint URL
 95 |         params: Query parameters
 96 |         response: Response data to cache
 97 |     """
 98 |     # Check response size limit
 99 |     import json
100 |     import sys
101 | 
102 |     # Better size estimation using JSON serialization
103 |     try:
104 |         response_json = json.dumps(response)
105 |         response_size = len(response_json.encode("utf-8"))
106 |     except (TypeError, ValueError):
107 |         # If can't serialize, use sys.getsizeof
108 |         response_size = sys.getsizeof(response)
109 | 
110 |     if response_size > MAX_RESPONSE_SIZE:
111 |         logger.warning(
112 |             f"Response too large to cache: {response_size} bytes > {MAX_RESPONSE_SIZE} bytes"
113 |         )
114 |         return
115 | 
116 |     # Check cache size limit
117 |     if len(_cache) >= MAX_CACHE_SIZE:
118 |         # Remove oldest entries (simple FIFO)
119 |         oldest_keys = sorted(_cache.keys(), key=lambda k: _cache[k][1])[
120 |             : len(_cache) - MAX_CACHE_SIZE + 1
121 |         ]
122 | 
123 |         for key in oldest_keys:
124 |             del _cache[key]
125 | 
126 |         logger.debug(
127 |             f"Cache size limit reached, removed {len(oldest_keys)} entries"
128 |         )
129 | 
130 |     cache_key = _generate_cache_key(endpoint, params)
131 |     _cache[cache_key] = (response, datetime.now())
132 | 
133 |     logger.debug(f"Cached response for {endpoint} (cache size: {len(_cache)})")
134 | 
135 | 
136 | def clear_cache() -> None:
137 |     """Clear all cached responses."""
138 |     global _cache
139 |     size = len(_cache)
140 |     _cache = {}
141 |     logger.info(f"Cleared FDA cache ({size} entries)")
142 | 
143 | 
144 | def get_cache_stats() -> dict[str, Any]:
145 |     """
146 |     Get cache statistics.
147 | 
148 |     Returns:
149 |         Dictionary with cache statistics
150 |     """
151 |     now = datetime.now()
152 |     valid_count = 0
153 |     total_age = 0.0
154 | 
155 |     for _data, timestamp in _cache.values():
156 |         age = (now - timestamp).total_seconds()
157 |         if age < CACHE_TTL_MINUTES * 60:
158 |             valid_count += 1
159 |             total_age += age
160 | 
161 |     avg_age = total_age / valid_count if valid_count > 0 else 0
162 | 
163 |     return {
164 |         "total_entries": len(_cache),
165 |         "valid_entries": valid_count,
166 |         "expired_entries": len(_cache) - valid_count,
167 |         "average_age_seconds": avg_age,
168 |         "ttl_minutes": CACHE_TTL_MINUTES,
169 |         "max_size": MAX_CACHE_SIZE,
170 |     }
171 | 
172 | 
173 | def is_cacheable_request(endpoint: str, params: dict[str, Any]) -> bool:
174 |     """
175 |     Determine if a request should be cached.
176 | 
177 |     Args:
178 |         endpoint: The API endpoint URL
179 |         params: Query parameters
180 | 
181 |     Returns:
182 |         True if the request should be cached
183 |     """
184 |     # Don't cache if caching is disabled
185 |     if CACHE_TTL_MINUTES <= 0:
186 |         return False
187 | 
188 |     # Don't cache very large requests
189 |     return params.get("limit", 0) <= 100
190 | 
```

--------------------------------------------------------------------------------
/tests/tdd/drugs/test_drug_getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Unit tests for drug information retrieval."""
  2 | 
  3 | import json
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.drugs.getter import get_drug
  8 | 
  9 | 
 10 | class TestDrugGetter:
 11 |     """Test drug information retrieval."""
 12 | 
 13 |     @pytest.fixture
 14 |     def mock_drug_response(self):
 15 |         """Mock drug response from MyChem.info."""
 16 |         return {
 17 |             "_id": "CHEMBL941",
 18 |             "name": "Imatinib",
 19 |             "drugbank": {
 20 |                 "id": "DB00619",
 21 |                 "name": "Imatinib",
 22 |                 "description": "Imatinib is a tyrosine kinase inhibitor...",
 23 |                 "indication": "Treatment of chronic myeloid leukemia...",
 24 |                 "mechanism_of_action": "Inhibits BCR-ABL tyrosine kinase...",
 25 |                 "products": {"name": ["Gleevec", "Glivec"]},
 26 |             },
 27 |             "chembl": {
 28 |                 "molecule_chembl_id": "CHEMBL941",
 29 |                 "pref_name": "IMATINIB",
 30 |             },
 31 |             "pubchem": {"cid": 5291},
 32 |             "chebi": {"id": "CHEBI:45783", "name": "imatinib"},
 33 |             "inchikey": "KTUFNOKKBVMGRW-UHFFFAOYSA-N",
 34 |             "formula": "C29H31N7O",
 35 |         }
 36 | 
 37 |     @pytest.mark.asyncio
 38 |     async def test_get_drug_by_name(self, monkeypatch, mock_drug_response):
 39 |         """Test getting drug by name."""
 40 |         # Mock the API call
 41 |         call_count = 0
 42 |         responses = [
 43 |             # Query response
 44 |             ({"hits": [{"_id": "CHEMBL941"}]}, None),
 45 |             # Get response
 46 |             (mock_drug_response, None),
 47 |         ]
 48 | 
 49 |         async def mock_request_api(url, request, method, domain):
 50 |             nonlocal call_count
 51 |             result = responses[call_count]
 52 |             call_count += 1
 53 |             return result
 54 | 
 55 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
 56 | 
 57 |         result = await get_drug("imatinib")
 58 | 
 59 |         assert "## Drug: Imatinib" in result
 60 |         assert "DrugBank ID**: DB00619" in result
 61 |         assert "ChEMBL ID**: CHEMBL941" in result
 62 |         assert "Formula**: C29H31N7O" in result
 63 |         assert "Trade Names**: Gleevec, Glivec" in result
 64 |         assert "External Links" in result
 65 |         assert "DrugBank](https://www.drugbank.ca/drugs/DB00619)" in result
 66 | 
 67 |     @pytest.mark.asyncio
 68 |     async def test_get_drug_by_id(self, monkeypatch, mock_drug_response):
 69 |         """Test getting drug by DrugBank ID."""
 70 | 
 71 |         # Mock the API call
 72 |         async def mock_request_api(url, request, method, domain):
 73 |             return (mock_drug_response, None)
 74 | 
 75 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
 76 | 
 77 |         result = await get_drug("DB00619")
 78 | 
 79 |         assert "## Drug: Imatinib" in result
 80 |         assert "DrugBank ID**: DB00619" in result
 81 | 
 82 |     @pytest.mark.asyncio
 83 |     async def test_get_drug_json_output(self, monkeypatch, mock_drug_response):
 84 |         """Test getting drug with JSON output."""
 85 | 
 86 |         # Mock the API call
 87 |         async def mock_request_api(url, request, method, domain):
 88 |             return (mock_drug_response, None)
 89 | 
 90 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
 91 | 
 92 |         result = await get_drug("DB00619", output_json=True)
 93 |         data = json.loads(result)
 94 | 
 95 |         assert data["drug_id"] == "CHEMBL941"
 96 |         assert data["name"] == "Imatinib"
 97 |         assert data["drugbank_id"] == "DB00619"
 98 |         assert (
 99 |             data["_links"]["DrugBank"]
100 |             == "https://www.drugbank.ca/drugs/DB00619"
101 |         )
102 | 
103 |     @pytest.mark.asyncio
104 |     async def test_drug_not_found(self, monkeypatch):
105 |         """Test drug not found."""
106 | 
107 |         # Mock the API call
108 |         async def mock_request_api(url, request, method, domain):
109 |             return ({"hits": []}, None)
110 | 
111 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
112 | 
113 |         result = await get_drug("INVALID_DRUG_XYZ")
114 | 
115 |         assert "Drug 'INVALID_DRUG_XYZ' not found" in result
116 | 
117 |     @pytest.mark.asyncio
118 |     async def test_drug_with_description_truncation(self, monkeypatch):
119 |         """Test drug with long description gets truncated."""
120 |         long_desc = "A" * 600
121 |         mock_response = {
122 |             "_id": "TEST001",
123 |             "name": "TestDrug",
124 |             "drugbank": {"id": "DB99999", "description": long_desc},
125 |         }
126 | 
127 |         async def mock_request_api(url, request, method, domain):
128 |             return (mock_response, None)
129 | 
130 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
131 | 
132 |         result = await get_drug("DB99999")
133 | 
134 |         assert "Description" in result
135 |         assert "A" * 500 in result
136 |         assert "..." in result  # Truncation indicator
137 | 
138 |     @pytest.mark.asyncio
139 |     async def test_drug_error_handling(self, monkeypatch):
140 |         """Test error handling."""
141 | 
142 |         # Mock the API call to raise an exception
143 |         async def mock_request_api(url, request, method, domain):
144 |             raise Exception("API error")
145 | 
146 |         monkeypatch.setattr("biomcp.http_client.request_api", mock_request_api)
147 | 
148 |         result = await get_drug("imatinib")
149 | 
150 |         # When an exception occurs, it's caught and the drug is reported as not found
151 |         assert "Drug 'imatinib' not found in MyChem.info" in result
152 | 
```

--------------------------------------------------------------------------------
/src/biomcp/drugs/getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Drug information retrieval from MyChem.info."""
  2 | 
  3 | import json
  4 | import logging
  5 | 
  6 | from ..integrations import BioThingsClient
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | def _add_drug_links(drug_info, result: dict) -> None:
 12 |     """Add external database links for the drug."""
 13 |     links = {}
 14 | 
 15 |     if drug_info.drugbank_id:
 16 |         links["DrugBank"] = (
 17 |             f"https://www.drugbank.ca/drugs/{drug_info.drugbank_id}"
 18 |         )
 19 | 
 20 |     if drug_info.chembl_id:
 21 |         links["ChEMBL"] = (
 22 |             f"https://www.ebi.ac.uk/chembl/compound_report_card/{drug_info.chembl_id}/"
 23 |         )
 24 | 
 25 |     if drug_info.pubchem_cid:
 26 |         links["PubChem"] = (
 27 |             f"https://pubchem.ncbi.nlm.nih.gov/compound/{drug_info.pubchem_cid}"
 28 |         )
 29 | 
 30 |     if drug_info.chebi_id:
 31 |         chebi_id = drug_info.chebi_id.replace("CHEBI:", "")
 32 |         links["ChEBI"] = (
 33 |             f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{chebi_id}"
 34 |         )
 35 | 
 36 |     if links:
 37 |         result["_links"] = links
 38 | 
 39 | 
 40 | def _format_basic_info(drug_info, output_lines: list[str]) -> None:
 41 |     """Format basic drug information."""
 42 |     if drug_info.formula:
 43 |         output_lines.append(f"- **Formula**: {drug_info.formula}")
 44 | 
 45 |     if drug_info.drugbank_id:
 46 |         output_lines.append(f"- **DrugBank ID**: {drug_info.drugbank_id}")
 47 | 
 48 |     if drug_info.chembl_id:
 49 |         output_lines.append(f"- **ChEMBL ID**: {drug_info.chembl_id}")
 50 | 
 51 |     if drug_info.pubchem_cid:
 52 |         output_lines.append(f"- **PubChem CID**: {drug_info.pubchem_cid}")
 53 | 
 54 |     if drug_info.chebi_id:
 55 |         output_lines.append(f"- **ChEBI ID**: {drug_info.chebi_id}")
 56 | 
 57 |     if drug_info.inchikey:
 58 |         output_lines.append(f"- **InChIKey**: {drug_info.inchikey}")
 59 | 
 60 | 
 61 | def _format_clinical_info(drug_info, output_lines: list[str]) -> None:
 62 |     """Format clinical drug information."""
 63 |     if drug_info.tradename:
 64 |         names = drug_info.tradename[:5]  # Limit to first 5
 65 |         output_lines.append(f"- **Trade Names**: {', '.join(names)}")
 66 |         if len(drug_info.tradename) > 5:
 67 |             output_lines.append(f"  (and {len(drug_info.tradename) - 5} more)")
 68 | 
 69 |     if drug_info.description:
 70 |         desc = drug_info.description[:500]
 71 |         if len(drug_info.description) > 500:
 72 |             desc += "..."
 73 |         output_lines.append(f"\n### Description\n{desc}")
 74 | 
 75 |     if drug_info.indication:
 76 |         ind = drug_info.indication[:500]
 77 |         if len(drug_info.indication) > 500:
 78 |             ind += "..."
 79 |         output_lines.append(f"\n### Indication\n{ind}")
 80 | 
 81 |     if drug_info.mechanism_of_action:
 82 |         moa = drug_info.mechanism_of_action[:500]
 83 |         if len(drug_info.mechanism_of_action) > 500:
 84 |             moa += "..."
 85 |         output_lines.append(f"\n### Mechanism of Action\n{moa}")
 86 | 
 87 | 
 88 | def _format_drug_output(drug_info, result: dict) -> None:
 89 |     """Format drug information for text output."""
 90 |     output_lines = [f"## Drug: {drug_info.name or 'Unknown'}"]
 91 | 
 92 |     _format_basic_info(drug_info, output_lines)
 93 |     _format_clinical_info(drug_info, output_lines)
 94 | 
 95 |     if result.get("_links"):
 96 |         output_lines.append("\n### External Links")
 97 |         for name, url in result["_links"].items():
 98 |             output_lines.append(f"- [{name}]({url})")
 99 | 
100 |     result["_formatted"] = "\n".join(output_lines)
101 | 
102 | 
103 | async def get_drug(drug_id_or_name: str, output_json: bool = False) -> str:
104 |     """Get drug information from MyChem.info.
105 | 
106 |     Args:
107 |         drug_id_or_name: Drug ID (DrugBank, ChEMBL, etc.) or name
108 |         output_json: Return JSON instead of formatted text
109 | 
110 |     Returns:
111 |         Formatted drug information or JSON string
112 |     """
113 |     try:
114 |         client = BioThingsClient()
115 |         drug_info = await client.get_drug_info(drug_id_or_name)
116 | 
117 |         if not drug_info:
118 |             error_msg = f"Drug '{drug_id_or_name}' not found in MyChem.info"
119 |             if output_json:
120 |                 return json.dumps({"error": error_msg}, indent=2)
121 |             return error_msg
122 | 
123 |         # Build result dictionary
124 |         result = drug_info.model_dump(by_alias=False, exclude_none=True)
125 | 
126 |         # Add external links
127 |         _add_drug_links(drug_info, result)
128 | 
129 |         if output_json:
130 |             return json.dumps(result, indent=2)
131 | 
132 |         # Format for text output
133 |         _format_drug_output(drug_info, result)
134 |         return result["_formatted"]
135 | 
136 |     except Exception as e:
137 |         logger.error(f"Error getting drug info: {e}")
138 |         error_msg = f"Error retrieving drug information: {e!s}"
139 |         if output_json:
140 |             return json.dumps({"error": error_msg}, indent=2)
141 |         return error_msg
142 | 
143 | 
144 | # MCP tool function
145 | async def _drug_details(drug_id_or_name: str) -> str:
146 |     """Get drug/chemical information from MyChem.info.
147 | 
148 |     This tool retrieves comprehensive drug information including:
149 |     - Drug identifiers (DrugBank, ChEMBL, PubChem, etc.)
150 |     - Chemical properties (formula, InChIKey)
151 |     - Trade names and synonyms
152 |     - Clinical indications
153 |     - Mechanism of action
154 |     - Links to external databases
155 | 
156 |     Args:
157 |         drug_id_or_name: Drug name (e.g., "aspirin") or ID (e.g., "DB00945", "CHEMBL25")
158 | 
159 |     Returns:
160 |         Formatted drug information with external database links
161 |     """
162 |     return await get_drug(drug_id_or_name, output_json=False)
163 | 
```

--------------------------------------------------------------------------------
/src/biomcp/prefetch.py:
--------------------------------------------------------------------------------

```python
  1 | """Prefetching system for common queries to improve performance.
  2 | 
  3 | This module implements a prefetching mechanism that warms up caches with
  4 | commonly searched biomedical entities during startup. This significantly
  5 | improves response times for frequent queries.
  6 | 
  7 | Key Features:
  8 | - Prefetches common genes, diseases, and chemicals on startup
  9 | - Runs asynchronously to avoid blocking server initialization
 10 | - Includes timeout to prevent startup delays
 11 | - Graceful error handling if prefetching fails
 12 | 
 13 | The prefetching runs automatically when the MCP server starts via the
 14 | lifespan hook in core.py.
 15 | 
 16 | Configuration:
 17 |     The lists of entities to prefetch can be customized by modifying
 18 |     the COMMON_GENES, COMMON_DISEASES, and COMMON_CHEMICALS constants.
 19 | """
 20 | 
 21 | import asyncio
 22 | import logging
 23 | 
 24 | from .constants import (
 25 |     PREFETCH_TIMEOUT,
 26 |     PREFETCH_TOP_CHEMICALS,
 27 |     PREFETCH_TOP_DISEASES,
 28 |     PREFETCH_TOP_GENES,
 29 | )
 30 | 
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | # Common genes that are frequently searched
 34 | COMMON_GENES = [
 35 |     "BRAF",
 36 |     "EGFR",
 37 |     "TP53",
 38 |     "KRAS",
 39 |     "ALK",
 40 |     "ROS1",
 41 |     "MET",
 42 |     "RET",
 43 |     "NTRK1",
 44 |     "NTRK2",
 45 |     "NTRK3",
 46 | ]
 47 | 
 48 | # Common cancer types
 49 | COMMON_DISEASES = [
 50 |     "lung cancer",
 51 |     "breast cancer",
 52 |     "colorectal cancer",
 53 |     "melanoma",
 54 |     "non-small cell lung cancer",
 55 |     "small cell lung cancer",
 56 | ]
 57 | 
 58 | # Common drug names
 59 | COMMON_CHEMICALS = [
 60 |     "osimertinib",
 61 |     "pembrolizumab",
 62 |     "nivolumab",
 63 |     "dabrafenib",
 64 |     "trametinib",
 65 |     "crizotinib",
 66 |     "alectinib",
 67 | ]
 68 | 
 69 | 
 70 | class PrefetchManager:
 71 |     """Manages prefetching of common queries."""
 72 | 
 73 |     def __init__(self):
 74 |         self._prefetch_task: asyncio.Task | None = None
 75 |         self._is_prefetching = False
 76 |         self._prefetch_complete = False
 77 | 
 78 |     async def start_prefetching(self):
 79 |         """Start prefetching common queries in the background."""
 80 |         if self._is_prefetching or self._prefetch_complete:
 81 |             return
 82 | 
 83 |         self._is_prefetching = True
 84 |         try:
 85 |             # Start prefetch task
 86 |             self._prefetch_task = asyncio.create_task(
 87 |                 self._prefetch_common_queries()
 88 |             )
 89 |         except Exception as e:
 90 |             logger.warning(f"Failed to start prefetching: {e}")
 91 |             self._is_prefetching = False
 92 | 
 93 |     async def _prefetch_common_queries(self):
 94 |         """Prefetch common queries to warm up the cache."""
 95 |         try:
 96 |             # Import here to avoid circular imports
 97 |             from .articles.autocomplete import EntityRequest, autocomplete
 98 |             from .variants.cbioportal_search import CBioPortalSearchClient
 99 | 
100 |             tasks = []
101 | 
102 |             # Prefetch gene autocomplete
103 |             for gene in COMMON_GENES[
104 |                 :PREFETCH_TOP_GENES
105 |             ]:  # Limit to avoid overload
106 |                 request = EntityRequest(concept="gene", query=gene, limit=1)
107 |                 tasks.append(autocomplete(request))
108 | 
109 |             # Prefetch disease autocomplete
110 |             for disease in COMMON_DISEASES[:PREFETCH_TOP_DISEASES]:
111 |                 request = EntityRequest(
112 |                     concept="disease", query=disease, limit=1
113 |                 )
114 |                 tasks.append(autocomplete(request))
115 | 
116 |             # Prefetch chemical autocomplete
117 |             for chemical in COMMON_CHEMICALS[:PREFETCH_TOP_CHEMICALS]:
118 |                 request = EntityRequest(
119 |                     concept="chemical", query=chemical, limit=1
120 |                 )
121 |                 tasks.append(autocomplete(request))
122 | 
123 |             # Execute all autocomplete prefetches
124 |             if tasks:
125 |                 await asyncio.gather(*tasks, return_exceptions=True)
126 | 
127 |             # Prefetch cBioPortal summaries for common genes
128 |             cbio_client = CBioPortalSearchClient()
129 |             cbio_tasks = []
130 | 
131 |             for gene in COMMON_GENES[:PREFETCH_TOP_GENES]:  # Top genes
132 |                 cbio_tasks.append(
133 |                     cbio_client.get_gene_search_summary(gene, max_studies=5)
134 |                 )
135 | 
136 |             if cbio_tasks:
137 |                 await asyncio.gather(*cbio_tasks, return_exceptions=True)
138 | 
139 |             logger.info("Prefetching completed successfully")
140 | 
141 |         except Exception as e:
142 |             logger.warning(f"Error during prefetching: {e}")
143 |         finally:
144 |             self._is_prefetching = False
145 |             self._prefetch_complete = True
146 | 
147 |     async def wait_for_prefetch(self, timeout: float = PREFETCH_TIMEOUT):
148 |         """Wait for prefetch to complete with timeout."""
149 |         if not self._prefetch_task:
150 |             return
151 | 
152 |         try:
153 |             await asyncio.wait_for(self._prefetch_task, timeout=timeout)
154 |         except asyncio.TimeoutError:
155 |             # Prefetch taking too long, continue without waiting
156 |             logger.debug("Prefetch timeout - continuing without waiting")
157 |         except Exception as e:
158 |             # Ignore prefetch errors
159 |             logger.debug(f"Prefetch error ignored: {e}")
160 | 
161 | 
162 | # Global prefetch manager
163 | _prefetch_manager = PrefetchManager()
164 | 
165 | 
166 | async def start_prefetching():
167 |     """Start the prefetching process."""
168 |     await _prefetch_manager.start_prefetching()
169 | 
170 | 
171 | async def wait_for_prefetch(timeout: float = PREFETCH_TIMEOUT):
172 |     """Wait for prefetch to complete."""
173 |     await _prefetch_manager.wait_for_prefetch(timeout)
174 | 
```

--------------------------------------------------------------------------------
/docs/backend-services-reference/01-overview.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Backend Services Reference Overview
  2 | 
  3 | BioMCP integrates with multiple biomedical databases and services to provide comprehensive research capabilities. This reference documents the underlying APIs and their capabilities.
  4 | 
  5 | ## Service Categories
  6 | 
  7 | ### Literature and Publications
  8 | 
  9 | - **[PubTator3](06-pubtator3.md)**: Biomedical literature with entity annotations
 10 | - **Europe PMC**: Preprints from bioRxiv and medRxiv
 11 | 
 12 | ### Clinical Trials
 13 | 
 14 | - **[ClinicalTrials.gov](04-clinicaltrials-gov.md)**: U.S. and international clinical trials registry
 15 | - **[NCI CTS API](05-nci-cts-api.md)**: National Cancer Institute's enhanced trial search
 16 | 
 17 | ### Biomedical Annotations
 18 | 
 19 | - **[BioThings Suite](02-biothings-suite.md)**:
 20 |   - MyGene.info - Gene annotations
 21 |   - MyVariant.info - Variant annotations
 22 |   - MyDisease.info - Disease ontology
 23 |   - MyChem.info - Drug/chemical data
 24 | 
 25 | ### Cancer Genomics
 26 | 
 27 | - **[cBioPortal](03-cbioportal.md)**: Cancer genomics portal with mutation data
 28 | - **TCGA**: The Cancer Genome Atlas (via MyVariant.info)
 29 | 
 30 | ### Variant Effect Prediction
 31 | 
 32 | - **[AlphaGenome](07-alphagenome.md)**: Google DeepMind's AI for regulatory predictions
 33 | 
 34 | ## API Authentication
 35 | 
 36 | | Service            | Authentication Required | Type    | Rate Limits         |
 37 | | ------------------ | ----------------------- | ------- | ------------------- |
 38 | | PubTator3          | No                      | Public  | 3 requests/second   |
 39 | | ClinicalTrials.gov | No                      | Public  | 50,000 requests/day |
 40 | | NCI CTS API        | Yes                     | API Key | 1,000 requests/day  |
 41 | | BioThings APIs     | No                      | Public  | 1,000 requests/hour |
 42 | | cBioPortal         | Optional                | Token   | Higher with token   |
 43 | | AlphaGenome        | Yes                     | API Key | Contact provider    |
 44 | 
 45 | ## Data Flow Architecture
 46 | 
 47 | ```
 48 | User Query → BioMCP Tools → Backend APIs → Unified Response
 49 | 
 50 | Example Flow:
 51 | 1. User: "Find articles about BRAF mutations"
 52 | 2. BioMCP: article_searcher tool
 53 | 3. APIs Called:
 54 |    - PubTator3 (articles)
 55 |    - cBioPortal (mutation data)
 56 |    - Europe PMC (preprints)
 57 | 4. Response: Integrated results with citations
 58 | ```
 59 | 
 60 | ## Service Reliability
 61 | 
 62 | ### Primary Services
 63 | 
 64 | - **PubTator3**: 99.9% uptime, updated daily
 65 | - **ClinicalTrials.gov**: 99.5% uptime, updated daily
 66 | - **BioThings APIs**: 99.9% uptime, real-time data
 67 | 
 68 | ### Fallback Strategies
 69 | 
 70 | - Cache frequently accessed data
 71 | - Implement exponential backoff
 72 | - Use alternative endpoints when available
 73 | 
 74 | ## Common Integration Patterns
 75 | 
 76 | ### 1. Entity Recognition Enhancement
 77 | 
 78 | ```
 79 | PubTator3 → Extract entities → BioThings → Get detailed annotations
 80 | ```
 81 | 
 82 | ### 2. Variant to Trial Pipeline
 83 | 
 84 | ```
 85 | MyVariant.info → Get gene → ClinicalTrials.gov → Find relevant trials
 86 | ```
 87 | 
 88 | ### 3. Comprehensive Gene Analysis
 89 | 
 90 | ```
 91 | MyGene.info → Basic info
 92 | cBioPortal → Cancer mutations
 93 | PubTator3 → Literature
 94 | AlphaGenome → Predictions
 95 | ```
 96 | 
 97 | ## Performance Considerations
 98 | 
 99 | ### Response Times (typical)
100 | 
101 | - PubTator3: 200-500ms
102 | - ClinicalTrials.gov: 300-800ms
103 | - BioThings APIs: 100-300ms
104 | - cBioPortal: 200-600ms
105 | - AlphaGenome: 1-3 seconds
106 | 
107 | ### Optimization Strategies
108 | 
109 | 1. **Batch requests** when APIs support it
110 | 2. **Cache static data** (gene names, ontologies)
111 | 3. **Parallelize independent** API calls
112 | 4. **Use pagination** for large result sets
113 | 
114 | ## Error Handling
115 | 
116 | ### Common Error Types
117 | 
118 | - **Rate Limiting**: 429 errors, implement backoff
119 | - **Invalid Parameters**: 400 errors, validate inputs
120 | - **Service Unavailable**: 503 errors, retry with delay
121 | - **Authentication**: 401 errors, check API keys
122 | 
123 | ### Error Response Format
124 | 
125 | ```json
126 | {
127 |   "error": {
128 |     "code": "RATE_LIMIT_EXCEEDED",
129 |     "message": "API rate limit exceeded",
130 |     "retry_after": 3600
131 |   }
132 | }
133 | ```
134 | 
135 | ## Data Formats
136 | 
137 | ### Input Formats
138 | 
139 | - **Identifiers**: HGNC symbols, rsIDs, NCT numbers, PMIDs
140 | - **Coordinates**: GRCh38 genomic positions
141 | - **Terms**: MeSH, MONDO, HPO ontologies
142 | 
143 | ### Output Formats
144 | 
145 | - **JSON**: Primary format for all APIs
146 | - **XML**: Available for some services
147 | - **TSV/CSV**: Export options for bulk data
148 | 
149 | ## Update Frequencies
150 | 
151 | | Service            | Update Frequency | Data Lag   |
152 | | ------------------ | ---------------- | ---------- |
153 | | PubTator3          | Daily            | 1-2 days   |
154 | | ClinicalTrials.gov | Daily            | Real-time  |
155 | | NCI CTS            | Daily            | 1 day      |
156 | | BioThings          | Real-time        | Minutes    |
157 | | cBioPortal         | Quarterly        | 3-6 months |
158 | 
159 | ## Best Practices
160 | 
161 | ### 1. API Key Management
162 | 
163 | - Store keys securely
164 | - Rotate keys periodically
165 | - Monitor usage against limits
166 | 
167 | ### 2. Error Recovery
168 | 
169 | - Implement retry logic
170 | - Log failed requests
171 | - Provide fallback data
172 | 
173 | ### 3. Data Validation
174 | 
175 | - Verify gene symbols
176 | - Validate genomic coordinates
177 | - Check identifier formats
178 | 
179 | ### 4. Performance
180 | 
181 | - Cache when appropriate
182 | - Batch similar requests
183 | - Use appropriate page sizes
184 | 
185 | ## Getting Started
186 | 
187 | 1. Review individual service documentation
188 | 2. Obtain necessary API keys
189 | 3. Test endpoints with sample data
190 | 4. Implement error handling
191 | 5. Monitor usage and performance
192 | 
193 | ## Support Resources
194 | 
195 | - **PubTator3**: [Support Forum](https://www.ncbi.nlm.nih.gov/research/pubtator3/)
196 | - **ClinicalTrials.gov**: [Help Desk](https://clinicaltrials.gov/help)
197 | - **BioThings**: [Documentation](https://docs.biothings.io/)
198 | - **cBioPortal**: [User Guide](https://docs.cbioportal.org/)
199 | - **NCI**: [API Support](https://api.cancer.gov/support)
200 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_concurrent_requests.py:
--------------------------------------------------------------------------------

```python
  1 | """Test concurrent request handling in the HTTP client."""
  2 | 
  3 | import asyncio
  4 | from unittest.mock import AsyncMock, patch
  5 | 
  6 | import pytest
  7 | 
  8 | from biomcp import http_client
  9 | 
 10 | 
 11 | class TestConcurrentRequests:
 12 |     """Test concurrent request handling."""
 13 | 
 14 |     @pytest.mark.asyncio
 15 |     async def test_concurrent_requests_same_domain(self):
 16 |         """Test multiple concurrent requests to the same domain."""
 17 |         # Use patch instead of direct replacement
 18 |         with patch(
 19 |             "biomcp.http_client.call_http", new_callable=AsyncMock
 20 |         ) as mock_call:
 21 |             # Configure mock to return success
 22 |             mock_call.return_value = (200, '{"data": "response"}')
 23 | 
 24 |             # Make 10 concurrent requests with different URLs to avoid caching
 25 |             # and disable caching explicitly
 26 |             tasks = [
 27 |                 http_client.request_api(
 28 |                     url=f"https://api.example.com/resource/{i}",
 29 |                     request={},
 30 |                     domain="example",
 31 |                     cache_ttl=0,  # Disable caching
 32 |                 )
 33 |                 for i in range(10)
 34 |             ]
 35 | 
 36 |             results = await asyncio.gather(*tasks)
 37 | 
 38 |             # All requests should succeed
 39 |             assert len(results) == 10
 40 |             for data, error in results:
 41 |                 assert error is None
 42 |                 assert data == {"data": "response"}
 43 | 
 44 |             # Check that rate limiting was applied
 45 |             assert mock_call.call_count == 10
 46 | 
 47 |     @pytest.mark.asyncio
 48 |     async def test_concurrent_requests_different_domains(self):
 49 |         """Test concurrent requests to different domains."""
 50 |         with patch(
 51 |             "biomcp.http_client.call_http", new_callable=AsyncMock
 52 |         ) as mock_call:
 53 |             # Return different responses based on URL
 54 |             async def side_effect(method, url, *args, **kwargs):
 55 |                 if "domain1" in url:
 56 |                     return (200, '{"source": "domain1"}')
 57 |                 elif "domain2" in url:
 58 |                     return (200, '{"source": "domain2"}')
 59 |                 else:
 60 |                     return (200, '{"source": "other"}')
 61 | 
 62 |             mock_call.side_effect = side_effect
 63 | 
 64 |             # Make requests to different domains
 65 |             tasks = [
 66 |                 http_client.request_api(
 67 |                     "https://domain1.com/api", {}, domain="domain1"
 68 |                 ),
 69 |                 http_client.request_api(
 70 |                     "https://domain2.com/api", {}, domain="domain2"
 71 |                 ),
 72 |                 http_client.request_api(
 73 |                     "https://domain3.com/api", {}, domain="domain3"
 74 |                 ),
 75 |             ]
 76 | 
 77 |             results = await asyncio.gather(*tasks)
 78 | 
 79 |             # Check results
 80 |             assert results[0][0] == {"source": "domain1"}
 81 |             assert results[1][0] == {"source": "domain2"}
 82 |             assert results[2][0] == {"source": "other"}
 83 | 
 84 |     @pytest.mark.asyncio
 85 |     async def test_concurrent_cache_access(self):
 86 |         """Test that concurrent requests properly use cache."""
 87 |         with patch(
 88 |             "biomcp.http_client.call_http", new_callable=AsyncMock
 89 |         ) as mock_call:
 90 |             mock_call.return_value = (200, '{"data": "cached"}')
 91 | 
 92 |             # First request to populate cache
 93 |             await http_client.request_api(
 94 |                 url="https://api.example.com/data",
 95 |                 request={},
 96 |                 domain="example",
 97 |                 cache_ttl=60,
 98 |             )
 99 | 
100 |             # Reset call count
101 |             initial_calls = mock_call.call_count
102 | 
103 |             # Make 5 concurrent requests to same URL
104 |             tasks = [
105 |                 http_client.request_api(
106 |                     url="https://api.example.com/data",
107 |                     request={},
108 |                     domain="example",
109 |                     cache_ttl=60,
110 |                 )
111 |                 for _ in range(5)
112 |             ]
113 | 
114 |             results = await asyncio.gather(*tasks)
115 | 
116 |             # All should get cached response
117 |             assert len(results) == 5
118 |             for data, _error in results:
119 |                 assert data == {"data": "cached"}
120 | 
121 |             # No additional HTTP calls should have been made
122 |             assert mock_call.call_count == initial_calls
123 | 
124 |     @pytest.mark.asyncio
125 |     async def test_concurrent_circuit_breaker(self):
126 |         """Test circuit breaker behavior with concurrent failures."""
127 |         with patch(
128 |             "biomcp.http_client.call_http", new_callable=AsyncMock
129 |         ) as mock_call:
130 |             # Simulate failures
131 |             mock_call.return_value = (500, "Internal Server Error")
132 | 
133 |             # Make concurrent failing requests
134 |             tasks = [
135 |                 http_client.request_api(
136 |                     url=f"https://failing.com/api/{i}",
137 |                     request={},
138 |                     domain="failing",
139 |                 )
140 |                 for i in range(10)
141 |             ]
142 | 
143 |             results = await asyncio.gather(*tasks, return_exceptions=True)
144 | 
145 |             # All should fail
146 |             error_count = sum(1 for _, error in results if error is not None)
147 |             assert error_count == 10
148 | 
149 |             # Circuit should be open now
150 |             # Additional requests should fail immediately
151 |             _, error = await http_client.request_api(
152 |                 url="https://failing.com/api/test",
153 |                 request={},
154 |                 domain="failing",
155 |             )
156 | 
157 |             assert error is not None
158 |             # Check that circuit breaker is preventing calls
159 |             # (exact behavior depends on implementation details)
160 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_connection_pool.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for connection pool management."""
  2 | 
  3 | import asyncio
  4 | import ssl
  5 | import weakref
  6 | from unittest.mock import patch
  7 | 
  8 | import httpx
  9 | import pytest
 10 | 
 11 | from biomcp.connection_pool import (
 12 |     EventLoopConnectionPools,
 13 |     close_all_pools,
 14 |     get_connection_pool,
 15 | )
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def pool_manager():
 20 |     """Create a fresh pool manager for testing."""
 21 |     return EventLoopConnectionPools()
 22 | 
 23 | 
 24 | @pytest.mark.asyncio
 25 | async def test_get_pool_creates_new_pool(pool_manager):
 26 |     """Test that get_pool creates a new pool when none exists."""
 27 |     timeout = httpx.Timeout(30)
 28 | 
 29 |     pool = await pool_manager.get_pool(verify=True, timeout=timeout)
 30 | 
 31 |     assert pool is not None
 32 |     assert isinstance(pool, httpx.AsyncClient)
 33 |     assert not pool.is_closed
 34 | 
 35 | 
 36 | @pytest.mark.asyncio
 37 | async def test_get_pool_reuses_existing_pool(pool_manager):
 38 |     """Test that get_pool reuses existing pools."""
 39 |     timeout = httpx.Timeout(30)
 40 | 
 41 |     pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
 42 |     pool2 = await pool_manager.get_pool(verify=True, timeout=timeout)
 43 | 
 44 |     assert pool1 is pool2
 45 | 
 46 | 
 47 | @pytest.mark.asyncio
 48 | async def test_get_pool_different_verify_settings(pool_manager):
 49 |     """Test that different verify settings create different pools."""
 50 |     timeout = httpx.Timeout(30)
 51 | 
 52 |     pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
 53 |     pool2 = await pool_manager.get_pool(verify=False, timeout=timeout)
 54 | 
 55 |     assert pool1 is not pool2
 56 | 
 57 | 
 58 | @pytest.mark.asyncio
 59 | async def test_get_pool_ssl_context(pool_manager):
 60 |     """Test pool creation with SSL context."""
 61 |     ssl_context = ssl.create_default_context()
 62 |     timeout = httpx.Timeout(30)
 63 | 
 64 |     pool = await pool_manager.get_pool(verify=ssl_context, timeout=timeout)
 65 | 
 66 |     assert pool is not None
 67 |     assert isinstance(pool, httpx.AsyncClient)
 68 | 
 69 | 
 70 | @pytest.mark.asyncio
 71 | async def test_pool_cleanup_on_close_all(pool_manager):
 72 |     """Test that close_all properly closes all pools."""
 73 |     timeout = httpx.Timeout(30)
 74 | 
 75 |     await pool_manager.get_pool(verify=True, timeout=timeout)
 76 |     await pool_manager.get_pool(verify=False, timeout=timeout)
 77 | 
 78 |     await pool_manager.close_all()
 79 | 
 80 |     # After close_all, pools should be cleared
 81 |     assert len(pool_manager._loop_pools) == 0
 82 | 
 83 | 
 84 | @pytest.mark.asyncio
 85 | async def test_no_event_loop_returns_single_use_client(pool_manager):
 86 |     """Test behavior when no event loop is running."""
 87 |     with patch("asyncio.get_running_loop", side_effect=RuntimeError):
 88 |         timeout = httpx.Timeout(30)
 89 | 
 90 |         pool = await pool_manager.get_pool(verify=True, timeout=timeout)
 91 | 
 92 |         assert pool is not None
 93 |         # Single-use client should have no keepalive
 94 |         # Note: httpx client internal structure may vary
 95 | 
 96 | 
 97 | @pytest.mark.asyncio
 98 | async def test_pool_recreation_after_close(pool_manager):
 99 |     """Test that a new pool is created after the old one is closed."""
100 |     timeout = httpx.Timeout(30)
101 | 
102 |     pool1 = await pool_manager.get_pool(verify=True, timeout=timeout)
103 |     await pool1.aclose()
104 | 
105 |     pool2 = await pool_manager.get_pool(verify=True, timeout=timeout)
106 | 
107 |     assert pool1 is not pool2
108 |     assert pool1.is_closed
109 |     assert not pool2.is_closed
110 | 
111 | 
112 | @pytest.mark.asyncio
113 | async def test_weak_reference_cleanup():
114 |     """Test that weak references are used for event loops."""
115 |     pool_manager = EventLoopConnectionPools()
116 | 
117 |     # Verify that the pool manager uses weak references
118 |     assert isinstance(pool_manager._loop_pools, weakref.WeakKeyDictionary)
119 | 
120 |     # Create a pool
121 |     timeout = httpx.Timeout(30)
122 |     pool = await pool_manager.get_pool(verify=True, timeout=timeout)
123 | 
124 |     # Verify pool was created
125 |     assert pool is not None
126 | 
127 |     # The current event loop should be in the weak key dict
128 |     current_loop = asyncio.get_running_loop()
129 |     assert current_loop in pool_manager._loop_pools
130 | 
131 | 
132 | @pytest.mark.asyncio
133 | async def test_global_get_connection_pool():
134 |     """Test the global get_connection_pool function."""
135 |     with patch.dict("os.environ", {"BIOMCP_USE_CONNECTION_POOL": "true"}):
136 |         timeout = httpx.Timeout(30)
137 | 
138 |         pool = await get_connection_pool(verify=True, timeout=timeout)
139 | 
140 |         assert pool is not None
141 |         assert isinstance(pool, httpx.AsyncClient)
142 | 
143 | 
144 | @pytest.mark.asyncio
145 | async def test_global_close_all_pools():
146 |     """Test the global close_all_pools function."""
147 |     # Create some pools
148 |     timeout = httpx.Timeout(30)
149 |     await get_connection_pool(verify=True, timeout=timeout)
150 |     await get_connection_pool(verify=False, timeout=timeout)
151 | 
152 |     # Close all pools
153 |     await close_all_pools()
154 | 
155 |     # Verify cleanup (this is implementation-specific)
156 |     from biomcp.connection_pool import _pool_manager
157 | 
158 |     assert len(_pool_manager._loop_pools) == 0
159 | 
160 | 
161 | @pytest.mark.asyncio
162 | async def test_concurrent_pool_creation(pool_manager):
163 |     """Test thread-safe pool creation under concurrent access."""
164 |     timeout = httpx.Timeout(30)
165 | 
166 |     async def get_pool():
167 |         return await pool_manager.get_pool(verify=True, timeout=timeout)
168 | 
169 |     # Create 10 concurrent requests for the same pool
170 |     pools = await asyncio.gather(*[get_pool() for _ in range(10)])
171 | 
172 |     # All should return the same pool instance
173 |     assert all(pool is pools[0] for pool in pools)
174 | 
175 | 
176 | @pytest.mark.asyncio
177 | async def test_connection_pool_limits():
178 |     """Test that connection pools have proper limits set."""
179 |     pool_manager = EventLoopConnectionPools()
180 |     timeout = httpx.Timeout(30)
181 | 
182 |     pool = await pool_manager.get_pool(verify=True, timeout=timeout)
183 | 
184 |     # Verify pool was created (actual limits are internal to httpx)
185 |     assert pool is not None
186 |     assert isinstance(pool, httpx.AsyncClient)
187 | 
```

--------------------------------------------------------------------------------
/tests/data/myvariant/variants_part_braf_v600_multiple.json:
--------------------------------------------------------------------------------

```json
  1 | [
  2 |   {
  3 |     "_id": "chr7:g.140453136A>G",
  4 |     "_score": 19.419012,
  5 |     "cadd": {
  6 |       "_license": "http://bit.ly/2TIuab9",
  7 |       "phred": 21.2
  8 |     },
  9 |     "chrom": "7",
 10 |     "clinvar": {
 11 |       "_license": "http://bit.ly/2SQdcI0",
 12 |       "rcv": {
 13 |         "clinical_significance": "Likely pathogenic"
 14 |       },
 15 |       "variant_id": 376288
 16 |     },
 17 |     "cosmic": {
 18 |       "_license": "http://bit.ly/2VMkY7R",
 19 |       "cosmic_id": "COSM18443"
 20 |     },
 21 |     "dbnsfp": {
 22 |       "_license": "http://bit.ly/2VLnQBz",
 23 |       "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
 24 |       "hgvsc": ["c.620T>C", "c.1919T>C", "c.1799T>C"],
 25 |       "hgvsp": ["p.V600A", "p.Val600Ala", "p.Val640Ala", "p.Val207Ala"],
 26 |       "polyphen2": {
 27 |         "hdiv": {
 28 |           "pred": "B",
 29 |           "score": 0.207
 30 |         }
 31 |       }
 32 |     },
 33 |     "dbsnp": {
 34 |       "_license": "http://bit.ly/2AqoLOc",
 35 |       "rsid": "rs113488022"
 36 |     },
 37 |     "vcf": {
 38 |       "alt": "G",
 39 |       "position": "140453136",
 40 |       "ref": "A"
 41 |     }
 42 |   },
 43 |   {
 44 |     "_id": "chr7:g.140453136A>T",
 45 |     "_score": 18.693962,
 46 |     "cadd": {
 47 |       "_license": "http://bit.ly/2TIuab9",
 48 |       "phred": 32
 49 |     },
 50 |     "chrom": "7",
 51 |     "civic": {
 52 |       "_license": "http://bit.ly/2FqS871",
 53 |       "id": 12,
 54 |       "openCravatUrl": "https://run.opencravat.org/webapps/variantreport/index.html?alt_base=T&chrom=chr7&pos=140753336&ref_base=A"
 55 |     },
 56 |     "clinvar": {
 57 |       "_license": "http://bit.ly/2SQdcI0",
 58 |       "rcv": [
 59 |         {
 60 |           "clinical_significance": "Pathogenic"
 61 |         },
 62 |         {
 63 |           "clinical_significance": "Pathogenic"
 64 |         },
 65 |         {
 66 |           "clinical_significance": "Pathogenic"
 67 |         },
 68 |         {
 69 |           "clinical_significance": "Pathogenic"
 70 |         },
 71 |         {
 72 |           "clinical_significance": "Pathogenic"
 73 |         },
 74 |         {
 75 |           "clinical_significance": "Pathogenic"
 76 |         },
 77 |         {
 78 |           "clinical_significance": "Pathogenic"
 79 |         },
 80 |         {
 81 |           "clinical_significance": "not provided"
 82 |         },
 83 |         {
 84 |           "clinical_significance": "Likely pathogenic"
 85 |         },
 86 |         {
 87 |           "clinical_significance": "Likely pathogenic"
 88 |         },
 89 |         {
 90 |           "clinical_significance": "Likely pathogenic"
 91 |         },
 92 |         {
 93 |           "clinical_significance": "Likely pathogenic"
 94 |         },
 95 |         {
 96 |           "clinical_significance": "Likely pathogenic"
 97 |         },
 98 |         {
 99 |           "clinical_significance": "Likely pathogenic"
100 |         },
101 |         {
102 |           "clinical_significance": "Likely pathogenic"
103 |         },
104 |         {
105 |           "clinical_significance": "Pathogenic"
106 |         },
107 |         {
108 |           "clinical_significance": "Pathogenic"
109 |         },
110 |         {
111 |           "clinical_significance": "Likely pathogenic"
112 |         },
113 |         {
114 |           "clinical_significance": "Pathogenic"
115 |         },
116 |         {
117 |           "clinical_significance": "Likely pathogenic"
118 |         },
119 |         {
120 |           "clinical_significance": "Likely pathogenic"
121 |         },
122 |         {
123 |           "clinical_significance": "Pathogenic"
124 |         },
125 |         {
126 |           "clinical_significance": "Pathogenic"
127 |         },
128 |         {
129 |           "clinical_significance": "Pathogenic"
130 |         },
131 |         {
132 |           "clinical_significance": "Pathogenic"
133 |         },
134 |         {
135 |           "clinical_significance": "Likely pathogenic"
136 |         },
137 |         {
138 |           "clinical_significance": "Pathogenic"
139 |         },
140 |         {
141 |           "clinical_significance": "Pathogenic"
142 |         },
143 |         {
144 |           "clinical_significance": "Likely pathogenic"
145 |         }
146 |       ],
147 |       "variant_id": 13961
148 |     },
149 |     "cosmic": {
150 |       "_license": "http://bit.ly/2VMkY7R",
151 |       "cosmic_id": "COSM476"
152 |     },
153 |     "dbnsfp": {
154 |       "_license": "http://bit.ly/2VLnQBz",
155 |       "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
156 |       "hgvsc": ["c.620T>A", "c.1919T>A", "c.1799T>A"],
157 |       "hgvsp": ["p.Val640Glu", "p.Val207Glu", "p.Val600Glu", "p.V600E"],
158 |       "polyphen2": {
159 |         "hdiv": {
160 |           "pred": "D",
161 |           "score": 0.971
162 |         }
163 |       }
164 |     },
165 |     "dbsnp": {
166 |       "_license": "http://bit.ly/2AqoLOc",
167 |       "rsid": "rs113488022"
168 |     },
169 |     "exac": {
170 |       "_license": "http://bit.ly/2H9c4hg",
171 |       "af": 1.647e-5
172 |     },
173 |     "gnomad_exome": {
174 |       "_license": "http://bit.ly/2I1cl1I",
175 |       "af": {
176 |         "af": 3.97994e-6
177 |       }
178 |     },
179 |     "vcf": {
180 |       "alt": "T",
181 |       "position": "140453136",
182 |       "ref": "A"
183 |     }
184 |   },
185 |   {
186 |     "_id": "chr7:g.140453136A>C",
187 |     "_score": 18.476965,
188 |     "cadd": {
189 |       "_license": "http://bit.ly/2TIuab9",
190 |       "phred": 26.0
191 |     },
192 |     "chrom": "7",
193 |     "clinvar": {
194 |       "_license": "http://bit.ly/2SQdcI0",
195 |       "rcv": [
196 |         {
197 |           "clinical_significance": "not provided"
198 |         },
199 |         {
200 |           "clinical_significance": "Pathogenic"
201 |         },
202 |         {
203 |           "clinical_significance": "Pathogenic"
204 |         },
205 |         {
206 |           "clinical_significance": "Uncertain significance"
207 |         }
208 |       ],
209 |       "variant_id": 40389
210 |     },
211 |     "cosmic": {
212 |       "_license": "http://bit.ly/2VMkY7R",
213 |       "cosmic_id": "COSM6137"
214 |     },
215 |     "dbnsfp": {
216 |       "_license": "http://bit.ly/2VLnQBz",
217 |       "genename": ["BRAF", "BRAF", "BRAF", "BRAF"],
218 |       "hgvsc": ["c.1919T>G", "c.1799T>G", "c.620T>G"],
219 |       "hgvsp": ["p.Val640Gly", "p.Val207Gly", "p.Val600Gly", "p.V600G"],
220 |       "polyphen2": {
221 |         "hdiv": {
222 |           "pred": "P",
223 |           "score": 0.822
224 |         }
225 |       }
226 |     },
227 |     "dbsnp": {
228 |       "_license": "http://bit.ly/2AqoLOc",
229 |       "rsid": "rs113488022"
230 |     },
231 |     "vcf": {
232 |       "alt": "C",
233 |       "position": "140453136",
234 |       "ref": "A"
235 |     }
236 |   }
237 | ]
238 | 
```

--------------------------------------------------------------------------------
/src/biomcp/rate_limiter.py:
--------------------------------------------------------------------------------

```python
  1 | """Rate limiting implementation for BioMCP API calls."""
  2 | 
  3 | import asyncio
  4 | import time
  5 | from collections import defaultdict
  6 | from contextlib import asynccontextmanager
  7 | 
  8 | from .constants import (
  9 |     DEFAULT_BURST_SIZE,
 10 |     DEFAULT_RATE_LIMIT_PER_SECOND,
 11 | )
 12 | from .exceptions import BioMCPError
 13 | 
 14 | 
 15 | class RateLimitExceeded(BioMCPError):
 16 |     """Raised when rate limit is exceeded."""
 17 | 
 18 |     def __init__(self, domain: str, limit: int, window: int):
 19 |         message = f"Rate limit exceeded for {domain}: {limit} requests per {window} seconds"
 20 |         super().__init__(
 21 |             message, {"domain": domain, "limit": limit, "window": window}
 22 |         )
 23 | 
 24 | 
 25 | class RateLimiter:
 26 |     """Token bucket rate limiter implementation."""
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         requests_per_second: float = DEFAULT_RATE_LIMIT_PER_SECOND,
 31 |         burst_size: int = DEFAULT_BURST_SIZE,
 32 |     ):
 33 |         """Initialize rate limiter.
 34 | 
 35 |         Args:
 36 |             requests_per_second: Sustained request rate
 37 |             burst_size: Maximum burst capacity
 38 |         """
 39 |         self.rate = requests_per_second
 40 |         self.burst_size = burst_size
 41 |         self.tokens = float(burst_size)
 42 |         self.last_update = time.monotonic()
 43 |         self._lock = asyncio.Lock()
 44 | 
 45 |     async def acquire(self, tokens: int = 1) -> None:
 46 |         """Acquire tokens from the bucket."""
 47 |         async with self._lock:
 48 |             now = time.monotonic()
 49 |             elapsed = now - self.last_update
 50 |             self.last_update = now
 51 | 
 52 |             # Add tokens based on elapsed time
 53 |             self.tokens = min(
 54 |                 self.burst_size, self.tokens + elapsed * self.rate
 55 |             )
 56 | 
 57 |             if self.tokens < tokens:
 58 |                 # Calculate wait time
 59 |                 wait_time = (tokens - self.tokens) / self.rate
 60 |                 await asyncio.sleep(wait_time)
 61 |                 self.tokens = 0
 62 |             else:
 63 |                 self.tokens -= tokens
 64 | 
 65 |     @asynccontextmanager
 66 |     async def limit(self):
 67 |         """Context manager for rate limiting."""
 68 |         await self.acquire()
 69 |         yield
 70 | 
 71 | 
 72 | class DomainRateLimiter:
 73 |     """Rate limiter with per-domain limits."""
 74 | 
 75 |     def __init__(self, default_rps: float = 10.0, default_burst: int = 20):
 76 |         """Initialize domain rate limiter.
 77 | 
 78 |         Args:
 79 |             default_rps: Default requests per second
 80 |             default_burst: Default burst size
 81 |         """
 82 |         self.default_rps = default_rps
 83 |         self.default_burst = default_burst
 84 |         self.limiters: dict[str, RateLimiter] = {}
 85 |         self.domain_configs = {
 86 |             "article": {"rps": 20.0, "burst": 40},  # PubMed can handle more
 87 |             "trial": {"rps": 10.0, "burst": 20},  # ClinicalTrials.gov standard
 88 |             "thinking": {"rps": 50.0, "burst": 100},  # Local processing
 89 |             "mygene": {"rps": 10.0, "burst": 20},  # MyGene.info
 90 |             "mydisease": {"rps": 10.0, "burst": 20},  # MyDisease.info
 91 |             "mychem": {"rps": 10.0, "burst": 20},  # MyChem.info
 92 |             "myvariant": {"rps": 15.0, "burst": 30},  # MyVariant.info
 93 |         }
 94 | 
 95 |     def get_limiter(self, domain: str) -> RateLimiter:
 96 |         """Get or create rate limiter for domain."""
 97 |         if domain not in self.limiters:
 98 |             config = self.domain_configs.get(domain, {})
 99 |             rps = config.get("rps", self.default_rps)
100 |             burst = config.get("burst", self.default_burst)
101 |             self.limiters[domain] = RateLimiter(rps, int(burst))
102 |         return self.limiters[domain]
103 | 
104 |     @asynccontextmanager
105 |     async def limit(self, domain: str):
106 |         """Rate limit context manager for a domain."""
107 |         limiter = self.get_limiter(domain)
108 |         async with limiter.limit():
109 |             yield
110 | 
111 | 
112 | class SlidingWindowRateLimiter:
113 |     """Sliding window rate limiter for user/IP based limiting."""
114 | 
115 |     def __init__(self, requests: int = 100, window_seconds: int = 60):
116 |         """Initialize sliding window rate limiter.
117 | 
118 |         Args:
119 |             requests: Maximum requests per window
120 |             window_seconds: Window size in seconds
121 |         """
122 |         self.max_requests = requests
123 |         self.window_seconds = window_seconds
124 |         self.requests: dict[str, list[float]] = defaultdict(list)
125 |         self._lock = asyncio.Lock()
126 | 
127 |     async def check_limit(self, key: str) -> bool:
128 |         """Check if request is allowed for key."""
129 |         async with self._lock:
130 |             now = time.time()
131 |             cutoff = now - self.window_seconds
132 | 
133 |             # Remove old requests
134 |             self.requests[key] = [
135 |                 req_time
136 |                 for req_time in self.requests[key]
137 |                 if req_time > cutoff
138 |             ]
139 | 
140 |             # Check limit
141 |             if len(self.requests[key]) >= self.max_requests:
142 |                 return False
143 | 
144 |             # Add current request
145 |             self.requests[key].append(now)
146 |             return True
147 | 
148 |     async def acquire(self, key: str) -> None:
149 |         """Acquire permission to make request."""
150 |         if not await self.check_limit(key):
151 |             raise RateLimitExceeded(
152 |                 key, self.max_requests, self.window_seconds
153 |             )
154 | 
155 | 
156 | # Global instances
157 | domain_limiter = DomainRateLimiter()
158 | user_limiter = SlidingWindowRateLimiter(
159 |     requests=1000, window_seconds=3600
160 | )  # 1000 req/hour
161 | 
162 | 
163 | async def rate_limit_domain(domain: str) -> None:
164 |     """Apply rate limiting for a domain."""
165 |     async with domain_limiter.limit(domain):
166 |         pass
167 | 
168 | 
169 | async def rate_limit_user(user_id: str | None = None) -> None:
170 |     """Apply rate limiting for a user."""
171 |     if user_id:
172 |         await user_limiter.acquire(user_id)
173 | 
```

--------------------------------------------------------------------------------
/src/biomcp/http_client_simple.py:
--------------------------------------------------------------------------------

```python
  1 | """Helper functions for simpler HTTP client operations."""
  2 | 
  3 | import asyncio
  4 | import contextlib
  5 | import json
  6 | import os
  7 | import ssl
  8 | 
  9 | import httpx
 10 | 
 11 | # Global connection pools per SSL context
 12 | _connection_pools: dict[str, httpx.AsyncClient] = {}
 13 | _pool_lock = asyncio.Lock()
 14 | 
 15 | 
 16 | def close_all_pools():
 17 |     """Close all connection pools. Useful for cleanup in tests."""
 18 |     global _connection_pools
 19 |     for pool in _connection_pools.values():
 20 |         if pool and not pool.is_closed:
 21 |             # Schedule the close in a safe way
 22 |             try:
 23 |                 # Store task reference to avoid garbage collection
 24 |                 close_task = asyncio.create_task(pool.aclose())
 25 |                 # Optionally add a callback to handle completion
 26 |                 close_task.add_done_callback(lambda t: None)
 27 |             except RuntimeError:
 28 |                 # If no event loop is running, close synchronously
 29 |                 pool._transport.close()
 30 |     _connection_pools.clear()
 31 | 
 32 | 
 33 | async def get_connection_pool(
 34 |     verify: ssl.SSLContext | str | bool,
 35 |     timeout: httpx.Timeout,
 36 | ) -> httpx.AsyncClient:
 37 |     """Get or create a shared connection pool for the given SSL context."""
 38 |     global _connection_pools
 39 | 
 40 |     # Create a key for the pool based on verify setting
 41 |     if isinstance(verify, ssl.SSLContext):
 42 |         pool_key = f"ssl_{id(verify)}"
 43 |     else:
 44 |         pool_key = str(verify)
 45 | 
 46 |     async with _pool_lock:
 47 |         pool = _connection_pools.get(pool_key)
 48 |         if pool is None or pool.is_closed:
 49 |             # Create a new connection pool with optimized settings
 50 |             pool = httpx.AsyncClient(
 51 |                 verify=verify,
 52 |                 http2=False,  # HTTP/2 can add overhead for simple requests
 53 |                 timeout=timeout,
 54 |                 limits=httpx.Limits(
 55 |                     max_keepalive_connections=20,  # Reuse connections
 56 |                     max_connections=100,  # Total connection limit
 57 |                     keepalive_expiry=30,  # Keep connections alive for 30s
 58 |                 ),
 59 |                 # Enable connection pooling
 60 |                 transport=httpx.AsyncHTTPTransport(
 61 |                     retries=0,  # We handle retries at a higher level
 62 |                 ),
 63 |             )
 64 |             _connection_pools[pool_key] = pool
 65 |         return pool
 66 | 
 67 | 
 68 | async def execute_http_request(  # noqa: C901
 69 |     method: str,
 70 |     url: str,
 71 |     params: dict,
 72 |     verify: ssl.SSLContext | str | bool,
 73 |     headers: dict[str, str] | None = None,
 74 | ) -> tuple[int, str]:
 75 |     """Execute the actual HTTP request using connection pooling.
 76 | 
 77 |     Args:
 78 |         method: HTTP method (GET or POST)
 79 |         url: Target URL
 80 |         params: Request parameters
 81 |         verify: SSL verification settings
 82 |         headers: Optional custom headers
 83 | 
 84 |     Returns:
 85 |         Tuple of (status_code, response_text)
 86 | 
 87 |     Raises:
 88 |         ConnectionError: For connection failures
 89 |         TimeoutError: For timeout errors
 90 |     """
 91 |     from .constants import HTTP_TIMEOUT_SECONDS
 92 | 
 93 |     try:
 94 |         # Extract custom headers from params if present
 95 |         custom_headers = headers or {}
 96 |         if "_headers" in params:
 97 |             with contextlib.suppress(json.JSONDecodeError, TypeError):
 98 |                 custom_headers.update(json.loads(params.pop("_headers")))
 99 | 
100 |         # Use the configured timeout from constants
101 |         timeout = httpx.Timeout(HTTP_TIMEOUT_SECONDS)
102 | 
103 |         # Use connection pooling with proper error handling
104 |         use_pool = (
105 |             os.getenv("BIOMCP_USE_CONNECTION_POOL", "true").lower() == "true"
106 |         )
107 | 
108 |         if use_pool:
109 |             try:
110 |                 # Use the new connection pool manager
111 |                 from ..connection_pool import get_connection_pool as get_pool
112 | 
113 |                 client = await get_pool(verify, timeout)
114 |                 should_close = False
115 |             except Exception:
116 |                 # Fallback to creating a new client
117 |                 client = httpx.AsyncClient(
118 |                     verify=verify, http2=False, timeout=timeout
119 |                 )
120 |                 should_close = True
121 |         else:
122 |             # Create a new client for each request
123 |             client = httpx.AsyncClient(
124 |                 verify=verify, http2=False, timeout=timeout
125 |             )
126 |             should_close = True
127 | 
128 |         try:
129 |             # Make the request
130 |             if method.upper() == "GET":
131 |                 resp = await client.get(
132 |                     url, params=params, headers=custom_headers
133 |                 )
134 |             elif method.upper() == "POST":
135 |                 resp = await client.post(
136 |                     url, json=params, headers=custom_headers
137 |                 )
138 |             else:
139 |                 from .constants import HTTP_ERROR_CODE_UNSUPPORTED_METHOD
140 | 
141 |                 return (
142 |                     HTTP_ERROR_CODE_UNSUPPORTED_METHOD,
143 |                     f"Unsupported method {method}",
144 |                 )
145 | 
146 |             # Check for empty response
147 |             if not resp.text:
148 |                 return resp.status_code, "{}"
149 | 
150 |             return resp.status_code, resp.text
151 |         finally:
152 |             # Only close if we created a new client
153 |             if should_close:
154 |                 await client.aclose()
155 | 
156 |     except httpx.ConnectError as exc:
157 |         raise ConnectionError(f"Failed to connect to {url}: {exc}") from exc
158 |     except httpx.TimeoutException as exc:
159 |         raise TimeoutError(f"Request to {url} timed out: {exc}") from exc
160 |     except httpx.HTTPError as exc:
161 |         error_msg = str(exc) if str(exc) else "Network connectivity error"
162 |         from .constants import HTTP_ERROR_CODE_NETWORK
163 | 
164 |         return HTTP_ERROR_CODE_NETWORK, error_msg
165 | 
```

--------------------------------------------------------------------------------
/docs/developer-guides/06-http-client-and-caching.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP HTTP Client Guide
  2 | 
  3 | ## Overview
  4 | 
  5 | BioMCP uses a centralized HTTP client for all external API calls. This provides:
  6 | 
  7 | - Consistent error handling and retry logic
  8 | - Request/response caching
  9 | - Rate limiting per domain
 10 | - Circuit breaker for fault tolerance
 11 | - Offline mode support
 12 | - Comprehensive endpoint tracking
 13 | 
 14 | ## Migration from Direct HTTP Libraries
 15 | 
 16 | ### Before (Direct httpx usage):
 17 | 
 18 | ```python
 19 | import httpx
 20 | 
 21 | async def fetch_gene(gene: str):
 22 |     async with httpx.AsyncClient() as client:
 23 |         response = await client.get(f"https://api.example.com/genes/{gene}")
 24 |         response.raise_for_status()
 25 |         return response.json()
 26 | ```
 27 | 
 28 | ### After (Centralized client):
 29 | 
 30 | ```python
 31 | from biomcp import http_client
 32 | 
 33 | async def fetch_gene(gene: str):
 34 |     data, error = await http_client.request_api(
 35 |         url=f"https://api.example.com/genes/{gene}",
 36 |         request={},
 37 |         domain="example"
 38 |     )
 39 |     if error:
 40 |         # Handle error consistently
 41 |         return None
 42 |     return data
 43 | ```
 44 | 
 45 | ## Error Handling
 46 | 
 47 | The centralized client uses a consistent error handling pattern:
 48 | 
 49 | ```python
 50 | result, error = await http_client.request_api(...)
 51 | 
 52 | if error:
 53 |     # error is a RequestError object with:
 54 |     # - error.code: HTTP status code or error type
 55 |     # - error.message: Human-readable error message
 56 |     # - error.details: Additional context
 57 |     logger.error(f"Request failed: {error.message}")
 58 |     return None  # or handle appropriately
 59 | ```
 60 | 
 61 | ### Error Handling Guidelines
 62 | 
 63 | 1. **For optional data**: Return `None` when the data is not critical
 64 | 2. **For required data**: Raise an exception or return an error to the caller
 65 | 3. **For batch operations**: Collect errors and report at the end
 66 | 4. **For user-facing operations**: Provide clear, actionable error messages
 67 | 
 68 | ## Creating Domain-Specific Adapters
 69 | 
 70 | For complex APIs, create an adapter class:
 71 | 
 72 | ```python
 73 | from biomcp import http_client
 74 | from biomcp.http_client import RequestError
 75 | 
 76 | class MyAPIAdapter:
 77 |     """Adapter for MyAPI using centralized HTTP client."""
 78 | 
 79 |     def __init__(self):
 80 |         self.base_url = "https://api.example.com"
 81 | 
 82 |     async def get_resource(self, resource_id: str) -> tuple[dict | None, RequestError | None]:
 83 |         """Fetch a resource by ID.
 84 | 
 85 |         Returns:
 86 |             Tuple of (data, error) where one is always None
 87 |         """
 88 |         return await http_client.request_api(
 89 |             url=f"{self.base_url}/resources/{resource_id}",
 90 |             request={},
 91 |             domain="example",
 92 |             endpoint_key="example_resources"
 93 |         )
 94 | ```
 95 | 
 96 | ## Configuration
 97 | 
 98 | ### Cache TTL (Time To Live)
 99 | 
100 | ```python
101 | # Cache for 1 hour (3600 seconds)
102 | data, error = await http_client.request_api(
103 |     url=url,
104 |     request=request,
105 |     cache_ttl=3600
106 | )
107 | 
108 | # Disable caching for this request
109 | data, error = await http_client.request_api(
110 |     url=url,
111 |     request=request,
112 |     cache_ttl=0
113 | )
114 | ```
115 | 
116 | ### Rate Limiting
117 | 
118 | Rate limits are configured per domain in `http_client.py`:
119 | 
120 | ```python
121 | # Default rate limits
122 | rate_limits = {
123 |     "ncbi.nlm.nih.gov": 20,  # 20 requests/second
124 |     "clinicaltrials.gov": 10,  # 10 requests/second
125 |     "myvariant.info": 1000/3600,  # 1000 requests/hour
126 | }
127 | ```
128 | 
129 | ### Circuit Breaker
130 | 
131 | The circuit breaker prevents cascading failures:
132 | 
133 | - **Closed**: Normal operation
134 | - **Open**: Failing fast after threshold exceeded
135 | - **Half-Open**: Testing if service recovered
136 | 
137 | Configure thresholds:
138 | 
139 | ```python
140 | CIRCUIT_BREAKER_FAILURE_THRESHOLD = 5  # Open after 5 failures
141 | CIRCUIT_BREAKER_RECOVERY_TIMEOUT = 60  # Try again after 60 seconds
142 | ```
143 | 
144 | ## Offline Mode
145 | 
146 | Enable offline mode to only serve cached responses:
147 | 
148 | ```bash
149 | export BIOMCP_OFFLINE=true
150 | biomcp run
151 | ```
152 | 
153 | In offline mode:
154 | 
155 | - Only cached responses are returned
156 | - No external HTTP requests are made
157 | - Missing cache entries return None with appropriate error
158 | 
159 | ## Performance Tuning
160 | 
161 | ### Connection Pooling
162 | 
163 | The HTTP client maintains connection pools per domain:
164 | 
165 | ```python
166 | # Configure in http_client_simple.py
167 | limits = httpx.Limits(
168 |     max_keepalive_connections=20,
169 |     max_connections=100,
170 |     keepalive_expiry=30
171 | )
172 | ```
173 | 
174 | ### Concurrent Requests
175 | 
176 | For parallel requests to the same API:
177 | 
178 | ```python
179 | import asyncio
180 | 
181 | # Fetch multiple resources concurrently
182 | tasks = [
183 |     http_client.request_api(f"/resource/{i}", {}, domain="example")
184 |     for i in range(10)
185 | ]
186 | results = await asyncio.gather(*tasks)
187 | ```
188 | 
189 | ## Monitoring and Debugging
190 | 
191 | ### Request Metrics
192 | 
193 | The client tracks metrics per endpoint:
194 | 
195 | - Request count
196 | - Error count
197 | - Cache hit/miss ratio
198 | - Average response time
199 | 
200 | Access metrics:
201 | 
202 | ```python
203 | from biomcp.http_client import get_metrics
204 | metrics = get_metrics()
205 | ```
206 | 
207 | ### Debug Logging
208 | 
209 | Enable debug logging to see all HTTP requests:
210 | 
211 | ```python
212 | import logging
213 | logging.getLogger("biomcp.http_client").setLevel(logging.DEBUG)
214 | ```
215 | 
216 | ## Best Practices
217 | 
218 | 1. **Always use the centralized client** for external HTTP calls
219 | 2. **Register new endpoints** in the endpoint registry
220 | 3. **Set appropriate cache TTLs** based on data volatility
221 | 4. **Handle errors gracefully** with user-friendly messages
222 | 5. **Test with offline mode** to ensure cache coverage
223 | 6. **Monitor rate limits** to avoid API throttling
224 | 7. **Use domain-specific adapters** for complex APIs
225 | 
226 | ## Endpoint Registration
227 | 
228 | Register new endpoints in `endpoint_registry.py`:
229 | 
230 | ```python
231 | registry.register(
232 |     "my_api_endpoint",
233 |     EndpointInfo(
234 |         url="https://api.example.com/v1/data",
235 |         category=EndpointCategory.BIOMEDICAL_LITERATURE,
236 |         data_types=[DataType.RESEARCH_ARTICLES],
237 |         description="My API for fetching data",
238 |         compliance_notes="Public API, no PII",
239 |         rate_limit="100 requests/minute"
240 |     )
241 | )
242 | ```
243 | 
244 | This ensures the endpoint is documented and tracked properly.
245 | 
```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_cbioportal_integration.py:
--------------------------------------------------------------------------------

```python
  1 | """Test cBioPortal integration with article searches."""
  2 | 
  3 | import json
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.articles.search import PubmedRequest
  8 | from biomcp.articles.unified import search_articles_unified
  9 | 
 10 | 
 11 | class TestArticleCBioPortalIntegration:
 12 |     """Test that cBioPortal summaries appear in article searches."""
 13 | 
 14 |     @pytest.mark.asyncio
 15 |     @pytest.mark.integration
 16 |     async def test_article_search_with_gene_includes_cbioportal(self):
 17 |         """Test that searching articles for a gene includes cBioPortal summary."""
 18 |         request = PubmedRequest(
 19 |             genes=["BRAF"],
 20 |             keywords=["melanoma"],
 21 |         )
 22 | 
 23 |         # Test markdown output
 24 |         result = await search_articles_unified(
 25 |             request,
 26 |             include_pubmed=True,
 27 |             include_preprints=False,
 28 |             output_json=False,
 29 |         )
 30 | 
 31 |         # Should include cBioPortal summary
 32 |         assert "cBioPortal Summary for BRAF" in result
 33 |         assert "Mutation Frequency" in result
 34 |         # Top Hotspots is only included when mutations are found
 35 |         # When cBioPortal API returns empty data, it won't be present
 36 |         if "0.0%" not in result:  # If mutation frequency is not 0
 37 |             assert "Top Hotspots" in result
 38 |         assert "---" in result  # Separator between summary and articles
 39 | 
 40 |         # Should still include article results
 41 |         assert "pmid" in result or "Title" in result or "Record" in result
 42 | 
 43 |     @pytest.mark.asyncio
 44 |     @pytest.mark.integration
 45 |     async def test_article_search_json_with_gene(self):
 46 |         """Test JSON output includes cBioPortal summary."""
 47 |         request = PubmedRequest(
 48 |             genes=["TP53"],
 49 |             keywords=["cancer"],
 50 |         )
 51 | 
 52 |         result = await search_articles_unified(
 53 |             request,
 54 |             include_pubmed=True,
 55 |             include_preprints=False,
 56 |             output_json=True,
 57 |         )
 58 | 
 59 |         # Parse JSON
 60 |         data = json.loads(result)
 61 | 
 62 |         # Should have both summary and articles
 63 |         assert "cbioportal_summary" in data
 64 |         assert "articles" in data
 65 |         assert "TP53" in data["cbioportal_summary"]
 66 |         assert isinstance(data["articles"], list)
 67 |         assert len(data["articles"]) > 0
 68 | 
 69 |     @pytest.mark.asyncio
 70 |     @pytest.mark.integration
 71 |     async def test_article_search_without_gene_no_cbioportal(self):
 72 |         """Test that searches without genes don't include cBioPortal summary."""
 73 |         request = PubmedRequest(
 74 |             diseases=["hypertension"],
 75 |             keywords=["treatment"],
 76 |         )
 77 | 
 78 |         # Test markdown output
 79 |         result = await search_articles_unified(
 80 |             request,
 81 |             include_pubmed=True,
 82 |             include_preprints=False,
 83 |             output_json=False,
 84 |         )
 85 | 
 86 |         # Should NOT include cBioPortal summary
 87 |         assert "cBioPortal Summary" not in result
 88 |         assert "Mutation Frequency" not in result
 89 | 
 90 |     @pytest.mark.asyncio
 91 |     @pytest.mark.integration
 92 |     async def test_article_search_multiple_genes(self):
 93 |         """Test that searching with multiple genes uses the first one."""
 94 |         request = PubmedRequest(
 95 |             genes=["KRAS", "NRAS", "BRAF"],
 96 |             diseases=["colorectal cancer"],
 97 |         )
 98 | 
 99 |         result = await search_articles_unified(
100 |             request,
101 |             include_pubmed=True,
102 |             include_preprints=False,
103 |             output_json=False,
104 |         )
105 | 
106 |         # Should include cBioPortal summary for KRAS (first gene)
107 |         assert "cBioPortal Summary for KRAS" in result
108 |         # Common KRAS hotspot
109 |         assert "G12" in result or "mutation" in result
110 | 
111 |     @pytest.mark.asyncio
112 |     @pytest.mark.integration
113 |     async def test_article_search_with_invalid_gene(self):
114 |         """Test graceful handling of invalid gene names."""
115 |         request = PubmedRequest(
116 |             genes=["BRCA1"],  # Valid gene
117 |             keywords=["cancer"],
118 |         )
119 | 
120 |         # First check that we handle invalid genes gracefully
121 |         # by using a real gene that might have cBioPortal data
122 |         result = await search_articles_unified(
123 |             request,
124 |             include_pubmed=True,
125 |             include_preprints=False,
126 |             output_json=False,
127 |         )
128 | 
129 |         # Should have some content - either cBioPortal summary or articles
130 |         assert len(result) > 50  # Some reasonable content
131 | 
132 |         # Now test with a gene that's valid for search but not in cBioPortal
133 |         request2 = PubmedRequest(
134 |             genes=["ACE2"],  # Real gene but might not be in cancer studies
135 |             keywords=["COVID-19"],
136 |         )
137 | 
138 |         result2 = await search_articles_unified(
139 |             request2,
140 |             include_pubmed=True,
141 |             include_preprints=False,
142 |             output_json=False,
143 |         )
144 | 
145 |         # Should return results even if cBioPortal data is not available
146 |         assert len(result2) > 50
147 | 
148 |     @pytest.mark.asyncio
149 |     @pytest.mark.integration
150 |     async def test_article_search_with_preprints_and_cbioportal(self):
151 |         """Test that cBioPortal summary works with preprint searches too."""
152 |         request = PubmedRequest(
153 |             genes=["EGFR"],
154 |             keywords=["lung cancer", "osimertinib"],
155 |         )
156 | 
157 |         result = await search_articles_unified(
158 |             request,
159 |             include_pubmed=True,
160 |             include_preprints=True,
161 |             output_json=False,
162 |         )
163 | 
164 |         # Should include cBioPortal summary
165 |         assert "cBioPortal Summary for EGFR" in result
166 |         # Should include both peer-reviewed and preprint results
167 |         assert ("pmid" in result or "Title" in result) and (
168 |             "Preprint" in result
169 |             or "bioRxiv" in result
170 |             or "peer_reviewed" in result
171 |         )
172 | 
```

--------------------------------------------------------------------------------
/src/biomcp/diseases/getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Disease information retrieval from MyDisease.info."""
  2 | 
  3 | import json
  4 | import logging
  5 | from typing import Annotated
  6 | 
  7 | from pydantic import Field
  8 | 
  9 | from ..integrations import BioThingsClient
 10 | from ..render import to_markdown
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def _add_disease_links(disease_info, result: dict) -> None:
 16 |     """Add helpful links to disease result."""
 17 |     links = {}
 18 | 
 19 |     # Add MONDO browser link if available
 20 |     if (
 21 |         disease_info.mondo
 22 |         and isinstance(disease_info.mondo, dict)
 23 |         and (mondo_id := disease_info.mondo.get("mondo"))
 24 |         and isinstance(mondo_id, str)
 25 |         and mondo_id.startswith("MONDO:")
 26 |     ):
 27 |         links["MONDO Browser"] = (
 28 |             f"https://www.ebi.ac.uk/ols/ontologies/mondo/terms?iri=http://purl.obolibrary.org/obo/{mondo_id.replace(':', '_')}"
 29 |         )
 30 | 
 31 |     # Add Disease Ontology link if available
 32 |     if (
 33 |         disease_info.xrefs
 34 |         and isinstance(disease_info.xrefs, dict)
 35 |         and (doid := disease_info.xrefs.get("doid"))
 36 |     ):
 37 |         if isinstance(doid, list) and doid:
 38 |             doid_id = doid[0] if isinstance(doid[0], str) else str(doid[0])
 39 |             links["Disease Ontology"] = (
 40 |                 f"https://www.disease-ontology.org/?id={doid_id}"
 41 |             )
 42 |         elif isinstance(doid, str):
 43 |             links["Disease Ontology"] = (
 44 |                 f"https://www.disease-ontology.org/?id={doid}"
 45 |             )
 46 | 
 47 |     # Add PubMed search link
 48 |     if disease_info.name:
 49 |         links["PubMed Search"] = (
 50 |             f"https://pubmed.ncbi.nlm.nih.gov/?term={disease_info.name.replace(' ', '+')}"
 51 |         )
 52 | 
 53 |     if links:
 54 |         result["_links"] = links
 55 | 
 56 | 
 57 | def _format_disease_output(disease_info, result: dict) -> None:
 58 |     """Format disease output for display."""
 59 |     # Format synonyms nicely
 60 |     if disease_info.synonyms:
 61 |         result["synonyms"] = ", ".join(
 62 |             disease_info.synonyms[:10]
 63 |         )  # Limit to first 10
 64 |         if len(disease_info.synonyms) > 10:
 65 |             result["synonyms"] += (
 66 |                 f" (and {len(disease_info.synonyms) - 10} more)"
 67 |             )
 68 | 
 69 |     # Format phenotypes if present
 70 |     if disease_info.phenotypes:
 71 |         # Just show count and first few phenotypes
 72 |         phenotype_names = []
 73 |         for pheno in disease_info.phenotypes[:5]:
 74 |             if isinstance(pheno, dict) and "phenotype" in pheno:
 75 |                 phenotype_names.append(pheno["phenotype"])
 76 |         if phenotype_names:
 77 |             result["associated_phenotypes"] = ", ".join(phenotype_names)
 78 |             if len(disease_info.phenotypes) > 5:
 79 |                 result["associated_phenotypes"] += (
 80 |                     f" (and {len(disease_info.phenotypes) - 5} more)"
 81 |                 )
 82 |         # Remove the raw phenotypes data for cleaner output
 83 |         result.pop("phenotypes", None)
 84 | 
 85 | 
 86 | async def get_disease(
 87 |     disease_id_or_name: str,
 88 |     output_json: bool = False,
 89 | ) -> str:
 90 |     """
 91 |     Get disease information from MyDisease.info.
 92 | 
 93 |     Args:
 94 |         disease_id_or_name: Disease ID (MONDO, DOID) or name (e.g., "melanoma", "MONDO:0016575")
 95 |         output_json: Return as JSON instead of markdown
 96 | 
 97 |     Returns:
 98 |         Disease information as markdown or JSON string
 99 |     """
100 |     client = BioThingsClient()
101 | 
102 |     try:
103 |         disease_info = await client.get_disease_info(disease_id_or_name)
104 | 
105 |         if not disease_info:
106 |             error_data = {
107 |                 "error": f"Disease '{disease_id_or_name}' not found",
108 |                 "suggestion": "Please check the disease name or ID (MONDO:, DOID:, OMIM:, MESH:)",
109 |             }
110 |             return (
111 |                 json.dumps(error_data, indent=2)
112 |                 if output_json
113 |                 else to_markdown([error_data])
114 |             )
115 | 
116 |         # Convert to dict for rendering
117 |         result = disease_info.model_dump(exclude_none=True)
118 | 
119 |         # Add helpful links
120 |         _add_disease_links(disease_info, result)
121 | 
122 |         # Format output for display
123 |         _format_disease_output(disease_info, result)
124 | 
125 |         if output_json:
126 |             return json.dumps(result, indent=2)
127 |         else:
128 |             return to_markdown([result])
129 | 
130 |     except Exception as e:
131 |         logger.error(
132 |             f"Error fetching disease info for {disease_id_or_name}: {e}"
133 |         )
134 |         error_data = {
135 |             "error": "Failed to retrieve disease information",
136 |             "details": str(e),
137 |         }
138 |         return (
139 |             json.dumps(error_data, indent=2)
140 |             if output_json
141 |             else to_markdown([error_data])
142 |         )
143 | 
144 | 
145 | async def _disease_details(
146 |     call_benefit: Annotated[
147 |         str,
148 |         "Define and summarize why this function is being called and the intended benefit",
149 |     ],
150 |     disease_id_or_name: Annotated[
151 |         str,
152 |         Field(
153 |             description="Disease name (e.g., melanoma, GIST) or ID (e.g., MONDO:0016575, DOID:1909)"
154 |         ),
155 |     ],
156 | ) -> str:
157 |     """
158 |     Retrieves detailed information for a disease from MyDisease.info.
159 | 
160 |     This tool provides real-time disease annotations including:
161 |     - Official disease name and definition
162 |     - Disease synonyms and alternative names
163 |     - Ontology mappings (MONDO, DOID, OMIM, etc.)
164 |     - Associated phenotypes
165 |     - Links to disease databases
166 | 
167 |     Parameters:
168 |     - call_benefit: Define why this function is being called
169 |     - disease_id_or_name: Disease name or ontology ID
170 | 
171 |     Process: Queries MyDisease.info API for up-to-date disease information
172 |     Output: Markdown formatted disease information with definition and metadata
173 | 
174 |     Note: For clinical trials about diseases, use trial_searcher. For articles about diseases, use article_searcher.
175 |     """
176 |     return await get_disease(disease_id_or_name, output_json=False)
177 | 
```