#
tokens: 49302/50000 23/303 files (page 4/19)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 4 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/connection_pool.py:
--------------------------------------------------------------------------------

```python
  1 | """Connection pool manager with proper event loop lifecycle management.
  2 | 
  3 | This module provides HTTP connection pooling that is properly integrated
  4 | with asyncio event loops. It ensures that connection pools are:
  5 | - Created per event loop to avoid cross-loop usage
  6 | - Automatically cleaned up when event loops are garbage collected
  7 | - Reused across requests for better performance
  8 | 
  9 | Key Features:
 10 | - Event loop isolation - each loop gets its own pools
 11 | - Weak references prevent memory leaks
 12 | - Automatic cleanup on loop destruction
 13 | - Thread-safe pool management
 14 | 
 15 | Example:
 16 |     ```python
 17 |     # Get a connection pool for the current event loop
 18 |     pool = await get_connection_pool(verify=True, timeout=httpx.Timeout(30))
 19 | 
 20 |     # Use the pool for multiple requests (no need to close)
 21 |     response = await pool.get("https://api.example.com/data")
 22 |     ```
 23 | 
 24 | Environment Variables:
 25 |     BIOMCP_USE_CONNECTION_POOL: Enable/disable pooling (default: "true")
 26 | """
 27 | 
 28 | import asyncio
 29 | import ssl
 30 | import weakref
 31 | 
 32 | # NOTE: httpx import is allowed in this file for connection pooling infrastructure
 33 | import httpx
 34 | 
 35 | 
 36 | class EventLoopConnectionPools:
 37 |     """Manages connection pools per event loop.
 38 | 
 39 |     This class ensures that each asyncio event loop has its own set of
 40 |     connection pools, preventing cross-loop contamination and ensuring
 41 |     proper cleanup when event loops are destroyed.
 42 | 
 43 |     Attributes:
 44 |         _loop_pools: Weak key dictionary mapping event loops to their pools
 45 |         _lock: Asyncio lock for thread-safe pool creation
 46 |     """
 47 | 
 48 |     def __init__(self):
 49 |         # Use weak references to avoid keeping event loops alive
 50 |         self._loop_pools: weakref.WeakKeyDictionary = (
 51 |             weakref.WeakKeyDictionary()
 52 |         )
 53 |         self._lock = asyncio.Lock()
 54 | 
 55 |     async def get_pool(
 56 |         self, verify: ssl.SSLContext | str | bool, timeout: httpx.Timeout
 57 |     ) -> httpx.AsyncClient:
 58 |         """Get or create a connection pool for the current event loop."""
 59 |         try:
 60 |             loop = asyncio.get_running_loop()
 61 |         except RuntimeError:
 62 |             # No event loop running, return a single-use client
 63 |             return self._create_client(verify, timeout, pooled=False)
 64 | 
 65 |         # Get or create pools dict for this event loop
 66 |         async with self._lock:
 67 |             if loop not in self._loop_pools:
 68 |                 self._loop_pools[loop] = {}
 69 |                 # Register cleanup when loop is garbage collected
 70 |                 self._register_loop_cleanup(loop)
 71 | 
 72 |             pools = self._loop_pools[loop]
 73 |             pool_key = self._get_pool_key(verify)
 74 | 
 75 |             # Check if we have a valid pool
 76 |             if pool_key in pools and not pools[pool_key].is_closed:
 77 |                 return pools[pool_key]
 78 | 
 79 |             # Create new pool
 80 |             client = self._create_client(verify, timeout, pooled=True)
 81 |             pools[pool_key] = client
 82 |             return client
 83 | 
 84 |     def _get_pool_key(self, verify: ssl.SSLContext | str | bool) -> str:
 85 |         """Generate a key for the connection pool."""
 86 |         if isinstance(verify, ssl.SSLContext):
 87 |             return f"ssl_{id(verify)}"
 88 |         return str(verify)
 89 | 
 90 |     def _create_client(
 91 |         self,
 92 |         verify: ssl.SSLContext | str | bool,
 93 |         timeout: httpx.Timeout,
 94 |         pooled: bool = True,
 95 |     ) -> httpx.AsyncClient:
 96 |         """Create a new HTTP client."""
 97 |         if pooled:
 98 |             limits = httpx.Limits(
 99 |                 max_keepalive_connections=20,
100 |                 max_connections=100,
101 |                 keepalive_expiry=30,
102 |             )
103 |         else:
104 |             # Single-use client
105 |             limits = httpx.Limits(max_keepalive_connections=0)
106 | 
107 |         return httpx.AsyncClient(
108 |             verify=verify,
109 |             http2=False,  # HTTP/2 can add overhead
110 |             timeout=timeout,
111 |             limits=limits,
112 |         )
113 | 
114 |     def _register_loop_cleanup(self, loop: asyncio.AbstractEventLoop):
115 |         """Register cleanup when event loop is garbage collected."""
116 |         # Store pools to close when loop is garbage collected
117 |         # Note: We can't create weak references to dicts, so we'll
118 |         # clean up pools when the loop itself is garbage collected
119 | 
120 |         def cleanup():
121 |             # Get pools for this loop if they still exist
122 |             pools = self._loop_pools.get(loop, {})
123 |             if pools:
124 |                 # Try to close all clients gracefully
125 |                 for client in list(pools.values()):
126 |                     if client and not client.is_closed:
127 |                         # Close synchronously since loop might be gone
128 |                         import contextlib
129 | 
130 |                         with contextlib.suppress(Exception):
131 |                             client._transport.close()
132 | 
133 |         # Register finalizer on the loop itself
134 |         weakref.finalize(loop, cleanup)
135 | 
136 |     async def close_all(self):
137 |         """Close all connection pools."""
138 |         async with self._lock:
139 |             all_clients = []
140 |             for pools in self._loop_pools.values():
141 |                 all_clients.extend(pools.values())
142 | 
143 |             # Close all clients
144 |             close_tasks = []
145 |             for client in all_clients:
146 |                 if client and not client.is_closed:
147 |                     close_tasks.append(client.aclose())
148 | 
149 |             if close_tasks:
150 |                 await asyncio.gather(*close_tasks, return_exceptions=True)
151 | 
152 |             self._loop_pools.clear()
153 | 
154 | 
155 | # Global instance
156 | _pool_manager = EventLoopConnectionPools()
157 | 
158 | 
159 | async def get_connection_pool(
160 |     verify: ssl.SSLContext | str | bool,
161 |     timeout: httpx.Timeout,
162 | ) -> httpx.AsyncClient:
163 |     """Get a connection pool for the current event loop."""
164 |     return await _pool_manager.get_pool(verify, timeout)
165 | 
166 | 
167 | async def close_all_pools():
168 |     """Close all connection pools."""
169 |     await _pool_manager.close_all()
170 | 
```

--------------------------------------------------------------------------------
/src/biomcp/parameter_parser.py:
--------------------------------------------------------------------------------

```python
  1 | """Parameter parsing and validation for BioMCP."""
  2 | 
  3 | import json
  4 | import logging
  5 | from typing import Any
  6 | 
  7 | from biomcp.exceptions import InvalidParameterError
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | 
 12 | class ParameterParser:
 13 |     """Handles parameter parsing and validation for search requests."""
 14 | 
 15 |     @staticmethod
 16 |     def parse_list_param(
 17 |         param: str | list[str] | None, param_name: str
 18 |     ) -> list[str] | None:
 19 |         """Convert various input formats to lists.
 20 | 
 21 |         Handles:
 22 |         - JSON arrays: '["item1", "item2"]' -> ['item1', 'item2']
 23 |         - Comma-separated: 'item1, item2' -> ['item1', 'item2']
 24 |         - Single values: 'item' -> ['item']
 25 |         - None values: None -> None
 26 |         - Already parsed lists: ['item'] -> ['item']
 27 | 
 28 |         Args:
 29 |             param: The parameter to parse
 30 |             param_name: Name of the parameter for error messages
 31 | 
 32 |         Returns:
 33 |             Parsed list or None
 34 | 
 35 |         Raises:
 36 |             InvalidParameterError: If parameter cannot be parsed
 37 |         """
 38 |         if param is None:
 39 |             return None
 40 | 
 41 |         if isinstance(param, str):
 42 |             # First try to parse as JSON array
 43 |             if param.startswith("["):
 44 |                 try:
 45 |                     parsed = json.loads(param)
 46 |                     if not isinstance(parsed, list):
 47 |                         raise InvalidParameterError(
 48 |                             param_name,
 49 |                             param,
 50 |                             "JSON array or comma-separated string",
 51 |                         )
 52 |                     return parsed
 53 |                 except (json.JSONDecodeError, TypeError) as e:
 54 |                     logger.debug(f"Failed to parse {param_name} as JSON: {e}")
 55 | 
 56 |             # If it's a comma-separated string, split it
 57 |             if "," in param:
 58 |                 return [item.strip() for item in param.split(",")]
 59 | 
 60 |             # Otherwise return as single-item list
 61 |             return [param]
 62 | 
 63 |         # If it's already a list, validate and return as-is
 64 |         if isinstance(param, list):
 65 |             # Validate all items are strings
 66 |             if not all(isinstance(item, str) for item in param):
 67 |                 raise InvalidParameterError(
 68 |                     param_name, param, "list of strings"
 69 |                 )
 70 |             return param
 71 | 
 72 |         # Invalid type
 73 |         raise InvalidParameterError(
 74 |             param_name, param, "string, list of strings, or None"
 75 |         )
 76 | 
 77 |     @staticmethod
 78 |     def normalize_phase(phase: str | None) -> str | None:
 79 |         """Normalize phase values for clinical trials.
 80 | 
 81 |         Converts various formats to standard enum values:
 82 |         - "Phase 3" -> "PHASE3"
 83 |         - "phase 3" -> "PHASE3"
 84 |         - "PHASE 3" -> "PHASE3"
 85 |         - "phase3" -> "PHASE3"
 86 | 
 87 |         Args:
 88 |             phase: Phase value to normalize
 89 | 
 90 |         Returns:
 91 |             Normalized phase value or None
 92 |         """
 93 |         if phase is None:
 94 |             return None
 95 | 
 96 |         # Convert to uppercase and remove spaces
 97 |         normalized = phase.upper().replace(" ", "")
 98 | 
 99 |         # Validate it matches expected pattern
100 |         valid_phases = [
101 |             "EARLYPHASE1",
102 |             "PHASE1",
103 |             "PHASE2",
104 |             "PHASE3",
105 |             "PHASE4",
106 |             "NOTAPPLICABLE",
107 |         ]
108 |         if normalized not in valid_phases:
109 |             # Try to be helpful with common mistakes
110 |             if "EARLY" in normalized and "1" in normalized:
111 |                 return "EARLYPHASE1"
112 |             if "NOT" in normalized and "APPLICABLE" in normalized:
113 |                 return "NOTAPPLICABLE"
114 | 
115 |             raise InvalidParameterError(
116 |                 "phase", phase, f"one of: {', '.join(valid_phases)}"
117 |             )
118 | 
119 |         return normalized
120 | 
121 |     @staticmethod
122 |     def validate_page_params(page: int, page_size: int) -> tuple[int, int]:
123 |         """Validate pagination parameters.
124 | 
125 |         Args:
126 |             page: Page number (minimum 1)
127 |             page_size: Results per page (1-100)
128 | 
129 |         Returns:
130 |             Validated (page, page_size) tuple
131 | 
132 |         Raises:
133 |             InvalidParameterError: If parameters are invalid
134 |         """
135 |         if page < 1:
136 |             raise InvalidParameterError("page", page, "integer >= 1")
137 | 
138 |         if page_size < 1 or page_size > 100:
139 |             raise InvalidParameterError(
140 |                 "page_size", page_size, "integer between 1 and 100"
141 |             )
142 | 
143 |         return page, page_size
144 | 
145 |     @staticmethod
146 |     def parse_search_params(
147 |         params: dict[str, Any], domain: str
148 |     ) -> dict[str, Any]:
149 |         """Parse and validate all search parameters for a domain.
150 | 
151 |         Args:
152 |             params: Raw parameters dictionary
153 |             domain: Domain being searched
154 | 
155 |         Returns:
156 |             Validated parameters dictionary
157 |         """
158 |         parsed: dict[str, Any] = {}
159 | 
160 |         # Common list parameters
161 |         list_params = [
162 |             "genes",
163 |             "diseases",
164 |             "variants",
165 |             "chemicals",
166 |             "keywords",
167 |             "conditions",
168 |             "interventions",
169 |         ]
170 | 
171 |         for param_name in list_params:
172 |             if param_name in params and params[param_name] is not None:
173 |                 parsed[param_name] = ParameterParser.parse_list_param(
174 |                     params[param_name], param_name
175 |                 )
176 | 
177 |         # Domain-specific parameters
178 |         if (
179 |             domain == "trial"
180 |             and "phase" in params
181 |             and params.get("phase") is not None
182 |         ):
183 |             parsed["phase"] = ParameterParser.normalize_phase(
184 |                 params.get("phase")
185 |             )
186 | 
187 |         # Pass through other parameters
188 |         for key, value in params.items():
189 |             if key not in parsed and key not in list_params and key != "phase":
190 |                 parsed[key] = value
191 | 
192 |         return parsed
193 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_labels.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | OpenFDA Drug Labels (SPL) integration.
  3 | """
  4 | 
  5 | import logging
  6 | 
  7 | from .constants import (
  8 |     OPENFDA_DEFAULT_LIMIT,
  9 |     OPENFDA_DISCLAIMER,
 10 |     OPENFDA_DRUG_LABELS_URL,
 11 |     OPENFDA_MAX_LIMIT,
 12 | )
 13 | from .drug_labels_helpers import (
 14 |     build_label_search_query,
 15 |     format_label_header,
 16 |     format_label_section,
 17 |     format_label_summary,
 18 |     get_default_sections,
 19 |     get_section_titles,
 20 | )
 21 | from .utils import clean_text, format_count, make_openfda_request
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | async def search_drug_labels(
 27 |     name: str | None = None,
 28 |     indication: str | None = None,
 29 |     boxed_warning: bool = False,
 30 |     section: str | None = None,
 31 |     limit: int = OPENFDA_DEFAULT_LIMIT,
 32 |     skip: int = 0,
 33 |     api_key: str | None = None,
 34 | ) -> str:
 35 |     """
 36 |     Search FDA drug product labels (SPL).
 37 | 
 38 |     Args:
 39 |         name: Drug name to search for
 40 |         indication: Search for drugs indicated for this condition
 41 |         boxed_warning: Filter for drugs with boxed warnings
 42 |         section: Specific label section to search
 43 |         limit: Maximum number of results
 44 |         skip: Number of results to skip
 45 | 
 46 |         api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)
 47 | 
 48 |     Returns:
 49 |         Formatted string with drug label information
 50 |     """
 51 |     if not name and not indication and not section and not boxed_warning:
 52 |         return (
 53 |             "⚠️ Please specify a drug name, indication, or label section to search.\n\n"
 54 |             "Examples:\n"
 55 |             "- Search by name: --name 'pembrolizumab'\n"
 56 |             "- Search by indication: --indication 'melanoma'\n"
 57 |             "- Search by section: --section 'contraindications'"
 58 |         )
 59 | 
 60 |     # Build and execute search
 61 |     search_query = build_label_search_query(
 62 |         name, indication, boxed_warning, section
 63 |     )
 64 |     params = {
 65 |         "search": search_query,
 66 |         "limit": min(limit, OPENFDA_MAX_LIMIT),
 67 |         "skip": skip,
 68 |     }
 69 | 
 70 |     response, error = await make_openfda_request(
 71 |         OPENFDA_DRUG_LABELS_URL, params, "openfda_drug_labels", api_key
 72 |     )
 73 | 
 74 |     if error:
 75 |         return f"⚠️ Error searching drug labels: {error}"
 76 | 
 77 |     if not response or not response.get("results"):
 78 |         return _format_no_results(name, indication, section)
 79 | 
 80 |     results = response["results"]
 81 |     total = (
 82 |         response.get("meta", {}).get("results", {}).get("total", len(results))
 83 |     )
 84 | 
 85 |     # Build output
 86 |     output = ["## FDA Drug Labels\n"]
 87 |     output.extend(_format_search_summary(name, indication, section, total))
 88 | 
 89 |     # Display results
 90 |     output.append(
 91 |         f"### Results (showing {min(len(results), 5)} of {total}):\n"
 92 |     )
 93 |     for i, result in enumerate(results[:5], 1):
 94 |         output.extend(format_label_summary(result, i))
 95 | 
 96 |     # Add tip for getting full labels
 97 |     if total > 0 and results and "set_id" in results[0]:
 98 |         output.append(
 99 |             "\n💡 **Tip**: Use `biomcp openfda label-get <label_id>` to retrieve "
100 |             "the complete label for any drug."
101 |         )
102 | 
103 |     output.append(f"\n{OPENFDA_DISCLAIMER}")
104 |     return "\n".join(output)
105 | 
106 | 
107 | def _format_no_results(
108 |     name: str | None, indication: str | None, section: str | None
109 | ) -> str:
110 |     """Format no results message."""
111 |     search_desc = []
112 |     if name:
113 |         search_desc.append(f"drug '{name}'")
114 |     if indication:
115 |         search_desc.append(f"indication '{indication}'")
116 |     if section:
117 |         search_desc.append(f"section '{section}'")
118 |     return f"No drug labels found for {' and '.join(search_desc)}."
119 | 
120 | 
121 | def _format_search_summary(
122 |     name: str | None, indication: str | None, section: str | None, total: int
123 | ) -> list[str]:
124 |     """Format the search summary."""
125 |     output = []
126 | 
127 |     search_desc = []
128 |     if name:
129 |         search_desc.append(f"**Drug**: {name}")
130 |     if indication:
131 |         search_desc.append(f"**Indication**: {indication}")
132 |     if section:
133 |         search_desc.append(f"**Section**: {section}")
134 | 
135 |     if search_desc:
136 |         output.append(" | ".join(search_desc))
137 |     output.append(f"**Total Labels Found**: {format_count(total, 'label')}\n")
138 | 
139 |     return output
140 | 
141 | 
142 | async def get_drug_label(
143 |     set_id: str,
144 |     sections: list[str] | None = None,
145 |     api_key: str | None = None,
146 | ) -> str:
147 |     """
148 |     Get detailed drug label information by set ID.
149 | 
150 |     Args:
151 |         set_id: Label set ID
152 |         sections: Specific sections to retrieve (default: key sections)
153 | 
154 |         api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)
155 | 
156 |     Returns:
157 |         Formatted string with detailed label information
158 |     """
159 |     params = {
160 |         "search": f'set_id:"{set_id}"',
161 |         "limit": 1,
162 |     }
163 | 
164 |     response, error = await make_openfda_request(
165 |         OPENFDA_DRUG_LABELS_URL, params, "openfda_drug_label_detail", api_key
166 |     )
167 | 
168 |     if error:
169 |         return f"⚠️ Error retrieving drug label: {error}"
170 | 
171 |     if not response or not response.get("results"):
172 |         return f"Drug label with ID '{set_id}' not found."
173 | 
174 |     result = response["results"][0]
175 | 
176 |     # Use default sections if not specified
177 |     if not sections:
178 |         sections = get_default_sections()
179 | 
180 |     # Build output
181 |     output = format_label_header(result, set_id)
182 | 
183 |     # Boxed warning (if exists)
184 |     if "boxed_warning" in result:
185 |         output.extend(_format_boxed_warning(result["boxed_warning"]))
186 | 
187 |     # Display requested sections
188 |     section_titles = get_section_titles()
189 |     for section in sections:
190 |         output.extend(format_label_section(result, section, section_titles))
191 | 
192 |     output.append(f"\n{OPENFDA_DISCLAIMER}")
193 |     return "\n".join(output)
194 | 
195 | 
196 | def _format_boxed_warning(boxed_warning: list) -> list[str]:
197 |     """Format boxed warning section."""
198 |     output = ["### ⚠️ BOXED WARNING\n"]
199 |     warning_text = clean_text(" ".join(boxed_warning))
200 |     output.append(warning_text)
201 |     output.append("")
202 |     return output
203 | 
```

--------------------------------------------------------------------------------
/src/biomcp/cli/articles.py:
--------------------------------------------------------------------------------

```python
  1 | import asyncio
  2 | import json
  3 | from typing import Annotated
  4 | 
  5 | import typer
  6 | 
  7 | from ..articles import fetch
  8 | from ..articles.search import PubmedRequest, search_articles
  9 | from ..articles.unified import search_articles_unified
 10 | 
 11 | article_app = typer.Typer(help="Search and retrieve biomedical articles.")
 12 | 
 13 | 
 14 | async def get_article_details(
 15 |     identifier: str, output_json: bool = False
 16 | ) -> str:
 17 |     """Get article details handling both PMIDs and DOIs with proper output format."""
 18 |     # Use the fetch module functions directly to control output format
 19 |     if fetch.is_doi(identifier):
 20 |         from ..articles.preprints import fetch_europe_pmc_article
 21 | 
 22 |         return await fetch_europe_pmc_article(
 23 |             identifier, output_json=output_json
 24 |         )
 25 |     elif fetch.is_pmid(identifier):
 26 |         return await fetch.fetch_articles(
 27 |             [int(identifier)], full=True, output_json=output_json
 28 |         )
 29 |     else:
 30 |         # Unknown identifier format
 31 |         error_data = [
 32 |             {
 33 |                 "error": f"Invalid identifier format: {identifier}. Expected either a PMID (numeric) or DOI (10.xxxx/xxxx format)."
 34 |             }
 35 |         ]
 36 |         if output_json:
 37 |             return json.dumps(error_data, indent=2)
 38 |         else:
 39 |             from .. import render
 40 | 
 41 |             return render.to_markdown(error_data)
 42 | 
 43 | 
 44 | @article_app.command("search")
 45 | def search_article(
 46 |     genes: Annotated[
 47 |         list[str] | None,
 48 |         typer.Option(
 49 |             "--gene",
 50 |             "-g",
 51 |             help="Gene name to search for (can be specified multiple times)",
 52 |         ),
 53 |     ] = None,
 54 |     variants: Annotated[
 55 |         list[str] | None,
 56 |         typer.Option(
 57 |             "--variant",
 58 |             "-v",
 59 |             help="Genetic variant to search for (can be specified multiple times)",
 60 |         ),
 61 |     ] = None,
 62 |     diseases: Annotated[
 63 |         list[str] | None,
 64 |         typer.Option(
 65 |             "--disease",
 66 |             "-d",
 67 |             help="Disease name to search for (can be specified multiple times)",
 68 |         ),
 69 |     ] = None,
 70 |     chemicals: Annotated[
 71 |         list[str] | None,
 72 |         typer.Option(
 73 |             "--chemical",
 74 |             "-c",
 75 |             help="Chemical name to search for (can be specified multiple times)",
 76 |         ),
 77 |     ] = None,
 78 |     keywords: Annotated[
 79 |         list[str] | None,
 80 |         typer.Option(
 81 |             "--keyword",
 82 |             "-k",
 83 |             help="Keyword to search for (can be specified multiple times)",
 84 |         ),
 85 |     ] = None,
 86 |     page: Annotated[
 87 |         int,
 88 |         typer.Option(
 89 |             "--page",
 90 |             "-p",
 91 |             help="Page number for pagination (starts at 1)",
 92 |         ),
 93 |     ] = 1,
 94 |     output_json: Annotated[
 95 |         bool,
 96 |         typer.Option(
 97 |             "--json",
 98 |             "-j",
 99 |             help="Render in JSON format",
100 |             case_sensitive=False,
101 |         ),
102 |     ] = False,
103 |     include_preprints: Annotated[
104 |         bool,
105 |         typer.Option(
106 |             "--include-preprints/--no-preprints",
107 |             help="Include preprint articles from bioRxiv/medRxiv and Europe PMC",
108 |         ),
109 |     ] = True,
110 | ):
111 |     """Search biomedical research articles"""
112 |     request = PubmedRequest(
113 |         genes=genes or [],
114 |         variants=variants or [],
115 |         diseases=diseases or [],
116 |         chemicals=chemicals or [],
117 |         keywords=keywords or [],
118 |     )
119 | 
120 |     if include_preprints:
121 |         result = asyncio.run(
122 |             search_articles_unified(
123 |                 request,
124 |                 include_pubmed=True,
125 |                 include_preprints=True,
126 |                 output_json=output_json,
127 |             )
128 |         )
129 |     else:
130 |         result = asyncio.run(search_articles(request, output_json))
131 |     typer.echo(result)
132 | 
133 | 
134 | @article_app.command("get")
135 | def get_article(
136 |     identifiers: Annotated[
137 |         list[str],
138 |         typer.Argument(
139 |             help="Article identifiers - PubMed IDs (e.g., 38768446) or DOIs (e.g., 10.1101/2024.01.20.23288905)",
140 |         ),
141 |     ],
142 |     full: Annotated[
143 |         bool,
144 |         typer.Option(
145 |             "--full",
146 |             "-f",
147 |             help="Whether to fetch full article text (PubMed only)",
148 |         ),
149 |     ] = False,
150 |     output_json: Annotated[
151 |         bool,
152 |         typer.Option(
153 |             "--json",
154 |             "-j",
155 |             help="Render in JSON format",
156 |             case_sensitive=False,
157 |         ),
158 |     ] = False,
159 | ):
160 |     """
161 |     Retrieve articles by PubMed ID or DOI.
162 | 
163 |     Supports:
164 |     - PubMed IDs for published articles (e.g., 38768446)
165 |     - DOIs for Europe PMC preprints (e.g., 10.1101/2024.01.20.23288905)
166 | 
167 |     For multiple articles, results are returned as a list.
168 |     """
169 |     # Handle single identifier
170 |     if len(identifiers) == 1:
171 |         result = asyncio.run(
172 |             get_article_details(identifiers[0], output_json=output_json)
173 |         )
174 |     else:
175 |         # For multiple identifiers, we need to handle them individually
176 |         # since they might be a mix of PMIDs and DOIs
177 |         results = []
178 |         for identifier in identifiers:
179 |             article_result = asyncio.run(
180 |                 get_article_details(identifier, output_json=True)
181 |             )
182 |             # Parse the result and add to list
183 |             try:
184 |                 article_data = json.loads(article_result)
185 |                 if isinstance(article_data, list):
186 |                     results.extend(article_data)
187 |                 else:
188 |                     results.append(article_data)
189 |             except json.JSONDecodeError:
190 |                 # This shouldn't happen with our new function
191 |                 results.append({
192 |                     "error": f"Failed to parse result for {identifier}"
193 |                 })
194 | 
195 |         if output_json:
196 |             result = json.dumps(results, indent=2)
197 |         else:
198 |             from .. import render
199 | 
200 |             result = render.to_markdown(results)
201 | 
202 |     typer.echo(result)
203 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_extract_gene_aa_change.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for _extract_gene_aa_change method in external.py."""
  2 | 
  3 | import pytest
  4 | 
  5 | from biomcp.variants.external import ExternalVariantAggregator
  6 | 
  7 | 
  8 | class TestExtractGeneAAChange:
  9 |     """Test the _extract_gene_aa_change method."""
 10 | 
 11 |     @pytest.fixture
 12 |     def aggregator(self):
 13 |         """Create an ExternalVariantAggregator instance."""
 14 |         return ExternalVariantAggregator()
 15 | 
 16 |     def test_extract_from_docm(self, aggregator):
 17 |         """Test extraction from DOCM data."""
 18 |         variant_data = {"docm": {"gene": "BRAF", "aa_change": "p.V600E"}}
 19 | 
 20 |         result = aggregator._extract_gene_aa_change(variant_data)
 21 |         assert result == "BRAF V600E"
 22 | 
 23 |     def test_extract_from_hgvsp_long_format(self, aggregator):
 24 |         """Test extraction from hgvsp with long amino acid names."""
 25 |         variant_data = {
 26 |             "cadd": {"gene": {"genename": "TP53"}},
 27 |             "hgvsp": ["p.Arg175His"],
 28 |         }
 29 | 
 30 |         result = aggregator._extract_gene_aa_change(variant_data)
 31 |         # The code doesn't convert all long forms, just checks for Val/Ala
 32 |         assert result == "TP53 Arg175His"
 33 | 
 34 |     def test_extract_from_hgvsp_with_dbnsfp(self, aggregator):
 35 |         """Test extraction from hgvsp with dbnsfp gene name."""
 36 |         variant_data = {
 37 |             "dbnsfp": {"genename": "EGFR"},
 38 |             "hgvsp": ["p.Leu858Arg"],
 39 |         }
 40 | 
 41 |         result = aggregator._extract_gene_aa_change(variant_data)
 42 |         # The code doesn't convert Leu/Arg to L/R
 43 |         assert result == "EGFR Leu858Arg"
 44 | 
 45 |     def test_extract_from_cadd_data(self, aggregator):
 46 |         """Test extraction from CADD annotations."""
 47 |         variant_data = {
 48 |             "cadd": {
 49 |                 "gene": {"genename": "KRAS", "prot": {"protpos": 12}},
 50 |                 "oaa": "G",
 51 |                 "naa": "D",
 52 |             }
 53 |         }
 54 | 
 55 |         result = aggregator._extract_gene_aa_change(variant_data)
 56 |         assert result == "KRAS G12D"
 57 | 
 58 |     def test_extract_from_docm_without_p_prefix(self, aggregator):
 59 |         """Test extraction from DOCM without p. prefix."""
 60 |         variant_data = {"docm": {"gene": "PIK3CA", "aa_change": "E545K"}}
 61 | 
 62 |         result = aggregator._extract_gene_aa_change(variant_data)
 63 |         assert result == "PIK3CA E545K"
 64 | 
 65 |     def test_extract_with_multiple_hgvsp(self, aggregator):
 66 |         """Test handling of multiple hgvsp entries - should take first."""
 67 |         variant_data = {
 68 |             "cadd": {"gene": {"genename": "BRCA1"}},
 69 |             "hgvsp": ["p.Gln1756Ter", "p.Gln1756*"],
 70 |         }
 71 | 
 72 |         result = aggregator._extract_gene_aa_change(variant_data)
 73 |         # Takes the first one, doesn't convert Gln/Ter
 74 |         assert result == "BRCA1 Gln1756Ter"
 75 | 
 76 |     def test_extract_with_special_characters(self, aggregator):
 77 |         """Test extraction with special characters in protein change."""
 78 |         variant_data = {
 79 |             "cadd": {"gene": {"genename": "MLH1"}},
 80 |             "hgvsp": ["p.Lys618Alafs*9"],
 81 |         }
 82 | 
 83 |         result = aggregator._extract_gene_aa_change(variant_data)
 84 |         # Should extract the basic AA change pattern
 85 |         assert result is not None
 86 |         assert "MLH1" in result
 87 | 
 88 |     def test_extract_no_gene_name(self, aggregator):
 89 |         """Test when gene name is missing."""
 90 |         variant_data = {"hgvsp": ["p.Val600Glu"]}
 91 | 
 92 |         result = aggregator._extract_gene_aa_change(variant_data)
 93 |         assert result is None
 94 | 
 95 |     def test_extract_no_aa_change(self, aggregator):
 96 |         """Test when AA change is missing."""
 97 |         variant_data = {"cadd": {"gene": {"genename": "BRAF"}}}
 98 | 
 99 |         result = aggregator._extract_gene_aa_change(variant_data)
100 |         assert result is None
101 | 
102 |     def test_extract_empty_variant_data(self, aggregator):
103 |         """Test with empty variant data."""
104 |         result = aggregator._extract_gene_aa_change({})
105 |         assert result is None
106 | 
107 |     def test_extract_malformed_hgvsp(self, aggregator):
108 |         """Test with malformed HGVS protein notation."""
109 |         variant_data = {
110 |             "clinvar": {
111 |                 "gene": {"symbol": "MYC"},
112 |                 "hgvs": {"protein": ["invalid_format"]},
113 |             }
114 |         }
115 | 
116 |         result = aggregator._extract_gene_aa_change(variant_data)
117 |         assert result is None
118 | 
119 |     def test_extract_priority_order(self, aggregator):
120 |         """Test that DOCM is prioritized for AA change, CADD for gene name."""
121 |         variant_data = {
122 |             "docm": {"gene": "BRAF", "aa_change": "p.V600E"},
123 |             "hgvsp": ["p.Val600Lys"],  # Different change
124 |             "cadd": {
125 |                 "gene": {"genename": "WRONG", "prot": {"protpos": 600}},
126 |                 "oaa": "V",
127 |                 "naa": "K",
128 |             },
129 |         }
130 | 
131 |         result = aggregator._extract_gene_aa_change(variant_data)
132 |         # CADD is prioritized for gene name, DOCM for AA change
133 |         assert result == "WRONG V600E"
134 | 
135 |     def test_extract_regex_with_val_ala(self, aggregator):
136 |         """Test regex extraction when Val/Ala are present."""
137 |         # The code specifically looks for Val or Ala to trigger regex
138 |         variant_data = {
139 |             "cadd": {"gene": {"genename": "TEST1"}},
140 |             "hgvsp": ["p.Val600Ala"],
141 |         }
142 | 
143 |         result = aggregator._extract_gene_aa_change(variant_data)
144 |         # The regex doesn't find a match in "Val600Ala" because it's looking for [A-Z]\d+[A-Z]
145 |         # which would match "V600A" but not "Val600Ala"
146 |         assert result == "TEST1 Val600Ala"
147 | 
148 |     def test_extract_handles_exceptions_gracefully(self, aggregator):
149 |         """Test that exceptions are handled gracefully."""
150 |         # This should trigger an exception internally but return None
151 |         variant_data = {
152 |             "cadd": {"gene": {"genename": "GENE"}},
153 |             "hgvsp": None,  # This will cause issues
154 |         }
155 | 
156 |         result = aggregator._extract_gene_aa_change(variant_data)
157 |         assert result is None
158 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_openfda_unified.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for OpenFDA integration with unified search/fetch tools."""
  2 | 
  3 | import pytest
  4 | 
  5 | 
  6 | class TestOpenFDAUnifiedIntegration:
  7 |     """Test OpenFDA domain integration in unified tools."""
  8 | 
  9 |     def test_openfda_domains_registered(self):
 10 |         """Test that OpenFDA domains are properly registered in constants."""
 11 |         from biomcp.constants import (
 12 |             DOMAIN_TO_PLURAL,
 13 |             PLURAL_TO_DOMAIN,
 14 |             VALID_DOMAINS,
 15 |             VALID_DOMAINS_PLURAL,
 16 |         )
 17 | 
 18 |         # List of OpenFDA domains
 19 |         openfda_domains = [
 20 |             "fda_adverse",
 21 |             "fda_label",
 22 |             "fda_device",
 23 |             "fda_approval",
 24 |             "fda_recall",
 25 |             "fda_shortage",
 26 |         ]
 27 | 
 28 |         openfda_plurals = [
 29 |             "fda_adverse_events",
 30 |             "fda_labels",
 31 |             "fda_device_events",
 32 |             "fda_approvals",
 33 |             "fda_recalls",
 34 |             "fda_shortages",
 35 |         ]
 36 | 
 37 |         # Check that all OpenFDA domains are registered
 38 |         for domain in openfda_domains:
 39 |             assert domain in VALID_DOMAINS, f"{domain} not in VALID_DOMAINS"
 40 |             assert (
 41 |                 domain in DOMAIN_TO_PLURAL
 42 |             ), f"{domain} not in DOMAIN_TO_PLURAL"
 43 | 
 44 |         # Check plural forms
 45 |         for plural in openfda_plurals:
 46 |             assert (
 47 |                 plural in VALID_DOMAINS_PLURAL
 48 |             ), f"{plural} not in VALID_DOMAINS_PLURAL"
 49 |             assert (
 50 |                 plural in PLURAL_TO_DOMAIN
 51 |             ), f"{plural} not in PLURAL_TO_DOMAIN"
 52 | 
 53 |         # Check mappings are correct
 54 |         assert DOMAIN_TO_PLURAL["fda_adverse"] == "fda_adverse_events"
 55 |         assert DOMAIN_TO_PLURAL["fda_label"] == "fda_labels"
 56 |         assert DOMAIN_TO_PLURAL["fda_device"] == "fda_device_events"
 57 |         assert DOMAIN_TO_PLURAL["fda_approval"] == "fda_approvals"
 58 |         assert DOMAIN_TO_PLURAL["fda_recall"] == "fda_recalls"
 59 |         assert DOMAIN_TO_PLURAL["fda_shortage"] == "fda_shortages"
 60 | 
 61 |         assert PLURAL_TO_DOMAIN["fda_adverse_events"] == "fda_adverse"
 62 |         assert PLURAL_TO_DOMAIN["fda_labels"] == "fda_label"
 63 |         assert PLURAL_TO_DOMAIN["fda_device_events"] == "fda_device"
 64 |         assert PLURAL_TO_DOMAIN["fda_approvals"] == "fda_approval"
 65 |         assert PLURAL_TO_DOMAIN["fda_recalls"] == "fda_recall"
 66 |         assert PLURAL_TO_DOMAIN["fda_shortages"] == "fda_shortage"
 67 | 
 68 |     def test_openfda_search_domain_type_hints(self):
 69 |         """Test that OpenFDA domains are in search tool type hints."""
 70 |         import inspect
 71 | 
 72 |         from biomcp.router import search
 73 | 
 74 |         # Get the function signature
 75 |         sig = inspect.signature(search)
 76 |         domain_param = sig.parameters.get("domain")
 77 | 
 78 |         # Check if domain parameter exists
 79 |         assert (
 80 |             domain_param is not None
 81 |         ), "domain parameter not found in search function"
 82 | 
 83 |         # Get the annotation
 84 |         annotation = domain_param.annotation
 85 | 
 86 |         # The annotation should be a Literal type that includes OpenFDA domains
 87 |         # We can't directly check the Literal values due to how Python handles it,
 88 |         # but we can verify that it's properly annotated
 89 |         assert (
 90 |             annotation is not None
 91 |         ), "domain parameter has no type annotation"
 92 | 
 93 |     def test_openfda_fetch_domain_type_hints(self):
 94 |         """Test that OpenFDA domains are in fetch tool type hints."""
 95 |         import inspect
 96 | 
 97 |         from biomcp.router import fetch
 98 | 
 99 |         # Get the function signature
100 |         sig = inspect.signature(fetch)
101 |         domain_param = sig.parameters.get("domain")
102 | 
103 |         # Check if domain parameter exists
104 |         assert (
105 |             domain_param is not None
106 |         ), "domain parameter not found in fetch function"
107 | 
108 |         # Get the annotation
109 |         annotation = domain_param.annotation
110 | 
111 |         # The annotation should be a Literal type that includes OpenFDA domains
112 |         assert (
113 |             annotation is not None
114 |         ), "domain parameter has no type annotation"
115 | 
116 |     @pytest.mark.asyncio
117 |     async def test_openfda_search_basic_call(self):
118 |         """Test that OpenFDA domain search doesn't raise errors with basic call."""
119 |         from unittest.mock import AsyncMock, patch
120 | 
121 |         # Mock the OpenFDA search function that will be imported
122 |         with patch(
123 |             "biomcp.openfda.adverse_events.search_adverse_events",
124 |             new_callable=AsyncMock,
125 |         ) as mock_search:
126 |             mock_search.return_value = (
127 |                 "## FDA Adverse Event Reports\n\nTest results"
128 |             )
129 | 
130 |             from biomcp.router import search
131 | 
132 |             # This should not raise an error
133 |             result = await search(
134 |                 query=None,  # Required parameter
135 |                 domain="fda_adverse",
136 |                 chemicals=["test"],
137 |                 page_size=1,
138 |             )
139 | 
140 |             # Basic check that result has expected structure
141 |             assert isinstance(result, dict)
142 |             assert "results" in result
143 | 
144 |     @pytest.mark.asyncio
145 |     async def test_openfda_fetch_basic_call(self):
146 |         """Test that OpenFDA domain fetch doesn't raise errors with basic call."""
147 |         from unittest.mock import AsyncMock, patch
148 | 
149 |         # Mock the OpenFDA get function that will be imported
150 |         with patch(
151 |             "biomcp.openfda.drug_approvals.get_drug_approval",
152 |             new_callable=AsyncMock,
153 |         ) as mock_get:
154 |             mock_get.return_value = "## Drug Approval Details\n\nTest details"
155 | 
156 |             from biomcp.router import fetch
157 | 
158 |             # This should not raise an error
159 |             result = await fetch(
160 |                 id="TEST123",
161 |                 domain="fda_approval",
162 |             )
163 | 
164 |             # Basic check that result has expected structure
165 |             assert isinstance(result, dict)
166 |             assert "title" in result
167 |             assert "text" in result
168 |             assert "metadata" in result
169 | 
```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_preprints.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for preprint search functionality."""
  2 | 
  3 | from unittest.mock import AsyncMock, patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.articles.preprints import (
  8 |     BiorxivClient,
  9 |     BiorxivResponse,
 10 |     BiorxivResult,
 11 |     EuropePMCClient,
 12 |     EuropePMCResponse,
 13 |     PreprintSearcher,
 14 | )
 15 | from biomcp.articles.search import PubmedRequest, ResultItem
 16 | from biomcp.core import PublicationState
 17 | 
 18 | 
 19 | class TestBiorxivClient:
 20 |     """Tests for BiorxivClient."""
 21 | 
 22 |     @pytest.mark.asyncio
 23 |     async def test_search_biorxiv_success(self):
 24 |         """Test successful bioRxiv search."""
 25 |         client = BiorxivClient()
 26 | 
 27 |         # Mock response
 28 |         mock_response = BiorxivResponse(
 29 |             collection=[
 30 |                 BiorxivResult(
 31 |                     doi="10.1101/2024.01.01.123456",
 32 |                     title="Test BRAF Mutation Study",
 33 |                     authors="Smith, J.; Doe, J.",
 34 |                     date="2024-01-01",
 35 |                     abstract="Study about BRAF mutations in cancer.",
 36 |                     server="biorxiv",
 37 |                 )
 38 |             ],
 39 |             total=1,
 40 |         )
 41 | 
 42 |         with patch("biomcp.http_client.request_api") as mock_request:
 43 |             mock_request.return_value = (mock_response, None)
 44 | 
 45 |             results = await client.search("BRAF")
 46 | 
 47 |             assert len(results) == 1
 48 |             assert results[0].doi == "10.1101/2024.01.01.123456"
 49 |             assert results[0].title == "Test BRAF Mutation Study"
 50 |             assert results[0].publication_state == PublicationState.PREPRINT
 51 |             assert "preprint" in results[0].journal.lower()
 52 | 
 53 |     @pytest.mark.asyncio
 54 |     async def test_search_biorxiv_no_results(self):
 55 |         """Test bioRxiv search with no results."""
 56 |         client = BiorxivClient()
 57 | 
 58 |         with patch("biomcp.http_client.request_api") as mock_request:
 59 |             mock_request.return_value = (
 60 |                 None,
 61 |                 {"code": 404, "message": "Not found"},
 62 |             )
 63 | 
 64 |             results = await client.search("nonexistent")
 65 | 
 66 |             assert len(results) == 0
 67 | 
 68 | 
 69 | class TestEuropePMCClient:
 70 |     """Tests for EuropePMCClient."""
 71 | 
 72 |     @pytest.mark.asyncio
 73 |     async def test_search_europe_pmc_success(self):
 74 |         """Test successful Europe PMC search."""
 75 |         client = EuropePMCClient()
 76 | 
 77 |         # Mock response
 78 |         mock_response = EuropePMCResponse(
 79 |             hitCount=1,
 80 |             resultList={
 81 |                 "result": [
 82 |                     {
 83 |                         "id": "PPR123456",
 84 |                         "doi": "10.1101/2024.01.02.654321",
 85 |                         "title": "TP53 Mutation Analysis",
 86 |                         "authorString": "Johnson, A., Williams, B.",
 87 |                         "journalTitle": "bioRxiv",
 88 |                         "firstPublicationDate": "2024-01-02",
 89 |                         "abstractText": "Analysis of TP53 mutations.",
 90 |                     }
 91 |                 ]
 92 |             },
 93 |         )
 94 | 
 95 |         with patch("biomcp.http_client.request_api") as mock_request:
 96 |             mock_request.return_value = (mock_response, None)
 97 | 
 98 |             results = await client.search("TP53")
 99 | 
100 |             assert len(results) == 1
101 |             assert results[0].doi == "10.1101/2024.01.02.654321"
102 |             assert results[0].title == "TP53 Mutation Analysis"
103 |             assert results[0].publication_state == PublicationState.PREPRINT
104 | 
105 | 
106 | class TestPreprintSearcher:
107 |     """Tests for PreprintSearcher."""
108 | 
109 |     @pytest.mark.asyncio
110 |     async def test_search_combined_sources(self):
111 |         """Test searching across multiple preprint sources."""
112 |         searcher = PreprintSearcher()
113 | 
114 |         # Mock both clients
115 |         mock_biorxiv_results = [
116 |             ResultItem(
117 |                 doi="10.1101/2024.01.01.111111",
118 |                 title="BRAF Study 1",
119 |                 date="2024-01-01",
120 |                 publication_state=PublicationState.PREPRINT,
121 |             )
122 |         ]
123 | 
124 |         mock_europe_results = [
125 |             ResultItem(
126 |                 doi="10.1101/2024.01.02.222222",
127 |                 title="BRAF Study 2",
128 |                 date="2024-01-02",
129 |                 publication_state=PublicationState.PREPRINT,
130 |             )
131 |         ]
132 | 
133 |         searcher.biorxiv_client.search = AsyncMock(
134 |             return_value=mock_biorxiv_results
135 |         )
136 |         searcher.europe_pmc_client.search = AsyncMock(
137 |             return_value=mock_europe_results
138 |         )
139 | 
140 |         request = PubmedRequest(genes=["BRAF"])
141 |         response = await searcher.search(request)
142 | 
143 |         assert response.count == 2
144 |         assert len(response.results) == 2
145 |         # Results should be sorted by date (newest first)
146 |         assert response.results[0].doi == "10.1101/2024.01.02.222222"
147 |         assert response.results[1].doi == "10.1101/2024.01.01.111111"
148 | 
149 |     @pytest.mark.asyncio
150 |     async def test_search_duplicate_removal(self):
151 |         """Test that duplicate DOIs are removed."""
152 |         searcher = PreprintSearcher()
153 | 
154 |         # Create duplicate results with same DOI
155 |         duplicate_doi = "10.1101/2024.01.01.999999"
156 | 
157 |         mock_biorxiv_results = [
158 |             ResultItem(
159 |                 doi=duplicate_doi,
160 |                 title="Duplicate Study",
161 |                 date="2024-01-01",
162 |                 publication_state=PublicationState.PREPRINT,
163 |             )
164 |         ]
165 | 
166 |         mock_europe_results = [
167 |             ResultItem(
168 |                 doi=duplicate_doi,
169 |                 title="Duplicate Study",
170 |                 date="2024-01-01",
171 |                 publication_state=PublicationState.PREPRINT,
172 |             )
173 |         ]
174 | 
175 |         searcher.biorxiv_client.search = AsyncMock(
176 |             return_value=mock_biorxiv_results
177 |         )
178 |         searcher.europe_pmc_client.search = AsyncMock(
179 |             return_value=mock_europe_results
180 |         )
181 | 
182 |         request = PubmedRequest(keywords=["test"])
183 |         response = await searcher.search(request)
184 | 
185 |         assert response.count == 1
186 |         assert len(response.results) == 1
187 |         assert response.results[0].doi == duplicate_doi
188 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_render.py:
--------------------------------------------------------------------------------

```python
  1 | from biomcp import render
  2 | 
  3 | 
  4 | def test_render_full_json(data_dir):
  5 |     input_data = (data_dir / "ct_gov/trials_NCT04280705.json").read_text()
  6 |     expect_markdown = (data_dir / "ct_gov/trials_NCT04280705.txt").read_text()
  7 |     markdown = render.to_markdown(input_data)
  8 |     assert markdown == expect_markdown
  9 | 
 10 |     input_data = (
 11 |         data_dir / "myvariant/variants_full_braf_v600e.json"
 12 |     ).read_text()
 13 |     expect_markdown = (
 14 |         data_dir / "myvariant/variants_full_braf_v600e.txt"
 15 |     ).read_text()
 16 |     markdown = render.to_markdown(input_data)
 17 |     print("==" * 100)
 18 |     print(markdown)
 19 |     print("==" * 100)
 20 |     assert markdown == expect_markdown
 21 | 
 22 | 
 23 | def test_render_with_nones():
 24 |     markdown = render.to_markdown(data)
 25 |     assert (
 26 |         markdown
 27 |         == """# Studies
 28 | 
 29 | ## Protocol Section
 30 | 
 31 | ### Design Module
 32 | Study Type: interventional
 33 | Phases: phase2
 34 | 
 35 | ### Identification Module
 36 | Brief Title:
 37 |   study of autologous tumor infiltrating lymphocytes in patients with
 38 |   solid tumors
 39 | Nct Id: nct03645928
 40 | 
 41 | ### Status Module
 42 | Overall Status: recruiting
 43 | 
 44 | #### Completion Date Struct
 45 | Date: 2029-08-09
 46 | 
 47 | #### Start Date Struct
 48 | Date: 2019-05-07
 49 | """
 50 |     )
 51 | 
 52 | 
 53 | data = {
 54 |     "next_page_token": None,
 55 |     "studies": [
 56 |         {
 57 |             "derived_section": None,
 58 |             "document_section": None,
 59 |             "has_results": None,
 60 |             "protocol_section": {
 61 |                 "arms_interventions_module": None,
 62 |                 "conditions_module": None,
 63 |                 "contacts_locations_module": None,
 64 |                 "description_module": None,
 65 |                 "design_module": {
 66 |                     "design_info": None,
 67 |                     "enrollment_info": None,
 68 |                     "phases": ["phase2"],
 69 |                     "study_type": "interventional",
 70 |                 },
 71 |                 "eligibility_module": None,
 72 |                 "identification_module": {
 73 |                     "acronym": None,
 74 |                     "brief_title": "study "
 75 |                     "of "
 76 |                     "autologous "
 77 |                     "tumor "
 78 |                     "infiltrating "
 79 |                     "lymphocytes "
 80 |                     "in "
 81 |                     "patients "
 82 |                     "with "
 83 |                     "solid "
 84 |                     "tumors",
 85 |                     "nct_id": "nct03645928",
 86 |                     "official_title": None,
 87 |                     "org_study_id_info": None,
 88 |                     "organization": None,
 89 |                     "secondary_id_infos": None,
 90 |                 },
 91 |                 "outcomes_module": None,
 92 |                 "oversight_module": None,
 93 |                 "references_module": None,
 94 |                 "sponsor_collaborators_module": None,
 95 |                 "status_module": {
 96 |                     "completion_date_struct": {
 97 |                         "date": "2029-08-09",
 98 |                         "type": None,
 99 |                     },
100 |                     "expanded_access_info": None,
101 |                     "last_known_status": None,
102 |                     "last_update_post_date_struct": None,
103 |                     "last_update_submit_date": None,
104 |                     "overall_status": "recruiting",
105 |                     "primary_completion_date_struct": None,
106 |                     "results_first_post_date_struct": None,
107 |                     "results_first_submit_date": None,
108 |                     "results_first_submit_qc_date": None,
109 |                     "start_date_struct": {"date": "2019-05-07", "type": None},
110 |                     "status_verified_date": None,
111 |                     "study_first_post_date_struct": None,
112 |                     "study_first_submit_date": None,
113 |                     "study_first_submit_qc_date": None,
114 |                     "why_stopped": None,
115 |                 },
116 |             },
117 |             "results_section": None,
118 |         },
119 |     ],
120 | }
121 | 
122 | 
123 | def test_transform_key_protocol_section():
124 |     assert render.transform_key("protocol_section") == "Protocol Section"
125 | 
126 | 
127 | def test_transform_key_nct_number():
128 |     assert render.transform_key("nct_number") == "Nct Number"
129 | 
130 | 
131 | def test_transform_key_study_url():
132 |     assert render.transform_key("study_url") == "Study Url"
133 | 
134 | 
135 | def test_transform_key_allcaps():
136 |     assert render.transform_key("allcaps") == "Allcaps"
137 | 
138 | 
139 | def test_transform_key_primary_purpose():
140 |     assert render.transform_key("primary_purpose") == "Primary Purpose"
141 | 
142 | 
143 | def test_transform_key_underscores():
144 |     assert render.transform_key("some_key_name") == "Some Key Name"
145 | 
146 | 
147 | def test_transform_key_lowercase():
148 |     assert render.transform_key("somekey") == "Somekey"
149 | 
150 | 
151 | def test_transform_key_nctid():
152 |     assert render.transform_key("nct_id") == "Nct Id"
153 | 
154 | 
155 | def test_transform_key_4dct():
156 |     assert render.transform_key("4dct") == "4dct"
157 | 
158 | 
159 | def test_wrap_preserve_newlines_blank():
160 |     assert render.wrap_preserve_newlines("", 20) == []
161 | 
162 | 
163 | def test_wrap_preserve_newlines_short_line():
164 |     text = "hello world"
165 |     assert render.wrap_preserve_newlines(text, 20) == ["hello world"]
166 | 
167 | 
168 | def test_wrap_preserve_newlines_long():
169 |     text = "this line is definitely longer than twenty characters"
170 |     lines = render.wrap_preserve_newlines(text, 20)
171 |     assert len(lines) > 1
172 |     assert "this line is" in lines[0]
173 | 
174 | 
175 | def test_process_scalar_list_fits():
176 |     lines = []
177 |     render.process_scalar_list(
178 |         "conditions",
179 |         lines,
180 |         ["condition1", "condition2"],
181 |     )
182 |     assert lines == ["Conditions: condition1, condition2"]
183 | 
184 | 
185 | def test_process_scalar_list_too_long():
186 |     lines = []
187 |     big_list = ["test_value" * 10, "another" * 5]
188 |     render.process_scalar_list("giant_field", lines, big_list)
189 |     assert lines[0].startswith("Giant Field:")
190 |     assert lines[1].startswith("- test_value")
191 | 
192 | 
193 | def test_render_key_value_short():
194 |     lines = []
195 |     render.render_key_value(lines, "nct_number", "nct100")
196 |     assert lines == ["Nct Number: nct100"]
197 | 
198 | 
199 | def test_render_key_value_long():
200 |     lines = []
201 |     render.render_key_value(lines, "brief_summary", "hello " * 15)
202 |     # first line "brief summary:"
203 |     assert lines[0] == "Brief Summary:"
204 |     assert lines[1].startswith("  hello hello")
205 | 
```

--------------------------------------------------------------------------------
/src/biomcp/articles/search_optimized.py:
--------------------------------------------------------------------------------

```python
  1 | """Optimized article search with caching and parallel processing."""
  2 | 
  3 | import asyncio
  4 | import hashlib
  5 | 
  6 | from .. import ensure_list
  7 | from ..shared_context import get_search_context
  8 | from ..utils.request_cache import get_cache
  9 | from .search import PubmedRequest
 10 | from .unified import search_articles_unified
 11 | 
 12 | # Cache for article search results (5 minute TTL)
 13 | _search_cache = get_cache("article_search", ttl_seconds=300)
 14 | 
 15 | 
 16 | def _get_search_cache_key(
 17 |     request: PubmedRequest, include_preprints: bool, include_cbioportal: bool
 18 | ) -> str:
 19 |     """Generate a cache key for search requests."""
 20 |     # Create a deterministic key from search parameters
 21 |     key_parts = [
 22 |         f"chemicals:{sorted(request.chemicals)}",
 23 |         f"diseases:{sorted(request.diseases)}",
 24 |         f"genes:{sorted(request.genes)}",
 25 |         f"keywords:{sorted(request.keywords)}",
 26 |         f"variants:{sorted(request.variants)}",
 27 |         f"preprints:{include_preprints}",
 28 |         f"cbioportal:{include_cbioportal}",
 29 |     ]
 30 |     key_string = "|".join(key_parts)
 31 |     return hashlib.sha256(key_string.encode()).hexdigest()
 32 | 
 33 | 
 34 | async def article_searcher_optimized(
 35 |     call_benefit: str,
 36 |     chemicals: list[str] | str | None = None,
 37 |     diseases: list[str] | str | None = None,
 38 |     genes: list[str] | str | None = None,
 39 |     keywords: list[str] | str | None = None,
 40 |     variants: list[str] | str | None = None,
 41 |     include_preprints: bool = True,
 42 |     include_cbioportal: bool = True,
 43 | ) -> str:
 44 |     """Optimized version of article_searcher with caching and context reuse."""
 45 | 
 46 |     # Convert parameters to PubmedRequest
 47 |     request = PubmedRequest(
 48 |         chemicals=ensure_list(chemicals, split_strings=True),
 49 |         diseases=ensure_list(diseases, split_strings=True),
 50 |         genes=ensure_list(genes, split_strings=True),
 51 |         keywords=ensure_list(keywords, split_strings=True),
 52 |         variants=ensure_list(variants, split_strings=True),
 53 |     )
 54 | 
 55 |     # Check cache first
 56 |     cache_key = _get_search_cache_key(
 57 |         request, include_preprints, include_cbioportal
 58 |     )
 59 |     cached_result = await _search_cache.get(cache_key)
 60 |     if cached_result is not None:
 61 |         return cached_result
 62 | 
 63 |     # Check if we're in a search context (for reusing validated entities)
 64 |     context = get_search_context()
 65 |     if context and request.genes:
 66 |         # Pre-validate genes using cached results
 67 |         valid_genes = []
 68 |         for gene in request.genes:
 69 |             if await context.validate_gene(gene):
 70 |                 valid_genes.append(gene)
 71 |         request.genes = valid_genes
 72 | 
 73 |         # Check if we have cached cBioPortal summaries
 74 |         if include_cbioportal and request.genes:
 75 |             for gene in request.genes[:1]:  # Just first gene
 76 |                 summary = context.get_gene_summary(gene)
 77 |                 if summary:
 78 |                     # We have a cached summary, can skip that part
 79 |                     pass
 80 | 
 81 |     # Perform the search
 82 |     result = await search_articles_unified(
 83 |         request,
 84 |         include_pubmed=True,
 85 |         include_preprints=include_preprints,
 86 |         include_cbioportal=include_cbioportal,
 87 |     )
 88 | 
 89 |     # Cache the result (5 minute TTL)
 90 |     await _search_cache.set(cache_key, result, ttl=300)
 91 | 
 92 |     return result
 93 | 
 94 | 
 95 | # Additional optimization: Batch article searches
 96 | class ArticleSearchBatcher:
 97 |     """Batch multiple article searches to reduce overhead."""
 98 | 
 99 |     def __init__(self, batch_size: int = 5, timeout: float = 0.1):
100 |         self.batch_size = batch_size
101 |         self.timeout = timeout
102 |         self._pending_searches: list[tuple[PubmedRequest, asyncio.Future]] = []
103 |         self._batch_task: asyncio.Task | None = None
104 | 
105 |     async def search(self, request: PubmedRequest) -> str:
106 |         """Add a search to the batch."""
107 |         future = asyncio.get_event_loop().create_future()
108 |         self._pending_searches.append((request, future))
109 | 
110 |         # Start batch processing if not already running
111 |         if self._batch_task is None or self._batch_task.done():
112 |             self._batch_task = asyncio.create_task(self._process_batch())
113 | 
114 |         return await future
115 | 
116 |     async def _process_batch(self):
117 |         """Process pending searches in batch."""
118 |         await asyncio.sleep(self.timeout)  # Wait for more requests
119 | 
120 |         if not self._pending_searches:
121 |             return
122 | 
123 |         # Take up to batch_size searches
124 |         batch = self._pending_searches[: self.batch_size]
125 |         self._pending_searches = self._pending_searches[self.batch_size :]
126 | 
127 |         # Process searches in parallel
128 |         search_tasks = []
129 |         for request, _ in batch:
130 |             task = search_articles_unified(request, include_pubmed=True)
131 |             search_tasks.append(task)
132 | 
133 |         results = await asyncio.gather(*search_tasks, return_exceptions=True)
134 | 
135 |         # Set results on futures
136 |         for (_, future), result in zip(batch, results, strict=False):
137 |             if isinstance(result, Exception):
138 |                 future.set_exception(result)
139 |             else:
140 |                 future.set_result(result)
141 | 
142 | 
143 | # Global batcher instance
144 | _article_batcher = ArticleSearchBatcher()
145 | 
146 | 
147 | async def article_searcher_batched(
148 |     call_benefit: str,
149 |     chemicals: list[str] | str | None = None,
150 |     diseases: list[str] | str | None = None,
151 |     genes: list[str] | str | None = None,
152 |     keywords: list[str] | str | None = None,
153 |     variants: list[str] | str | None = None,
154 |     include_preprints: bool = True,
155 |     include_cbioportal: bool = True,
156 | ) -> str:
157 |     """Batched version of article_searcher for multiple concurrent searches."""
158 | 
159 |     request = PubmedRequest(
160 |         chemicals=ensure_list(chemicals, split_strings=True),
161 |         diseases=ensure_list(diseases, split_strings=True),
162 |         genes=ensure_list(genes, split_strings=True),
163 |         keywords=ensure_list(keywords, split_strings=True),
164 |         variants=ensure_list(variants, split_strings=True),
165 |     )
166 | 
167 |     # Use the optimized version with caching
168 |     return await article_searcher_optimized(
169 |         call_benefit=call_benefit,
170 |         chemicals=request.chemicals,
171 |         diseases=request.diseases,
172 |         genes=request.genes,
173 |         keywords=request.keywords,
174 |         variants=request.variants,
175 |         include_preprints=include_preprints,
176 |         include_cbioportal=include_cbioportal,
177 |     )
178 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_cbioportal_mutations.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for cBioPortal mutation-specific search functionality."""
  2 | 
  3 | import pytest
  4 | 
  5 | from biomcp.utils.mutation_filter import MutationFilter
  6 | from biomcp.variants.cbioportal_mutations import (
  7 |     CBioPortalMutationClient,
  8 |     MutationHit,
  9 |     StudyMutationSummary,
 10 |     format_mutation_search_result,
 11 | )
 12 | 
 13 | 
 14 | class TestCBioPortalMutationSearch:
 15 |     """Test mutation-specific search functionality."""
 16 | 
 17 |     @pytest.mark.asyncio
 18 |     @pytest.mark.integration
 19 |     async def test_search_specific_mutation_srsf2_f57y(self):
 20 |         """Test searching for SRSF2 F57Y mutation."""
 21 |         client = CBioPortalMutationClient()
 22 | 
 23 |         result = await client.search_specific_mutation(
 24 |             gene="SRSF2", mutation="F57Y", max_studies=10
 25 |         )
 26 | 
 27 |         assert result is not None
 28 |         assert result.gene == "SRSF2"
 29 |         assert result.specific_mutation == "F57Y"
 30 |         assert result.studies_with_mutation >= 0
 31 | 
 32 |         # If mutations found, check structure
 33 |         if result.studies_with_mutation > 0:
 34 |             assert len(result.top_studies) > 0
 35 |             top_study = result.top_studies[0]
 36 |             assert isinstance(top_study, StudyMutationSummary)
 37 |             assert top_study.mutation_count > 0
 38 | 
 39 |     @pytest.mark.asyncio
 40 |     @pytest.mark.integration
 41 |     async def test_search_mutation_pattern_srsf2_f57(self):
 42 |         """Test searching for SRSF2 F57* mutations."""
 43 |         client = CBioPortalMutationClient()
 44 | 
 45 |         result = await client.search_specific_mutation(
 46 |             gene="SRSF2", pattern="F57*", max_studies=10
 47 |         )
 48 | 
 49 |         assert result is not None
 50 |         assert result.gene == "SRSF2"
 51 |         assert result.pattern == "F57*"
 52 | 
 53 |         # F57* should match F57Y, F57C, etc.
 54 |         if result.total_mutations > 0:
 55 |             assert result.mutation_types is not None
 56 |             # Check that we found some F57 mutations
 57 |             f57_mutations = [
 58 |                 mut for mut in result.mutation_types if mut.startswith("F57")
 59 |             ]
 60 |             assert len(f57_mutations) > 0
 61 | 
 62 |     @pytest.mark.asyncio
 63 |     @pytest.mark.integration
 64 |     async def test_search_braf_v600e(self):
 65 |         """Test searching for BRAF V600E - a very common mutation."""
 66 |         client = CBioPortalMutationClient()
 67 | 
 68 |         result = await client.search_specific_mutation(
 69 |             gene="BRAF", mutation="V600E", max_studies=20
 70 |         )
 71 | 
 72 |         assert result is not None
 73 |         assert result.gene == "BRAF"
 74 |         assert result.specific_mutation == "V600E"
 75 |         # V600E is very common, should have many studies
 76 |         assert result.studies_with_mutation > 10
 77 |         assert len(result.top_studies) > 0
 78 | 
 79 |         # Check melanoma is in top cancer types
 80 |         cancer_types = [s.cancer_type for s in result.top_studies]
 81 |         # At least some melanoma studies should have V600E
 82 |         assert any("melanoma" in ct.lower() for ct in cancer_types)
 83 | 
 84 |     def test_filter_mutations_specific(self):
 85 |         """Test filtering for specific mutations."""
 86 |         mutations = [
 87 |             MutationHit(
 88 |                 study_id="study1",
 89 |                 molecular_profile_id="study1_mutations",
 90 |                 protein_change="F57Y",
 91 |                 mutation_type="Missense",
 92 |             ),
 93 |             MutationHit(
 94 |                 study_id="study1",
 95 |                 molecular_profile_id="study1_mutations",
 96 |                 protein_change="F57C",
 97 |                 mutation_type="Missense",
 98 |             ),
 99 |             MutationHit(
100 |                 study_id="study2",
101 |                 molecular_profile_id="study2_mutations",
102 |                 protein_change="R88Q",
103 |                 mutation_type="Missense",
104 |             ),
105 |         ]
106 | 
107 |         # Filter for F57Y
108 |         mutation_filter = MutationFilter(specific_mutation="F57Y")
109 |         filtered = mutation_filter.filter_mutations(mutations)
110 |         assert len(filtered) == 1
111 |         assert filtered[0].protein_change == "F57Y"
112 | 
113 |     def test_filter_mutations_pattern(self):
114 |         """Test filtering with wildcard patterns."""
115 |         mutations = [
116 |             MutationHit(
117 |                 study_id="study1",
118 |                 molecular_profile_id="study1_mutations",
119 |                 protein_change="F57Y",
120 |                 mutation_type="Missense",
121 |             ),
122 |             MutationHit(
123 |                 study_id="study1",
124 |                 molecular_profile_id="study1_mutations",
125 |                 protein_change="F57C",
126 |                 mutation_type="Missense",
127 |             ),
128 |             MutationHit(
129 |                 study_id="study2",
130 |                 molecular_profile_id="study2_mutations",
131 |                 protein_change="R88Q",
132 |                 mutation_type="Missense",
133 |             ),
134 |         ]
135 | 
136 |         # Filter for F57*
137 |         mutation_filter = MutationFilter(pattern="F57*")
138 |         filtered = mutation_filter.filter_mutations(mutations)
139 |         assert len(filtered) == 2
140 |         assert all(m.protein_change.startswith("F57") for m in filtered)
141 | 
142 |     def test_format_mutation_search_result(self):
143 |         """Test formatting of mutation search results."""
144 |         from biomcp.variants.cbioportal_mutations import MutationSearchResult
145 | 
146 |         result = MutationSearchResult(
147 |             gene="SRSF2",
148 |             specific_mutation="F57Y",
149 |             total_studies=100,
150 |             studies_with_mutation=3,
151 |             total_mutations=5,
152 |             top_studies=[
153 |                 StudyMutationSummary(
154 |                     study_id="msk_ch_2023",
155 |                     study_name="Cancer Therapy and Clonal Hematopoiesis",
156 |                     cancer_type="mixed",
157 |                     mutation_count=5,
158 |                     sample_count=100,
159 |                 ),
160 |                 StudyMutationSummary(
161 |                     study_id="mds_mskcc_2020",
162 |                     study_name="Myelodysplastic Syndrome Study",
163 |                     cancer_type="mds",
164 |                     mutation_count=2,
165 |                     sample_count=50,
166 |                 ),
167 |             ],
168 |             mutation_types={"F57Y": 5},
169 |         )
170 | 
171 |         formatted = format_mutation_search_result(result)
172 | 
173 |         assert "SRSF2" in formatted
174 |         assert "F57Y" in formatted
175 |         assert "**Studies with Mutation**: 3" in formatted
176 |         assert "msk_ch_2023" in formatted
177 |         assert "|     5 |" in formatted  # mutation count
178 | 
```

--------------------------------------------------------------------------------
/docs/backend-services-reference/06-pubtator3.md:
--------------------------------------------------------------------------------

```markdown
  1 | # PubTator3 API
  2 | 
  3 | This document describes the PubTator3 API used by BioMCP for searching biomedical literature and retrieving article details with annotations. Understanding this API provides context for how BioMCP's article commands function.
  4 | 
  5 | ## Overview
  6 | 
  7 | The PubTator3 API provides a way to search for and retrieve biomedical articles
  8 | with entity annotations. This document outlines the API implementation details.
  9 | PubTator3 is a web-based tool that provides annotations of biomedical entities
 10 | in PubMed abstracts and PMC full-text articles. BioMCP uses the PubTator3 API
 11 | to search for and retrieve biomedical articles and their annotated entities (
 12 | genes, variants, diseases, chemicals, etc.).
 13 | 
 14 | > **CLI Documentation**: For information on using these APIs through the BioMCP
 15 | > command line interface, see
 16 | > the [Articles CLI Documentation](../user-guides/01-command-line-interface.md#article-commands).
 17 | 
 18 | ## Usage Guide
 19 | 
 20 | For practical examples of searching articles with PubTator3, see [How to Find Articles and cBioPortal Data](../how-to-guides/01-find-articles-and-cbioportal-data.md).
 21 | 
 22 | ## API Workflow
 23 | 
 24 | The PubTator3 integration follows a three-step workflow:
 25 | 
 26 | 1. **Entity Autocomplete**: Get standardized entity identifiers
 27 | 2. **Search**: Find articles using entity identifiers and keywords
 28 | 3. **Fetch**: Retrieve full article details by PMID
 29 | 
 30 | ## API Endpoints
 31 | 
 32 | ### Entity Autocomplete API
 33 | 
 34 | **Endpoint:**
 35 | `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/`
 36 | 
 37 | This endpoint helps normalize entity names to their standard identifiers,
 38 | improving search precision.
 39 | 
 40 | #### Parameters
 41 | 
 42 | | Parameter | Description                 | Example                             |
 43 | | --------- | --------------------------- | ----------------------------------- |
 44 | | `query`   | Text to autocomplete        | `BRAF`                              |
 45 | | `concept` | Entity type                 | `GENE`, `CHEMICAL`, `DISEASE`, etc. |
 46 | | `limit`   | Number of results to return | `2`                                 |
 47 | 
 48 | #### Example Request and Response
 49 | 
 50 | ```bash
 51 | curl "https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/?query=BRAF&concept=GENE&limit=2"
 52 | ```
 53 | 
 54 | Response:
 55 | 
 56 | ```json
 57 | [
 58 |   {
 59 |     "_id": "@GENE_BRAF",
 60 |     "biotype": "gene",
 61 |     "name": "BRAF",
 62 |     "description": "All Species",
 63 |     "match": "Matched on name <m>BRAF</m>"
 64 |   },
 65 |   {
 66 |     "_id": "@GENE_BRAFP1",
 67 |     "biotype": "gene",
 68 |     "name": "BRAFP1",
 69 |     "description": "All Species",
 70 |     "match": "Matched on name <m>BRAFP1</m>"
 71 |   }
 72 | ]
 73 | ```
 74 | 
 75 | ### Entity Search API
 76 | 
 77 | **Endpoint:** `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/`
 78 | 
 79 | This endpoint allows searching for PMIDs (PubMed IDs) based on entity
 80 | identifiers and keywords.
 81 | 
 82 | #### Parameters
 83 | 
 84 | | Parameter | Description                     | Example                |
 85 | | --------- | ------------------------------- | ---------------------- |
 86 | | `text`    | Entity identifier or text query | `@CHEMICAL_remdesivir` |
 87 | 
 88 | #### Example Request and Response
 89 | 
 90 | ```bash
 91 | curl "https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/?text=@CHEMICAL_remdesivir"
 92 | ```
 93 | 
 94 | Response (truncated):
 95 | 
 96 | ```json
 97 | {
 98 |   "results": [
 99 |     {
100 |       "_id": "37711410",
101 |       "pmid": 37711410,
102 |       "title": "Remdesivir.",
103 |       "journal": "Hosp Pharm",
104 |       "authors": ["Levien TL", "Baker DE"],
105 |       "date": "2023-10-01T00:00:00Z",
106 |       "doi": "10.1177/0018578721999804",
107 |       "meta_date_publication": "2023 Oct",
108 |       "meta_volume": "58"
109 |     }
110 |     // More results...
111 |   ]
112 | }
113 | ```
114 | 
115 | ### Article Fetch API
116 | 
117 | **Endpoint:**
118 | `https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson`
119 | 
120 | This endpoint retrieves detailed information about specific articles, including
121 | annotations.
122 | 
123 | #### Parameters
124 | 
125 | | Parameter   | Description                                   | Example    |
126 | | ----------- | --------------------------------------------- | ---------- |
127 | | `pmids`     | List of PubMed IDs to retrieve                | `29355051` |
128 | | `full_text` | Whether to include full text (when available) | `true`     |
129 | 
130 | #### Example Request
131 | 
132 | ```bash
133 | curl "https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson?pmids=29355051&full=true"
134 | ```
135 | 
136 | Response format (truncated):
137 | 
138 | ```json
139 | {
140 |   "PubTator3": [
141 |     {
142 |       "_id": "29355051|PMC6142073",
143 |       "id": "6142073",
144 |       "infons": {},
145 |       "passages": [
146 |         {
147 |           "infons": {
148 |             "name_3": "surname:Hu;given-names:Minghua",
149 |             "name_2": "surname:Luo;given-names:Xia",
150 |             "name_1": "surname:Luo;given-names:Shuang",
151 |             "article-id_pmid": "29355051"
152 |             // More metadata...
153 |           }
154 |         }
155 |         // More passages...
156 |       ]
157 |     }
158 |   ]
159 | }
160 | ```
161 | 
162 | ## Entity Types
163 | 
164 | PubTator3 annotates several types of biomedical entities:
165 | 
166 | 1. **Genes/Proteins**: Gene or protein names (e.g., BRAF, TP53)
167 | 2. **Genetic Variants**: Genetic variations (e.g., BRAF V600E)
168 | 3. **Diseases**: Disease names and conditions (e.g., Melanoma)
169 | 4. **Chemicals/Drugs**: Chemical substances or drugs (e.g., Vemurafenib)
170 | 
171 | ## Integration Strategy for BioMCP
172 | 
173 | The recommended workflow for integrating with PubTator3 in BioMCP is:
174 | 
175 | 1. **Entity Normalization**: Use the autocomplete API to convert user-provided
176 |    entity names to standardized identifiers
177 | 2. **Literature Search**: Use the search API with these identifiers to find
178 |    relevant PMIDs
179 | 3. **Data Retrieval**: Fetch detailed article data with annotations using the
180 |    fetch API
181 | 
182 | This workflow ensures consistent entity handling and optimal search results.
183 | 
184 | ## Authentication
185 | 
186 | The PubTator3 API is public and does not require authentication for basic
187 | usage. However, there are rate limits in place to prevent abuse.
188 | 
189 | ## Rate Limits and Best Practices
190 | 
191 | - **Request Limits**: Approximately 30 requests per minute
192 | - **Batch Requests**: For article retrieval, batch multiple PMIDs in a single
193 |   request
194 | - **Caching**: Implement caching to minimize repeated requests
195 | - **Specific Queries**: Use specific entity names rather than general terms for
196 |   better results
197 | 
198 | ## Error Handling
199 | 
200 | Common error responses:
201 | 
202 | - **400**: Invalid parameters
203 | - **404**: Articles not found
204 | - **429**: Rate limit exceeded
205 | - **500**: Server error
206 | 
207 | ## More Information
208 | 
209 | For complete API documentation, visit
210 | the [PubTator3 API Documentation](https://www.ncbi.nlm.nih.gov/research/pubtator3/api).
211 | 
```

--------------------------------------------------------------------------------
/docs/backend-services-reference/04-clinicaltrials-gov.md:
--------------------------------------------------------------------------------

```markdown
  1 | # ClinicalTrials.gov API
  2 | 
  3 | This document outlines the key aspects of the public ClinicalTrials.gov v2 API utilized by BioMCP. Understanding these details can be helpful for advanced users interpreting BioMCP results or for developers extending its capabilities. BioMCP's CLI commands often simplify or combine these parameters for ease of use; refer to the [Trials CLI Documentation](../user-guides/01-command-line-interface.md#trial-commands) for specific command options.
  4 | 
  5 | ## Overview
  6 | 
  7 | The [ClinicalTrials.gov](https://clinicaltrials.gov/) API provides programmatic
  8 | access to clinical trial information. This document outlines the API
  9 | implementation details for searching and retrieving clinical trial data.
 10 | 
 11 | > **CLI Documentation**: For information on using these APIs through the BioMCP
 12 | > command line interface, see the [Trials CLI Documentation](../user-guides/01-command-line-interface.md#trial-commands).
 13 | 
 14 | ## API Endpoints
 15 | 
 16 | ### Search API
 17 | 
 18 | **Endpoint:** `https://clinicaltrials.gov/api/v2/studies`
 19 | 
 20 | This endpoint allows searching for clinical trials using various parameters.
 21 | 
 22 | #### Key Parameters
 23 | 
 24 | | Parameter              | Description                         | Example Value                                   |
 25 | | ---------------------- | ----------------------------------- | ----------------------------------------------- |
 26 | | `query.cond`           | "Conditions or disease" query       | `lung cancer`                                   |
 27 | | `query.term`           | "Other terms" query                 | `AREA[LastUpdatePostDate]RANGE[2023-01-15,MAX]` |
 28 | | `query.intr`           | "Intervention/treatment" query      | `Vemurafenib`                                   |
 29 | | `query.locn`           | "Location terms" query              | `New York`                                      |
 30 | | `query.titles`         | "Title/acronym" query               | `BRAF Melanoma`                                 |
 31 | | `query.outc`           | "Outcome measure" query             | `overall survival`                              |
 32 | | `query.spons`          | "Sponsor/collaborator" query        | `National Cancer Institute`                     |
 33 | | `query.lead`           | Searches in "LeadSponsorName" field | `MD Anderson`                                   |
 34 | | `query.id`             | "Study IDs" query (OR semantics)    | `NCT04267848`                                   |
 35 | | `filter.overallStatus` | Comma-separated list of statuses    | `NOT_YET_RECRUITING,RECRUITING`                 |
 36 | | `filter.geo`           | Geo-location filter                 | `distance(39.0035707,-77.1013313,50mi)`         |
 37 | | `filter.ids`           | Filter by NCT IDs (AND semantics)   | `NCT04852770,NCT01728545`                       |
 38 | | `filter.advanced`      | Advanced filter query               | `AREA[StartDate]2022`                           |
 39 | | `sort`                 | Sort order                          | `LastUpdatePostDate:desc`                       |
 40 | | `fields`               | Fields to return                    | `NCTId,BriefTitle,OverallStatus,HasResults`     |
 41 | 
 42 | | `countTotal` | Count total number of studies | `true` or `false` |
 43 | 
 44 | #### Example Request
 45 | 
 46 | ```bash
 47 | curl -X GET "https://clinicaltrials.gov/api/v2/studies?query.cond=Melanoma&query.intr=BRAF"
 48 | ```
 49 | 
 50 | ### Study Details API
 51 | 
 52 | **Endpoint:** `https://clinicaltrials.gov/api/v2/studies/{NCT_ID}`
 53 | 
 54 | This endpoint retrieves detailed information about a specific clinical trial.
 55 | 
 56 | #### Example Request
 57 | 
 58 | ```bash
 59 | curl -X GET "https://clinicaltrials.gov/api/v2/studies/NCT04267848"
 60 | ```
 61 | 
 62 | #### Response Modules
 63 | 
 64 | The API response contains various modules of information:
 65 | 
 66 | - **protocolSection**: Basic study information, eligibility criteria, and
 67 |   design
 68 | - **resultsSection**: Study outcomes and results (when available)
 69 | - **documentSection**: Related documents
 70 | - **derivedSection**: Derived data elements
 71 | - **annotationsSection**: Additional annotations
 72 | 
 73 | ## Implementation Details
 74 | 
 75 | ### NCT ID Filtering Semantics
 76 | 
 77 | BioMCP uses intelligent filtering when NCT IDs are provided:
 78 | 
 79 | - **ID-only mode**: When NCT IDs are the only filter criteria, `query.id` is used for fast direct lookup
 80 | - **Intersection mode**: When NCT IDs are combined with other filters (conditions, interventions, etc.), `filter.ids` is used to ensure results match ALL criteria
 81 | 
 82 | This ensures that specifying NCT IDs restricts results rather than expanding them.
 83 | 
 84 | ### Query Building
 85 | 
 86 | When constructing API queries, parameters must be properly formatted according to the API documentation.
 87 | 
 88 | For implementation details on query building in BioMCP, see the [HTTP Client Developer Guide](../developer-guides/06-http-client-and-caching.md).
 89 | 
 90 | ### Response Parsing
 91 | 
 92 | The API returns data in JSON format (or CSV if specified). Key sections in the
 93 | response include:
 94 | 
 95 | - `protocolSection`: Contains study protocol details
 96 |   - `identificationModule`: Basic identifiers including NCT ID and title
 97 |   - `statusModule`: Current recruitment status and study dates
 98 |   - `sponsorCollaboratorsModule`: Information about sponsors and
 99 |     collaborators
100 |   - `designModule`: Study design information including interventions
101 |   - `eligibilityModule`: Inclusion/exclusion criteria and eligible population
102 |   - `contactsLocationsModule`: Study sites and contact information
103 |   - `referencesModule`: Related publications
104 | 
105 | ### Error Handling
106 | 
107 | The API returns standard HTTP status codes. Common error scenarios include:
108 | 
109 | - **404**: Trial not found
110 | - **429**: Rate limit exceeded
111 | - **400**: Invalid query parameters
112 | 
113 | For implementation details on error handling in BioMCP, see the [Error Handling Developer Guide](../developer-guides/05-error-handling.md).
114 | 
115 | ## Authentication
116 | 
117 | The ClinicalTrials.gov API is public and does not require authentication for
118 | basic usage. However, there are rate limits in place.
119 | 
120 | ## Rate Limits and Best Practices
121 | 
122 | - **Rate Limit**: Approximately 50 requests per minute per IP address
123 | - **Caching**: Implement caching to minimize repeated requests
124 | - **Pagination**: For large result sets, use the pagination functionality with
125 | 
126 | - **Focused Queries**: Use specific search terms rather than broad queries to
127 |   get more relevant results
128 | - **Field Selection**: Use the fields parameter to request only the data you
129 |   need
130 | 
131 | ## More Information
132 | 
133 | For complete API documentation, visit
134 | the [ClinicalTrials.gov API Documentation](https://clinicaltrials.gov/data-api/about-api)
135 | 
```

--------------------------------------------------------------------------------
/docs/how-to-guides/05-logging-and-monitoring-with-bigquery.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BigQuery Logging for BioMCP
  2 | 
  3 | This document outlines how BioMCP uses Google BigQuery for logging user interactions and API usage.
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP integrates with Google BigQuery to log user interactions, queries, and API usage. This logging provides valuable insights into how the system is being used, helps with debugging, and enables analytics for improving the service.
  8 | 
  9 | ## Prerequisites
 10 | 
 11 | - A Google Cloud Platform (GCP) account
 12 | - A BigQuery dataset and table created in your GCP project
 13 | - A GCP service account with BigQuery permissions
 14 | 
 15 | ## Setting Up BigQuery for BioMCP
 16 | 
 17 | 1. **Create a BigQuery Dataset and Table**
 18 | 
 19 |    - In the Google Cloud Console, navigate to BigQuery
 20 |    - Create a new dataset (e.g., `biomcp_logs`)
 21 |    - Create a table within the dataset (e.g., `worker_logs`) with the following schema:
 22 |      ```
 23 |      timestamp: TIMESTAMP
 24 |      userEmail: STRING
 25 |      query: STRING
 26 |      ```
 27 |    - Adjust the schema as needed for your specific logging requirements
 28 | 
 29 | 2. **Create a Service Account**
 30 | 
 31 |    - Navigate to "IAM & Admin" > "Service Accounts" in the Google Cloud Console
 32 |    - Create a new service account with a descriptive name (e.g., `biomcp-bigquery-logger`)
 33 |    - Assign the "BigQuery Data Editor" role to the service account
 34 |    - Create and download a JSON key for the service account
 35 | 
 36 | 3. **Configure BioMCP with BigQuery Credentials**
 37 | 
 38 |    - Open `wrangler.toml` in the BioMCP project
 39 |    - Update the following variables with your BigQuery information:
 40 |      ```toml
 41 |      BQ_PROJECT_ID = "your-gcp-project-id"
 42 |      BQ_DATASET = "biomcp_logs"
 43 |      BQ_TABLE = "worker_logs"
 44 |      ```
 45 |    - For the service account key, use Cloudflare's secret management:
 46 |      ```bash
 47 |      npx wrangler secret put BQ_SA_KEY_JSON
 48 |      ```
 49 |      When prompted, paste the entire JSON content of your service account key file
 50 | 
 51 | ## How BigQuery Logging Works
 52 | 
 53 | The BioMCP worker uses the following process to log data to BigQuery:
 54 | 
 55 | 1. **Authentication**: The worker generates a JWT token using the service account credentials
 56 | 2. **Token Exchange**: The JWT is exchanged for a Google OAuth access token
 57 | 3. **Data Insertion**: The worker uses BigQuery's streaming insert API to log events
 58 | 
 59 | The implementation includes:
 60 | 
 61 | - Token caching to minimize authentication requests
 62 | - Error handling for failed logging attempts
 63 | - Automatic retry logic for transient failures
 64 | 
 65 | ## Logged Information
 66 | 
 67 | By default, the following information is logged to BigQuery:
 68 | 
 69 | - **timestamp**: When the event occurred
 70 | - **userEmail**: The email address of the authenticated user (if available)
 71 | - **query**: The query or request that was made
 72 | 
 73 | You can extend the logging schema to include additional information as needed.
 74 | 
 75 | ## Accessing and Analyzing Logs
 76 | 
 77 | To access and analyze the logs:
 78 | 
 79 | 1. **Query the BigQuery Table**
 80 | 
 81 |    - Use the BigQuery console or SQL to query your logs
 82 |    - Example query to see recent logs:
 83 |      ```sql
 84 |      SELECT timestamp, userEmail, query
 85 |      FROM `your-project.biomcp_logs.worker_logs`
 86 |      ORDER BY timestamp DESC
 87 |      LIMIT 100
 88 |      ```
 89 | 
 90 | 2. **Create Visualizations**
 91 | 
 92 |    - Use Google Data Studio to create dashboards based on your BigQuery data
 93 |    - Connect Data Studio to your BigQuery table and create visualizations
 94 | 
 95 | ## Security Considerations
 96 | 
 97 | - The service account key is sensitive information and should be protected
 98 | - Use Cloudflare's secret management to store the key securely
 99 | - Consider implementing field-level encryption for sensitive data
100 | - Implement data retention policies to comply with privacy regulations
101 | - **IMPORTANT: Never include PHI (Protected Health Information) or PII (Personally Identifiable Information) in queries or logs**
102 |   - Ensure all queries are sanitized to remove patient identifiers, medical record numbers, and other sensitive information
103 |   - Consider implementing automatic redaction of potential PHI/PII from logs
104 |   - Regularly audit logs to ensure compliance with HIPAA and other privacy regulations
105 |   - Remember that BigQuery logs are not designed for storing protected health information
106 | 
107 | ### Automatic Sanitization
108 | 
109 | BioMCP automatically sanitizes sensitive data before logging to BigQuery:
110 | 
111 | - **API Keys and Secrets**: Fields containing `api_key`, `apiKey`, `api-key`, `token`, `secret`, or `password` are automatically redacted
112 | - **Nested Objects**: Sanitization works recursively through nested objects and arrays
113 | - **Case-Insensitive**: Field name matching is case-insensitive to catch variations
114 | - **Preserved Structure**: The original request structure is maintained with sensitive values replaced by `[REDACTED]`
115 | 
116 | Example of sanitization:
117 | 
118 | ```javascript
119 | // Original request
120 | {
121 |   "params": {
122 |     "arguments": {
123 |       "api_key": "AIzaSyB1234567890",
124 |       "gene": "BRAF"
125 |     }
126 |   }
127 | }
128 | 
129 | // Sanitized for BigQuery
130 | {
131 |   "params": {
132 |     "arguments": {
133 |       "api_key": "[REDACTED]",
134 |       "gene": "BRAF"
135 |     }
136 |   }
137 | }
138 | ```
139 | 
140 | ### Excluded Queries
141 | 
142 | Certain types of queries are automatically excluded from BigQuery logging:
143 | 
144 | - **Think Tool Calls**: Any calls to the `think` tool are not logged
145 | - **Thinking Domain**: Queries with `domain="thinking"` or `domain="think"` are excluded
146 | - **Privacy-First Design**: This ensures that internal reasoning and analysis steps remain private
147 | 
148 | ## Troubleshooting
149 | 
150 | - **Authentication Failures**: Verify that the service account key is correctly formatted and has the necessary permissions
151 | - **Insertion Errors**: Check that the BigQuery table schema matches the data being inserted
152 | - **Missing Logs**: Ensure that the worker has network access to the BigQuery API
153 | 
154 | ## Example Code
155 | 
156 | The worker includes the following key functions for BigQuery logging:
157 | 
158 | - `getBQToken()`: Fetches and caches a BigQuery OAuth token
159 | - `insertEvent()`: Inserts a single row into BigQuery via streaming insert
160 | - `sanitizeObject()`: Recursively sanitizes sensitive fields from objects before logging
161 | 
162 | These functions handle the authentication and data insertion process automatically.
163 | 
164 | ## Testing
165 | 
166 | BioMCP includes comprehensive tests for the BigQuery logging functionality:
167 | 
168 | ### JavaScript Tests
169 | 
170 | The sanitization logic is tested using Node.js built-in test framework:
171 | 
172 | ```bash
173 | # Run JavaScript worker tests
174 | make test-js
175 | 
176 | # Or run directly
177 | node --test tests/tdd/workers/test_worker_sanitization.js
178 | ```
179 | 
180 | Tests cover:
181 | 
182 | - API key redaction
183 | - Nested sensitive field handling
184 | - Array sanitization
185 | - Case-insensitive field matching
186 | - Think tool detection
187 | - Domain-based filtering
188 | 
```

--------------------------------------------------------------------------------
/src/biomcp/organizations/search.py:
--------------------------------------------------------------------------------

```python
  1 | """Search functionality for organizations via NCI CTS API."""
  2 | 
  3 | import logging
  4 | from typing import Any
  5 | 
  6 | from ..constants import NCI_ORGANIZATIONS_URL
  7 | from ..integrations.cts_api import CTSAPIError, make_cts_request
  8 | from ..utils import parse_or_query
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | async def search_organizations(
 14 |     name: str | None = None,
 15 |     org_type: str | None = None,
 16 |     city: str | None = None,
 17 |     state: str | None = None,
 18 |     page_size: int = 20,
 19 |     page: int = 1,
 20 |     api_key: str | None = None,
 21 | ) -> dict[str, Any]:
 22 |     """
 23 |     Search for organizations in the NCI CTS database.
 24 | 
 25 |     Args:
 26 |         name: Organization name to search for (partial match)
 27 |         org_type: Type of organization (e.g., "industry", "academic")
 28 |         city: City location
 29 |         state: State location (2-letter code)
 30 |         page_size: Number of results per page
 31 |         page: Page number
 32 |         api_key: Optional API key (if not provided, uses NCI_API_KEY env var)
 33 | 
 34 |     Returns:
 35 |         Dictionary with search results containing:
 36 |         - organizations: List of organization records
 37 |         - total: Total number of results
 38 |         - page: Current page
 39 |         - page_size: Results per page
 40 | 
 41 |     Raises:
 42 |         CTSAPIError: If the API request fails
 43 |     """
 44 |     # Build query parameters
 45 |     params: dict[str, Any] = {
 46 |         "size": page_size,
 47 |     }
 48 | 
 49 |     # Note: The NCI API doesn't support offset/page pagination for organizations
 50 |     # It uses cursor-based pagination or returns all results up to size limit
 51 | 
 52 |     # Add search filters with correct API parameter names
 53 |     if name:
 54 |         params["name"] = name
 55 |     if org_type:
 56 |         params["type"] = org_type
 57 |     if city:
 58 |         params["org_city"] = city
 59 |     if state:
 60 |         params["org_state_or_province"] = state
 61 | 
 62 |     try:
 63 |         # Make API request
 64 |         response = await make_cts_request(
 65 |             url=NCI_ORGANIZATIONS_URL,
 66 |             params=params,
 67 |             api_key=api_key,
 68 |         )
 69 | 
 70 |         # Process response - adapt to actual API format
 71 |         # This is a reasonable structure based on typical REST APIs
 72 |         organizations = response.get("data", response.get("organizations", []))
 73 |         total = response.get("total", len(organizations))
 74 | 
 75 |         return {
 76 |             "organizations": organizations,
 77 |             "total": total,
 78 |             "page": page,
 79 |             "page_size": page_size,
 80 |         }
 81 | 
 82 |     except CTSAPIError:
 83 |         raise
 84 |     except Exception as e:
 85 |         logger.error(f"Failed to search organizations: {e}")
 86 |         raise CTSAPIError(f"Organization search failed: {e!s}") from e
 87 | 
 88 | 
 89 | def format_organization_results(results: dict[str, Any]) -> str:
 90 |     """
 91 |     Format organization search results as markdown.
 92 | 
 93 |     Args:
 94 |         results: Search results dictionary
 95 | 
 96 |     Returns:
 97 |         Formatted markdown string
 98 |     """
 99 |     organizations = results.get("organizations", [])
100 |     total = results.get("total", 0)
101 | 
102 |     if not organizations:
103 |         return "No organizations found matching the search criteria."
104 | 
105 |     # Build markdown output
106 |     lines = [
107 |         f"## Organization Search Results ({total} found)",
108 |         "",
109 |     ]
110 | 
111 |     for org in organizations:
112 |         org_id = org.get("id", org.get("org_id", "Unknown"))
113 |         name = org.get("name", "Unknown Organization")
114 |         org_type = org.get("type", org.get("category", "Unknown"))
115 |         city = org.get("city", "")
116 |         state = org.get("state", "")
117 | 
118 |         lines.append(f"### {name}")
119 |         lines.append(f"- **ID**: {org_id}")
120 |         lines.append(f"- **Type**: {org_type}")
121 | 
122 |         if city or state:
123 |             location_parts = [p for p in [city, state] if p]
124 |             lines.append(f"- **Location**: {', '.join(location_parts)}")
125 | 
126 |         lines.append("")
127 | 
128 |     return "\n".join(lines)
129 | 
130 | 
131 | async def search_organizations_with_or(
132 |     name_query: str,
133 |     org_type: str | None = None,
134 |     city: str | None = None,
135 |     state: str | None = None,
136 |     page_size: int = 20,
137 |     page: int = 1,
138 |     api_key: str | None = None,
139 | ) -> dict[str, Any]:
140 |     """
141 |     Search for organizations with OR query support.
142 | 
143 |     This function handles OR queries by making multiple API calls and combining results.
144 |     For example: "MD Anderson OR Mayo Clinic" will search for each term.
145 | 
146 |     Args:
147 |         name_query: Name query that may contain OR operators
148 |         Other args same as search_organizations
149 | 
150 |     Returns:
151 |         Combined results from all searches with duplicates removed
152 |     """
153 |     # Check if this is an OR query
154 |     if " OR " in name_query or " or " in name_query:
155 |         search_terms = parse_or_query(name_query)
156 |         logger.info(f"Parsed OR query into terms: {search_terms}")
157 |     else:
158 |         # Single term search
159 |         search_terms = [name_query]
160 | 
161 |     # Collect all unique organizations
162 |     all_organizations = {}
163 |     total_found = 0
164 | 
165 |     # Search for each term
166 |     for term in search_terms:
167 |         logger.info(f"Searching organizations for term: {term}")
168 |         try:
169 |             results = await search_organizations(
170 |                 name=term,
171 |                 org_type=org_type,
172 |                 city=city,
173 |                 state=state,
174 |                 page_size=page_size,
175 |                 page=page,
176 |                 api_key=api_key,
177 |             )
178 | 
179 |             # Add unique organizations (deduplicate by ID)
180 |             for org in results.get("organizations", []):
181 |                 org_id = org.get("id", org.get("org_id"))
182 |                 if org_id and org_id not in all_organizations:
183 |                     all_organizations[org_id] = org
184 | 
185 |             total_found += results.get("total", 0)
186 | 
187 |         except Exception as e:
188 |             logger.warning(f"Failed to search for term '{term}': {e}")
189 |             # Continue with other terms
190 | 
191 |     # Convert back to list and apply pagination
192 |     unique_organizations = list(all_organizations.values())
193 | 
194 |     # Sort by name for consistent results
195 |     unique_organizations.sort(key=lambda x: x.get("name", "").lower())
196 | 
197 |     # Apply pagination to combined results
198 |     start_idx = (page - 1) * page_size
199 |     end_idx = start_idx + page_size
200 |     paginated_organizations = unique_organizations[start_idx:end_idx]
201 | 
202 |     return {
203 |         "organizations": paginated_organizations,
204 |         "total": len(unique_organizations),
205 |         "page": page,
206 |         "page_size": page_size,
207 |         "search_terms": search_terms,  # Include what we searched for
208 |         "total_found_across_terms": total_found,  # Total before deduplication
209 |     }
210 | 
```

--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------

```yaml
  1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
  2 | 
  3 | startCommand:
  4 |   type: stdio
  5 |   configSchema:
  6 |     # JSON Schema defining the configuration options for the MCP.
  7 |     type: object
  8 |     properties: {}
  9 |   commandFunction:
 10 |     # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
 11 |     |-
 12 |     (config) => ({ command: 'biomcp', args: ['run'], env: {} })
 13 |   exampleConfig: {}
 14 | 
 15 | schemas:
 16 |   TrialQuery:
 17 |     type: object
 18 |     properties:
 19 |       conditions:
 20 |         type: array
 21 |         items:
 22 |           type: string
 23 |         description: "List of condition terms."
 24 |       terms:
 25 |         type: array
 26 |         items:
 27 |           type: string
 28 |         description: "General search terms that don't fit specific categories."
 29 |       interventions:
 30 |         type: array
 31 |         items:
 32 |           type: string
 33 |         description: "Intervention names."
 34 |       recruiting_status:
 35 |         type: string
 36 |         description: "Study recruitment status."
 37 |       study_type:
 38 |         type: string
 39 |         description: "Type of study."
 40 |       nct_ids:
 41 |         type: array
 42 |         items:
 43 |           type: string
 44 |         description: "Clinical trial NCT IDs"
 45 |       lat:
 46 |         type: number
 47 |         description: "Latitude for location search"
 48 |       long:
 49 |         type: number
 50 |         description: "Longitude for location search"
 51 |       distance:
 52 |         type: integer
 53 |         description: "Distance from lat/long in miles"
 54 |       min_date:
 55 |         type: string
 56 |         description: "Minimum date for filtering"
 57 |       max_date:
 58 |         type: string
 59 |         description: "Maximum date for filtering"
 60 |       date_field:
 61 |         type: string
 62 |         description: "Date field to filter on"
 63 |       phase:
 64 |         type: string
 65 |         description: "Trial phase filter"
 66 |       age_group:
 67 |         type: string
 68 |         description: "Age group filter"
 69 |       primary_purpose:
 70 |         type: string
 71 |         description: "Primary purpose of the trial"
 72 |       intervention_type:
 73 |         type: string
 74 |         description: "Type of intervention"
 75 |       sponsor_type:
 76 |         type: string
 77 |         description: "Type of sponsor"
 78 |       study_design:
 79 |         type: string
 80 |         description: "Study design"
 81 |       sort:
 82 |         type: string
 83 |         description: "Sort order for results"
 84 |       next_page_hash:
 85 |         type: string
 86 |         description: "Token to retrieve the next page of results"
 87 | 
 88 |   VariantQuery:
 89 |     type: object
 90 |     properties:
 91 |       gene:
 92 |         type: string
 93 |         description: "Gene symbol to search for (e.g. BRAF, TP53)"
 94 |       hgvsp:
 95 |         type: string
 96 |         description: "Protein change notation (e.g., p.V600E, p.Arg557His)"
 97 |       hgvsc:
 98 |         type: string
 99 |         description: "cDNA notation (e.g., c.1799T>A)"
100 |       rsid:
101 |         type: string
102 |         description: "dbSNP rsID (e.g., rs113488022)"
103 |       region:
104 |         type: string
105 |         description: "Genomic region as chr:start-end (e.g. chr1:12345-67890)"
106 |       significance:
107 |         type: string
108 |         description: "ClinVar clinical significance"
109 |       max_frequency:
110 |         type: number
111 |         description: "Maximum population allele frequency threshold"
112 |       min_frequency:
113 |         type: number
114 |         description: "Minimum population allele frequency threshold"
115 |       cadd:
116 |         type: number
117 |         description: "Minimum CADD phred score"
118 |       polyphen:
119 |         type: string
120 |         description: "PolyPhen-2 prediction"
121 |       sift:
122 |         type: string
123 |         description: "SIFT prediction"
124 |       sources:
125 |         type: array
126 |         items:
127 |           type: string
128 |         description: "Include only specific data sources"
129 |       size:
130 |         type: integer
131 |         description: "Number of results to return"
132 |         default: 40
133 |       offset:
134 |         type: integer
135 |         description: "Result offset for pagination"
136 |         default: 0
137 | 
138 |   PubmedRequest:
139 |     type: object
140 |     properties:
141 |       chemicals:
142 |         type: array
143 |         items:
144 |           type: string
145 |         description: "List of chemicals for filtering results."
146 |       diseases:
147 |         type: array
148 |         items:
149 |           type: string
150 |         description: "Diseases such as Hypertension, Lung Adenocarcinoma, etc."
151 |       genes:
152 |         type: array
153 |         items:
154 |           type: string
155 |         description: "List of genes for filtering results."
156 |       keywords:
157 |         type: array
158 |         items:
159 |           type: string
160 |         description: "List of other keywords for filtering results."
161 |       variants:
162 |         type: array
163 |         items:
164 |           type: string
165 |         description: "List of variants for filtering results."
166 | 
167 | tools:
168 |   trial_searcher:
169 |     input:
170 |       schema:
171 |         type: object
172 |         properties:
173 |           query:
174 |             $ref: "#/schemas/TrialQuery"
175 |         required: ["query"]
176 | 
177 |   variant_searcher:
178 |     input:
179 |       schema:
180 |         type: object
181 |         properties:
182 |           query:
183 |             $ref: "#/schemas/VariantQuery"
184 |         required: ["query"]
185 | 
186 |   article_searcher:
187 |     input:
188 |       schema:
189 |         type: object
190 |         properties:
191 |           query:
192 |             $ref: "#/schemas/PubmedRequest"
193 |         required: ["query"]
194 | 
195 |   # Simple string parameter functions
196 |   trial_protocol:
197 |     input:
198 |       schema:
199 |         type: object
200 |         properties:
201 |           nct_id:
202 |             type: string
203 |             description: "A single NCT ID (e.g., NCT04280705)"
204 |         required: ["nct_id"]
205 | 
206 |   trial_locations:
207 |     input:
208 |       schema:
209 |         type: object
210 |         properties:
211 |           nct_id:
212 |             type: string
213 |             description: "A single NCT ID (e.g., NCT04280705)"
214 |         required: ["nct_id"]
215 | 
216 |   trial_outcomes:
217 |     input:
218 |       schema:
219 |         type: object
220 |         properties:
221 |           nct_id:
222 |             type: string
223 |             description: "A single NCT ID (e.g., NCT04280705)"
224 |         required: ["nct_id"]
225 | 
226 |   trial_references:
227 |     input:
228 |       schema:
229 |         type: object
230 |         properties:
231 |           nct_id:
232 |             type: string
233 |             description: "A single NCT ID (e.g., NCT04280705)"
234 |         required: ["nct_id"]
235 | 
236 |   article_details:
237 |     input:
238 |       schema:
239 |         type: object
240 |         properties:
241 |           pmid:
242 |             type: string
243 |             description: "A single PubMed ID (e.g., 34397683)"
244 |         required: ["pmid"]
245 | 
246 |   variant_details:
247 |     input:
248 |       schema:
249 |         type: object
250 |         properties:
251 |           variant_id:
252 |             type: string
253 |             description: "A variant identifier (e.g., chr7:g.140453136A>T)"
254 |         required: ["variant_id"]
255 | 
```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_adverse_events.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Unit tests for OpenFDA adverse events integration.
  3 | """
  4 | 
  5 | from unittest.mock import patch
  6 | 
  7 | import pytest
  8 | 
  9 | from biomcp.openfda.adverse_events import (
 10 |     get_adverse_event,
 11 |     search_adverse_events,
 12 | )
 13 | 
 14 | 
 15 | @pytest.mark.asyncio
 16 | async def test_search_adverse_events_by_drug():
 17 |     """Test searching adverse events by drug name."""
 18 |     mock_response = {
 19 |         "meta": {"results": {"total": 100}},
 20 |         "results": [
 21 |             {
 22 |                 "patient": {
 23 |                     "drug": [
 24 |                         {
 25 |                             "medicinalproduct": "IMATINIB",
 26 |                             "openfda": {
 27 |                                 "brand_name": ["GLEEVEC"],
 28 |                                 "generic_name": ["IMATINIB MESYLATE"],
 29 |                             },
 30 |                         }
 31 |                     ],
 32 |                     "reaction": [
 33 |                         {"reactionmeddrapt": "NAUSEA"},
 34 |                         {"reactionmeddrapt": "FATIGUE"},
 35 |                     ],
 36 |                     "patientonsetage": "45",
 37 |                     "patientsex": 2,
 38 |                 },
 39 |                 "serious": "1",
 40 |                 "seriousnesshospitalization": "1",
 41 |                 "receivedate": "20240115",
 42 |             }
 43 |         ],
 44 |     }
 45 | 
 46 |     with patch(
 47 |         "biomcp.openfda.adverse_events.make_openfda_request"
 48 |     ) as mock_request:
 49 |         mock_request.return_value = (mock_response, None)
 50 | 
 51 |         result = await search_adverse_events(drug="imatinib", limit=10)
 52 | 
 53 |         # Verify the request was made correctly
 54 |         mock_request.assert_called_once()
 55 |         call_args = mock_request.call_args
 56 |         assert "imatinib" in call_args[0][1]["search"].lower()
 57 | 
 58 |         # Check the output contains expected information
 59 |         assert "FDA Adverse Event Reports" in result
 60 |         assert "imatinib" in result.lower()
 61 |         assert "NAUSEA" in result
 62 |         assert "FATIGUE" in result
 63 |         assert "100 reports" in result
 64 | 
 65 | 
 66 | @pytest.mark.asyncio
 67 | async def test_search_adverse_events_by_reaction():
 68 |     """Test searching adverse events by reaction."""
 69 |     mock_response = {
 70 |         "meta": {"results": {"total": 50}},
 71 |         "results": [
 72 |             {
 73 |                 "patient": {
 74 |                     "drug": [{"medicinalproduct": "ASPIRIN"}],
 75 |                     "reaction": [{"reactionmeddrapt": "HEADACHE"}],
 76 |                 },
 77 |                 "serious": "0",
 78 |                 "receivedate": "20240201",
 79 |             }
 80 |         ],
 81 |     }
 82 | 
 83 |     with patch(
 84 |         "biomcp.openfda.adverse_events.make_openfda_request"
 85 |     ) as mock_request:
 86 |         mock_request.return_value = (mock_response, None)
 87 | 
 88 |         result = await search_adverse_events(reaction="headache", limit=10)
 89 | 
 90 |         # Verify the request
 91 |         mock_request.assert_called_once()
 92 |         call_args = mock_request.call_args
 93 |         assert "headache" in call_args[0][1]["search"].lower()
 94 | 
 95 |         # Check output
 96 |         assert "HEADACHE" in result
 97 |         assert "50 reports" in result
 98 | 
 99 | 
100 | @pytest.mark.asyncio
101 | async def test_search_adverse_events_no_params():
102 |     """Test that searching without parameters returns helpful message."""
103 |     result = await search_adverse_events()
104 | 
105 |     assert "Please specify" in result
106 |     assert "drug name or reaction" in result
107 |     assert "Examples:" in result
108 | 
109 | 
110 | @pytest.mark.asyncio
111 | async def test_search_adverse_events_no_results():
112 |     """Test handling when no results are found."""
113 |     with patch(
114 |         "biomcp.openfda.adverse_events.make_openfda_request"
115 |     ) as mock_request:
116 |         mock_request.return_value = ({"results": []}, None)
117 | 
118 |         result = await search_adverse_events(drug="nonexistentdrug")
119 | 
120 |         assert "No adverse event reports found" in result
121 |         assert "nonexistentdrug" in result
122 | 
123 | 
124 | @pytest.mark.asyncio
125 | async def test_search_adverse_events_error():
126 |     """Test error handling in adverse event search."""
127 |     with patch(
128 |         "biomcp.openfda.adverse_events.make_openfda_request"
129 |     ) as mock_request:
130 |         mock_request.return_value = (None, "API rate limit exceeded")
131 | 
132 |         result = await search_adverse_events(drug="aspirin")
133 | 
134 |         assert "Error searching adverse events" in result
135 |         assert "API rate limit exceeded" in result
136 | 
137 | 
138 | @pytest.mark.asyncio
139 | async def test_get_adverse_event_detail():
140 |     """Test getting detailed adverse event report."""
141 |     mock_response = {
142 |         "results": [
143 |             {
144 |                 "safetyreportid": "12345678",
145 |                 "patient": {
146 |                     "patientonsetage": "55",
147 |                     "patientsex": 1,
148 |                     "patientweight": "75",
149 |                     "drug": [
150 |                         {
151 |                             "medicinalproduct": "DRUG A",
152 |                             "drugindication": "HYPERTENSION",
153 |                             "drugdosagetext": "100mg daily",
154 |                             "drugadministrationroute": "048",
155 |                             "actiondrug": 4,
156 |                         }
157 |                     ],
158 |                     "reaction": [
159 |                         {"reactionmeddrapt": "DIZZINESS", "reactionoutcome": 1}
160 |                     ],
161 |                 },
162 |                 "serious": "1",
163 |                 "seriousnesshospitalization": "1",
164 |                 "receivedate": "20240115",
165 |                 "reporttype": 1,
166 |             }
167 |         ]
168 |     }
169 | 
170 |     with patch(
171 |         "biomcp.openfda.adverse_events.make_openfda_request"
172 |     ) as mock_request:
173 |         mock_request.return_value = (mock_response, None)
174 | 
175 |         result = await get_adverse_event("12345678")
176 | 
177 |         # Verify request
178 |         mock_request.assert_called_once()
179 |         call_args = mock_request.call_args
180 |         assert "12345678" in call_args[0][1]["search"]
181 | 
182 |         # Check detailed output
183 |         assert "12345678" in result
184 |         assert "Patient Information" in result
185 |         assert "55 years" in result
186 |         assert "Male" in result
187 |         assert "75 kg" in result
188 |         assert "DRUG A" in result
189 |         assert "HYPERTENSION" in result
190 |         assert "100mg daily" in result
191 |         assert "DIZZINESS" in result
192 |         assert "Recovered/Resolved" in result
193 | 
194 | 
195 | @pytest.mark.asyncio
196 | async def test_get_adverse_event_not_found():
197 |     """Test handling when adverse event report is not found."""
198 |     with patch(
199 |         "biomcp.openfda.adverse_events.make_openfda_request"
200 |     ) as mock_request:
201 |         mock_request.return_value = ({"results": []}, None)
202 | 
203 |         result = await get_adverse_event("NOTFOUND123")
204 | 
205 |         assert "NOTFOUND123" in result
206 |         assert "not found" in result
207 | 
```

--------------------------------------------------------------------------------
/src/biomcp/openfda/adverse_events_helpers.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | Helper functions for OpenFDA adverse events to reduce complexity.
  3 | """
  4 | 
  5 | from collections import Counter
  6 | from typing import Any
  7 | 
  8 | from .utils import (
  9 |     extract_drug_names,
 10 |     extract_reactions,
 11 |     format_count,
 12 |     format_drug_list,
 13 | )
 14 | 
 15 | 
 16 | def format_search_summary(
 17 |     drug: str | None, reaction: str | None, serious: bool | None, total: int
 18 | ) -> list[str]:
 19 |     """Format the search summary section."""
 20 |     output = []
 21 | 
 22 |     # Add search criteria
 23 |     search_desc = []
 24 |     if drug:
 25 |         search_desc.append(f"**Drug**: {drug}")
 26 |     if reaction:
 27 |         search_desc.append(f"**Reaction**: {reaction}")
 28 |     if serious is not None:
 29 |         search_desc.append(f"**Serious Events**: {'Yes' if serious else 'No'}")
 30 | 
 31 |     if search_desc:
 32 |         output.append(" | ".join(search_desc))
 33 |     output.append(
 34 |         f"**Total Reports Found**: {format_count(total, 'report')}\n"
 35 |     )
 36 | 
 37 |     return output
 38 | 
 39 | 
 40 | def format_top_reactions(results: list[dict[str, Any]]) -> list[str]:
 41 |     """Format top reported reactions from search results."""
 42 |     output = []
 43 |     all_reactions = []
 44 | 
 45 |     for result in results:
 46 |         all_reactions.extend(extract_reactions(result))
 47 | 
 48 |     if all_reactions:
 49 |         reaction_counts = Counter(all_reactions)
 50 |         top_reactions = reaction_counts.most_common(10)
 51 | 
 52 |         output.append("### Top Reported Reactions:")
 53 |         for rxn, count in top_reactions:
 54 |             percentage = (count / len(results)) * 100
 55 |             output.append(f"- **{rxn}**: {count} reports ({percentage:.1f}%)")
 56 |         output.append("")
 57 | 
 58 |     return output
 59 | 
 60 | 
 61 | def format_report_summary(
 62 |     result: dict[str, Any], report_num: int
 63 | ) -> list[str]:
 64 |     """Format a single report summary."""
 65 |     output = [f"#### Report {report_num}"]
 66 | 
 67 |     # Extract key information
 68 |     drugs = extract_drug_names(result)
 69 |     reactions = extract_reactions(result)
 70 | 
 71 |     # Patient info
 72 |     patient = result.get("patient", {})
 73 |     age = patient.get("patientonsetage")
 74 |     sex_map = {0: "Unknown", 1: "Male", 2: "Female"}
 75 |     sex = sex_map.get(patient.get("patientsex"), "Unknown")
 76 | 
 77 |     # Serious outcomes
 78 |     serious_flag = result.get("serious", "0")
 79 |     outcomes = []
 80 |     for code in [
 81 |         "seriousnessdeath",
 82 |         "seriousnesslifethreatening",
 83 |         "seriousnesshospitalization",
 84 |         "seriousnessdisabling",
 85 |     ]:
 86 |         if result.get(code) == "1":
 87 |             outcomes.append(code.replace("seriousness", "").title())
 88 | 
 89 |     # Format output
 90 |     output.append(f"- **Drugs**: {format_drug_list(drugs)}")
 91 |     output.append(f"- **Reactions**: {', '.join(reactions[:5])}")
 92 |     if age:
 93 |         output.append(f"- **Patient**: {age} years, {sex}")
 94 |     if serious_flag == "1" and outcomes:
 95 |         output.append(f"- **Serious Outcome**: {', '.join(outcomes)}")
 96 | 
 97 |     # Dates
 98 |     receive_date = result.get("receivedate", "")
 99 |     if receive_date:
100 |         output.append(
101 |             f"- **Report Date**: {receive_date[:4]}-{receive_date[4:6]}-{receive_date[6:]}"
102 |         )
103 | 
104 |     output.append("")
105 |     return output
106 | 
107 | 
108 | def format_drug_details(drugs: list[dict[str, Any]]) -> list[str]:
109 |     """Format drug information details."""
110 |     from .utils import clean_text
111 | 
112 |     output = ["### Drug Information"]
113 | 
114 |     for i, drug in enumerate(drugs, 1):
115 |         output.append(
116 |             f"\n#### Drug {i}: {drug.get('medicinalproduct', 'Unknown')}"
117 |         )
118 | 
119 |         if "drugindication" in drug:
120 |             output.append(f"- **Indication**: {drug['drugindication']}")
121 | 
122 |         if "drugdosagetext" in drug:
123 |             dosage = clean_text(drug["drugdosagetext"])
124 |             output.append(f"- **Dosage**: {dosage}")
125 | 
126 |         if "drugadministrationroute" in drug:
127 |             output.append(f"- **Route**: {drug['drugadministrationroute']}")
128 | 
129 |         # Drug action taken
130 |         action_map = {
131 |             1: "Drug withdrawn",
132 |             2: "Dose reduced",
133 |             3: "Dose increased",
134 |             4: "Dose not changed",
135 |             5: "Unknown",
136 |             6: "Not applicable",
137 |         }
138 |         action_code = drug.get("actiondrug")
139 |         action = (
140 |             action_map.get(action_code, "Unknown")
141 |             if action_code is not None
142 |             else "Unknown"
143 |         )
144 |         output.append(f"- **Action Taken**: {action}")
145 | 
146 |     output.append("")
147 |     return output
148 | 
149 | 
150 | def format_reaction_details(reactions: list[dict[str, Any]]) -> list[str]:
151 |     """Format adverse reaction details."""
152 |     output = ["### Adverse Reactions"]
153 | 
154 |     for reaction in reactions:
155 |         rxn_name = reaction.get("reactionmeddrapt", "Unknown")
156 |         outcome_map = {
157 |             1: "Recovered/Resolved",
158 |             2: "Recovering/Resolving",
159 |             3: "Not recovered/Not resolved",
160 |             4: "Recovered/Resolved with sequelae",
161 |             5: "Fatal",
162 |             6: "Unknown",
163 |         }
164 |         outcome_code = reaction.get("reactionoutcome")
165 |         outcome = (
166 |             outcome_map.get(outcome_code, "Unknown")
167 |             if outcome_code is not None
168 |             else "Unknown"
169 |         )
170 |         output.append(f"- **{rxn_name}**: {outcome}")
171 | 
172 |     output.append("")
173 |     return output
174 | 
175 | 
176 | def format_report_metadata(result: dict[str, Any]) -> list[str]:
177 |     """Format report metadata information."""
178 |     output = ["### Report Information"]
179 | 
180 |     receive_date = result.get("receivedate", "")
181 |     if receive_date:
182 |         formatted_date = (
183 |             f"{receive_date[:4]}-{receive_date[4:6]}-{receive_date[6:]}"
184 |         )
185 |         output.append(f"- **Report Date**: {formatted_date}")
186 | 
187 |     report_type_map = {
188 |         1: "Spontaneous",
189 |         2: "Report from study",
190 |         3: "Other",
191 |         4: "Not available to sender",
192 |     }
193 |     report_type_code = result.get("reporttype")
194 |     report_type = (
195 |         report_type_map.get(report_type_code, "Unknown")
196 |         if report_type_code is not None
197 |         else "Unknown"
198 |     )
199 |     output.append(f"- **Report Type**: {report_type}")
200 | 
201 |     # Seriousness
202 |     if result.get("serious") == "1":
203 |         outcomes = []
204 |         if result.get("seriousnessdeath") == "1":
205 |             outcomes.append("Death")
206 |         if result.get("seriousnesslifethreatening") == "1":
207 |             outcomes.append("Life-threatening")
208 |         if result.get("seriousnesshospitalization") == "1":
209 |             outcomes.append("Hospitalization")
210 |         if result.get("seriousnessdisabling") == "1":
211 |             outcomes.append("Disability")
212 |         if result.get("seriousnesscongenitalanomali") == "1":
213 |             outcomes.append("Congenital anomaly")
214 |         if result.get("seriousnessother") == "1":
215 |             outcomes.append("Other serious")
216 | 
217 |         if outcomes:
218 |             output.append(f"- **Serious Outcomes**: {', '.join(outcomes)}")
219 | 
220 |     return output
221 | 
```

--------------------------------------------------------------------------------
/docs/blog/researcher-persona-resource.md:
--------------------------------------------------------------------------------

```markdown
  1 | # BioMCP Deep Researcher Persona
  2 | 
  3 | With the release of BioMCP v0.1.2, users can now access a specialized
  4 | Researcher Persona that transforms Claude into a rigorous biomedical research
  5 | assistant using BioMCP's built-in sequential thinking capabilities.
  6 | 
  7 | This persona is designed to leverage BioMCP's suite of tools for accessing
  8 | PubMed articles, ClinicalTrials.gov data, and genomic variant information,
  9 | while incorporating Claude's web search capabilities to produce comprehensive,
 10 | thoroughly-researched reports.
 11 | 
 12 | ## How to Use the Researcher Persona
 13 | 
 14 | Getting started with the BioMCP Researcher Persona is straightforward:
 15 | 
 16 | 1. Configure Claude Desktop by updating your configuration JSON with:
 17 | 
 18 | ```json
 19 | {
 20 |   "mcpServers": {
 21 |     "biomcp": {
 22 |       "command": "uv",
 23 |       "args": ["run", "--with", "biomcp-python>=0.1.2", "biomcp", "run"]
 24 |     }
 25 |   }
 26 | }
 27 | ```
 28 | 
 29 | 2. Restart Claude Desktop (the `>=0.1.2` ensures the latest version is used, which includes the built-in think tool)
 30 | 
 31 | 3. Select the "Researcher" persona from the dropdown menu
 32 |    ![Select Researcher Persona](./images/researcher-drop-down.png)
 33 | 
 34 | 4. Ask your biomedical research question
 35 | 
 36 | The Researcher Persona will then work through its 10-step process, keeping you
 37 | updated on its progress and ultimately producing a comprehensive research
 38 | brief.
 39 | 
 40 | ## Video Demonstration
 41 | 
 42 | Below is a video demonstrating the Researcher Persona in action:
 43 | 
 44 | [![▶️ Watch the video](./images/deep-researcher-video.png)](https://youtu.be/tBGG53O-7Hg)
 45 | 
 46 | ## Sequential Thinking: A Rigorous 10-Step Research Process
 47 | 
 48 | What makes the Researcher Persona so powerful is its integration with BioMCP's
 49 | built-in 'think' tool, which guides the AI through a comprehensive
 50 | 10-step research methodology:
 51 | 
 52 | 1. **Topic Scoping & Domain Framework**: Creating a comprehensive structure to
 53 |    ensure complete coverage
 54 | 2. **Initial Information Gathering**: Establishing baseline terminology and
 55 |    recent developments
 56 | 3. **Focused & Frontier Retrieval**: Filling knowledge gaps and identifying
 57 |    cutting-edge developments
 58 | 4. **Primary Trials Analysis**: Identifying and analyzing key clinical trials
 59 | 5. **Primary Literature Analysis**: Identifying and analyzing pivotal
 60 |    publications
 61 | 6. **Initial Evidence Synthesis**: Creating a preliminary framework of findings
 62 | 7. **Integrated Gap-Filling**: Addressing identified knowledge gaps
 63 | 8. **Comprehensive Evidence Synthesis**: Creating a final integrated framework
 64 |    with quality assessment
 65 | 9. **Self-Critique and Verification**: Rigorously assessing the quality and
 66 |    comprehensiveness
 67 | 10. **Research Brief Creation**: Producing the final deliverable with all
 68 |     required elements
 69 | 
 70 | [![View Researcher Persona](./images/researcher-prompt.png)](https://github.com/genomoncology/biomcp/blob/main/src/biomcp/resources/researcher.md)
 71 | 
 72 | This structured approach ensures that no important aspects of the research
 73 | question are overlooked and that the final output is comprehensive,
 74 | well-organized, and backed by current evidence.
 75 | 
 76 | ## Put to the Test: Emerging Treatment Strategies for Head and Neck Cancer
 77 | 
 78 | To evaluate the effectiveness of the Researcher Persona, we conducted a
 79 | head-to-head comparison with other AI research approaches. We asked the same
 80 | question to five different systems: "What are the emerging treatment strategies
 81 | for head and neck cancer?"
 82 | 
 83 | The results were impressive. The BioMCP-powered Researcher Persona, combined
 84 | with Claude's web search capabilities and the built-in think tool,
 85 | produced the highest-rated research brief among all approaches tested.
 86 | 
 87 | [![Researcher Announcement](./images/researcher-announce.png)](https://github.com/genomoncology/biomcp-examples#researcher-announcement)
 88 | 
 89 | The research brief produced by the BioMCP Researcher Persona stood out for
 90 | several reasons:
 91 | 
 92 | 1. **Comprehensive domain coverage**: The report covered all relevant treatment
 93 |    modalities (immunotherapy, targeted therapy, radiation techniques, surgery,
 94 |    combination approaches)
 95 | 2. **Structured evidence categorization**: Findings were clearly organized by
 96 |    level of evidence (Established, Emerging, Experimental, Theoretical)
 97 | 3. **Evidence quality assessment**: The brief included critical evaluation of
 98 |    source quality and evidence strength
 99 | 4. **Thorough citation**: All claims were backed by specific references to
100 |    scientific literature or clinical trials
101 | 5. **Self-critique**: The report included transparent limitations and
102 |    identified areas requiring further research
103 | 
104 | ## Explore the Example and Evaluations
105 | 
106 | We've documented this comparison in detail in
107 | the [biomcp-examples repository](https://github.com/genomoncology/biomcp-examples),
108 | where you can find:
109 | 
110 | - The full research briefs produced by each approach
111 | - Independent evaluations by three different AI judges (Claude 3.7, Gemini 2.5
112 |   Pro, and OpenAI o3)
113 | - Detailed scoring against a rubric that prioritizes accuracy, clarity, and
114 |   comprehensiveness
115 | - Analysis of strengths and weaknesses of each approach
116 | 
117 | The consensus among the judges placed the BioMCP-powered brief at the top,
118 | highlighting its exceptional structure, evidence-based approach, and
119 | comprehensive coverage.
120 | 
121 | ## Beyond the Example: Wide-Ranging Applications
122 | 
123 | While our example focused on head and neck cancer treatments, the BioMCP
124 | Researcher Persona can tackle a wide range of biomedical research questions:
125 | 
126 | - **Therapeutic comparisons**: "Compare the efficacy and safety profiles of JAK
127 |   inhibitors versus biologics for treating rheumatoid arthritis"
128 | - **Disease mechanisms**: "What is the current understanding of gut microbiome
129 |   dysbiosis in inflammatory bowel disease?"
130 | - **Biomarker investigations**: "What emerging biomarkers show promise for
131 |   early detection of pancreatic cancer?"
132 | - **Treatment protocols**: "What are the latest guidelines for managing
133 |   anticoagulation in patients with atrial fibrillation and chronic kidney
134 |   disease?"
135 | 
136 | ## Join the BioMCP Community
137 | 
138 | The Researcher Persona is just one example of how BioMCP is transforming
139 | AI-assisted biomedical research. We invite you to:
140 | 
141 | 1. Try the Researcher Persona with your own research questions
142 | 2. Contribute to
143 |    the [biomcp-examples repository](https://github.com/genomoncology/biomcp-examples)
144 |    with your experiments
145 | 3. Share your feedback and suggestions for future improvements
146 | 
147 | By combining specialized biomedical data access with structured research
148 | methodologies, BioMCP is helping researchers produce more comprehensive,
149 | accurate, and useful biomedical research briefs than ever before.
150 | 
151 | Have a complex biomedical research question? Give the BioMCP Researcher Persona
152 | a try and experience the difference a structured, tool-powered approach can
153 | make!
154 | 
```

--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------

```yaml
  1 | site_name: BioMCP
  2 | repo_url: https://github.com/genomoncology/biomcp
  3 | site_url: https://biomcp.org/
  4 | site_description: Biomedical Model Context Protocol Server
  5 | site_author: Ian Maurer
  6 | edit_uri: edit/main/docs/
  7 | repo_name: genomoncology/biomcp
  8 | copyright: Maintained by <a href="https://genomoncology.com">genomoncology</a>.
  9 | 
 10 | nav:
 11 |   - Home: index.md
 12 | 
 13 |   - Getting Started:
 14 |       - Quick Start: getting-started/01-quickstart-cli.md
 15 |       - Claude Desktop: getting-started/02-claude-desktop-integration.md
 16 |       - API Keys: getting-started/03-authentication-and-api-keys.md
 17 |       - FAQ: faq-condensed.md
 18 |       - Troubleshooting: troubleshooting.md
 19 | 
 20 |   - User Guide:
 21 |       - Overview: concepts/01-what-is-biomcp.md
 22 |       - Finding Articles: how-to-guides/01-find-articles-and-cbioportal-data.md
 23 |       - Finding Trials: how-to-guides/02-find-trials-with-nci-and-biothings.md
 24 |       - Analyzing Variants: how-to-guides/03-get-comprehensive-variant-annotations.md
 25 |       - Predicting Effects: how-to-guides/04-predict-variant-effects-with-alphagenome.md
 26 |       - Searching Organizations: how-to-guides/06-search-nci-organizations-and-interventions.md
 27 |       - Research Workflows: workflows/all-workflows.md
 28 |       - Examples:
 29 |           - Pydantic AI Integration: tutorials/pydantic-ai-integration.md
 30 |           - Remote Connection: tutorials/remote-connection.md
 31 |           - BioThings Examples: tutorials/biothings-prompts.md
 32 |           - NCI Examples: tutorials/nci-prompts.md
 33 |           - AlphaGenome Tutorial: tutorials/claude-code-biomcp-alphagenome.md
 34 |           - OpenFDA Examples: tutorials/openfda-prompts.md
 35 |       - Concepts:
 36 |           - Deep Researcher: concepts/02-the-deep-researcher-persona.md
 37 |           - Sequential Thinking: concepts/03-sequential-thinking-with-the-think-tool.md
 38 | 
 39 |   - Reference:
 40 |       - Quick Reference: reference/quick-reference.md
 41 |       - CLI Commands: user-guides/01-command-line-interface.md
 42 |       - MCP Tools: user-guides/02-mcp-tools-reference.md
 43 |       - API Documentation:
 44 |           - API Overview: apis/overview.md
 45 |           - Python SDK: apis/python-sdk.md
 46 |           - Error Codes: apis/error-codes.md
 47 |       - IDE Integration: user-guides/03-integrating-with-ides-and-clients.md
 48 | 
 49 |   - Developer:
 50 |       - Architecture:
 51 |           - Overview: reference/quick-architecture.md
 52 |           - Visual Diagrams: reference/visual-architecture.md
 53 |           - Detailed Diagrams: reference/architecture-diagrams.md
 54 |       - Data Sources:
 55 |           - Overview: backend-services-reference/01-overview.md
 56 |           - PubTator3/PubMed: backend-services-reference/06-pubtator3.md
 57 |           - ClinicalTrials.gov: backend-services-reference/04-clinicaltrials-gov.md
 58 |           - NCI CTS API: backend-services-reference/05-nci-cts-api.md
 59 |           - BioThings Suite: backend-services-reference/02-biothings-suite.md
 60 |           - cBioPortal: backend-services-reference/03-cbioportal.md
 61 |           - AlphaGenome: backend-services-reference/07-alphagenome.md
 62 |           - OpenFDA: tutorials/openfda-integration.md
 63 |       - Development:
 64 |           - Contributing: developer-guides/02-contributing-and-testing.md
 65 |           - Deployment: developer-guides/01-server-deployment.md
 66 |           - BigQuery Monitoring: how-to-guides/05-logging-and-monitoring-with-bigquery.md
 67 |       - Technical Details:
 68 |           - Transport Protocol: developer-guides/04-transport-protocol.md
 69 |           - Error Handling: developer-guides/05-error-handling.md
 70 |           - HTTP Client: developer-guides/06-http-client-and-caching.md
 71 |           - Performance: developer-guides/07-performance-optimizations.md
 72 |           - Third-Party APIs: developer-guides/03-third-party-endpoints.md
 73 |       - Security:
 74 |           - FDA Integration Security: FDA_SECURITY.md
 75 | 
 76 |   - About:
 77 |       - Blog:
 78 |           - Clinical Trial Search: blog/ai-assisted-clinical-trial-search-analysis.md
 79 |           - Researcher Persona: blog/researcher-persona-resource.md
 80 |       - Project:
 81 |           - Changelog: changelog.md
 82 |           - Policies: policies.md
 83 |           - GenomOncology: genomoncology.md
 84 | 
 85 | plugins:
 86 |   - search:
 87 |       lang: en
 88 |       separator: '[\s\-\.]+'
 89 |   - mkdocstrings:
 90 |       handlers:
 91 |         python:
 92 |           paths: ["src/biomcp"]
 93 |   # Note: sitemap plugin requires additional installation
 94 |   # Uncomment after installing: pip install mkdocs-sitemap
 95 |   # - sitemap:
 96 |   #     changefreq: weekly
 97 |   #     priority: 0.5
 98 | theme:
 99 |   name: material
100 |   # custom_dir: overrides
101 |   favicon: assets/favicon.ico
102 |   logo: assets/icon.png
103 |   features:
104 |     - navigation.tabs
105 |     - navigation.tabs.sticky
106 |     - navigation.sections
107 |     - navigation.instant
108 |     - navigation.tracking
109 |     - navigation.top
110 |     - toc.follow
111 |     - search.suggest
112 |     - search.highlight
113 |   palette:
114 |     - media: "(prefers-color-scheme: light)"
115 |       scheme: default
116 |       primary: white
117 |       accent: deep orange
118 |       toggle:
119 |         icon: material/brightness-7
120 |         name: Switch to dark mode
121 |     - media: "(prefers-color-scheme: dark)"
122 |       scheme: slate
123 |       primary: black
124 |       accent: deep orange
125 |       toggle:
126 |         icon: material/brightness-4
127 |         name: Switch to light mode
128 |   icon:
129 |     repo: fontawesome/brands/github
130 | 
131 | extra:
132 |   social:
133 |     - icon: fontawesome/brands/github
134 |       link: https://github.com/genomoncology/biomcp
135 |     - icon: fontawesome/brands/python
136 |       link: https://pypi.org/project/biomcp-python
137 |   meta:
138 |     - property: og:type
139 |       content: website
140 |     - property: og:title
141 |       content: BioMCP - Biomedical Model Context Protocol Server
142 |     - property: og:description
143 |       content: AI-powered biomedical research tool integrating PubMed, ClinicalTrials.gov, and genomic databases
144 |     - property: og:image
145 |       content: https://biomcp.org/assets/icon.png
146 |     - property: og:url
147 |       content: https://biomcp.org/
148 |     - name: twitter:card
149 |       content: summary
150 |     - name: twitter:title
151 |       content: BioMCP - Biomedical Model Context Protocol
152 |     - name: twitter:description
153 |       content: AI-powered biomedical research tool for PubMed, clinical trials, and genomic data
154 |     - name: keywords
155 |       content: biomedical, MCP, AI, PubMed, clinical trials, genomics, bioinformatics, Claude Desktop
156 | 
157 | extra_css:
158 |   - stylesheets/extra.css
159 |   - stylesheets/announcement.css
160 | 
161 | # extra_javascript: (removed - no third-party dependencies)
162 | markdown_extensions:
163 |   - toc:
164 |       permalink: true
165 |   - pymdownx.arithmatex:
166 |       generic: true
167 |   - admonition # Nice looking note/warning boxes
168 |   - pymdownx.details # Collapsible sections
169 |   - pymdownx.highlight: # Code highlighting
170 |       anchor_linenums: true
171 |   - pymdownx.inlinehilite
172 |   - pymdownx.snippets # Include content from other files
173 |   - pymdownx.superfences # Nested code blocks
174 |   - pymdownx.tabbed: # Tabbed content
175 |       alternate_style: true
176 | 
```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_getter.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for variant getter module."""
  2 | 
  3 | from unittest.mock import AsyncMock, patch
  4 | 
  5 | import pytest
  6 | 
  7 | from biomcp.constants import DEFAULT_ASSEMBLY
  8 | from biomcp.variants import getter
  9 | 
 10 | 
 11 | class TestGetVariant:
 12 |     """Test the get_variant function."""
 13 | 
 14 |     @pytest.mark.asyncio
 15 |     async def test_get_variant_default_assembly(self):
 16 |         """Test that get_variant defaults to hg19 assembly."""
 17 |         mock_response = {
 18 |             "_id": "rs113488022",
 19 |             "dbsnp": {"rsid": "rs113488022"},
 20 |         }
 21 | 
 22 |         with patch("biomcp.http_client.request_api") as mock_request:
 23 |             mock_request.return_value = (mock_response, None)
 24 | 
 25 |             await getter.get_variant("rs113488022")
 26 | 
 27 |             # Verify assembly parameter was passed with default value
 28 |             call_args = mock_request.call_args
 29 |             assert call_args[1]["request"]["assembly"] == "hg19"
 30 | 
 31 |     @pytest.mark.asyncio
 32 |     async def test_get_variant_hg38_assembly(self):
 33 |         """Test that get_variant accepts hg38 assembly parameter."""
 34 |         mock_response = {
 35 |             "_id": "rs113488022",
 36 |             "dbsnp": {"rsid": "rs113488022"},
 37 |         }
 38 | 
 39 |         with patch("biomcp.http_client.request_api") as mock_request:
 40 |             mock_request.return_value = (mock_response, None)
 41 | 
 42 |             await getter.get_variant("rs113488022", assembly="hg38")
 43 | 
 44 |             # Verify assembly parameter was passed correctly
 45 |             call_args = mock_request.call_args
 46 |             assert call_args[1]["request"]["assembly"] == "hg38"
 47 | 
 48 |     @pytest.mark.asyncio
 49 |     async def test_get_variant_hg19_assembly(self):
 50 |         """Test that get_variant accepts hg19 assembly parameter explicitly."""
 51 |         mock_response = {
 52 |             "_id": "rs113488022",
 53 |             "dbsnp": {"rsid": "rs113488022"},
 54 |         }
 55 | 
 56 |         with patch("biomcp.http_client.request_api") as mock_request:
 57 |             mock_request.return_value = (mock_response, None)
 58 | 
 59 |             await getter.get_variant("rs113488022", assembly="hg19")
 60 | 
 61 |             # Verify assembly parameter was passed correctly
 62 |             call_args = mock_request.call_args
 63 |             assert call_args[1]["request"]["assembly"] == "hg19"
 64 | 
 65 |     @pytest.mark.asyncio
 66 |     async def test_get_variant_includes_all_fields(self):
 67 |         """Test that request includes all required fields."""
 68 |         mock_response = {"_id": "rs113488022"}
 69 | 
 70 |         with patch("biomcp.http_client.request_api") as mock_request:
 71 |             mock_request.return_value = (mock_response, None)
 72 | 
 73 |             await getter.get_variant("rs113488022", assembly="hg38")
 74 | 
 75 |             # Verify both fields and assembly are in request
 76 |             call_args = mock_request.call_args
 77 |             request_params = call_args[1]["request"]
 78 |             assert "fields" in request_params
 79 |             assert request_params["fields"] == "all"
 80 |             assert "assembly" in request_params
 81 |             assert request_params["assembly"] == "hg38"
 82 | 
 83 |     @pytest.mark.asyncio
 84 |     async def test_get_variant_with_external_annotations(self):
 85 |         """Test that assembly parameter works with external annotations."""
 86 |         from biomcp.variants.external import EnhancedVariantAnnotation
 87 | 
 88 |         mock_response = {
 89 |             "_id": "rs113488022",
 90 |             "dbsnp": {"rsid": "rs113488022"},
 91 |             "dbnsfp": {"genename": "BRAF"},
 92 |         }
 93 | 
 94 |         with (
 95 |             patch("biomcp.http_client.request_api") as mock_request,
 96 |             patch(
 97 |                 "biomcp.variants.getter.ExternalVariantAggregator"
 98 |             ) as mock_aggregator,
 99 |         ):
100 |             mock_request.return_value = (mock_response, None)
101 | 
102 |             # Mock the aggregator with proper EnhancedVariantAnnotation
103 |             mock_enhanced = EnhancedVariantAnnotation(
104 |                 variant_id="rs113488022",
105 |                 tcga=None,
106 |                 thousand_genomes=None,
107 |                 cbioportal=None,
108 |                 error_sources=[],
109 |             )
110 | 
111 |             mock_agg_instance = AsyncMock()
112 |             mock_agg_instance.get_enhanced_annotations = AsyncMock(
113 |                 return_value=mock_enhanced
114 |             )
115 |             mock_aggregator.return_value = mock_agg_instance
116 | 
117 |             await getter.get_variant(
118 |                 "rs113488022",
119 |                 assembly="hg38",
120 |                 include_external=True,
121 |             )
122 | 
123 |             # Verify assembly was still passed correctly
124 |             call_args = mock_request.call_args
125 |             assert call_args[1]["request"]["assembly"] == "hg38"
126 | 
127 | 
128 | class TestVariantDetailsMCPTool:
129 |     """Test the _variant_details MCP tool."""
130 | 
131 |     @pytest.mark.asyncio
132 |     async def test_variant_details_default_assembly(self):
133 |         """Test that _variant_details defaults to hg19 assembly."""
134 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
135 |             mock_get.return_value = "Variant details"
136 | 
137 |             await getter._variant_details(
138 |                 call_benefit="Testing default assembly",
139 |                 variant_id="rs113488022",
140 |             )
141 | 
142 |             # Verify get_variant was called with default assembly
143 |             mock_get.assert_called_once_with(
144 |                 "rs113488022",
145 |                 output_json=False,
146 |                 include_external=True,
147 |                 assembly=DEFAULT_ASSEMBLY,
148 |             )
149 | 
150 |     @pytest.mark.asyncio
151 |     async def test_variant_details_custom_assembly(self):
152 |         """Test that _variant_details accepts custom assembly parameter."""
153 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
154 |             mock_get.return_value = "Variant details"
155 | 
156 |             await getter._variant_details(
157 |                 call_benefit="Testing hg38 assembly",
158 |                 variant_id="rs113488022",
159 |                 assembly="hg38",
160 |             )
161 | 
162 |             # Verify get_variant was called with hg38
163 |             mock_get.assert_called_once_with(
164 |                 "rs113488022",
165 |                 output_json=False,
166 |                 include_external=True,
167 |                 assembly="hg38",
168 |             )
169 | 
170 |     @pytest.mark.asyncio
171 |     async def test_variant_details_with_all_params(self):
172 |         """Test that all parameters are passed through correctly."""
173 |         with patch("biomcp.variants.getter.get_variant") as mock_get:
174 |             mock_get.return_value = "Variant details"
175 | 
176 |             await getter._variant_details(
177 |                 call_benefit="Testing all parameters",
178 |                 variant_id="chr7:g.140453136A>T",
179 |                 include_external=False,
180 |                 assembly="hg19",
181 |             )
182 | 
183 |             # Verify all params were passed
184 |             mock_get.assert_called_once_with(
185 |                 "chr7:g.140453136A>T",
186 |                 output_json=False,
187 |                 include_external=False,
188 |                 assembly="hg19",
189 |             )
190 | 
```

--------------------------------------------------------------------------------
/docs/developer-guides/04-transport-protocol.md:
--------------------------------------------------------------------------------

```markdown
  1 | # Transport Protocol Guide
  2 | 
  3 | This guide explains BioMCP's transport protocol options, with a focus on the new Streamable HTTP transport that provides better scalability and reliability for production deployments.
  4 | 
  5 | ## Overview
  6 | 
  7 | BioMCP supports multiple transport protocols to accommodate different deployment scenarios:
  8 | 
  9 | | Transport           | Use Case                                     | Endpoint | Protocol Version |
 10 | | ------------------- | -------------------------------------------- | -------- | ---------------- |
 11 | | **STDIO**           | Local development, direct Claude integration | N/A      | All              |
 12 | | **Worker/SSE**      | Legacy cloud deployments                     | `/sse`   | Pre-2025         |
 13 | | **Streamable HTTP** | Modern cloud deployments                     | `/mcp`   | 2025-03-26+      |
 14 | 
 15 | ## Streamable HTTP Transport
 16 | 
 17 | ### What is Streamable HTTP?
 18 | 
 19 | Streamable HTTP is the latest MCP transport protocol (specification version 2025-03-26) that provides:
 20 | 
 21 | - **Single endpoint** (`/mcp`) for all operations
 22 | - **Dynamic response modes**: JSON for quick operations, SSE for long-running tasks
 23 | - **Session management** via `session_id` query parameter
 24 | - **Better scalability**: No permanent connections required
 25 | - **Automatic reconnection** and session recovery
 26 | 
 27 | ### Architecture
 28 | 
 29 | The Streamable HTTP transport follows this flow:
 30 | 
 31 | 1. **MCP Client** sends POST request to `/mcp` endpoint
 32 | 2. **BioMCP Server** processes the request
 33 | 3. **Response Type** determined by operation:
 34 |    - Quick operations return JSON response
 35 |    - Long operations return SSE stream
 36 | 4. **Session Management** maintains state via session_id parameter
 37 | 
 38 | ### Implementation Details
 39 | 
 40 | BioMCP leverages FastMCP's native streamable HTTP support:
 41 | 
 42 | ```python
 43 | # In core.py
 44 | mcp_app = FastMCP(
 45 |     name="BioMCP",
 46 |     stateless_http=True,  # Enables streamable HTTP
 47 | )
 48 | ```
 49 | 
 50 | The transport is automatically handled by FastMCP 1.12.3+, providing:
 51 | 
 52 | - Request routing
 53 | - Session management
 54 | - Response type negotiation
 55 | - Error handling
 56 | 
 57 | ## Migration Guide
 58 | 
 59 | ### From SSE to Streamable HTTP
 60 | 
 61 | If you're currently using the legacy SSE transport, migrate to streamable HTTP:
 62 | 
 63 | #### 1. Update Server Configuration
 64 | 
 65 | **Before (SSE/Worker mode):**
 66 | 
 67 | ```bash
 68 | biomcp run --mode worker
 69 | ```
 70 | 
 71 | **After (Streamable HTTP):**
 72 | 
 73 | ```bash
 74 | biomcp run --mode streamable_http
 75 | ```
 76 | 
 77 | #### 2. Update Client Configuration
 78 | 
 79 | **MCP Inspector:**
 80 | 
 81 | ```bash
 82 | npx @modelcontextprotocol/inspector uv run --with . biomcp run --mode streamable_http
 83 | ```
 84 | 
 85 | **Claude Desktop Configuration:**
 86 | 
 87 | ```json
 88 | {
 89 |   "mcpServers": {
 90 |     "biomcp": {
 91 |       "command": "docker",
 92 |       "args": [
 93 |         "run",
 94 |         "-p",
 95 |         "8000:8000",
 96 |         "biomcp:latest",
 97 |         "biomcp",
 98 |         "run",
 99 |         "--mode",
100 |         "streamable_http"
101 |       ]
102 |     }
103 |   }
104 | }
105 | ```
106 | 
107 | #### 3. Update Cloudflare Worker
108 | 
109 | The worker now supports both GET (legacy SSE) and POST (streamable HTTP) on the `/mcp` endpoint:
110 | 
111 | ```javascript
112 | // Automatically routes based on method
113 | .get("/mcp", async (c) => {
114 |   // Legacy SSE transport
115 | })
116 | .post("/mcp", async (c) => {
117 |   // Streamable HTTP transport
118 | })
119 | ```
120 | 
121 | ### Backward Compatibility
122 | 
123 | All legacy endpoints remain functional:
124 | 
125 | - `/sse` - Server-sent events transport
126 | - `/health` - Health check endpoint
127 | - `/events` - Event streaming endpoint
128 | 
129 | ## Configuration Options
130 | 
131 | ### Server Modes
132 | 
133 | ```bash
134 | # Local development (STDIO)
135 | biomcp run
136 | 
137 | # Legacy SSE transport
138 | biomcp run --mode worker
139 | 
140 | # Modern streamable HTTP
141 | biomcp run --mode streamable_http --host 0.0.0.0 --port 8000
142 | ```
143 | 
144 | ### Environment Variables
145 | 
146 | | Variable        | Description             | Default |
147 | | --------------- | ----------------------- | ------- |
148 | | `MCP_TRANSPORT` | Override transport mode | None    |
149 | | `MCP_HOST`      | Server bind address     | 0.0.0.0 |
150 | | `MCP_PORT`      | Server port             | 8000    |
151 | 
152 | ## Session Management
153 | 
154 | Streamable HTTP uses session IDs to maintain state across requests:
155 | 
156 | ```http
157 | POST /mcp?session_id=abc123 HTTP/1.1
158 | Content-Type: application/json
159 | 
160 | {
161 |   "jsonrpc": "2.0",
162 |   "method": "initialize",
163 |   "params": {...}
164 | }
165 | ```
166 | 
167 | Sessions are:
168 | 
169 | - Created automatically on first request
170 | - Maintained in server memory
171 | - Cleaned up after inactivity timeout
172 | - Isolated between different clients
173 | 
174 | ## Performance Considerations
175 | 
176 | ### Response Mode Selection
177 | 
178 | The server automatically selects the optimal response mode:
179 | 
180 | | Operation Type    | Response Mode | Example                |
181 | | ----------------- | ------------- | ---------------------- |
182 | | Quick queries     | JSON          | `search(limit=10)`     |
183 | | Large results     | SSE           | `search(limit=1000)`   |
184 | | Real-time updates | SSE           | Thinking tool progress |
185 | 
186 | ### Optimization Tips
187 | 
188 | 1. **Use session IDs** for related requests to avoid re-initialization
189 | 2. **Batch operations** when possible to reduce round trips
190 | 3. **Set appropriate timeouts** for long-running operations
191 | 4. **Monitor response times** to identify bottlenecks
192 | 
193 | ## Troubleshooting
194 | 
195 | ### Common Issues
196 | 
197 | #### 1. Connection Refused
198 | 
199 | ```
200 | Error: connect ECONNREFUSED 127.0.0.1:8000
201 | ```
202 | 
203 | **Solution**: Ensure server is running with `--host 0.0.0.0` for Docker deployments.
204 | 
205 | #### 2. Session Not Found
206 | 
207 | ```
208 | Error: Session 'xyz' not found
209 | ```
210 | 
211 | **Solution**: Session may have expired. Omit session_id to create new session.
212 | 
213 | #### 3. Timeout on Large Results
214 | 
215 | ```
216 | Error: Request timeout after 30s
217 | ```
218 | 
219 | **Solution**: Increase client timeout or reduce result size with `limit` parameter.
220 | 
221 | ### Debug Mode
222 | 
223 | Enable debug logging to troubleshoot transport issues:
224 | 
225 | ```bash
226 | LOG_LEVEL=debug biomcp run --mode streamable_http
227 | ```
228 | 
229 | ## Security Considerations
230 | 
231 | ### Authentication
232 | 
233 | BioMCP does not implement authentication at the transport layer. Secure your deployment using:
234 | 
235 | - **API Gateway**: AWS API Gateway, Kong, etc.
236 | - **Reverse Proxy**: Nginx with auth modules
237 | - **Cloud IAM**: Platform-specific access controls
238 | 
239 | ### Rate Limiting
240 | 
241 | Implement rate limiting at the infrastructure layer:
242 | 
243 | ```nginx
244 | # Nginx example
245 | limit_req_zone $binary_remote_addr zone=mcp:10m rate=10r/s;
246 | 
247 | location /mcp {
248 |     limit_req zone=mcp burst=20;
249 |     proxy_pass http://biomcp:8000;
250 | }
251 | ```
252 | 
253 | ### CORS Configuration
254 | 
255 | For browser-based clients, configure CORS headers:
256 | 
257 | ```python
258 | # Handled automatically by FastMCP when stateless_http=True
259 | ```
260 | 
261 | ## Monitoring
262 | 
263 | ### Health Checks
264 | 
265 | ```bash
266 | # Check server health
267 | curl http://localhost:8000/health
268 | 
269 | # Response
270 | {"status": "ok", "transport": "streamable_http"}
271 | ```
272 | 
273 | ### Metrics
274 | 
275 | Monitor these key metrics:
276 | 
277 | - Request rate on `/mcp` endpoint
278 | - Response time percentiles (p50, p95, p99)
279 | - Session count and duration
280 | - Error rate by error type
281 | 
282 | ## Next Steps
283 | 
284 | - Review [MCP Specification](https://spec.modelcontextprotocol.io) for protocol details
285 | 
286 | For questions or issues, please visit our [GitHub repository](https://github.com/genomoncology/biomcp).
287 | 
```

--------------------------------------------------------------------------------
/tests/tdd/test_europe_pmc_fetch.py:
--------------------------------------------------------------------------------

```python
  1 | """Tests for Europe PMC article fetching via DOI."""
  2 | 
  3 | import json
  4 | from unittest.mock import Mock, patch
  5 | 
  6 | import pytest
  7 | 
  8 | from biomcp.articles.fetch import _article_details, is_doi, is_pmid
  9 | from biomcp.articles.preprints import fetch_europe_pmc_article
 10 | 
 11 | 
 12 | class TestDOIDetection:
 13 |     """Test DOI and PMID detection functions."""
 14 | 
 15 |     def test_valid_dois(self):
 16 |         """Test that valid DOIs are correctly identified."""
 17 |         valid_dois = [
 18 |             "10.1101/2024.01.20.23288905",
 19 |             "10.1038/nature12373",
 20 |             "10.1016/j.cell.2023.05.001",
 21 |             "10.1126/science.abc1234",
 22 |         ]
 23 |         for doi in valid_dois:
 24 |             assert (
 25 |                 is_doi(doi) is True
 26 |             ), f"Expected {doi} to be identified as DOI"
 27 |             assert (
 28 |                 is_pmid(doi) is False
 29 |             ), f"Expected {doi} NOT to be identified as PMID"
 30 | 
 31 |     def test_valid_pmids(self):
 32 |         """Test that valid PMIDs are correctly identified."""
 33 |         valid_pmids = [
 34 |             "35271234",
 35 |             "12345678",
 36 |             "1",
 37 |             "999999999",
 38 |         ]
 39 |         for pmid in valid_pmids:
 40 |             assert (
 41 |                 is_pmid(pmid) is True
 42 |             ), f"Expected {pmid} to be identified as PMID"
 43 |             assert (
 44 |                 is_doi(pmid) is False
 45 |             ), f"Expected {pmid} NOT to be identified as DOI"
 46 | 
 47 |     def test_invalid_identifiers(self):
 48 |         """Test that invalid identifiers are rejected by both functions."""
 49 |         invalid_ids = [
 50 |             "PMC11193658",  # PMC ID
 51 |             "abc123",  # Random string
 52 |             "10.1101",  # Incomplete DOI
 53 |             "nature12373",  # DOI without prefix
 54 |             "",  # Empty string
 55 |         ]
 56 |         for identifier in invalid_ids:
 57 |             assert (
 58 |                 is_doi(identifier) is False
 59 |             ), f"Expected {identifier} NOT to be identified as DOI"
 60 |             assert (
 61 |                 is_pmid(identifier) is False
 62 |             ), f"Expected {identifier} NOT to be identified as PMID"
 63 | 
 64 | 
 65 | class TestEuropePMCFetch:
 66 |     """Test Europe PMC article fetching."""
 67 | 
 68 |     @pytest.mark.asyncio
 69 |     async def test_fetch_europe_pmc_article_success(self):
 70 |         """Test successful fetch from Europe PMC."""
 71 |         # Mock the response
 72 |         mock_response = Mock()
 73 |         mock_response.hitCount = 1
 74 |         mock_response.results = [
 75 |             Mock(
 76 |                 id="PPR790987",
 77 |                 source="PPR",
 78 |                 pmid=None,
 79 |                 pmcid=None,
 80 |                 doi="10.1101/2024.01.20.23288905",
 81 |                 title="Test Article Title",
 82 |                 authorString="Author A, Author B, Author C",
 83 |                 journalTitle=None,
 84 |                 pubYear="2024",
 85 |                 firstPublicationDate="2024-01-23",
 86 |                 abstractText="This is the abstract text.",
 87 |             )
 88 |         ]
 89 | 
 90 |         with patch(
 91 |             "biomcp.articles.preprints.http_client.request_api"
 92 |         ) as mock_request:
 93 |             mock_request.return_value = (mock_response, None)
 94 | 
 95 |             result = await fetch_europe_pmc_article(
 96 |                 "10.1101/2024.01.20.23288905", output_json=True
 97 |             )
 98 |             data = json.loads(result)
 99 | 
100 |             assert len(data) == 1
101 |             article = data[0]
102 |             assert article["doi"] == "10.1101/2024.01.20.23288905"
103 |             assert article["title"] == "Test Article Title"
104 |             assert article["journal"] == "Preprint Server (preprint)"
105 |             assert article["date"] == "2024-01-23"
106 |             assert article["authors"] == ["Author A", "Author B", "Author C"]
107 |             assert article["abstract"] == "This is the abstract text."
108 |             assert article["source"] == "Europe PMC"
109 |             assert article["pmid"] is None
110 |             assert "europepmc.org" in article["pmc_url"]
111 | 
112 |     @pytest.mark.asyncio
113 |     async def test_fetch_europe_pmc_article_not_found(self):
114 |         """Test fetch when article is not found in Europe PMC."""
115 |         mock_response = Mock()
116 |         mock_response.hitCount = 0
117 |         mock_response.results = []
118 | 
119 |         with patch(
120 |             "biomcp.articles.preprints.http_client.request_api"
121 |         ) as mock_request:
122 |             mock_request.return_value = (mock_response, None)
123 | 
124 |             result = await fetch_europe_pmc_article(
125 |                 "10.1101/invalid.doi", output_json=True
126 |             )
127 |             data = json.loads(result)
128 | 
129 |             assert len(data) == 1
130 |             assert data[0]["error"] == "Article not found in Europe PMC"
131 | 
132 |     @pytest.mark.asyncio
133 |     async def test_fetch_europe_pmc_article_error(self):
134 |         """Test fetch when Europe PMC API returns an error."""
135 |         mock_error = Mock()
136 |         mock_error.code = 500
137 |         mock_error.message = "Internal Server Error"
138 | 
139 |         with patch(
140 |             "biomcp.articles.preprints.http_client.request_api"
141 |         ) as mock_request:
142 |             mock_request.return_value = (None, mock_error)
143 | 
144 |             result = await fetch_europe_pmc_article(
145 |                 "10.1101/2024.01.20.23288905", output_json=True
146 |             )
147 |             data = json.loads(result)
148 | 
149 |             assert len(data) == 1
150 |             assert data[0]["error"] == "Error 500: Internal Server Error"
151 | 
152 | 
153 | class TestArticleDetailsRouting:
154 |     """Test that _article_details correctly routes DOIs to Europe PMC."""
155 | 
156 |     @pytest.mark.asyncio
157 |     async def test_doi_routes_to_europe_pmc(self):
158 |         """Test that DOIs are routed to fetch_europe_pmc_article."""
159 |         test_doi = "10.1101/2024.01.20.23288905"
160 | 
161 |         with patch(
162 |             "biomcp.articles.preprints.fetch_europe_pmc_article"
163 |         ) as mock_europe_pmc:
164 |             mock_europe_pmc.return_value = "Europe PMC result"
165 | 
166 |             result = await _article_details("Test", test_doi)
167 | 
168 |             mock_europe_pmc.assert_called_once_with(test_doi, output_json=True)
169 |             assert result == "Europe PMC result"
170 | 
171 |     @pytest.mark.asyncio
172 |     async def test_pmid_routes_to_pubtator(self):
173 |         """Test that PMIDs are routed to fetch_articles."""
174 |         test_pmid = "35271234"
175 | 
176 |         with patch(
177 |             "biomcp.articles.fetch.fetch_articles"
178 |         ) as mock_fetch_articles:
179 |             mock_fetch_articles.return_value = "PubTator result"
180 | 
181 |             result = await _article_details("Test", test_pmid)
182 | 
183 |             mock_fetch_articles.assert_called_once_with(
184 |                 [35271234], full=True, output_json=True
185 |             )
186 |             assert result == "PubTator result"
187 | 
188 |     @pytest.mark.asyncio
189 |     async def test_invalid_identifier_returns_error(self):
190 |         """Test that invalid identifiers return an error."""
191 |         invalid_id = "PMC12345"
192 | 
193 |         result = await _article_details("Test", invalid_id)
194 | 
195 |         data = json.loads(result)
196 |         assert len(data) == 1
197 |         assert "Invalid identifier format" in data[0]["error"]
198 |         assert "PMC12345" in data[0]["error"]
199 | 
```

--------------------------------------------------------------------------------
/src/biomcp/workers/worker_entry.js:
--------------------------------------------------------------------------------

```javascript
  1 | /**
  2 |  * BioMCP Worker – Auth‑less version (rev 1.8)
  3 |  *
  4 |  *  Fix: Added improved error handling and increased timeouts for list requests
  5 |  */
  6 | 
  7 | // Server URL will be configured from environment variables
  8 | let REMOTE_MCP_SERVER_URL = "http://localhost:8000"; // Default fallback
  9 | const DEBUG = true;
 10 | 
 11 | const log = (m) => DEBUG && console.log("[DEBUG]", m);
 12 | const CORS = {
 13 |   "Access-Control-Allow-Origin": "*",
 14 |   "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
 15 |   "Access-Control-Allow-Headers": "*",
 16 |   "Access-Control-Max-Age": "86400",
 17 | };
 18 | const json = (o, s = 200) =>
 19 |   new Response(JSON.stringify(o, null, 2), {
 20 |     status: s,
 21 |     headers: { "Content-Type": "application/json", ...CORS },
 22 |   });
 23 | 
 24 | let forwardPath = "/messages"; // for proxying JSON‑RPC POSTS (no query)
 25 | let resourceEndpoint = null; // full string we echo back (/messages/?sid=…)
 26 | 
 27 | // Track active SSE connections to avoid duplicate connections
 28 | const activeConnections = new Map();
 29 | 
 30 | export default {
 31 |   async fetch(req, env, ctx) {
 32 |     // Use environment variable if available, otherwise use the default
 33 |     REMOTE_MCP_SERVER_URL = env.REMOTE_MCP_SERVER_URL || REMOTE_MCP_SERVER_URL;
 34 | 
 35 |     const url = new URL(req.url);
 36 |     log(`${req.method} ${url.pathname}${url.search}`);
 37 | 
 38 |     if (req.method === "OPTIONS")
 39 |       return new Response(null, { status: 204, headers: CORS });
 40 |     if (url.pathname === "/status" || url.pathname === "/debug")
 41 |       return json({
 42 |         worker: "BioMCP-authless",
 43 |         remote: REMOTE_MCP_SERVER_URL,
 44 |         forwardPath,
 45 |         resourceEndpoint,
 46 |       });
 47 |     if (url.pathname === "/sse" || url.pathname === "/events")
 48 |       return serveSSE(req, ctx);
 49 | 
 50 |     if (req.method === "POST") {
 51 |       const sid = url.searchParams.get("session_id");
 52 |       if (!sid) return new Response("Missing session_id", { status: 400 });
 53 |       return proxyPost(req, forwardPath, sid);
 54 |     }
 55 | 
 56 |     return new Response("Not found", { status: 404 });
 57 |   },
 58 | };
 59 | 
 60 | async function proxyPost(req, path, sid) {
 61 |   const body = await req.text();
 62 |   const target = `${REMOTE_MCP_SERVER_URL}${path}?session_id=${encodeURIComponent(
 63 |     sid,
 64 |   )}`;
 65 | 
 66 |   try {
 67 |     // Parse the request to check if it's a list request that might need a longer timeout
 68 |     let jsonBody;
 69 |     try {
 70 |       jsonBody = JSON.parse(body);
 71 |     } catch (e) {
 72 |       // Not valid JSON, proceed with normal request
 73 |       jsonBody = {};
 74 |     }
 75 | 
 76 |     // Set a longer timeout for list requests that tend to time out
 77 |     const timeout =
 78 |       jsonBody.method &&
 79 |       (jsonBody.method === "tools/list" || jsonBody.method === "resources/list")
 80 |         ? 30000
 81 |         : 10000;
 82 | 
 83 |     // Use AbortController to implement timeout
 84 |     const controller = new AbortController();
 85 |     const timeoutId = setTimeout(() => controller.abort(), timeout);
 86 | 
 87 |     log(`Proxying ${jsonBody.method || "request"} with timeout ${timeout}ms`);
 88 | 
 89 |     const resp = await fetch(target, {
 90 |       method: "POST",
 91 |       headers: { "Content-Type": "application/json" },
 92 |       body,
 93 |       signal: controller.signal,
 94 |     });
 95 | 
 96 |     clearTimeout(timeoutId);
 97 | 
 98 |     // If it's a list request, cache the response for future use
 99 |     if (
100 |       jsonBody.method &&
101 |       (jsonBody.method === "tools/list" || jsonBody.method === "resources/list")
102 |     ) {
103 |       log(`Received response for ${jsonBody.method}`);
104 |     }
105 | 
106 |     return new Response(await resp.text(), {
107 |       status: resp.status,
108 |       headers: { "Content-Type": "application/json", ...CORS },
109 |     });
110 |   } catch (error) {
111 |     log(`POST error: ${error.message}`);
112 | 
113 |     // For timeout errors, provide a default empty response for list requests
114 |     if (error.name === "AbortError") {
115 |       try {
116 |         const jsonBody = JSON.parse(body);
117 |         if (jsonBody.method === "tools/list") {
118 |           log("Returning empty tools list due to timeout");
119 |           return new Response(
120 |             JSON.stringify({
121 |               jsonrpc: "2.0",
122 |               id: jsonBody.id,
123 |               result: { tools: [] },
124 |             }),
125 |             {
126 |               status: 200,
127 |               headers: { "Content-Type": "application/json", ...CORS },
128 |             },
129 |           );
130 |         } else if (jsonBody.method === "resources/list") {
131 |           log("Returning empty resources list due to timeout");
132 |           return new Response(
133 |             JSON.stringify({
134 |               jsonrpc: "2.0",
135 |               id: jsonBody.id,
136 |               result: { resources: [] },
137 |             }),
138 |             {
139 |               status: 200,
140 |               headers: { "Content-Type": "application/json", ...CORS },
141 |             },
142 |           );
143 |         }
144 |       } catch (e) {
145 |         // If parsing fails, fall through to default error response
146 |       }
147 |     }
148 | 
149 |     return new Response(JSON.stringify({ error: error.message }), {
150 |       status: 502,
151 |       headers: { "Content-Type": "application/json", ...CORS },
152 |     });
153 |   }
154 | }
155 | 
156 | function serveSSE(clientReq, ctx) {
157 |   const enc = new TextEncoder();
158 |   let keepalive;
159 |   const upstreamCtl = new AbortController();
160 | 
161 |   const stream = new ReadableStream({
162 |     async start(ctrl) {
163 |       ctrl.enqueue(enc.encode("event: ready\ndata: {}\n\n"));
164 | 
165 |       clientReq.signal.addEventListener("abort", () => {
166 |         clearInterval(keepalive);
167 |         upstreamCtl.abort();
168 |         ctrl.close();
169 |       });
170 | 
171 |       try {
172 |         const u = await fetch(`${REMOTE_MCP_SERVER_URL}/sse`, {
173 |           headers: { Accept: "text/event-stream" },
174 |           signal: upstreamCtl.signal,
175 |         });
176 | 
177 |         if (!u.ok || !u.body) throw new Error(`Upstream SSE ${u.status}`);
178 |         const r = u.body.getReader();
179 | 
180 |         while (true) {
181 |           const { value, done } = await r.read();
182 |           if (done) break;
183 |           if (value) {
184 |             const text = new TextDecoder().decode(value);
185 |             // capture first endpoint once
186 |             if (!resourceEndpoint) {
187 |               const m = text.match(
188 |                 /data:\s*(\/messages\/\?session_id=[A-Za-z0-9_-]+)/,
189 |               );
190 |               if (m) {
191 |                 resourceEndpoint = m[1];
192 |                 forwardPath = resourceEndpoint.split("?")[0];
193 |                 log(`Captured endpoint ${resourceEndpoint}`);
194 |                 ctrl.enqueue(
195 |                   enc.encode(`event: resource\ndata: ${resourceEndpoint}\n\n`),
196 |                 );
197 |               }
198 |             }
199 |             ctrl.enqueue(value);
200 |           }
201 |         }
202 |       } catch (e) {
203 |         if (e.name !== "AbortError") {
204 |           log(`SSE error: ${e.message}`);
205 |           ctrl.enqueue(enc.encode(`event: error\ndata: ${e.message}\n\n`));
206 |         }
207 |       }
208 | 
209 |       // Reduce keepalive interval to 5 seconds to prevent timeouts
210 |       keepalive = setInterval(() => {
211 |         try {
212 |           ctrl.enqueue(enc.encode(":keepalive\n\n"));
213 |         } catch (_) {
214 |           clearInterval(keepalive);
215 |         }
216 |       }, 5000);
217 |     },
218 |   });
219 | 
220 |   return new Response(stream, {
221 |     headers: {
222 |       "Content-Type": "text/event-stream",
223 |       "Cache-Control": "no-cache",
224 |       Connection: "keep-alive",
225 |       ...CORS,
226 |     },
227 |   });
228 | }
229 | 
```
Page 4/19FirstPrevNextLast