#
tokens: 47856/50000 20/303 files (page 5/15)
lines: off (toggle) GitHub
raw markdown copy
This is page 5 of 15. Use http://codebase.md/genomoncology/biomcp?page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/tests/tdd/test_drug_recalls.py:
--------------------------------------------------------------------------------

```python
"""Tests for FDA drug recalls module."""

import json
from pathlib import Path
from unittest.mock import AsyncMock, patch

import pytest

from biomcp.openfda.drug_recalls import (
    get_drug_recall,
    search_drug_recalls,
)

# Load mock data
MOCK_DIR = Path(__file__).parent.parent / "data" / "openfda"
MOCK_RECALLS_SEARCH = json.loads(
    (MOCK_DIR / "enforcement_search.json").read_text()
)
MOCK_RECALL_DETAIL = json.loads(
    (MOCK_DIR / "enforcement_detail.json").read_text()
)


class TestDrugRecalls:
    """Test drug recalls functionality."""

    @pytest.mark.asyncio
    async def test_search_drug_recalls_success(self):
        """Test successful drug recall search."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALLS_SEARCH, None)

            result = await search_drug_recalls(
                drug="valsartan",
                limit=10,
            )

            assert "Drug Recall" in result or "FDA Drug Recall" in result
            assert "valsartan" in result.lower()
            # Check for presence of key recall info
            assert "Recall" in result or "recall" in result.lower()
            mock_request.assert_called_once()

    @pytest.mark.asyncio
    async def test_search_drug_recalls_with_filters(self):
        """Test drug recall search with multiple filters."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALLS_SEARCH, None)

            result = await search_drug_recalls(
                drug="metformin",
                recall_class="2",
                status="ongoing",
                reason="contamination",
                since_date="20230101",
                limit=5,
                api_key="test-key",
            )

            assert "Drug Recall" in result or "FDA Drug Recall" in result
            # Verify API key was passed as the 4th positional argument
            call_args = mock_request.call_args
            assert (
                call_args[0][3] == "test-key"
            )  # api_key is 4th positional arg

    @pytest.mark.asyncio
    async def test_search_drug_recalls_no_results(self):
        """Test drug recall search with no results."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = ({"results": []}, None)

            result = await search_drug_recalls(drug="nonexistent-drug")

            assert "No drug recall records found" in result

    @pytest.mark.asyncio
    async def test_search_drug_recalls_api_error(self):
        """Test drug recall search with API error."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (None, "API rate limit exceeded")

            result = await search_drug_recalls(drug="test")

            assert "Error searching drug recalls" in result
            assert "API rate limit exceeded" in result

    @pytest.mark.asyncio
    async def test_get_drug_recall_success(self):
        """Test getting specific drug recall details."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALL_DETAIL, None)

            result = await get_drug_recall("D-0001-2023")

            assert "Drug Recall" in result or "D-0001-2023" in result
            assert "D-0001-2023" in result
            # Check for key details in the output (formats may vary)
            assert "product" in result.lower() or "valsartan" in result.lower()

    @pytest.mark.asyncio
    async def test_get_drug_recall_not_found(self):
        """Test getting drug recall that doesn't exist."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = ({"results": []}, None)

            result = await get_drug_recall("INVALID-RECALL")

            assert "No recall record found" in result
            assert "INVALID-RECALL" in result

    @pytest.mark.asyncio
    async def test_get_drug_recall_with_api_key(self):
        """Test getting drug recall with API key."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALL_DETAIL, None)

            result = await get_drug_recall(
                "D-0001-2023",
                api_key="test-api-key",
            )

            assert "Drug Recall" in result or "D-0001-2023" in result
            # Verify API key was passed as the 4th positional argument
            call_args = mock_request.call_args
            assert (
                call_args[0][3] == "test-api-key"
            )  # api_key is 4th positional arg

    @pytest.mark.asyncio
    async def test_recall_class_validation(self):
        """Test that recall class is validated."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALLS_SEARCH, None)

            # Valid recall classes
            for recall_class in ["1", "2", "3"]:
                result = await search_drug_recalls(recall_class=recall_class)
                assert "Drug Recall" in result or "FDA Drug Recall" in result

            # Test with Class I, II, III format
            result = await search_drug_recalls(recall_class="Class I")
            call_args = mock_request.call_args
            params = call_args[0][1]  # params is 2nd positional arg
            assert 'classification:"Class I"' in params["search"]

    @pytest.mark.asyncio
    async def test_recall_status_mapping(self):
        """Test that recall status is properly mapped."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALLS_SEARCH, None)

            # Test ongoing status
            await search_drug_recalls(status="ongoing")
            call_args = mock_request.call_args
            params = call_args[0][1]  # params is 2nd positional arg
            assert "Ongoing" in params["search"]

            # Test completed status
            await search_drug_recalls(status="completed")
            call_args = mock_request.call_args
            params = call_args[0][1]  # params is 2nd positional arg
            assert "Completed" in params["search"]

    @pytest.mark.asyncio
    async def test_search_drug_recalls_pagination(self):
        """Test drug recall search pagination."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_response = {
                "meta": {"results": {"total": 150}},
                "results": MOCK_RECALLS_SEARCH["results"],
            }
            mock_request.return_value = (mock_response, None)

            result = await search_drug_recalls(
                drug="aspirin",
                limit=10,
                skip=30,
            )

            # Check for total count instead of specific pagination format
            assert "150" in result
            # Verify skip parameter was passed
            call_args = mock_request.call_args
            assert (
                call_args[0][1]["skip"] == "30"
            )  # params is 2nd positional arg, value is string

    @pytest.mark.asyncio
    async def test_date_filtering(self):
        """Test that date filtering works correctly."""
        with patch(
            "biomcp.openfda.drug_recalls.make_openfda_request",
            new_callable=AsyncMock,
        ) as mock_request:
            mock_request.return_value = (MOCK_RECALLS_SEARCH, None)

            await search_drug_recalls(
                since_date="20230615",
            )

            # Check that date was properly formatted in query
            call_args = mock_request.call_args
            params = call_args[0][1]  # params is 2nd positional arg
            assert "recall_initiation_date" in params["search"]
            assert "[2023-06-15 TO *]" in params["search"]

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/validation.py:
--------------------------------------------------------------------------------

```python
"""Validation functions for OpenFDA API responses."""

import logging
from typing import Any

from .exceptions import OpenFDAValidationError

logger = logging.getLogger(__name__)


def validate_fda_response(
    response: dict[str, Any],
    required_fields: list[str] | None = None,
    response_type: str = "generic",
) -> bool:
    """
    Validate FDA API response structure.

    Args:
        response: The FDA API response dictionary
        required_fields: List of required top-level fields
        response_type: Type of response for specific validation

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(response, dict):
        raise OpenFDAValidationError(
            f"Expected dict response, got {type(response).__name__}"
        )

    # Default required fields for most FDA responses
    if required_fields is None:
        required_fields = ["results"] if "results" in response else []

    # Check required fields
    missing_fields = [
        field for field in required_fields if field not in response
    ]
    if missing_fields:
        raise OpenFDAValidationError(
            f"Missing required fields in FDA response: {', '.join(missing_fields)}"
        )

    # Type-specific validation
    if response_type == "search":
        validate_search_response(response)
    elif response_type == "detail":
        validate_detail_response(response)

    return True


def validate_search_response(response: dict[str, Any]) -> bool:
    """
    Validate FDA search response structure.

    Args:
        response: FDA search response

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    # Search responses should have results array
    if "results" not in response:
        raise OpenFDAValidationError("Search response missing 'results' field")

    if not isinstance(response["results"], list):
        raise OpenFDAValidationError(
            f"Expected 'results' to be a list, got {type(response['results']).__name__}"
        )

    # If meta is present, validate it
    if "meta" in response:
        validate_meta_field(response["meta"])

    return True


def validate_detail_response(response: dict[str, Any]) -> bool:
    """
    Validate FDA detail response structure.

    Args:
        response: FDA detail response

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    # Detail responses usually have a single result
    if "results" in response:
        if not isinstance(response["results"], list):
            raise OpenFDAValidationError(
                f"Expected 'results' to be a list, got {type(response['results']).__name__}"
            )

        if len(response["results"]) == 0:
            # Empty results is valid (not found)
            return True

        if len(response["results"]) > 1:
            logger.warning(
                f"Detail response contains {len(response['results'])} results, expected 1"
            )

    return True


def validate_meta_field(meta: dict[str, Any]) -> bool:
    """
    Validate FDA response meta field.

    Args:
        meta: Meta field from FDA response

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(meta, dict):
        raise OpenFDAValidationError(
            f"Expected 'meta' to be a dict, got {type(meta).__name__}"
        )

    # Check for results metadata
    if "results" in meta:
        results_meta = meta["results"]
        if not isinstance(results_meta, dict):
            raise OpenFDAValidationError(
                f"Expected 'meta.results' to be a dict, got {type(results_meta).__name__}"
            )

        # Validate pagination fields if present
        for field in ["skip", "limit", "total"]:
            if field in results_meta and not isinstance(
                results_meta[field], int | float
            ):
                raise OpenFDAValidationError(
                    f"Expected 'meta.results.{field}' to be numeric, "
                    f"got {type(results_meta[field]).__name__}"
                )

    return True


def validate_adverse_event(event: dict[str, Any]) -> bool:
    """
    Validate an adverse event record.

    Args:
        event: Adverse event record

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(event, dict):
        raise OpenFDAValidationError(
            f"Expected adverse event to be a dict, got {type(event).__name__}"
        )

    # Key fields that should be present (but may be null)
    important_fields = ["patient", "safetyreportid"]

    for field in important_fields:
        if field not in event:
            logger.warning(f"Adverse event missing expected field: {field}")

    return True


def validate_drug_label(label: dict[str, Any]) -> bool:
    """
    Validate a drug label record.

    Args:
        label: Drug label record

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(label, dict):
        raise OpenFDAValidationError(
            f"Expected drug label to be a dict, got {type(label).__name__}"
        )

    # Labels should have OpenFDA section
    if "openfda" not in label:
        logger.warning("Drug label missing 'openfda' section")

    # Should have at least one section
    label_sections = [
        "indications_and_usage",
        "contraindications",
        "warnings_and_precautions",
        "adverse_reactions",
        "dosage_and_administration",
    ]

    has_section = any(section in label for section in label_sections)
    if not has_section:
        logger.warning("Drug label has no standard sections")

    return True


def validate_device_event(event: dict[str, Any]) -> bool:
    """
    Validate a device event record.

    Args:
        event: Device event record

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(event, dict):
        raise OpenFDAValidationError(
            f"Expected device event to be a dict, got {type(event).__name__}"
        )

    # Device events should have MDR report key
    if "mdr_report_key" not in event:
        logger.warning("Device event missing 'mdr_report_key'")

    # Should have device information
    if "device" not in event and "devices" not in event:
        logger.warning("Device event missing device information")

    return True


def validate_recall(recall: dict[str, Any]) -> bool:
    """
    Validate a recall record.

    Args:
        recall: Recall record

    Returns:
        True if valid

    Raises:
        OpenFDAValidationError: If validation fails
    """
    if not isinstance(recall, dict):
        raise OpenFDAValidationError(
            f"Expected recall to be a dict, got {type(recall).__name__}"
        )

    # Required fields for recalls
    required = ["recall_number", "classification", "product_description"]

    for field in required:
        if field not in recall:
            logger.warning(f"Recall missing required field: {field}")

    # Validate classification if present
    if "classification" in recall:
        valid_classes = ["Class I", "Class II", "Class III", "1", "2", "3"]
        if recall["classification"] not in valid_classes:
            logger.warning(
                f"Invalid recall classification: {recall['classification']}"
            )

    return True


def sanitize_response(response: dict[str, Any]) -> dict[str, Any]:
    """
    Sanitize FDA response to handle common issues.

    Args:
        response: Raw FDA response

    Returns:
        Sanitized response
    """
    if not response:
        return {}

    # Handle fields that can be string or list
    if "results" in response and isinstance(response["results"], list):
        for result in response["results"]:
            if isinstance(result, dict):
                # Fields that can be string or list
                polymorphic_fields = [
                    "source_type",
                    "remedial_action",
                    "medical_specialty_description",
                    "manufacturer_name",
                    "brand_name",
                    "generic_name",
                ]

                for field in polymorphic_fields:
                    if field in result:
                        value = result[field]
                        # Ensure consistent list format
                        if not isinstance(value, list):
                            result[field] = [value] if value else []

    return response

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/input_validation.py:
--------------------------------------------------------------------------------

```python
"""
Input validation and sanitization for OpenFDA API requests.

This module provides security-focused input validation to prevent injection attacks
and ensure data integrity for all FDA API requests.
"""

import logging
import re
from typing import Any

logger = logging.getLogger(__name__)

# Maximum lengths for different input types
MAX_DRUG_NAME_LENGTH = 100
MAX_REACTION_LENGTH = 200
MAX_GENERAL_QUERY_LENGTH = 500
MAX_DATE_LENGTH = 10

# Patterns for validation
SAFE_CHARS_PATTERN = re.compile(r"^[a-zA-Z0-9\s\-\.\,\(\)\/\*]+$")
DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$")
# Include SQL comment pattern -- and other injection patterns
INJECTION_CHARS = re.compile(r"[<>\"\';&|\\`${}]|--")


def sanitize_input(
    value: str | None, max_length: int = MAX_GENERAL_QUERY_LENGTH
) -> str | None:
    """
    Sanitize user input to prevent injection attacks.

    Args:
        value: Input string to sanitize
        max_length: Maximum allowed length

    Returns:
        Sanitized string or None if input is invalid
    """
    if not value:
        return None

    # Convert to string and strip whitespace
    value = str(value).strip()

    # Check length
    if len(value) > max_length:
        logger.warning(
            f"Input truncated from {len(value)} to {max_length} characters"
        )
        value = value[:max_length]

    # Remove potential injection characters
    cleaned = INJECTION_CHARS.sub("", value)

    # Warn if characters were removed
    if cleaned != value:
        logger.warning("Removed potentially dangerous characters from input")

    # Normalize whitespace
    cleaned = " ".join(cleaned.split())

    return cleaned if cleaned else None


def validate_drug_name(drug: str | None) -> str | None:
    """
    Validate and sanitize drug name input.

    Args:
        drug: Drug name to validate

    Returns:
        Validated drug name or None
    """
    if not drug:
        return None

    sanitized = sanitize_input(drug, MAX_DRUG_NAME_LENGTH)

    if not sanitized:
        return None

    # Drug names should only contain alphanumeric, spaces, hyphens, and slashes
    if not re.match(r"^[a-zA-Z0-9\s\-\/\(\)]+$", sanitized):
        logger.warning(f"Invalid drug name format: {sanitized[:20]}...")
        return None

    return sanitized


def validate_date(date_str: str | None) -> str | None:
    """
    Validate date string format.

    Args:
        date_str: Date string in YYYY-MM-DD format

    Returns:
        Validated date string or None
    """
    if not date_str:
        return None

    sanitized = sanitize_input(date_str, MAX_DATE_LENGTH)

    if not sanitized:
        return None

    # Check date format
    if not DATE_PATTERN.match(sanitized):
        logger.warning(f"Invalid date format: {sanitized}")
        return None

    # Basic date validation
    try:
        year, month, day = map(int, sanitized.split("-"))
        if not (1900 <= year <= 2100 and 1 <= month <= 12 and 1 <= day <= 31):
            logger.warning(f"Date out of valid range: {sanitized}")
            return None
    except (ValueError, IndexError):
        logger.warning(f"Cannot parse date: {sanitized}")
        return None

    return sanitized


def validate_limit(limit: int | None, max_limit: int = 100) -> int:
    """
    Validate and constrain limit parameter.

    Args:
        limit: Requested limit
        max_limit: Maximum allowed limit

    Returns:
        Valid limit value
    """
    if limit is None:
        return 25  # Default

    try:
        limit = int(limit)
    except (ValueError, TypeError):
        logger.warning(f"Invalid limit value: {limit}")
        return 25

    if limit < 1:
        return 1
    elif limit > max_limit:
        logger.warning(f"Limit {limit} exceeds maximum {max_limit}")
        return max_limit

    return limit


def validate_skip(skip: int | None, max_skip: int = 10000) -> int:
    """
    Validate and constrain skip/offset parameter.

    Args:
        skip: Requested skip/offset
        max_skip: Maximum allowed skip

    Returns:
        Valid skip value
    """
    if skip is None:
        return 0

    try:
        skip = int(skip)
    except (ValueError, TypeError):
        logger.warning(f"Invalid skip value: {skip}")
        return 0

    if skip < 0:
        return 0
    elif skip > max_skip:
        logger.warning(f"Skip {skip} exceeds maximum {max_skip}")
        return max_skip

    return skip


def validate_classification(classification: str | None) -> str | None:
    """
    Validate recall classification.

    Args:
        classification: Classification string (Class I, II, or III)

    Returns:
        Validated classification or None
    """
    if not classification:
        return None

    sanitized = sanitize_input(classification, 20)

    if not sanitized:
        return None

    # Normalize classification format
    sanitized = sanitized.upper()

    # Check valid classifications
    valid_classes = [
        "CLASS I",
        "CLASS II",
        "CLASS III",
        "I",
        "II",
        "III",
        "1",
        "2",
        "3",
    ]

    if sanitized not in valid_classes:
        logger.warning(f"Invalid classification: {sanitized}")
        return None

    # Normalize to standard format
    if sanitized in ["I", "1"]:
        return "Class I"
    elif sanitized in ["II", "2"]:
        return "Class II"
    elif sanitized in ["III", "3"]:
        return "Class III"

    return sanitized.title()  # "CLASS I" -> "Class I"


def validate_status(status: str | None) -> str | None:
    """
    Validate status parameter.

    Args:
        status: Status string

    Returns:
        Validated status or None
    """
    if not status:
        return None

    sanitized = sanitize_input(status, 50)

    if not sanitized:
        return None

    # Normalize status
    sanitized = sanitized.lower()

    # Check valid statuses
    valid_statuses = [
        "ongoing",
        "terminated",
        "completed",
        "current",
        "resolved",
    ]

    if sanitized not in valid_statuses:
        logger.warning(f"Invalid status: {sanitized}")
        return None

    return sanitized.title()  # "ongoing" -> "Ongoing"


def validate_boolean(value: Any) -> bool | None:
    """
    Validate boolean parameter.

    Args:
        value: Boolean-like value

    Returns:
        Boolean value or None
    """
    if value is None:
        return None

    if isinstance(value, bool):
        return value

    if isinstance(value, str):
        value = value.lower().strip()
        if value in ["true", "1", "yes", "y"]:
            return True
        elif value in ["false", "0", "no", "n"]:
            return False

    return None


def validate_api_key(api_key: str | None) -> str | None:
    """
    Validate API key format.

    Args:
        api_key: API key string

    Returns:
        Validated API key or None
    """
    if not api_key:
        return None

    # API keys should be alphanumeric with possible hyphens
    if not re.match(r"^[a-zA-Z0-9\-_]+$", api_key):
        logger.warning("Invalid API key format")
        return None

    # Check reasonable length
    if len(api_key) < 10 or len(api_key) > 100:
        logger.warning("API key length out of expected range")
        return None

    return api_key


def _validate_parameter(key: str, value: Any) -> Any:
    """Validate a single parameter based on its key."""
    if key in ["drug", "brand", "generic"]:
        return validate_drug_name(value)
    elif key in ["limit"]:
        return validate_limit(value)
    elif key in ["skip", "offset"]:
        return validate_skip(value)
    elif key in ["classification"]:
        return validate_classification(value)
    elif key in ["status"]:
        return validate_status(value)
    elif key in ["serious", "death", "ongoing"]:
        return validate_boolean(value)
    elif key in ["api_key"]:
        return validate_api_key(value)
    elif "date" in key.lower():
        return validate_date(value)
    else:
        return sanitize_input(value)


def build_safe_query(params: dict[str, Any]) -> dict[str, Any]:
    """
    Build a safe query dictionary with validated parameters.

    Args:
        params: Raw parameters dictionary

    Returns:
        Dictionary with validated parameters
    """
    safe_params = {}

    for key, value in params.items():
        if value is None:
            continue

        # Validate key name
        if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", key):
            logger.warning(f"Skipping invalid parameter key: {key}")
            continue

        # Validate parameter value
        validated = _validate_parameter(key, value)

        if validated is not None:
            safe_params[key] = validated

    return safe_params

```

--------------------------------------------------------------------------------
/tests/tdd/openfda/test_device_events.py:
--------------------------------------------------------------------------------

```python
"""
Unit tests for OpenFDA device events integration.
"""

from unittest.mock import patch

import pytest

from biomcp.openfda.device_events import get_device_event, search_device_events


@pytest.mark.asyncio
async def test_search_device_events_by_device():
    """Test searching device events by device name."""
    mock_response = {
        "meta": {"results": {"total": 3}},
        "results": [
            {
                "event_type": "M",
                "date_received": "2024-01-15",
                "device": [
                    {
                        "brand_name": "FoundationOne CDx",
                        "manufacturer_d_name": "Foundation Medicine",
                        "model_number": "F1CDX",
                        "device_problem_text": ["False negative result"],
                        "openfda": {
                            "device_class": "2",
                            "medical_specialty_description": ["Pathology"],
                            "product_code": "PQP",
                        },
                    }
                ],
                "event_description": "Device failed to detect known mutation",
                "mdr_report_key": "MDR123456",
            }
        ],
    }

    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await search_device_events(device="FoundationOne", limit=10)

        # Verify request
        mock_request.assert_called_once()
        call_args = mock_request.call_args
        assert "FoundationOne" in call_args[0][1]["search"]
        # When searching for a specific device, genomic filter is not needed
        # The device search itself is sufficient

        # Check output
        assert "FDA Device Adverse Event Reports" in result
        assert "FoundationOne CDx" in result
        assert "Foundation Medicine" in result
        assert "False negative result" in result
        assert "Malfunction" in result
        assert "MDR123456" in result


@pytest.mark.asyncio
async def test_search_device_events_genomics_filter():
    """Test that genomics filter is applied by default."""
    mock_response = {"meta": {"results": {"total": 5}}, "results": []}

    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        await search_device_events(manufacturer="Illumina", genomics_only=True)

        # Verify genomic device codes are in search
        call_args = mock_request.call_args
        search_query = call_args[0][1]["search"]
        # Should contain at least one genomic product code
        assert any(
            code in search_query for code in ["OOI", "PQP", "OYD", "NYE"]
        )


@pytest.mark.asyncio
async def test_search_device_events_no_genomics_filter():
    """Test searching without genomics filter."""
    mock_response = {"meta": {"results": {"total": 10}}, "results": []}

    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        await search_device_events(device="pacemaker", genomics_only=False)

        # Verify no genomic product codes in search
        call_args = mock_request.call_args
        search_query = call_args[0][1]["search"]
        # Should not contain genomic product codes
        assert not any(code in search_query for code in ["OOI", "PQP", "OYD"])


@pytest.mark.asyncio
async def test_search_device_events_by_problem():
    """Test searching device events by problem description."""
    mock_response = {
        "meta": {"results": {"total": 8}},
        "results": [
            {
                "event_type": "IN",
                "device": [
                    {
                        "brand_name": "Test Device",
                        "device_problem_text": [
                            "Software malfunction",
                            "Data loss",
                        ],
                    }
                ],
                "mdr_report_key": "MDR789",
            }
        ],
    }

    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await search_device_events(problem="software malfunction")

        # Verify request
        call_args = mock_request.call_args
        assert "software malfunction" in call_args[0][1]["search"].lower()

        # Check output
        assert "Software malfunction" in result
        assert "Data loss" in result
        assert "Injury" in result  # IN = Injury


@pytest.mark.asyncio
async def test_search_device_events_no_params():
    """Test that searching without parameters returns helpful message."""
    result = await search_device_events()

    assert "Please specify" in result
    assert "device name, manufacturer, or problem" in result
    assert "Examples:" in result


@pytest.mark.asyncio
async def test_get_device_event_detail():
    """Test getting detailed device event report."""
    mock_response = {
        "results": [
            {
                "mdr_report_key": "MDR999888",
                "event_type": "D",
                "date_received": "2024-02-01",
                "date_of_event": "2024-01-20",
                "source_type": "M",
                "device": [
                    {
                        "brand_name": "Genomic Sequencer X",
                        "manufacturer_d_name": "GenTech Corp",
                        "model_number": "GSX-2000",
                        "catalog_number": "CAT123",
                        "lot_number": "LOT456",
                        "expiration_date_of_device": "2025-12-31",
                        "device_problem_text": [
                            "Critical failure",
                            "Sample contamination",
                        ],
                        "device_evaluated_by_manufacturer": "Y",
                        "openfda": {
                            "device_class": "3",
                            "medical_specialty_description": [
                                "Clinical Chemistry"
                            ],
                            "product_code": "OOI",
                        },
                    }
                ],
                "event_description": "Device failure led to incorrect cancer diagnosis",
                "manufacturer_narrative": "Investigation revealed component failure",
                "patient": [
                    {
                        "patient_age": "65",
                        "patient_sex": "F",
                        "date_of_death": "2024-01-25",
                        "life_threatening": "Y",
                    }
                ],
                "remedial_action": "Device recall initiated",
            }
        ]
    }

    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (mock_response, None)

        result = await get_device_event("MDR999888")

        # Verify request
        mock_request.assert_called_once()
        call_args = mock_request.call_args
        assert "MDR999888" in call_args[0][1]["search"]

        # Check detailed output
        assert "MDR999888" in result
        assert "Death" in result
        assert "Genomic Sequencer X" in result
        assert "GenTech Corp" in result
        assert "GSX-2000" in result
        assert "Critical failure" in result
        assert "Sample contamination" in result
        assert "Class III" in result
        assert "65 years" in result
        assert "Female" in result
        assert "2024-01-25" in result
        assert "Life-threatening" in result
        assert "Device recall initiated" in result
        assert "Investigation revealed component failure" in result


@pytest.mark.asyncio
async def test_get_device_event_not_found():
    """Test handling when device event report is not found."""
    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = ({"results": []}, None)

        result = await get_device_event("NOTFOUND789")

        assert "NOTFOUND789" in result
        assert "not found" in result


@pytest.mark.asyncio
async def test_search_device_events_error():
    """Test error handling in device event search."""
    with patch(
        "biomcp.openfda.device_events.make_openfda_request"
    ) as mock_request:
        mock_request.return_value = (None, "Network timeout")

        result = await search_device_events(device="test")

        assert "Error searching device events" in result
        assert "Network timeout" in result

```

--------------------------------------------------------------------------------
/docs/reference/quick-reference.md:
--------------------------------------------------------------------------------

```markdown
# BioMCP Quick Reference

## Command Cheat Sheet

### Installation

```bash
# Install BioMCP
uv tool install biomcp

# Update to latest version
uv tool install biomcp --force

# Check version
biomcp --version
```

### Article Search Commands

```bash
# Basic gene search
biomcp article search --gene BRAF

# Multiple filters
biomcp article search \
  --gene EGFR --disease "lung cancer" \
  --chemical erlotinib

# Exclude preprints
biomcp article search --gene TP53 --no-preprints

# OR logic in keywords
biomcp article search --gene PTEN \
  --keyword "R173|Arg173|p.R173"

# Get specific article
biomcp article get 38768446  # PMID
biomcp article get "10.1101/2024.01.20.23288905"  # DOI
```

### Trial Search Commands

```bash
# Basic disease search
biomcp trial search \
  --condition melanoma --status RECRUITING

# Location-based search (requires coordinates)
biomcp trial search --condition cancer \
  --latitude 40.7128 --longitude -74.0060 --distance 50

# Phase-specific search
biomcp trial search \
  --condition "breast cancer" --phase PHASE3

# Using NCI source (requires API key)
biomcp trial search --condition melanoma --source nci \
  --required-mutations "BRAF V600E" --api-key $NCI_API_KEY
```

### Variant Commands

```bash
# Search by gene
biomcp variant search \
  --gene BRCA1 --significance pathogenic

# Search by HGVS
biomcp variant search --hgvs "NM_007294.4:c.5266dupC"

# Search by frequency
biomcp variant search --gene TP53 \
  --max-frequency 0.01 --min-cadd 20

# Get variant details
biomcp variant get rs121913529
biomcp variant get "NM_007294.4:c.5266dupC"

# Predict effects (requires AlphaGenome key)
biomcp variant predict chr7 140753336 A T --tissue UBERON:0002367
```

### Gene/Drug/Disease Commands

```bash
# Get gene information
biomcp gene get TP53
biomcp gene get BRAF

# Get drug information
biomcp drug get imatinib
biomcp drug get pembrolizumab

# Get disease information
biomcp disease get melanoma
biomcp disease get "non-small cell lung cancer"
```

### NCI Commands (Require API Key)

```bash
# Search organizations
biomcp organization search --name "MD Anderson" \
  --city Houston --state TX --api-key $NCI_API_KEY

# Search interventions
biomcp intervention search --name pembrolizumab \
  --intervention-type Drug --api-key $NCI_API_KEY

# Search biomarkers
biomcp biomarker search --gene EGFR \
  --biomarker-type mutation --api-key $NCI_API_KEY
```

### Health Check

```bash
# Full health check
biomcp health check

# Check APIs only
biomcp health check --apis-only

# Verbose output
biomcp health check --verbose
```

## Common Parameter Reference

### Search Parameters

| Parameter  | Description   | Example         |
| ---------- | ------------- | --------------- |
| `--limit`  | Max results   | `--limit 20`    |
| `--page`   | Page number   | `--page 2`      |
| `--format` | Output format | `--format json` |

### Trial Status Values

| Status                  | Description            |
| ----------------------- | ---------------------- |
| `RECRUITING`            | Currently enrolling    |
| `ACTIVE_NOT_RECRUITING` | Ongoing, not enrolling |
| `NOT_YET_RECRUITING`    | Will start recruiting  |
| `COMPLETED`             | Trial has ended        |
| `SUSPENDED`             | Temporarily halted     |
| `TERMINATED`            | Stopped early          |

### Trial Phase Values

| Phase          | Description   |
| -------------- | ------------- |
| `EARLY_PHASE1` | Early Phase 1 |
| `PHASE1`       | Phase 1       |
| `PHASE2`       | Phase 2       |
| `PHASE3`       | Phase 3       |
| `PHASE4`       | Phase 4       |

### Clinical Significance

| Value                    | Description             |
| ------------------------ | ----------------------- |
| `pathogenic`             | Causes disease          |
| `likely_pathogenic`      | Probably causes disease |
| `uncertain_significance` | Unknown impact          |
| `likely_benign`          | Probably harmless       |
| `benign`                 | Does not cause disease  |

## Gene Symbol Quick Lookup

### Common Gene Aliases

| Common Name | Official Symbol |
| ----------- | --------------- |
| HER2        | ERBB2           |
| HER3        | ERBB3           |
| EGFR        | EGFR            |
| ALK         | ALK             |
| c-MET       | MET             |
| PD-1        | PDCD1           |
| PD-L1       | CD274           |
| CTLA-4      | CTLA4           |

## Location Coordinates

### Major US Cities

| City          | Latitude | Longitude |
| ------------- | -------- | --------- |
| New York      | 40.7128  | -74.0060  |
| Los Angeles   | 34.0522  | -118.2437 |
| Chicago       | 41.8781  | -87.6298  |
| Houston       | 29.7604  | -95.3698  |
| Philadelphia  | 39.9526  | -75.1652  |
| Boston        | 42.3601  | -71.0589  |
| Atlanta       | 33.7490  | -84.3880  |
| Miami         | 25.7617  | -80.1918  |
| Seattle       | 47.6062  | -122.3321 |
| San Francisco | 37.7749  | -122.4194 |

## Environment Variables

```bash
# API Keys
export NCI_API_KEY="your-nci-key"
export ALPHAGENOME_API_KEY="your-alphagenome-key"
export CBIO_TOKEN="your-cbioportal-token"

# Configuration
export BIOMCP_LOG_LEVEL="DEBUG"
export BIOMCP_CACHE_DIR="/path/to/cache"
export BIOMCP_TIMEOUT=300
export BIOMCP_MAX_CONCURRENT=5
```

## Output Format Examples

### JSON Output

```bash
biomcp article search --gene BRAF --format json | jq '.articles[0]'
```

### Extract Specific Fields

```bash
# Get PMIDs only
biomcp article search --gene TP53 --format json | \
  jq -r '.articles[].pmid'

# Get trial NCT IDs
biomcp trial search --condition melanoma --format json | \
  jq -r '.trials[].nct_id'
```

### Save to File

```bash
biomcp article search --gene BRCA1 --format json > results.json
```

## MCP Tool Names

### Core Tools

- `search` - Unified search
- `fetch` - Get details
- `think` - Sequential thinking

### Article Tools

- `article_searcher`
- `article_getter`

### Trial Tools

- `trial_searcher`
- `trial_getter`
- `trial_protocol_getter`
- `trial_references_getter`
- `trial_outcomes_getter`
- `trial_locations_getter`

### Variant Tools

- `variant_searcher`
- `variant_getter`
- `alphagenome_predictor`

### BioThings Tools

- `gene_getter`
- `disease_getter`
- `drug_getter`

### NCI Tools

- `nci_organization_searcher`
- `nci_organization_getter`
- `nci_intervention_searcher`
- `nci_intervention_getter`
- `nci_biomarker_searcher`
- `nci_disease_searcher`

## Query Language Syntax

### Unified Search Examples

```
gene:BRAF AND disease:melanoma
gene:EGFR AND (mutation OR variant)
drugs.tradename:gleevec
diseases.name:"lung cancer"
chemicals.mesh:D000069439
```

### Field Prefixes

- `gene:` - Gene symbol
- `disease:` - Disease/condition
- `chemical:` - Drug/chemical
- `variant:` - Genetic variant
- `pmid:` - PubMed ID
- `doi:` - Digital Object ID

## Common Workflows

### Find Articles About a Mutation

```bash
# Step 1: Search articles
biomcp article search --gene BRAF --keyword "V600E|p.V600E"

# Step 2: Get full article
biomcp article get [PMID]
```

### Check Trial Eligibility

```bash
# Step 1: Search trials
biomcp trial search --condition melanoma --status RECRUITING

# Step 2: Get trial details
biomcp trial get NCT03006926
```

### Variant Analysis

```bash
# Step 1: Search variant
biomcp variant search --gene BRCA1 --significance pathogenic

# Step 2: Get variant details
biomcp variant get rs80357906

# Step 3: Search related articles
biomcp article search --gene BRCA1 --variant rs80357906
```

## Error Code Quick Reference

### Common HTTP Codes

- `400` - Bad request (check parameters)
- `401` - Unauthorized (check API key)
- `404` - Not found (verify ID)
- `429` - Rate limited (wait and retry)
- `500` - Server error (retry later)

### BioMCP Error Patterns

- `1xxx` - Article errors
- `2xxx` - Trial errors
- `3xxx` - Variant errors
- `4xxx` - Gene/drug/disease errors
- `5xxx` - Authentication errors
- `6xxx` - Rate limit errors
- `7xxx` - Validation errors

## Tips and Tricks

### 1. Use Official Gene Symbols

```bash
# Wrong
biomcp article search --gene HER2  # ❌

# Right
biomcp article search --gene ERBB2  # ✅
```

### 2. Combine Multiple Searches

```bash
# Search multiple databases in parallel
(
  biomcp article search --gene BRAF --format json > articles.json &
  biomcp trial search --condition melanoma --format json > trials.json &
  biomcp variant search --gene BRAF --format json > variants.json &
  wait
)
```

### 3. Process Large Results

```bash
# Paginate through results
for page in {1..10}; do
  biomcp article search --gene TP53 --page $page --limit 100
done
```

### 4. Debug API Issues

```bash
# Enable debug logging
export BIOMCP_LOG_LEVEL=DEBUG
biomcp article search --gene BRAF --verbose
```

## Getting Help

```bash
# General help
biomcp --help

# Command help
biomcp article search --help

# Check documentation
open https://biomcp.org/

# Report issues
open https://github.com/genomoncology/biomcp/issues
```

```

--------------------------------------------------------------------------------
/tests/tdd/test_retry.py:
--------------------------------------------------------------------------------

```python
"""Tests for retry logic with exponential backoff."""

import asyncio
from unittest.mock import AsyncMock, MagicMock, patch

import httpx
import pytest

from biomcp.retry import (
    RetryableHTTPError,
    RetryConfig,
    calculate_delay,
    is_retryable_exception,
    is_retryable_status,
    retry_with_backoff,
    with_retry,
)


def test_calculate_delay_exponential_backoff():
    """Test that delay increases exponentially."""
    config = RetryConfig(initial_delay=1.0, exponential_base=2.0, jitter=False)

    # Test exponential increase
    assert calculate_delay(0, config) == 1.0  # 1 * 2^0
    assert calculate_delay(1, config) == 2.0  # 1 * 2^1
    assert calculate_delay(2, config) == 4.0  # 1 * 2^2
    assert calculate_delay(3, config) == 8.0  # 1 * 2^3


def test_calculate_delay_max_cap():
    """Test that delay is capped at max_delay."""
    config = RetryConfig(
        initial_delay=1.0, exponential_base=2.0, max_delay=5.0, jitter=False
    )

    # Test that delay is capped
    assert calculate_delay(0, config) == 1.0
    assert calculate_delay(1, config) == 2.0
    assert calculate_delay(2, config) == 4.0
    assert calculate_delay(3, config) == 5.0  # Capped at max_delay
    assert calculate_delay(10, config) == 5.0  # Still capped


def test_calculate_delay_with_jitter():
    """Test that jitter adds randomness to delay."""
    config = RetryConfig(initial_delay=10.0, jitter=True)

    # Generate multiple delays and check they're different
    delays = [calculate_delay(1, config) for _ in range(10)]

    # All should be around 20.0 (10 * 2^1) with jitter
    for delay in delays:
        assert 18.0 <= delay <= 22.0  # Within 10% jitter range

    # Should have some variation
    assert len(set(delays)) > 1


def test_is_retryable_exception():
    """Test exception retryability check."""
    config = RetryConfig(retryable_exceptions=(ConnectionError, TimeoutError))

    # Retryable exceptions
    assert is_retryable_exception(ConnectionError("test"), config)
    assert is_retryable_exception(TimeoutError("test"), config)

    # Non-retryable exceptions
    assert not is_retryable_exception(ValueError("test"), config)
    assert not is_retryable_exception(KeyError("test"), config)


def test_is_retryable_status():
    """Test HTTP status code retryability check."""
    config = RetryConfig(retryable_status_codes=(429, 502, 503, 504))

    # Retryable status codes
    assert is_retryable_status(429, config)
    assert is_retryable_status(502, config)
    assert is_retryable_status(503, config)
    assert is_retryable_status(504, config)

    # Non-retryable status codes
    assert not is_retryable_status(200, config)
    assert not is_retryable_status(404, config)
    assert not is_retryable_status(500, config)


@pytest.mark.asyncio
async def test_with_retry_decorator_success():
    """Test retry decorator with successful call."""
    call_count = 0

    @with_retry(RetryConfig(max_attempts=3))
    async def test_func():
        nonlocal call_count
        call_count += 1
        return "success"

    result = await test_func()
    assert result == "success"
    assert call_count == 1  # Should succeed on first try


@pytest.mark.asyncio
async def test_with_retry_decorator_eventual_success():
    """Test retry decorator with eventual success."""
    call_count = 0

    @with_retry(
        RetryConfig(
            max_attempts=3,
            initial_delay=0.01,  # Fast for testing
            retryable_exceptions=(ValueError,),
        )
    )
    async def test_func():
        nonlocal call_count
        call_count += 1
        if call_count < 3:
            raise ValueError("Transient error")
        return "success"

    result = await test_func()
    assert result == "success"
    assert call_count == 3


@pytest.mark.asyncio
async def test_with_retry_decorator_max_attempts_exceeded():
    """Test retry decorator when max attempts exceeded."""
    call_count = 0

    @with_retry(
        RetryConfig(
            max_attempts=3,
            initial_delay=0.01,
            retryable_exceptions=(ConnectionError,),
        )
    )
    async def test_func():
        nonlocal call_count
        call_count += 1
        raise ConnectionError("Persistent error")

    with pytest.raises(ConnectionError, match="Persistent error"):
        await test_func()

    assert call_count == 3


@pytest.mark.asyncio
async def test_with_retry_non_retryable_exception():
    """Test retry decorator with non-retryable exception."""
    call_count = 0

    @with_retry(
        RetryConfig(max_attempts=3, retryable_exceptions=(ConnectionError,))
    )
    async def test_func():
        nonlocal call_count
        call_count += 1
        raise ValueError("Non-retryable error")

    with pytest.raises(ValueError, match="Non-retryable error"):
        await test_func()

    assert call_count == 1  # Should not retry


@pytest.mark.asyncio
async def test_retry_with_backoff_function():
    """Test retry_with_backoff function."""
    call_count = 0

    async def test_func(value):
        nonlocal call_count
        call_count += 1
        if call_count < 2:
            raise TimeoutError("Timeout")
        return f"result: {value}"

    config = RetryConfig(
        max_attempts=3,
        initial_delay=0.01,
        retryable_exceptions=(TimeoutError,),
    )

    result = await retry_with_backoff(test_func, "test", config=config)
    assert result == "result: test"
    assert call_count == 2


def test_retryable_http_error():
    """Test RetryableHTTPError."""
    error = RetryableHTTPError(503, "Service Unavailable")
    assert error.status_code == 503
    assert error.message == "Service Unavailable"
    assert str(error) == "HTTP 503: Service Unavailable"


@pytest.mark.asyncio
async def test_retry_with_delay_progression():
    """Test that retries happen with correct delay progression."""
    call_times = []

    @with_retry(
        RetryConfig(
            max_attempts=3,
            initial_delay=0.1,
            exponential_base=2.0,
            jitter=False,
            retryable_exceptions=(ValueError,),
        )
    )
    async def test_func():
        call_times.append(asyncio.get_event_loop().time())
        if len(call_times) < 3:
            raise ValueError("Retry me")
        return "success"

    asyncio.get_event_loop().time()
    result = await test_func()

    assert result == "success"
    assert len(call_times) == 3

    # Check delays between attempts (allowing some tolerance)
    first_delay = call_times[1] - call_times[0]
    second_delay = call_times[2] - call_times[1]

    assert 0.08 <= first_delay <= 0.12  # ~0.1s
    assert 0.18 <= second_delay <= 0.22  # ~0.2s


@pytest.mark.asyncio
async def test_integration_with_http_client(monkeypatch):
    """Test retry integration with HTTP client."""
    from biomcp.http_client import call_http

    # Disable connection pooling for this test
    monkeypatch.setenv("BIOMCP_USE_CONNECTION_POOL", "false")

    # Test 1: Connection error retry
    with patch(
        "biomcp.http_client_simple.httpx.AsyncClient"
    ) as mock_client_class:
        mock_client = AsyncMock()
        mock_client_class.return_value = mock_client
        mock_client.aclose = AsyncMock()  # Mock aclose method

        # Simulate connection errors then success
        call_count = 0

        async def mock_get(*args, **kwargs):
            nonlocal call_count
            call_count += 1
            if call_count < 3:
                raise httpx.ConnectError("Connection failed")
            # Return success on third try
            mock_response = MagicMock()
            mock_response.status_code = 200
            mock_response.text = '{"result": "success"}'
            return mock_response

        mock_client.get = mock_get

        config = RetryConfig(
            max_attempts=3,
            initial_delay=0.01,
        )

        status, content = await call_http(
            "GET", "https://api.example.com/test", {}, retry_config=config
        )

        assert status == 200
        assert content == '{"result": "success"}'
        assert call_count == 3

    # Test 2: Timeout error retry
    with patch(
        "biomcp.http_client_simple.httpx.AsyncClient"
    ) as mock_client_class:
        mock_client = AsyncMock()
        mock_client_class.return_value = mock_client
        mock_client.aclose = AsyncMock()  # Mock aclose method

        # Simulate timeout errors
        mock_client.get.side_effect = httpx.TimeoutException(
            "Request timed out"
        )

        config = RetryConfig(
            max_attempts=2,
            initial_delay=0.01,
        )

        # This should raise TimeoutError after retries fail
        with pytest.raises(TimeoutError):
            await call_http(
                "GET", "https://api.example.com/test", {}, retry_config=config
            )

        assert mock_client.get.call_count == 2

```

--------------------------------------------------------------------------------
/src/biomcp/circuit_breaker.py:
--------------------------------------------------------------------------------

```python
"""Circuit breaker pattern implementation for fault tolerance."""

import asyncio
import enum
import logging
from collections.abc import Callable
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any

logger = logging.getLogger(__name__)


class CircuitState(enum.Enum):
    """Circuit breaker states."""

    CLOSED = "closed"  # Normal operation, requests pass through
    OPEN = "open"  # Circuit tripped, requests fail fast
    HALF_OPEN = "half_open"  # Testing if service recovered


@dataclass
class CircuitBreakerConfig:
    """Configuration for circuit breaker behavior."""

    failure_threshold: int = 5
    """Number of failures before opening circuit"""

    recovery_timeout: float = 60.0
    """Seconds to wait before attempting recovery"""

    success_threshold: int = 2
    """Successes needed in half-open state to close circuit"""

    expected_exception: type[Exception] | tuple[type[Exception], ...] = (
        Exception
    )
    """Exception types that count as failures"""

    exclude_exceptions: tuple[type[Exception], ...] = ()
    """Exception types that don't count as failures"""


@dataclass
class CircuitBreakerState:
    """Mutable state for a circuit breaker."""

    state: CircuitState = CircuitState.CLOSED
    failure_count: int = 0
    success_count: int = 0
    last_failure_time: datetime | None = None
    last_state_change: datetime = field(default_factory=datetime.now)
    _lock: asyncio.Lock = field(default_factory=asyncio.Lock)


class CircuitBreakerError(Exception):
    """Raised when circuit breaker is open."""

    def __init__(
        self, message: str, last_failure_time: datetime | None = None
    ):
        super().__init__(message)
        self.last_failure_time = last_failure_time


class CircuitBreaker:
    """Circuit breaker implementation."""

    def __init__(
        self,
        name: str,
        config: CircuitBreakerConfig | None = None,
    ):
        """Initialize circuit breaker.

        Args:
            name: Circuit breaker name for logging
            config: Configuration (uses defaults if not provided)
        """
        self.name = name
        self.config = config or CircuitBreakerConfig()
        self._state = CircuitBreakerState()

    async def call(
        self,
        func: Callable[..., Any],
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        """Execute function through circuit breaker.

        Args:
            func: Async function to execute
            *args: Positional arguments for func
            **kwargs: Keyword arguments for func

        Returns:
            Result of function call

        Raises:
            CircuitBreakerError: If circuit is open
            Exception: If function raises exception
        """
        async with self._state._lock:
            # Check if we should transition from open to half-open
            if self._state.state == CircuitState.OPEN:
                if self._should_attempt_reset():
                    self._state.state = CircuitState.HALF_OPEN
                    self._state.success_count = 0
                    self._state.last_state_change = datetime.now()
                    logger.info(
                        f"Circuit breaker '{self.name}' entering half-open state"
                    )
                else:
                    raise CircuitBreakerError(
                        f"Circuit breaker '{self.name}' is open",
                        self._state.last_failure_time,
                    )

        # Execute the function
        try:
            result = await func(*args, **kwargs)
            await self._on_success()
            return result
        except Exception as exc:
            if await self._on_failure(exc):
                raise
            # If exception doesn't count as failure, re-raise it
            raise

    async def _on_success(self) -> None:
        """Handle successful call."""
        async with self._state._lock:
            if self._state.state == CircuitState.HALF_OPEN:
                self._state.success_count += 1
                if self._state.success_count >= self.config.success_threshold:
                    self._state.state = CircuitState.CLOSED
                    self._state.failure_count = 0
                    self._state.success_count = 0
                    self._state.last_state_change = datetime.now()
                    logger.info(
                        f"Circuit breaker '{self.name}' closed after recovery"
                    )
            elif self._state.state == CircuitState.CLOSED:
                # Reset failure count on success
                self._state.failure_count = 0

    async def _on_failure(self, exc: Exception) -> bool:
        """Handle failed call.

        Args:
            exc: The exception that was raised

        Returns:
            True if exception counts as failure
        """
        # Check if exception should be counted
        if not self._is_counted_exception(exc):
            return False

        async with self._state._lock:
            self._state.failure_count += 1
            self._state.last_failure_time = datetime.now()

            if self._state.state == CircuitState.HALF_OPEN:
                # Single failure in half-open state reopens circuit
                self._state.state = CircuitState.OPEN
                self._state.last_state_change = datetime.now()
                logger.warning(
                    f"Circuit breaker '{self.name}' reopened due to failure in half-open state"
                )
            elif (
                self._state.state == CircuitState.CLOSED
                and self._state.failure_count >= self.config.failure_threshold
            ):
                # Threshold exceeded, open circuit
                self._state.state = CircuitState.OPEN
                self._state.last_state_change = datetime.now()
                logger.error(
                    f"Circuit breaker '{self.name}' opened after {self._state.failure_count} failures"
                )

        return True

    def _should_attempt_reset(self) -> bool:
        """Check if enough time has passed to attempt reset."""
        if self._state.last_failure_time is None:
            return True

        time_since_failure = datetime.now() - self._state.last_failure_time
        return (
            time_since_failure.total_seconds() >= self.config.recovery_timeout
        )

    def _is_counted_exception(self, exc: Exception) -> bool:
        """Check if exception should count as failure."""
        # Check excluded exceptions first
        if isinstance(exc, self.config.exclude_exceptions):
            return False

        # Check expected exceptions
        return isinstance(exc, self.config.expected_exception)

    @property
    def state(self) -> CircuitState:
        """Get current circuit state."""
        return self._state.state

    @property
    def is_open(self) -> bool:
        """Check if circuit is open."""
        return self._state.state == CircuitState.OPEN

    @property
    def is_closed(self) -> bool:
        """Check if circuit is closed."""
        return self._state.state == CircuitState.CLOSED

    async def reset(self) -> None:
        """Manually reset circuit to closed state."""
        async with self._state._lock:
            self._state.state = CircuitState.CLOSED
            self._state.failure_count = 0
            self._state.success_count = 0
            self._state.last_failure_time = None
            self._state.last_state_change = datetime.now()
            logger.info(f"Circuit breaker '{self.name}' manually reset")


# Global registry of circuit breakers
_circuit_breakers: dict[str, CircuitBreaker] = {}


def get_circuit_breaker(
    name: str,
    config: CircuitBreakerConfig | None = None,
) -> CircuitBreaker:
    """Get or create a circuit breaker.

    Args:
        name: Circuit breaker name
        config: Configuration (used only on creation)

    Returns:
        Circuit breaker instance
    """
    if name not in _circuit_breakers:
        _circuit_breakers[name] = CircuitBreaker(name, config)
    return _circuit_breakers[name]


def circuit_breaker(
    name: str | None = None,
    config: CircuitBreakerConfig | None = None,
):
    """Decorator to apply circuit breaker to function.

    Args:
        name: Circuit breaker name (defaults to function name)
        config: Circuit breaker configuration

    Returns:
        Decorated function
    """

    def decorator(func):
        breaker_name = name or f"{func.__module__}.{func.__name__}"
        breaker = get_circuit_breaker(breaker_name, config)

        async def wrapper(*args, **kwargs):
            return await breaker.call(func, *args, **kwargs)

        # Preserve function metadata
        wrapper.__name__ = func.__name__
        wrapper.__doc__ = func.__doc__
        wrapper._circuit_breaker = breaker  # Expose breaker for testing

        return wrapper

    return decorator

```

--------------------------------------------------------------------------------
/src/biomcp/articles/search.py:
--------------------------------------------------------------------------------

```python
import asyncio
import json
from collections.abc import Generator
from typing import Annotated, Any, get_args

from pydantic import BaseModel, Field, computed_field

from .. import http_client, render
from ..constants import PUBTATOR3_SEARCH_URL, SYSTEM_PAGE_SIZE
from ..core import PublicationState
from .autocomplete import Concept, EntityRequest, autocomplete
from .fetch import call_pubtator_api

concepts: list[Concept] = sorted(get_args(Concept))
fields: list[str] = [concept + "s" for concept in concepts]


class PubmedRequest(BaseModel):
    chemicals: list[str] = Field(
        default_factory=list,
        description="List of chemicals for filtering results.",
    )
    diseases: list[str] = Field(
        default_factory=list,
        description="Diseases such as Hypertension, Lung Adenocarcinoma, etc.",
    )
    genes: list[str] = Field(
        default_factory=list,
        description="List of genes for filtering results.",
    )
    keywords: list[str] = Field(
        default_factory=list,
        description="List of other keywords for filtering results.",
    )
    variants: list[str] = Field(
        default_factory=list,
        description="List of variants for filtering results.",
    )

    def iter_concepts(self) -> Generator[tuple[Concept, str], None, None]:
        for concept in concepts:
            field = concept + "s"
            values = getattr(self, field, []) or []
            for value in values:
                yield concept, value


class PubtatorRequest(BaseModel):
    text: str
    size: int = 50


class ResultItem(BaseModel):
    pmid: int | None = None
    pmcid: str | None = None
    title: str | None = None
    journal: str | None = None
    authors: list[str] | None = None
    date: str | None = None
    doi: str | None = None
    abstract: str | None = None
    publication_state: PublicationState = PublicationState.PEER_REVIEWED
    source: str | None = Field(
        None, description="Source database (e.g., PubMed, bioRxiv, Europe PMC)"
    )

    @computed_field
    def pubmed_url(self) -> str | None:
        url = None
        if self.pmid:
            url = f"https://pubmed.ncbi.nlm.nih.gov/{self.pmid}/"
        return url

    @computed_field
    def pmc_url(self) -> str | None:
        """Generates the PMC URL if PMCID exists."""
        url = None
        if self.pmcid:
            url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{self.pmcid}/"
        return url

    @computed_field
    def doi_url(self) -> str | None:
        """Generates the DOI URL if DOI exists."""
        url = None
        if self.doi:
            url = f"https://doi.org/{self.doi}"
        return url


class SearchResponse(BaseModel):
    results: list[ResultItem]
    page_size: int
    current: int
    count: int
    total_pages: int


async def convert_request(request: PubmedRequest) -> PubtatorRequest:
    query_parts = []

    # Process keywords with OR logic support
    for keyword in request.keywords:
        if "|" in keyword:
            # Handle OR within a keyword (e.g., "R173|Arg173|p.R173")
            or_terms = [term.strip() for term in keyword.split("|")]
            or_query = "(" + " OR ".join(or_terms) + ")"
            query_parts.append(or_query)
        else:
            query_parts.append(keyword)

    # Create all autocomplete tasks in parallel
    autocomplete_tasks = []
    concept_values = []

    for concept, value in request.iter_concepts():
        task = autocomplete(
            request=EntityRequest(concept=concept, query=value),
        )
        autocomplete_tasks.append(task)
        concept_values.append((concept, value))

    # Execute all autocomplete calls in parallel
    if autocomplete_tasks:
        entities = await asyncio.gather(*autocomplete_tasks)

        # Process results
        for (_concept, value), entity in zip(
            concept_values, entities, strict=False
        ):
            if entity:
                query_parts.append(entity.entity_id)
            else:
                query_parts.append(value)

    query_text = " AND ".join(query_parts)

    return PubtatorRequest(text=query_text, size=SYSTEM_PAGE_SIZE)


async def add_abstracts(response: SearchResponse) -> None:
    pmids = [pr.pmid for pr in response.results if pr.pmid]
    abstract_response, _ = await call_pubtator_api(pmids, full=False)

    if abstract_response:
        for result in response.results:
            result.abstract = abstract_response.get_abstract(result.pmid)


def clean_authors(record):
    """Keep only the first and last author if > 4 authors."""
    authors = record.get("authors")
    if authors and len(authors) > 4:
        record["authors"] = [authors[0], "...", authors[-1]]
    return record


async def search_articles(
    request: PubmedRequest,
    output_json: bool = False,
) -> str:
    pubtator_request = await convert_request(request)

    # Start the search request
    search_task = http_client.request_api(
        url=PUBTATOR3_SEARCH_URL,
        request=pubtator_request,
        response_model_type=SearchResponse,
        domain="article",
    )

    # Execute search first
    response, error = await search_task

    if response:
        # Now fetch abstracts (still sequential but could be parallelized with other operations)
        await add_abstracts(response)
        # Add source field to PubMed results
        for result in response.results:
            result.source = "PubMed"

    # noinspection DuplicatedCode
    if error:
        data: list[dict[str, Any]] = [
            {"error": f"Error {error.code}: {error.message}"}
        ]
    else:
        data = list(
            map(
                clean_authors,
                [
                    result.model_dump(mode="json", exclude_none=True)
                    for result in (response.results if response else [])
                ],
            )
        )

    if data and not output_json:
        return render.to_markdown(data)
    else:
        return json.dumps(data, indent=2)


async def _article_searcher(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    chemicals: Annotated[
        list[str] | str | None, "List of chemicals for filtering results"
    ] = None,
    diseases: Annotated[
        list[str] | str | None,
        "Diseases such as Hypertension, Lung Adenocarcinoma, etc.",
    ] = None,
    genes: Annotated[
        list[str] | str | None, "List of genes for filtering results"
    ] = None,
    keywords: Annotated[
        list[str] | str | None, "List of other keywords for filtering results"
    ] = None,
    variants: Annotated[
        list[str] | str | None, "List of variants for filtering results"
    ] = None,
    include_preprints: Annotated[
        bool, "Include preprint articles from bioRxiv/medRxiv and Europe PMC"
    ] = True,
    include_cbioportal: Annotated[
        bool,
        "Include cBioPortal cancer genomics summary when searching by gene",
    ] = True,
) -> str:
    """
    Searches for articles across PubMed and preprint servers.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - chemicals: List of chemicals for filtering results
    - diseases: Diseases such as Hypertension, Lung Adenocarcinoma, etc.
    - genes: List of genes for filtering results
    - keywords: List of other keywords for filtering results
    - variants: List of variants for filtering results
    - include_preprints: Include results from preprint servers (default: True)
    - include_cbioportal: Include cBioPortal summaries for gene searches (default: True)

    Notes:
    - Use full terms ("Non-small cell lung carcinoma") over abbreviations ("NSCLC")
    - Use keywords to specify terms that don't fit in disease, gene ("EGFR"),
      chemical ("Cisplatin"), or variant ("BRAF V600E") categories
    - Parameters can be provided as lists or comma-separated strings
    - Results include both peer-reviewed and preprint articles by default
    - Keywords support OR logic using the pipe (|) separator:
      - Example: "R173|Arg173|p.R173" finds articles with any of these notations
      - Multiple keywords are still combined with AND logic

    Returns:
    Markdown formatted list of matching articles, sorted by date (newest first),
    with peer-reviewed articles listed before preprints.
    Limited to max 20 results (10 from each source) by default to optimize token usage.
    """
    # Import here to avoid circular dependency
    from .search_optimized import article_searcher_optimized

    # Use the optimized version with caching
    return await article_searcher_optimized(
        call_benefit=call_benefit,
        chemicals=chemicals,
        diseases=diseases,
        genes=genes,
        keywords=keywords,
        variants=variants,
        include_preprints=include_preprints,
        include_cbioportal=include_cbioportal,
    )

```

--------------------------------------------------------------------------------
/docs/FDA_SECURITY.md:
--------------------------------------------------------------------------------

```markdown
# FDA Integration Security Documentation

## Overview

This document outlines the security measures implemented in the BioMCP FDA integration to ensure safe handling of medical data and protection against common vulnerabilities.

## Security Features

### 1. Input Validation & Sanitization

All user inputs are validated and sanitized before being sent to the FDA API:

- **Injection Prevention**: Removes characters that could be used for SQL injection, XSS, or command injection (`<>\"';&|\\`)
- **Length Limits**: Enforces maximum lengths on all input fields
- **Type Validation**: Ensures parameters match expected types (dates, numbers, etc.)
- **Format Validation**: Validates specific formats (e.g., YYYY-MM-DD for dates)

**Implementation**: `src/biomcp/openfda/input_validation.py`

```python
# Example usage
from biomcp.openfda.input_validation import sanitize_input, validate_drug_name

safe_drug = validate_drug_name("Aspirin<script>")  # Returns "Aspirin"
safe_input = sanitize_input("'; DROP TABLE;")  # SQL injection blocked
```

### 2. API Key Protection

API keys are protected at multiple levels:

- **Cache Key Exclusion**: API keys are removed before generating cache keys
- **No Logging**: API keys are never logged, even in debug mode
- **Environment Variables**: Keys stored in environment variables, not in code
- **Validation**: API key format is validated before use

**Implementation**: `src/biomcp/openfda/cache.py`, `src/biomcp/openfda/utils.py`

### 3. Rate Limiting

Client-side rate limiting prevents API quota exhaustion:

- **Token Bucket Algorithm**: Allows bursts while maintaining average rate
- **Configurable Limits**: 40 requests/minute without key, 240 with key
- **Concurrent Request Limiting**: Maximum 10 concurrent requests via semaphore
- **Automatic Backoff**: Delays requests when approaching limits

**Implementation**: `src/biomcp/openfda/rate_limiter.py`

### 4. Circuit Breaker Pattern

Prevents cascading failures when FDA API is unavailable:

- **Failure Threshold**: Opens after 5 consecutive failures
- **Recovery Timeout**: Waits 60 seconds before retry attempts
- **Half-Open State**: Tests recovery with limited requests
- **Automatic Recovery**: Returns to normal operation when API recovers

**States**:

- **CLOSED**: Normal operation
- **OPEN**: Blocking all requests (API is down)
- **HALF_OPEN**: Testing if API has recovered

### 5. Memory Protection

Prevents memory exhaustion from large responses:

- **Response Size Limits**: Maximum 1MB per cached response
- **Cache Size Limits**: Maximum 100 entries in cache
- **FIFO Eviction**: Oldest entries removed when cache is full
- **Size Validation**: Large responses rejected before caching

**Configuration**:

```bash
export BIOMCP_FDA_MAX_RESPONSE_SIZE=1048576  # 1MB
export BIOMCP_FDA_MAX_CACHE_SIZE=100
```

### 6. File Operation Security

Secure handling of cache files:

- **File Locking**: Uses `fcntl` for exclusive/shared locks
- **Atomic Operations**: Writes to temp files then renames
- **Race Condition Prevention**: Locks prevent concurrent modifications
- **Permission Control**: Files created without world-write permissions

**Implementation**: `src/biomcp/openfda/drug_shortages.py`

## Security Best Practices

### For Developers

1. **Never Log Sensitive Data**

   ```python
   # BAD
   logger.debug(f"API key: {api_key}")

   # GOOD
   logger.debug("API key configured" if api_key else "No API key")
   ```

2. **Always Validate Input**

   ```python
   from biomcp.openfda.input_validation import validate_drug_name

   # Always validate before using
   safe_drug = validate_drug_name(user_input)
   if safe_drug:
       # Use safe_drug, not user_input
       await search_adverse_events(drug=safe_drug)
   ```

3. **Use Rate Limiting**

   ```python
   from biomcp.openfda.rate_limiter import rate_limited_request

   # Wrap API calls with rate limiting
   result = await rate_limited_request(make_api_call, params)
   ```

### For System Administrators

1. **API Key Management**

   - Store API keys in environment variables
   - Rotate keys regularly (recommended: every 90 days)
   - Use different keys for dev/staging/production
   - Monitor key usage for anomalies

2. **Monitoring**

   - Set up alerts for circuit breaker state changes
   - Monitor rate limit consumption
   - Track cache hit/miss ratios
   - Log validation failures (potential attacks)

3. **Resource Limits**
   ```bash
   # Configure limits based on your environment
   export BIOMCP_FDA_CACHE_TTL=15  # Minutes
   export BIOMCP_FDA_MAX_CACHE_SIZE=100
   export BIOMCP_FDA_MAX_RESPONSE_SIZE=1048576  # 1MB
   ```

## Threat Model

### Threats Addressed

| Threat              | Mitigation                  | Implementation         |
| ------------------- | --------------------------- | ---------------------- |
| SQL Injection       | Input sanitization          | `input_validation.py`  |
| XSS Attacks         | HTML/JS character removal   | `sanitize_input()`     |
| Command Injection   | Shell metacharacter removal | `sanitize_input()`     |
| API Key Exposure    | Exclusion from logs/cache   | `cache.py`, `utils.py` |
| DoS via Rate Limits | Client-side rate limiting   | `rate_limiter.py`      |
| Cascading Failures  | Circuit breaker pattern     | `CircuitBreaker` class |
| Memory Exhaustion   | Response size limits        | `MAX_RESPONSE_SIZE`    |
| Race Conditions     | File locking                | `fcntl` usage          |
| Cache Poisoning     | Input validation            | `build_safe_query()`   |

### Residual Risks

1. **API Key Compromise**: If environment is compromised, keys are accessible

   - **Mitigation**: Use secret management systems in production

2. **Zero-Day FDA API Vulnerabilities**: Unknown vulnerabilities in FDA API

   - **Mitigation**: Monitor FDA security advisories

3. **Distributed DoS**: Multiple clients could still overwhelm FDA API
   - **Mitigation**: Implement global rate limiting at gateway level

## Compliance Considerations

### HIPAA (If Applicable)

While FDA's public APIs don't contain PHI, if extended to include patient data:

1. **Encryption**: Use TLS for all API communications
2. **Audit Logging**: Log all data access (but not the data itself)
3. **Access Controls**: Implement user authentication/authorization
4. **Data Retention**: Define and enforce retention policies

### FDA Data Usage

1. **Attribution**: Always include FDA disclaimers in responses
2. **Data Currency**: Warn users that data may not be real-time
3. **Medical Decisions**: Explicitly state data is not for clinical decisions
4. **Rate Limits**: Respect FDA's terms of service

## Security Testing

### Automated Tests

Run security tests with:

```bash
pytest tests/tdd/openfda/test_security.py -v
```

Tests cover:

- Input validation
- Cache key security
- Rate limiting
- Circuit breaker
- File operations

### Manual Security Review

Checklist for security review:

- [ ] No sensitive data in logs
- [ ] All inputs validated
- [ ] Rate limiting functional
- [ ] Circuit breaker triggers correctly
- [ ] Cache size limited
- [ ] File operations are atomic
- [ ] API keys not in cache keys
- [ ] Error messages don't leak information

## Incident Response

### If API Key is Compromised

1. **Immediate**: Revoke compromised key at FDA portal
2. **Generate**: Create new API key
3. **Update**: Update environment variables
4. **Restart**: Restart services to load new key
5. **Audit**: Review logs for unauthorized usage

### If Rate Limits Exceeded

1. **Check**: Verify circuit breaker state
2. **Wait**: Allow circuit breaker recovery timeout
3. **Reduce**: Lower request rate if needed
4. **Monitor**: Check for abnormal usage patterns

### If Security Vulnerability Found

1. **Assess**: Determine severity and exploitability
2. **Patch**: Develop and test fix
3. **Deploy**: Roll out fix with monitoring
4. **Document**: Update this security documentation
5. **Notify**: Inform users if data was at risk

## Configuration Reference

### Environment Variables

| Variable                       | Default | Description                        |
| ------------------------------ | ------- | ---------------------------------- |
| `OPENFDA_API_KEY`              | None    | FDA API key for higher rate limits |
| `BIOMCP_FDA_CACHE_TTL`         | 15      | Cache TTL in minutes               |
| `BIOMCP_FDA_MAX_CACHE_SIZE`    | 100     | Maximum cache entries              |
| `BIOMCP_FDA_MAX_RESPONSE_SIZE` | 1048576 | Maximum response size in bytes     |
| `BIOMCP_SHORTAGE_CACHE_TTL`    | 24      | Drug shortage cache TTL in hours   |

### Security Headers

When deploying as a web service, add these headers:

```python
headers = {
    "X-Content-Type-Options": "nosniff",
    "X-Frame-Options": "DENY",
    "X-XSS-Protection": "1; mode=block",
    "Strict-Transport-Security": "max-age=31536000; includeSubDomains",
    "Content-Security-Policy": "default-src 'self'"
}
```

## Contact

For security issues, contact: [email protected] (create this address)

For FDA API issues, see: https://open.fda.gov/apis/

---

_Last Updated: 2025-08-07_
_Version: 1.0_

```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_cbioportal_search.py:
--------------------------------------------------------------------------------

```python
"""Test cBioPortal search enhancements."""

import asyncio

import pytest

from biomcp.variants.cbioportal_search import (
    CBioPortalSearchClient,
    CBioPortalSearchSummary,
    format_cbioportal_search_summary,
)
from biomcp.variants.search import VariantQuery, search_variants

from .constants import API_RETRY_DELAY_SECONDS, DEFAULT_MAX_STUDIES


class TestCBioPortalSearch:
    """Test cBioPortal search functionality."""

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_gene_search_summary(self):
        """Test getting gene search summary from cBioPortal."""
        client = CBioPortalSearchClient()

        # Test with BRAF
        summary = await client.get_gene_search_summary("BRAF", max_studies=5)

        assert summary is not None
        assert summary.gene == "BRAF"

        # Handle case where cBioPortal API returns empty data
        if summary.total_mutations == 0:
            # API might be down or returning empty results
            # This is acceptable for integration tests
            assert summary.total_mutations == 0
            assert summary.total_samples_tested == 0
            assert summary.mutation_frequency == 0.0
            assert len(summary.hotspots) == 0
        else:
            # Normal case - data is available
            assert summary.total_mutations > 0
            assert summary.total_samples_tested > 0
            assert summary.mutation_frequency > 0
            assert len(summary.hotspots) > 0

            # Check that V600E is a top hotspot
            v600e_found = any(
                "V600E" in hs.amino_acid_change for hs in summary.hotspots
            )
            assert v600e_found, "BRAF V600E should be a top hotspot"

        # Check cancer distribution
        if summary.total_mutations > 0:
            assert len(summary.cancer_distribution) > 0
            assert any(
                "melanoma" in cancer.lower()
                for cancer in summary.cancer_distribution
            ), "BRAF should be found in melanoma"
        else:
            # When no mutations found, cancer distribution should be empty
            assert len(summary.cancer_distribution) == 0

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_format_search_summary(self):
        """Test formatting of search summary."""
        # Create a mock summary
        summary = CBioPortalSearchSummary(
            gene="BRAF",
            total_mutations=1000,
            total_samples_tested=10000,
            mutation_frequency=0.1,
            hotspots=[
                {
                    "position": 600,
                    "amino_acid_change": "V600E",
                    "count": 800,
                    "frequency": 0.8,
                    "cancer_types": ["Melanoma", "Colorectal Cancer"],
                }
            ],
            cancer_distribution={"Melanoma": 600, "Colorectal Cancer": 200},
            study_coverage={
                "total_studies": 50,
                "queried_studies": 10,
                "studies_with_data": 8,
            },
        )

        formatted = format_cbioportal_search_summary(summary)

        assert "BRAF" in formatted
        assert "10.0%" in formatted  # Mutation frequency
        assert "V600E" in formatted
        assert "Melanoma" in formatted
        assert "600 mutations" in formatted

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_search_with_cbioportal_summary(self):
        """Test variant search with cBioPortal summary included."""
        query = VariantQuery(gene="BRAF", size=5)

        result = await search_variants(query, include_cbioportal=True)

        # Should include cBioPortal summary section
        assert "cBioPortal Summary for BRAF" in result
        assert "Mutation Frequency" in result
        # Top Hotspots only appears when mutations are found
        # Check for either Top Hotspots or 0 mutations message
        assert "Top Hotspots" in result or "0 mutations" in result

        # Should still include variant results
        assert "# Record" in result or "No variants found" in result

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_search_without_gene(self):
        """Test that cBioPortal summary is not included without gene parameter."""
        query = VariantQuery(rsid="rs113488022", size=5)

        result = await search_variants(query, include_cbioportal=True)

        # Should not include cBioPortal summary
        assert "cBioPortal Summary" not in result

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_tp53_search_summary(self):
        """Test TP53 gene search summary."""
        client = CBioPortalSearchClient()

        # Clear any caches to ensure fresh data
        from biomcp.utils.request_cache import clear_cache

        await clear_cache()

        summary = await client.get_gene_search_summary("TP53", max_studies=5)

        assert summary is not None
        assert summary.gene == "TP53"

        # If we got no mutations, it might be a temporary API issue
        if summary.total_mutations == 0 and summary.total_samples_tested == 0:
            # Try one more time with a small delay
            await asyncio.sleep(API_RETRY_DELAY_SECONDS)
            summary = await client.get_gene_search_summary(
                "TP53", max_studies=5
            )

            # If still no data, skip the test rather than fail
            if summary.total_mutations == 0:
                pytest.skip(
                    "cBioPortal returned no mutation data for TP53 - possible API issue"
                )

        # Basic checks that should pass when data is available
        assert (
            summary.total_mutations > 0
        ), f"TP53 should have mutations. Got: {summary}"

        # More flexible checks
        if summary.hotspots:
            # Just verify structure if we have hotspots
            hotspot_changes = [hs.amino_acid_change for hs in summary.hotspots]
            print(f"TP53 hotspots found: {hotspot_changes[:5]}")
            assert (
                len(hotspot_changes) >= 1
            ), "Should find at least one TP53 hotspot"

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_kras_search_summary(self):
        """Test KRAS gene search summary.

        This test verifies basic functionality rather than specific hotspots,
        which can change as cBioPortal data is updated.
        """
        client = CBioPortalSearchClient()

        # Clear any caches to ensure fresh data
        from biomcp.utils.request_cache import clear_cache

        await clear_cache()

        summary = await client.get_gene_search_summary(
            "KRAS", max_studies=DEFAULT_MAX_STUDIES
        )

        assert summary is not None, "Failed to get summary for KRAS"
        assert summary.gene == "KRAS"

        # If we got no mutations, it might be a temporary API issue
        if summary.total_mutations == 0 and summary.total_samples_tested == 0:
            # Try one more time with a small delay
            await asyncio.sleep(API_RETRY_DELAY_SECONDS)
            summary = await client.get_gene_search_summary(
                "KRAS", max_studies=DEFAULT_MAX_STUDIES
            )

            # If still no data, skip the test rather than fail
            if summary.total_mutations == 0:
                pytest.skip(
                    "cBioPortal returned no mutation data for KRAS - possible API issue"
                )

        # Basic checks that should pass when data is available
        assert (
            summary.total_mutations > 0
        ), f"KRAS should have mutations. Got: {summary}"

        # More flexible checks
        if summary.hotspots:
            # Just verify structure if we have hotspots
            for hotspot in summary.hotspots[:3]:
                assert hasattr(hotspot, "amino_acid_change")
                assert hasattr(hotspot, "count")
            print(
                f"Top KRAS hotspots: {[hs.amino_acid_change for hs in summary.hotspots[:5]]}"
            )

        # Cancer distribution check - only if we have data
        if summary.total_mutations > 0:
            assert (
                len(summary.cancer_distribution) > 0
            ), "Should have cancer type distribution"

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_invalid_gene(self):
        """Test handling of invalid gene name."""
        client = CBioPortalSearchClient()

        summary = await client.get_gene_search_summary("INVALID_GENE")

        assert summary is None

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_json_output_with_cbioportal(self):
        """Test JSON output includes cBioPortal summary."""
        query = VariantQuery(gene="BRAF", size=2)

        result = await search_variants(
            query, output_json=True, include_cbioportal=True
        )

        # Parse JSON
        import json

        data = json.loads(result)

        # Should have both summary and variants
        assert "cbioportal_summary" in data
        assert "variants" in data
        assert "BRAF" in data["cbioportal_summary"]

```

--------------------------------------------------------------------------------
/tests/tdd/articles/test_unified.py:
--------------------------------------------------------------------------------

```python
"""Tests for unified article search functionality."""

import json
from unittest.mock import AsyncMock, patch

import pytest

from biomcp.articles.search import PubmedRequest
from biomcp.articles.unified import (
    _deduplicate_articles,
    _parse_search_results,
    search_articles_unified,
)


class TestUnifiedSearch:
    """Test unified search functionality."""

    @pytest.fixture
    def pubmed_results(self):
        """Sample PubMed results in JSON format."""
        return json.dumps([
            {
                "pmid": 12345,
                "title": "BRAF mutations in cancer",
                "doi": "10.1234/test1",
                "date": "2024-01-15",
                "publication_state": "peer_reviewed",
            },
            {
                "pmid": 12346,
                "title": "Another cancer study",
                "doi": "10.1234/test2",
                "date": "2024-01-10",
                "publication_state": "peer_reviewed",
            },
        ])

    @pytest.fixture
    def preprint_results(self):
        """Sample preprint results in JSON format."""
        return json.dumps([
            {
                "title": "BRAF preprint study",
                "doi": "10.1101/2024.01.20.123456",
                "date": "2024-01-20",
                "publication_state": "preprint",
                "source": "bioRxiv",
            },
            {
                "title": "Duplicate study",
                "doi": "10.1234/test1",  # Same DOI as PubMed result
                "date": "2024-01-14",
                "publication_state": "preprint",
                "source": "Europe PMC",
            },
        ])

    @pytest.mark.asyncio
    async def test_search_articles_unified_both_sources(
        self, pubmed_results, preprint_results
    ):
        """Test searching with both PubMed and preprints enabled."""
        request = PubmedRequest(genes=["BRAF"])

        mock_pubmed = AsyncMock(return_value=pubmed_results)
        mock_preprints = AsyncMock(return_value=preprint_results)

        with (
            patch("biomcp.articles.unified.search_articles", mock_pubmed),
            patch("biomcp.articles.unified.search_preprints", mock_preprints),
            patch(
                "biomcp.variants.cbioportal_search.CBioPortalSearchClient"
            ) as mock_cbio,
        ):
            # Mock cBioPortal client to return None (no summary)
            mock_cbio.return_value.get_gene_search_summary = AsyncMock(
                return_value=None
            )

            result = await search_articles_unified(
                request,
                include_pubmed=True,
                include_preprints=True,
                output_json=True,
            )

            # Parse result
            data = json.loads(result)

            # When gene is specified but cBioPortal returns no data,
            # we should just get the articles list
            if isinstance(data, dict):
                articles = data.get("articles", data)
            else:
                articles = data

            # Should have 3 articles (one duplicate removed)
            assert len(articles) == 3

            # Check ordering - peer reviewed should come first
            # Sort is by (publication_state priority, date DESC)
            # The test data has preprint with newer date, so it might come first
            # Let's just check we have the right mix
            states = [a["publication_state"] for a in articles]
            assert states.count("peer_reviewed") == 2
            assert states.count("preprint") == 1

            # Check deduplication worked
            dois = [a.get("doi") for a in articles if a.get("doi")]
            assert len(dois) == len(set(dois))  # No duplicate DOIs

    @pytest.mark.asyncio
    async def test_search_articles_unified_pubmed_only(self, pubmed_results):
        """Test searching with only PubMed enabled."""
        request = PubmedRequest(
            keywords=["cancer"]
        )  # No gene, so no cBioPortal

        with (
            patch("biomcp.articles.unified.search_articles") as mock_pubmed,
            patch(
                "biomcp.articles.unified.search_preprints"
            ) as mock_preprints,
        ):
            mock_pubmed.return_value = pubmed_results

            result = await search_articles_unified(
                request,
                include_pubmed=True,
                include_preprints=False,
                output_json=True,
            )

            # Preprints should not be called
            mock_preprints.assert_not_called()

            # Parse result
            articles = json.loads(result)
            assert len(articles) == 2
            assert all(
                a["publication_state"] == "peer_reviewed" for a in articles
            )

    @pytest.mark.asyncio
    async def test_search_articles_unified_preprints_only(
        self, preprint_results
    ):
        """Test searching with only preprints enabled."""
        request = PubmedRequest(
            keywords=["cancer"]
        )  # No gene, so no cBioPortal

        with (
            patch("biomcp.articles.unified.search_articles") as mock_pubmed,
            patch(
                "biomcp.articles.unified.search_preprints"
            ) as mock_preprints,
        ):
            mock_preprints.return_value = preprint_results

            result = await search_articles_unified(
                request,
                include_pubmed=False,
                include_preprints=True,
                output_json=True,
            )

            # PubMed should not be called
            mock_pubmed.assert_not_called()

            # Parse result
            articles = json.loads(result)
            assert len(articles) == 2
            assert all(a["publication_state"] == "preprint" for a in articles)

    @pytest.mark.asyncio
    async def test_search_articles_unified_error_handling(self):
        """Test error handling when one source fails."""
        request = PubmedRequest(
            keywords=["cancer"]
        )  # No gene, so no cBioPortal

        with (
            patch("biomcp.articles.unified.search_articles") as mock_pubmed,
            patch(
                "biomcp.articles.unified.search_preprints"
            ) as mock_preprints,
        ):
            # PubMed succeeds
            mock_pubmed.return_value = json.dumps([{"title": "Success"}])
            # Preprints fails
            mock_preprints.side_effect = Exception("API Error")

            result = await search_articles_unified(
                request,
                include_pubmed=True,
                include_preprints=True,
                output_json=True,
            )

            # Should still get PubMed results
            articles = json.loads(result)
            assert len(articles) == 1
            assert articles[0]["title"] == "Success"

    @pytest.mark.asyncio
    async def test_search_articles_unified_markdown_output(
        self, pubmed_results
    ):
        """Test markdown output format."""
        request = PubmedRequest(genes=["BRAF"])

        mock_pubmed = AsyncMock(return_value=pubmed_results)

        with patch("biomcp.articles.unified.search_articles", mock_pubmed):
            result = await search_articles_unified(
                request,
                include_pubmed=True,
                include_preprints=False,
                output_json=False,
            )

            # Should return markdown
            assert isinstance(result, str)
            assert "BRAF mutations in cancer" in result
            assert "# Record" in result  # Markdown headers

    def test_deduplicate_articles(self):
        """Test article deduplication logic."""
        articles = [
            {"title": "Article 1", "doi": "10.1234/test1"},
            {"title": "Article 2", "doi": "10.1234/test2"},
            {"title": "Duplicate of 1", "doi": "10.1234/test1"},
            {"title": "No DOI article"},
            {"title": "Another no DOI"},
        ]

        deduped = _deduplicate_articles(articles)

        # Should have 4 articles (one duplicate removed)
        assert len(deduped) == 4

        # Check DOIs are unique
        dois = [a.get("doi") for a in deduped if a.get("doi")]
        assert len(dois) == len(set(dois))

        # Articles without DOI should be preserved
        no_doi_count = sum(1 for a in deduped if not a.get("doi"))
        assert no_doi_count == 2

    def test_parse_search_results(self):
        """Test parsing of search results from multiple sources."""
        results = [
            json.dumps([{"title": "Article 1"}, {"title": "Article 2"}]),
            json.dumps([{"title": "Article 3"}]),
            Exception("Failed source"),  # Should be skipped
            "[invalid json",  # Should be skipped
        ]

        parsed = _parse_search_results(results)

        # Should have 3 articles (2 + 1, skipping errors)
        assert len(parsed) == 3
        assert parsed[0]["title"] == "Article 1"
        assert parsed[1]["title"] == "Article 2"
        assert parsed[2]["title"] == "Article 3"

    def test_parse_search_results_empty(self):
        """Test parsing with all empty/failed results."""
        results = [
            Exception("Failed"),
            "[invalid",
            json.dumps([]),  # Empty list
        ]

        parsed = _parse_search_results(results)
        assert parsed == []

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/device_events.py:
--------------------------------------------------------------------------------

```python
"""
OpenFDA Device Adverse Events (MAUDE) integration.

Focus on genomic/diagnostic devices relevant to precision oncology.
"""

import logging

from .constants import (
    GENOMIC_DEVICE_PRODUCT_CODES,
    OPENFDA_DEFAULT_LIMIT,
    OPENFDA_DEVICE_EVENTS_URL,
    OPENFDA_DISCLAIMER,
    OPENFDA_MAX_LIMIT,
)
from .device_events_helpers import (
    analyze_device_problems,
    format_detailed_device_info,
    format_device_detail_header,
    format_device_distribution,
    format_device_report_summary,
    format_patient_details,
    format_top_problems,
)
from .utils import clean_text, format_count, make_openfda_request

logger = logging.getLogger(__name__)


def _build_device_search_query(
    device: str | None,
    manufacturer: str | None,
    problem: str | None,
    product_code: str | None,
    genomics_only: bool,
) -> str:
    """Build the search query for device events."""
    search_parts = []

    if device:
        # Build flexible search queries
        device_queries = []

        # First try exact match
        device_queries.extend([
            f'device.brand_name:"{device}"',
            f'device.generic_name:"{device}"',
            f'device.openfda.device_name:"{device}"',
        ])

        # For multi-word terms, also search for key words with wildcards
        # This helps match "FoundationOne CDx" to "F1CDX" or similar variations
        words = device.split()

        # If it's a multi-word query, add wildcard searches for significant words
        for word in words:
            # Skip common words and very short ones
            if len(word) > 3 and word.lower() not in [
                "test",
                "system",
                "device",
            ]:
                # Use prefix wildcard for better performance
                device_queries.append(f"device.brand_name:{word}*")
                device_queries.append(f"device.generic_name:{word}*")

        # Also try searching by removing spaces (e.g., "Foundation One" -> "FoundationOne")
        if len(words) > 1:
            combined = "".join(words)
            device_queries.append(f'device.brand_name:"{combined}"')
            device_queries.append(f'device.generic_name:"{combined}"')

        search_parts.append(f"({' OR '.join(device_queries)})")

    if manufacturer:
        # Search manufacturer field with both exact and wildcard matching
        mfr_queries = [
            f'device.manufacturer_d_name:"{manufacturer}"',
            f"device.manufacturer_d_name:*{manufacturer}*",
        ]
        search_parts.append(f"({' OR '.join(mfr_queries)})")

    if problem:
        search_parts.append(f'device.device_problem_text:"{problem}"')

    if product_code:
        search_parts.append(f'device.openfda.product_code:"{product_code}"')
    elif (
        genomics_only and not device
    ):  # Only apply genomics filter if no specific device is named
        # Filter to genomic device product codes
        code_parts = [
            f'device.openfda.product_code:"{code}"'
            for code in GENOMIC_DEVICE_PRODUCT_CODES
        ]
        if code_parts:
            search_parts.append(f"({' OR '.join(code_parts)})")

    return " AND ".join(search_parts)


def _format_search_summary(
    device: str | None,
    manufacturer: str | None,
    problem: str | None,
    genomics_only: bool,
    total: int,
) -> list[str]:
    """Format the search summary section."""
    output = []

    search_desc = []
    if device:
        search_desc.append(f"**Device**: {device}")
    if manufacturer:
        search_desc.append(f"**Manufacturer**: {manufacturer}")
    if problem:
        search_desc.append(f"**Problem**: {problem}")
    if genomics_only:
        search_desc.append("**Type**: Genomic/Diagnostic Devices")

    if search_desc:
        output.append(" | ".join(search_desc))
    output.append(
        f"**Total Reports Found**: {format_count(total, 'report')}\n"
    )

    return output


async def search_device_events(
    device: str | None = None,
    manufacturer: str | None = None,
    problem: str | None = None,
    product_code: str | None = None,
    genomics_only: bool = True,
    limit: int = OPENFDA_DEFAULT_LIMIT,
    skip: int = 0,
    api_key: str | None = None,
) -> str:
    """
    Search FDA device adverse event reports (MAUDE).

    Args:
        device: Device name to search for
        manufacturer: Manufacturer name
        problem: Device problem description
        product_code: FDA product code
        genomics_only: Filter to genomic/diagnostic devices only
        limit: Maximum number of results
        skip: Number of results to skip
        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with device event information
    """
    if not device and not manufacturer and not product_code and not problem:
        return (
            "⚠️ Please specify a device name, manufacturer, or problem to search.\n\n"
            "Examples:\n"
            "- Search by device: --device 'FoundationOne'\n"
            "- Search by manufacturer: --manufacturer 'Illumina'\n"
            "- Search by problem: --problem 'false positive'"
        )

    # Build and execute search
    search_query = _build_device_search_query(
        device, manufacturer, problem, product_code, genomics_only
    )
    params = {
        "search": search_query,
        "limit": min(limit, OPENFDA_MAX_LIMIT),
        "skip": skip,
    }

    response, error = await make_openfda_request(
        OPENFDA_DEVICE_EVENTS_URL, params, "openfda_device_events", api_key
    )

    if error:
        return f"⚠️ Error searching device events: {error}"

    if not response or not response.get("results"):
        return _format_no_results(device, manufacturer, problem, genomics_only)

    results = response["results"]
    total = (
        response.get("meta", {}).get("results", {}).get("total", len(results))
    )

    # Build output
    output = ["## FDA Device Adverse Event Reports\n"]
    output.extend(
        _format_search_summary(
            device, manufacturer, problem, genomics_only, total
        )
    )

    # Analyze and format problems
    all_problems, all_device_names, _ = analyze_device_problems(results)
    output.extend(format_top_problems(all_problems, results))

    # Show device distribution if searching by problem
    if problem:
        output.extend(format_device_distribution(all_device_names, results))

    # Display sample reports
    output.append(
        f"### Sample Reports (showing {min(len(results), 3)} of {total}):\n"
    )
    for i, result in enumerate(results[:3], 1):
        output.extend(format_device_report_summary(result, i))

    # Add tips
    if genomics_only:
        output.append(
            "\n💡 **Note**: Results filtered to genomic/diagnostic devices. "
            "Use --no-genomics-only to search all medical devices."
        )

    output.append(f"\n{OPENFDA_DISCLAIMER}")
    return "\n".join(output)


def _format_no_results(
    device: str | None,
    manufacturer: str | None,
    problem: str | None,
    genomics_only: bool,
) -> str:
    """Format no results message."""
    search_desc = []
    if device:
        search_desc.append(f"device '{device}'")
    if manufacturer:
        search_desc.append(f"manufacturer '{manufacturer}'")
    if problem:
        search_desc.append(f"problem '{problem}'")

    desc = " and ".join(search_desc)
    if genomics_only:
        desc += " (filtered to genomic/diagnostic devices)"

    return f"No device adverse event reports found for {desc}."


async def get_device_event(
    mdr_report_key: str, api_key: str | None = None
) -> str:
    """
    Get detailed information for a specific device event report.

    Args:
        mdr_report_key: MDR report key
        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with detailed report information
    """
    params = {
        "search": f'mdr_report_key:"{mdr_report_key}"',
        "limit": 1,
    }

    response, error = await make_openfda_request(
        OPENFDA_DEVICE_EVENTS_URL,
        params,
        "openfda_device_event_detail",
        api_key,
    )

    if error:
        return f"⚠️ Error retrieving device event report: {error}"

    if not response or not response.get("results"):
        return f"Device event report '{mdr_report_key}' not found."

    result = response["results"][0]

    # Build detailed output
    output = format_device_detail_header(result, mdr_report_key)

    # Device details
    if devices := result.get("device", []):
        output.extend(format_detailed_device_info(devices))

    # Event narrative
    if event_desc := result.get("event_description"):
        output.append("### Event Description")
        output.append(clean_text(event_desc))
        output.append("")

    # Manufacturer narrative
    if mfr_narrative := result.get("manufacturer_narrative"):
        output.append("### Manufacturer's Analysis")
        output.append(clean_text(mfr_narrative))
        output.append("")

    # Patient information
    if patient := result.get("patient", []):
        output.extend(format_patient_details(patient))

    # Remedial action
    if remedial := result.get("remedial_action"):
        output.append("### Remedial Action")
        if isinstance(remedial, list):
            output.append(", ".join(remedial))
        else:
            output.append(remedial)
        output.append("")

    output.append(f"\n{OPENFDA_DISCLAIMER}")
    return "\n".join(output)

```

--------------------------------------------------------------------------------
/docs/troubleshooting.md:
--------------------------------------------------------------------------------

```markdown
# Troubleshooting Guide

This guide helps you resolve common issues with BioMCP installation, configuration, and usage.

## Installation Issues

### Prerequisites Not Met

**macOS:**

```bash
# Install uv (recommended)
brew install uv

# Or using the official installer
curl -LsSf https://astral.sh/uv/install.sh | sh

# Install Node.js for npx (if needed)
brew install node
```

**Linux:**

```bash
# Install uv
curl -LsSf https://astral.sh/uv/install.sh | sh

# Install Node.js
curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash -
sudo apt-get install -y nodejs
```

**Windows:**

```powershell
# Install uv
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"

# Install Node.js from https://nodejs.org
```

### "Command not found" Error

After installing BioMCP, if you get "command not found":

1. **Restart your terminal** - PATH updates require a new session

2. **Check installation location:**

   ```bash
   # For uv tool install
   ls ~/.local/bin/biomcp

   # For pip install
   which biomcp
   ```

3. **Add to PATH manually:**

   ```bash
   # Add to ~/.bashrc or ~/.zshrc
   export PATH="$HOME/.local/bin:$PATH"
   ```

4. **Reinstall with force:**

   ```bash
   uv tool install biomcp --force
   ```

5. **Use full path:**
   ```bash
   ~/.local/bin/biomcp --version
   ```

### Python Version Issues

BioMCP requires Python 3.10 or higher:

```bash
# Check Python version
python --version

# If too old, install newer version
# macOS
brew install [email protected]

# Linux
sudo apt update
sudo apt install python3.11

# Use pyenv for version management
pyenv install 3.11.8
pyenv local 3.11.8
```

## Configuration Issues

### API Key Not Working

**Environment Variable Not Set:**

```bash
# Check if set
echo $NCI_API_KEY

# Set temporarily
export NCI_API_KEY="your-key-here"

# Set permanently in ~/.bashrc or ~/.zshrc
echo 'export NCI_API_KEY="your-key-here"' >> ~/.bashrc
source ~/.bashrc
```

**Wrong API Key Format:**

- NCI keys: Should be 36 characters (UUID format)
- AlphaGenome: Alphanumeric string
- cBioPortal: JWT token format

**API Key Permissions:**

```bash
# Test NCI API key
biomcp health check --verbose

# Test specific API
curl -H "X-API-KEY: $NCI_API_KEY" \
  "https://cts.nlm.nih.gov/api/v2/trials?size=1"
```

### SSL Certificate Errors

**Update certificates:**

```bash
# Python certificates
pip install --upgrade certifi

# System certificates (macOS)
brew install ca-certificates

# System certificates (Linux)
sudo apt-get update
sudo apt-get install ca-certificates
```

**Corporate proxy issues:**

```bash
# Set proxy environment variables
export HTTP_PROXY="http://proxy.company.com:8080"
export HTTPS_PROXY="http://proxy.company.com:8080"
export NO_PROXY="localhost,127.0.0.1"

# Configure pip for proxy
pip config set global.proxy http://proxy.company.com:8080
```

## Search Issues

### No Results Found

**1. Check gene symbol:**

```bash
# Wrong: common names
biomcp article search --gene HER2  # ❌

# Correct: official HGNC symbol
biomcp article search --gene ERBB2  # ✅

# Find correct symbol
biomcp gene get HER2  # Will suggest ERBB2
```

**2. Too restrictive filters:**

```bash
# Too specific - may return nothing
biomcp article search --gene BRAF --disease "stage IV melanoma" \
  --chemical "dabrafenib and trametinib combination"

# Better - broader search
biomcp article search --gene BRAF --disease melanoma \
  --keyword "dabrafenib trametinib"
```

**3. Check data availability:**

```bash
# Test if gene exists in database
biomcp gene get YOUR_GENE

# Test if disease term is recognized
biomcp disease get "your disease term"
```

### Location Search Not Working

Location searches require coordinates:

```bash
# Wrong - city name only
biomcp trial search --condition cancer --city "New York"  # ❌

# Correct - with coordinates
biomcp trial search --condition cancer \
  --latitude 40.7128 --longitude -74.0060 --distance 50  # ✅
```

Common coordinates:

- New York: 40.7128, -74.0060
- Los Angeles: 34.0522, -118.2437
- Chicago: 41.8781, -87.6298
- Houston: 29.7604, -95.3698
- Boston: 42.3601, -71.0589

### Preprint Search Issues

**Preprints not appearing:**

```bash
# Check if preprints are being excluded
biomcp article search --gene BRAF --no-preprints  # Excludes preprints

# Include preprints (default)
biomcp article search --gene BRAF  # Includes preprints
```

**DOI not found:**

```bash
# Ensure correct DOI format
biomcp article get "10.1101/2024.01.20.23288905"  # bioRxiv format

# Not all preprints are indexed immediately
# Try searching by title/keywords instead
```

## Performance Issues

### Slow Searches

**1. Reduce result count:**

```bash
# Default may be too high
biomcp article search --gene TP53 --limit 100  # Slow

# Reduce for faster results
biomcp article search --gene TP53 --limit 10   # Fast
```

**2. Use specific filters:**

```bash
# Broad search - slow
biomcp trial search --condition cancer

# Specific search - faster
biomcp trial search --condition "melanoma" --phase PHASE3 \
  --status RECRUITING --country "United States"
```

**3. Check API health:**

```bash
# See which APIs are slow
biomcp health check --verbose

# Check specific API
biomcp health check --apis-only
```

### Timeout Errors

**Increase timeout for slow networks:**

```bash
# Set environment variable
export BIOMCP_TIMEOUT=300  # 5 minutes

# Or use configuration file
echo "timeout: 300" > ~/.biomcp/config.yml
```

**For specific operations:**

```python
# In Python scripts
import asyncio
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
```

### Memory Issues

**Large result sets:**

```bash
# Process in batches
for i in {1..10}; do
  biomcp article search --gene BRCA1 --page $i --limit 100
done

# Use streaming where available
biomcp article search --gene TP53 --format jsonl | \
  while read line; do
    echo "$line" | jq '.pmid'
  done
```

## MCP Server Issues

### Testing Server Connectivity

**1. Test with MCP Inspector:**

```bash
npx @modelcontextprotocol/inspector uv run --with biomcp-python biomcp run
```

Open http://127.0.0.1:6274 and verify:

- Tools list loads
- Can invoke a simple tool like `gene_getter`

**2. Test with curl (HTTP mode):**

```bash
# Start server in HTTP mode
biomcp run --mode http --port 8000

# Test health endpoint
curl http://localhost:8000/health

# Test MCP endpoint
curl -X POST http://localhost:8000/mcp \
  -H "Content-Type: application/json" \
  -d '{"method": "tools/list"}'
```

### Claude Desktop Integration Issues

**Server not appearing:**

1. Check configuration file location:

   - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
   - Windows: `%APPDATA%\Claude\claude_desktop_config.json`

2. Validate JSON syntax:

   ```bash
   # macOS
   cat ~/Library/Application\ Support/Claude/claude_desktop_config.json | jq .
   ```

3. Check server starts correctly:
   ```bash
   # Test the exact command from config
   uv run --with biomcp-python biomcp run
   ```

**Server crashes:**
Check logs:

```bash
# Enable debug logging
export BIOMCP_LOG_LEVEL=DEBUG
uv run --with biomcp-python biomcp run
```

Common fixes:

- Update to latest version: `uv tool install biomcp --force`
- Clear cache: `rm -rf ~/.biomcp/cache`
- Check port conflicts: `lsof -i :8000`

## Data Quality Issues

### Outdated Results

**Check data freshness:**

```bash
# See when databases were last updated
biomcp health check --verbose | grep "Last updated"
```

**Clear cache if needed:**

```bash
# Remove cached results
rm -rf ~/.biomcp/cache

# Or set cache TTL
export BIOMCP_CACHE_TTL=900  # 15 minutes
```

### Missing Annotations

**PubTator3 annotations missing:**

- Some newer articles may not be fully annotated yet
- Try searching by PMID directly
- Check if article is indexed: search by title

**Variant annotations incomplete:**

- Not all variants have all annotation types
- Rare variants may lack population frequencies
- Novel variants won't have ClinVar data

## Error Messages

### Common Error Codes

**HTTP 429 - Rate Limit Exceeded:**

```bash
# Add delay between requests
biomcp article search --gene BRAF --delay 1000  # 1 second

# Or reduce parallel requests
export BIOMCP_MAX_CONCURRENT=2
```

**HTTP 404 - Not Found:**

- Check identifier format (PMID, NCT ID, etc.)
- Verify record exists in source database
- Try alternative identifiers

**HTTP 500 - Server Error:**

- External API may be down
- Check status: `biomcp health check`
- Try again later

### Debugging

**Enable verbose logging:**

```bash
# Set log level
export BIOMCP_LOG_LEVEL=DEBUG

# Run with verbose output
biomcp article search --gene BRAF --verbose

# Check log files
tail -f ~/.biomcp/logs/biomcp.log
```

**Report bugs:**
Include when reporting issues:

1. BioMCP version: `biomcp --version`
2. Full error message and stack trace
3. Command that caused the error
4. Operating system and Python version
5. Relevant environment variables

Report at: https://github.com/genomoncology/biomcp/issues

## Getting Help

### Quick Checks

1. **Check FAQ first**: [Frequently Asked Questions](faq-condensed.md)
2. **Search existing issues**: [GitHub Issues](https://github.com/genomoncology/biomcp/issues)
3. **Check examples**: [How-to Guides](how-to-guides/01-find-articles-and-cbioportal-data.md)

### Community Support

- Issue Tracker: Report bugs, request features
- Documentation: PRs welcome for improvements

### Professional Support

For commercial support, contact: [email protected]

---

_Still having issues? [Open a GitHub issue](https://github.com/genomoncology/biomcp/issues/new) with details._

```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_external_integration.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for external variant data sources with real API calls."""

import pytest

from biomcp.variants.cbio_external_client import CBioPortalExternalClient
from biomcp.variants.external import (
    ExternalVariantAggregator,
    TCGAClient,
    ThousandGenomesClient,
)


class TestTCGAIntegration:
    """Integration tests for TCGA/GDC API."""

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_braf_v600e_variant(self):
        """Test fetching BRAF V600E data from TCGA."""
        client = TCGAClient()

        # Try different formats
        variants_to_test = [
            "BRAF V600E",  # Gene AA change format that TCGA supports
            "chr7:g.140453136A>T",
            "7:g.140453136A>T",
        ]

        found_data = False
        for variant in variants_to_test:
            result = await client.get_variant_data(variant)
            if result:
                found_data = True
                # BRAF V600E is common in melanoma and thyroid cancer
                assert result.tumor_types is not None
                assert len(result.tumor_types) > 0
                # Should have affected cases if data found
                if result.affected_cases:
                    assert result.affected_cases > 0
                break

        # Note: TCGA might not have data for all variants
        if not found_data:
            pytest.skip("TCGA API did not return data for BRAF V600E variants")

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_tp53_variant(self):
        """Test fetching TP53 variant data from TCGA."""
        client = TCGAClient()

        # TP53 R273H - common tumor suppressor mutation
        result = await client.get_variant_data("chr17:g.7577120G>A")

        # TP53 mutations are very common in cancer
        if result:
            assert result.tumor_types is not None
            assert len(result.tumor_types) > 0

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_nonexistent_variant(self):
        """Test TCGA response for non-existent variant."""
        client = TCGAClient()

        # Made-up variant that shouldn't exist
        result = await client.get_variant_data("chr99:g.999999999A>T")

        assert result is None


class TestThousandGenomesIntegration:
    """Integration tests for 1000 Genomes via Ensembl REST API."""

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_common_variant_with_rsid(self):
        """Test fetching common variant data by rsID."""
        client = ThousandGenomesClient()

        # rs113488022 is BRAF V600E
        result = await client.get_variant_data("rs113488022")

        if result:
            # This is a rare variant, so MAF should be low or None
            if result.global_maf is not None:
                assert result.global_maf < 0.01  # Less than 1%

            # Consequence information might not be available for all variants
            # Just verify the data structure is correct
            assert hasattr(result, "most_severe_consequence")

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_variant_population_frequencies(self):
        """Test population frequency data retrieval."""
        client = ThousandGenomesClient()

        # Use a more common variant for testing population frequencies
        # rs1800734 - common variant in MLH1 promoter
        result = await client.get_variant_data("rs1800734")

        if result:
            # Should have at least global MAF
            assert result.global_maf is not None
            assert 0 <= result.global_maf <= 1

            # Check that we get population-specific frequencies
            pop_freqs = [
                result.afr_maf,
                result.amr_maf,
                result.eas_maf,
                result.eur_maf,
                result.sas_maf,
            ]

            # At least some populations should have data
            non_null_freqs = [f for f in pop_freqs if f is not None]
            assert len(non_null_freqs) > 0

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_invalid_variant_id(self):
        """Test 1000 Genomes response for invalid variant."""
        client = ThousandGenomesClient()

        # Invalid rsID
        result = await client.get_variant_data("rs999999999999")

        assert result is None


class TestCBioPortalIntegration:
    """Integration tests for cBioPortal API."""

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_braf_v600e_variant(self):
        """Test fetching BRAF V600E data from cBioPortal."""
        client = CBioPortalExternalClient()

        result = await client.get_variant_data("BRAF V600E")

        if result:
            # BRAF V600E is common in melanoma and other cancers
            assert result.total_cases is not None
            assert result.total_cases > 0
            assert len(result.studies) > 0
            # Should have data from various studies
            print(
                f"Found {result.total_cases} cases in {len(result.studies)} studies: {result.studies}"
            )

            # Check enhanced fields
            assert result.cancer_type_distribution is not None
            assert len(result.cancer_type_distribution) > 0
            print(
                f"Cancer types: {list(result.cancer_type_distribution.keys())}"
            )

            assert result.mutation_types is not None
            assert "Missense_Mutation" in result.mutation_types

            assert result.mean_vaf is not None
            print(f"Mean VAF: {result.mean_vaf}")
        else:
            pytest.skip("cBioPortal API did not return data for BRAF V600E")

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_kras_g12d_variant(self):
        """Test fetching KRAS G12D data from cBioPortal."""
        client = CBioPortalExternalClient()

        result = await client.get_variant_data("KRAS G12D")

        if result:
            # KRAS G12D is a common mutation in multiple cancer types
            assert result.total_cases is not None
            assert result.total_cases > 0
            assert len(result.studies) > 0
        else:
            pytest.skip("cBioPortal API did not return data for KRAS G12D")

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_invalid_variant(self):
        """Test cBioPortal response for invalid variant."""
        client = CBioPortalExternalClient()

        # Invalid gene name
        result = await client.get_variant_data("FAKEGENE V600E")

        assert result is None


class TestExternalVariantAggregatorIntegration:
    """Integration tests for the external variant aggregator."""

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_aggregate_all_sources(self):
        """Test aggregating data from all available sources."""
        aggregator = ExternalVariantAggregator()

        # Use rs1045642 which is a common variant that should have 1000 Genomes data
        # Also provide variant data for cBioPortal
        variant_data = {
            "cadd": {"gene": {"genename": "ABCB1"}},
            "docm": {"aa_change": "p.I1145I"},
        }

        result = await aggregator.get_enhanced_annotations(
            "rs1045642",
            include_tcga=True,
            include_1000g=True,
            include_cbioportal=True,
            variant_data=variant_data,
        )

        assert result.variant_id == "rs1045642"

        # Check which sources returned data
        sources_with_data = []
        if result.tcga:
            sources_with_data.append("tcga")
        if result.thousand_genomes:
            sources_with_data.append("1000g")
        if result.cbioportal:
            sources_with_data.append("cbioportal")

        # This common variant should have at least 1000 Genomes data
        assert len(sources_with_data) > 0
        # Specifically, it should have 1000 Genomes data
        assert result.thousand_genomes is not None

        # No errors should be reported for successful queries
        # (though some sources might not have data, which is different from errors)
        assert len(result.error_sources) == 0

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_selective_source_inclusion(self):
        """Test including only specific sources."""
        aggregator = ExternalVariantAggregator()

        # Only request 1000 Genomes data
        result = await aggregator.get_enhanced_annotations(
            "rs1800734",  # Common variant
            include_tcga=False,
            include_1000g=True,
        )

        # Should only attempt to fetch 1000 Genomes data
        assert result.tcga is None
        # 1000 Genomes might have data for this common variant
        # (but it's okay if it doesn't)

    @pytest.mark.asyncio
    @pytest.mark.integration
    async def test_error_handling_resilience(self):
        """Test that aggregator handles individual source failures gracefully."""
        aggregator = ExternalVariantAggregator()

        # Use an invalid variant format that might cause errors
        result = await aggregator.get_enhanced_annotations(
            "INVALID_VARIANT_FORMAT_12345",
            include_tcga=True,
            include_1000g=True,
        )

        # Should still return a result even if all sources fail
        assert result is not None
        assert result.variant_id == "INVALID_VARIANT_FORMAT_12345"

        # Sources should return None or be in error_sources
        assert result.tcga is None
        assert result.thousand_genomes is None

```

--------------------------------------------------------------------------------
/docs/tutorials/biothings-prompts.md:
--------------------------------------------------------------------------------

```markdown
# BioThings Integration Example Prompts

This guide provides example prompts for AI assistants to effectively use the BioThings suite integration in BioMCP.

## Overview of BioThings Suite

BioMCP integrates with the complete BioThings suite of APIs:

- **MyGene.info** - Gene information and annotations
- **MyDisease.info** - Disease ontology and synonyms
- **MyVariant.info** - Genetic variant annotations (pre-existing integration, enhanced with BioThings client)
- **MyChem.info** - Drug/chemical information and annotations

All four services share common infrastructure through the BioThings client module, providing consistent error handling, rate limiting, and response parsing.

## Gene Information Retrieval

### Basic Gene Lookup

```
"What is the TP53 gene?"
"Tell me about BRAF"
"Get information on the EGFR gene"
"What does the BRCA1 gene do?"
```

**Expected tool usage**: `gene_getter("TP53")` → Returns official name, summary, aliases

### Gene by ID

```
"Look up gene with Entrez ID 7157"
"What is gene 673?"
```

**Expected tool usage**: `gene_getter("7157")` → Returns TP53 information

### Gene Context for Research

```
"I need to understand the KRAS gene before searching for mutations"
"What type of protein does BRAF encode?"
"Give me the official name and aliases for MYC"
```

## Disease Information Retrieval

### Basic Disease Lookup

```
"What is GIST?"
"Tell me about melanoma"
"Define non-small cell lung cancer"
"What is Erdheim-Chester disease?"
```

**Expected tool usage**: `disease_getter("GIST")` → Returns definition, synonyms, ontology IDs

### Disease by Ontology ID

```
"Look up disease MONDO:0018076"
"What is DOID:1909?"
```

**Expected tool usage**: `disease_getter("MONDO:0018076")` → Returns disease information

### Disease Synonyms for Research

```
"What are all the names for gastrointestinal stromal tumor?"
"Find synonyms for NSCLC"
"What other terms are used for melanoma?"
```

## Variant Information Retrieval (MyVariant.info)

MyVariant.info is part of the BioThings suite and provides comprehensive variant annotations. BioMCP has extensive integration with specialized features:

### Basic Variant Lookup

```
"Get information about rs7412"
"What is the BRAF V600E variant?"
"Look up variant chr7:140453136-140453136"
```

**Expected tool usage**: `variant_getter("rs7412")` → Returns variant annotations with external database links

### Variant Search with Filters

```
"Find pathogenic variants in TP53"
"Search for BRCA1 variants with high impact"
"Get all loss-of-function variants in KRAS"
```

**Expected tool usage**: `variant_searcher(gene="TP53", significance="pathogenic")` → Returns filtered variant list

### Variant with Cancer Context

```
"What cancer types have BRAF V600E mutations?"
"Get TCGA data for TP53 R273H"
```

**Expected tool usage**: Variant tools automatically integrate cBioPortal, TCGA, and 1000 Genomes data when available

## Drug Information Retrieval (MyChem.info)

MyChem.info is part of the BioThings suite and provides comprehensive drug/chemical information.

### Basic Drug Lookup

```
"What is imatinib?"
"Tell me about aspirin"
"Get information on pembrolizumab"
"What does metformin do?"
```

**Expected tool usage**: `drug_getter("imatinib")` → Returns drug information with database links

### Drug by ID

```
"Look up DrugBank ID DB00619"
"What is CHEMBL941?"
"Get details for PubChem CID 5291"
```

**Expected tool usage**: `drug_getter("DB00619")` → Returns drug details by identifier

### Drug Properties and Mechanism

```
"What is the mechanism of action of imatinib?"
"Find the chemical formula for aspirin"
"What are the trade names for adalimumab?"
"How does pembrolizumab work?"
```

**Expected tool usage**: `drug_getter("pembrolizumab")` → Returns mechanism, indications, and properties

## Integrated Research Workflows

### Variant Analysis with Gene Context

```
"Analyze the BRAF V600E mutation - first tell me about the gene, then find pathogenic variants"
```

**Expected tool sequence**:

1. `think(thought="Analyzing BRAF V600E mutation", thoughtNumber=1)`
2. `gene_getter("BRAF")` → Gene context
3. `variant_searcher(gene="BRAF", hgvsp="V600E", significance="pathogenic")` → Variant details

### Clinical Trial Search with Disease Expansion

```
"Find clinical trials for GIST patients"
"Search for trials treating gastrointestinal stromal tumors"
```

**Expected tool usage**:

- `trial_searcher(conditions=["GIST"], expand_synonyms=True)`
- Automatically searches for: GIST OR "gastrointestinal stromal tumor" OR "GI stromal tumor"

### Comprehensive Gene-Disease Research

```
"I'm researching EGFR mutations in lung cancer. Start with the gene, then the disease, then find relevant trials"
```

**Expected tool sequence**:

1. `think(thought="Researching EGFR in lung cancer", thoughtNumber=1)`
2. `gene_getter("EGFR")` → Gene information
3. `disease_getter("lung cancer")` → Disease context and synonyms
4. `trial_searcher(conditions=["lung cancer"], interventions=["EGFR inhibitor"])` → Trials with synonym expansion

### Multi-Gene Analysis

```
"Compare TP53, BRAF, and KRAS genes"
"Tell me about the RAS family genes: KRAS, NRAS, HRAS"
```

**Expected tool usage**: Multiple `gene_getter()` calls for each gene

## Advanced Use Cases

### Gene Alias Resolution

```
"What is the official name for the p53 gene?"
"Is TRP53 the same as TP53?"
```

**Expected tool usage**: `gene_getter("p53")` → Will resolve to TP53

### Disease Name Disambiguation

```
"Is GIST the same as gastrointestinal stromal tumor?"
"What's the MONDO ID for melanoma?"
```

**Expected tool usage**: `disease_getter("GIST")` → Shows all synonyms and IDs

### Trial Search Without Synonym Expansion

```
"Find trials specifically mentioning 'GIST' not other names"
```

**Expected tool usage**: `trial_searcher(conditions=["GIST"], expand_synonyms=False)`

### Integrated Literature and Gene Search

```
"Find recent papers about TP53 mutations - first tell me about the gene"
```

**Expected tool sequence**:

1. `gene_getter("TP53")` → Gene context
2. `article_searcher(genes=["TP53"], keywords=["mutation"])` → Literature

### Drug-Target Research

```
"I'm researching imatinib for CML treatment. Get drug info, then find trials"
"What targets does pembrolizumab hit? Then find related articles"
```

**Expected tool sequence**:

1. `think(thought="Researching imatinib for CML", thoughtNumber=1)`
2. `drug_getter("imatinib")` → Drug information and mechanism
3. `trial_searcher(interventions=["imatinib"], conditions=["chronic myeloid leukemia"])`

## Tips for AI Assistants

1. **Always use think() first** for complex biomedical queries
2. **Gene context helps interpretation**: Get gene info before analyzing variants
3. **Disease synonyms improve search**: Use expand_synonyms=True (default) for comprehensive results
4. **Drug mechanisms matter**: Get drug info before searching trials to understand targets
5. **Real-time data**: All BioThings data is fetched live, ensuring current information
6. **Combine tools**: Gene + disease + variant + drug tools work together for comprehensive analysis

## Common Patterns

### Pattern 1: Gene → Variant → Clinical Impact

```
gene_getter("BRAF") →
variant_searcher(gene="BRAF", significance="pathogenic") →
article_searcher(genes=["BRAF"], diseases=["melanoma"])
```

### Pattern 2: Disease → Trials → Locations

```
disease_getter("NSCLC") →
trial_searcher(conditions=["NSCLC"], expand_synonyms=True) →
trial_locations_getter(nct_id="NCT...")
```

### Pattern 3: Multi-Gene Pathway Analysis

```
gene_getter("EGFR") →
gene_getter("KRAS") →
gene_getter("BRAF") →
article_searcher(genes=["EGFR", "KRAS", "BRAF"], keywords=["pathway"])
```

## Unified Search with BioThings Domains

BioMCP's unified search now supports gene, drug, and disease domains alongside articles, trials, and variants:

### Domain-Specific Search

```
"Search for BRAF in the gene domain"
"Find imatinib in drugs"
"Look up melanoma in diseases"
```

**Expected tool usage**:

- `search(domain="gene", keywords=["BRAF"])`
- `search(domain="drug", keywords=["imatinib"])`
- `search(domain="disease", keywords=["melanoma"])`

### Unified Query Language with BioThings

```
"genes.symbol:BRAF AND genes.type:protein-coding"
"drugs.tradename:gleevec"
"diseases.name:melanoma OR diseases.synonym:malignant melanoma"
```

**Expected tool usage**: Query parser automatically routes to appropriate domains

### Cross-Domain Gene Searches

```
"gene:BRAF"  # Searches articles, variants, genes, and trials
"Search everything about TP53"
```

**Expected behavior**:

- Gene queries trigger searches across multiple domains
- Results include gene info, variants, articles, and related trials

### Cross-Domain Disease Searches

```
"disease:melanoma"  # Searches articles, trials, and diseases
"Find all information about NSCLC"
```

**Expected behavior**:

- Disease queries search articles, trials, and disease databases
- Disease synonyms are automatically expanded in trial searches

### Combined Domain Queries

```
"gene:BRAF AND disease:melanoma"
"drugs.indication:leukemia AND trials.phase:3"
"genes.symbol:EGFR AND articles.year:>2023"
```

### Unified Fetch

```
"Fetch BRAF from gene domain"
"Get imatinib details from drugs"
"Retrieve melanoma information from diseases"
```

**Expected tool usage**:

- `fetch(id="BRAF", domain="gene")`
- `fetch(id="imatinib", domain="drug")`
- `fetch(id="melanoma", domain="disease")`

## Error Handling

If a gene/disease is not found:

- Check for typos or alternative names
- Try searching with partial names
- Use official symbols for genes (e.g., "TP53" not "p53 gene")
- For diseases, try both common and medical names

```

--------------------------------------------------------------------------------
/src/biomcp/constants.py:
--------------------------------------------------------------------------------

```python
"""
Central constants file for BioMCP.

This module contains all constants used throughout the BioMCP codebase,
including API URLs, default values, limits, and domain configurations.
"""

# ============================================================================
# API Base URLs
# ============================================================================

# PubTator3 API
# https://www.ncbi.nlm.nih.gov/research/pubtator3/api
PUBTATOR3_BASE_URL = "https://www.ncbi.nlm.nih.gov/research/pubtator3-api"
PUBTATOR3_SEARCH_URL = f"{PUBTATOR3_BASE_URL}/search/"
PUBTATOR3_FULLTEXT_URL = f"{PUBTATOR3_BASE_URL}/publications/export/biocjson"
PUBTATOR3_AUTOCOMPLETE_URL = f"{PUBTATOR3_BASE_URL}/entity/autocomplete/"

# ClinicalTrials.gov API
# https://clinicaltrials.gov/data-api/api
CLINICAL_TRIALS_BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
CLINICAL_TRIALS_STUDY_URL = "https://clinicaltrials.gov/study/"

# NCI Clinical Trials Search API
# https://clinicaltrialsapi.cancer.gov/api/v2
NCI_CTS_BASE_URL = "https://clinicaltrialsapi.cancer.gov/api/v2"
NCI_TRIALS_URL = f"{NCI_CTS_BASE_URL}/trials"
NCI_ORGANIZATIONS_URL = f"{NCI_CTS_BASE_URL}/organizations"
NCI_DISEASES_URL = f"{NCI_CTS_BASE_URL}/diseases"
NCI_INTERVENTIONS_URL = f"{NCI_CTS_BASE_URL}/interventions"
NCI_BIOMARKERS_URL = f"{NCI_CTS_BASE_URL}/biomarkers"
NCI_API_KEY_ENV = "NCI_API_KEY"

# MyVariant.info API
# https://docs.myvariant.info/
MYVARIANT_BASE_URL = "https://myvariant.info/v1"
MYVARIANT_QUERY_URL = f"{MYVARIANT_BASE_URL}/query"
MYVARIANT_GET_URL = f"{MYVARIANT_BASE_URL}/variant"

# Preprint Server APIs
BIORXIV_BASE_URL = "https://api.biorxiv.org/details/biorxiv"
MEDRXIV_BASE_URL = "https://api.biorxiv.org/details/medrxiv"
EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"

# External Variant APIs
GDC_BASE_URL = "https://api.gdc.cancer.gov"
GDC_SSMS_ENDPOINT_URL = f"{GDC_BASE_URL}/ssms"  # Simple Somatic Mutations
GDC_SSM_OCCURRENCES_URL = f"{GDC_BASE_URL}/ssm_occurrences"
ENSEMBL_REST_BASE_URL = "https://rest.ensembl.org"
ENSEMBL_VARIATION_URL = f"{ENSEMBL_REST_BASE_URL}/variation/human"
CBIOPORTAL_BASE_URL = "https://www.cbioportal.org/api"

# External Resource URLs
PUBMED_BASE_URL = "https://pubmed.ncbi.nlm.nih.gov/"
PMC_BASE_URL = "https://www.ncbi.nlm.nih.gov/pmc/articles/"
DOI_BASE_URL = "https://doi.org/"
DBSNP_BASE_URL = "https://www.ncbi.nlm.nih.gov/snp/"
CLINVAR_BASE_URL = "https://www.ncbi.nlm.nih.gov/clinvar/variation/"
COSMIC_BASE_URL = "https://cancer.sanger.ac.uk/cosmic/mutation/overview?id="
CIVIC_BASE_URL = "https://civicdb.org/variants/"
ENSEMBL_VARIANT_BASE_URL = (
    "https://ensembl.org/Homo_sapiens/Variation/Explore?v="
)
GENENAMES_BASE_URL = (
    "https://www.genenames.org/data/gene-symbol-report/#!/symbol/"
)
UCSC_GENOME_BROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&"

# ============================================================================
# Default Values and Limits
# ============================================================================

# Caching
DEFAULT_CACHE_TIMEOUT = 60 * 60 * 24 * 7  # 1 week in seconds

# Pagination
SYSTEM_PAGE_SIZE = (
    10  # Default page size for all searches (reduced for token efficiency)
)
DEFAULT_PAGE_SIZE = 10  # Default page size for unified search
MIN_PAGE_SIZE = 1
MAX_PAGE_SIZE = 100
DEFAULT_PAGE_NUMBER = 1

# Search limits
MAX_RESULTS_PER_DOMAIN_DEFAULT = (
    10  # Default max results per domain in unified search
)
ESTIMATED_ADDITIONAL_RESULTS = (
    100  # Estimate for additional results when full page returned
)
DEFAULT_AUTOCOMPLETE_LIMIT = 1
MAX_AUTOCOMPLETE_LIMIT = 100

# Text display
MAX_WIDTH = 72  # Maximum width for text wrapping in console output
SNIPPET_LENGTH = 200  # Maximum length for text snippets in search results

# Genome Assembly
DEFAULT_ASSEMBLY = "hg19"  # Default genome assembly for MyVariant.info API

# Rate Limiting
DEFAULT_RATE_LIMIT_PER_SECOND = 10.0
DEFAULT_BURST_SIZE = 20
SLIDING_WINDOW_MINUTE_LIMIT = 60
SLIDING_WINDOW_HOUR_LIMIT = 1000

# Retry Configuration
DEFAULT_MAX_RETRY_ATTEMPTS = 3
DEFAULT_INITIAL_RETRY_DELAY = 1.0
DEFAULT_MAX_RETRY_DELAY = 60.0
DEFAULT_EXPONENTIAL_BASE = 2.0
AGGRESSIVE_MAX_RETRY_ATTEMPTS = 5
AGGRESSIVE_INITIAL_RETRY_DELAY = 2.0
AGGRESSIVE_MAX_RETRY_DELAY = 30.0

# Circuit Breaker Configuration
DEFAULT_FAILURE_THRESHOLD = 10
DEFAULT_RECOVERY_TIMEOUT = 30.0
DEFAULT_SUCCESS_THRESHOLD = 3

# Metrics Configuration
MAX_METRIC_SAMPLES = 1000
METRIC_PERCENTILE_50 = 0.50
METRIC_PERCENTILE_95 = 0.95
METRIC_PERCENTILE_99 = 0.99
METRIC_JITTER_RANGE = 0.1  # 10% jitter

# HTTP Client Configuration
HTTP_TIMEOUT_SECONDS = 120.0
HTTP_ERROR_CODE_NETWORK = 599
HTTP_ERROR_CODE_UNSUPPORTED_METHOD = 405

# Batching and Pagination Configuration
DEFAULT_BATCH_SIZE = 10
DEFAULT_BATCH_TIMEOUT = 0.1
CBIOPORTAL_BATCH_SIZE = 5
EUROPE_PMC_PAGE_SIZE = 25
BIORXIV_MAX_PAGES = 3
BIORXIV_RESULTS_PER_PAGE = 30
BIORXIV_DEFAULT_DAYS_BACK = 365

# Prefetching Configuration
PREFETCH_TOP_GENES = 5
PREFETCH_TOP_DISEASES = 3
PREFETCH_TOP_CHEMICALS = 3
PREFETCH_TIMEOUT = 2.0

# Cache Configuration
REQUEST_CACHE_MAX_SIZE = 1000
CACHE_KEY_SAMPLE_SIZE = 100

# Connection Pool Configuration
CONNECTION_POOL_MAX_KEEPALIVE = 20
CONNECTION_POOL_MAX_CONNECTIONS = 100
CONNECTION_POOL_KEEPALIVE_EXPIRY = 30

# ============================================================================
# Domain Configuration
# ============================================================================

# Valid domains for search
VALID_DOMAINS = [
    "article",
    "trial",
    "variant",
    "gene",
    "drug",
    "disease",
    "nci_organization",
    "nci_intervention",
    "nci_biomarker",
    "nci_disease",
    # OpenFDA domains
    "fda_adverse",
    "fda_label",
    "fda_device",
    "fda_approval",
    "fda_recall",
    "fda_shortage",
]
VALID_DOMAINS_PLURAL = [
    "articles",
    "trials",
    "variants",
    "genes",
    "drugs",
    "diseases",
    "nci_organizations",
    "nci_interventions",
    "nci_biomarkers",
    "nci_diseases",
    # OpenFDA domains
    "fda_adverse_events",
    "fda_labels",
    "fda_device_events",
    "fda_approvals",
    "fda_recalls",
    "fda_shortages",
]

# Domain mappings for unified search
DOMAIN_TO_PLURAL = {
    "article": "articles",
    "trial": "trials",
    "variant": "variants",
    "gene": "genes",
    "drug": "drugs",
    "disease": "diseases",
    "nci_organization": "nci_organizations",
    "nci_intervention": "nci_interventions",
    "nci_biomarker": "nci_biomarkers",
    "nci_disease": "nci_diseases",
    # OpenFDA domains
    "fda_adverse": "fda_adverse_events",
    "fda_label": "fda_labels",
    "fda_device": "fda_device_events",
    "fda_approval": "fda_approvals",
    "fda_recall": "fda_recalls",
    "fda_shortage": "fda_shortages",
}

PLURAL_TO_DOMAIN = {
    "articles": "article",
    "trials": "trial",
    "variants": "variant",
    "genes": "gene",
    "drugs": "drug",
    "diseases": "disease",
    "nci_organizations": "nci_organization",
    "nci_interventions": "nci_intervention",
    "nci_biomarkers": "nci_biomarker",
    "nci_diseases": "nci_disease",
    # OpenFDA domains
    "fda_adverse_events": "fda_adverse",
    "fda_labels": "fda_label",
    "fda_device_events": "fda_device",
    "fda_approvals": "fda_approval",
    "fda_recalls": "fda_recall",
    "fda_shortages": "fda_shortage",
}

# Trial detail sections
TRIAL_DETAIL_SECTIONS = [
    "protocol",
    "locations",
    "outcomes",
    "references",
    "all",
    "full",
]

# ============================================================================
# Field Names and Enums
# ============================================================================

# Autocomplete concept types
AUTOCOMPLETE_CONCEPTS = ["variant", "chemical", "disease", "gene"]

# HTTP methods
VALID_HTTP_METHODS = ["GET", "POST"]

# Trial search defaults
DEFAULT_TRIAL_FORMAT = "csv"
DEFAULT_TRIAL_MARKUP = "markdown"

# ============================================================================
# Error Messages
# ============================================================================

ERROR_THOUGHT_NUMBER_MIN = "Error: thoughtNumber must be >= 1"
ERROR_TOTAL_THOUGHTS_MIN = "Error: totalThoughts must be >= 1"
ERROR_DOMAIN_REQUIRED = "Either 'query' or 'domain' parameter must be provided"
ERROR_THOUGHT_REQUIRED = (
    "'thought' parameter is required when domain='thinking'"
)
ERROR_THOUGHT_NUMBER_REQUIRED = (
    "'thoughtNumber' parameter is required when domain='thinking'"
)
ERROR_TOTAL_THOUGHTS_REQUIRED = (
    "'totalThoughts' parameter is required when domain='thinking'"
)
ERROR_NEXT_THOUGHT_REQUIRED = (
    "'nextThoughtNeeded' parameter is required when domain='thinking'"
)

# ============================================================================
# API Response Formatting
# ============================================================================

# Default values for missing data
DEFAULT_TITLE = "Untitled"
DEFAULT_GENE = "Unknown"
DEFAULT_SIGNIFICANCE = "Unknown"

# Metadata field names
METADATA_YEAR = "year"
METADATA_JOURNAL = "journal"
METADATA_AUTHORS = "authors"
METADATA_STATUS = "status"
METADATA_PHASE = "phase"
METADATA_START_DATE = "start_date"
METADATA_COMPLETION_DATE = "primary_completion_date"
METADATA_GENE = "gene"
METADATA_RSID = "rsid"
METADATA_SIGNIFICANCE = "clinical_significance"
METADATA_CONSEQUENCE = "consequence"
METADATA_SOURCE = "source"

# Result field names
RESULT_ID = "id"
RESULT_TITLE = "title"
RESULT_SNIPPET = "snippet"  # Internal use for domain handlers
RESULT_TEXT = "text"  # OpenAI MCP compliant field name
RESULT_URL = "url"
RESULT_METADATA = "metadata"
RESULT_DATA = "data"
RESULT_PAGE = "page"
RESULT_PAGE_SIZE = "page_size"
RESULT_TOTAL = "total"
RESULT_NEXT_PAGE = "next_page"

```

--------------------------------------------------------------------------------
/docs/backend-services-reference/05-nci-cts-api.md:
--------------------------------------------------------------------------------

```markdown
# NCI Clinical Trials Search API Reference

The National Cancer Institute's Clinical Trials Search (CTS) API provides advanced search capabilities for cancer clinical trials with enhanced filtering options beyond ClinicalTrials.gov.

## Overview

The NCI CTS API offers:

- Advanced biomarker and mutation filtering
- Comprehensive organization database
- Intervention and drug vocabularies
- Disease terminology with NCI Thesaurus integration
- Prior therapy and eligibility criteria

**Base URL:** `https://clinicaltrialsapi.cancer.gov/api/v2/`

## Authentication

An API key is required for all endpoints.

### Obtaining an API Key

1. Visit [https://clinicaltrialsapi.cancer.gov/](https://clinicaltrialsapi.cancer.gov/)
2. Click "Get API Key"
3. Complete registration
4. Key is emailed immediately

### Using the API Key

Include in request headers:

```
X-API-KEY: your-api-key-here
```

Or as query parameter:

```
?api_key=your-api-key-here
```

## Core Endpoints

### 1. Trial Search

```
GET /trials
```

Search for clinical trials with advanced filtering.

#### Parameters

**Basic Search:**

- `keyword`: General text search
- `nct_id`: Specific NCT identifiers
- `diseases`: Disease/condition names
- `interventions`: Treatment names

**Advanced Filters:**

- `biomarkers`: Required biomarkers/mutations
- `prior_therapy_required`: true/false
- `accepts_brain_mets`: true/false
- `min_age`: Minimum age in years
- `max_age`: Maximum age in years

**Pagination:**

- `size`: Results per page (max 50)
- `from`: Starting index (offset)

#### Example Request

```bash
curl -X GET "https://clinicaltrialsapi.cancer.gov/api/v2/trials" \
  -H "X-API-KEY: your-key" \
  -d "diseases=melanoma" \
  -d "biomarkers=BRAF V600E" \
  -d "accepts_brain_mets=true" \
  -d "size=10"
```

#### Response Format

```json
{
  "total": 42,
  "trials": [
    {
      "nct_id": "NCT04280705",
      "brief_title": "BRAF/MEK Inhibitor Combination",
      "current_trial_status": "Active",
      "phase": "Phase II",
      "biomarker_eligibility": [
        {
          "gene": "BRAF",
          "variant": "V600E",
          "required": true
        }
      ],
      "sites": [...]
    }
  ]
}
```

### 2. Trial Details

```
GET /trials/{nct_id}
```

Get comprehensive information about a specific trial.

#### Example Request

```bash
curl -X GET "https://clinicaltrialsapi.cancer.gov/api/v2/trials/NCT04280705" \
  -H "X-API-KEY: your-key"
```

### 3. Organization Search

```
GET /organizations
```

Search for cancer research organizations and treatment centers.

#### Parameters

- `name`: Organization name
- `org_city`: City location
- `org_state_or_province`: State/province
- `org_country`: Country
- `org_type`: Type (e.g., "NCI-designated", "academic")

**Important:** Always use city AND state together to avoid Elasticsearch errors.

#### Example Request

```bash
curl -X GET "https://clinicaltrialsapi.cancer.gov/api/v2/organizations" \
  -H "X-API-KEY: your-key" \
  -d "org_city=Houston" \
  -d "org_state_or_province=TX"
```

### 4. Organization Details

```
GET /organizations/{org_id}
```

Get details about a specific organization.

### 5. Intervention Search

```
GET /interventions
```

Search for drugs, devices, and procedures used in trials.

#### Parameters

- `name`: Intervention name
- `type`: Drug, Device, Procedure, etc.
- `synonyms`: Include synonym matches (default: true)

#### Example Request

```bash
curl -X GET "https://clinicaltrialsapi.cancer.gov/api/v2/interventions" \
  -H "X-API-KEY: your-key" \
  -d "name=pembrolizumab" \
  -d "type=Drug"
```

### 6. Intervention Details

```
GET /interventions/{intervention_id}
```

### 7. Biomarker Search

```
GET /biomarkers
```

Search for biomarkers used in trial eligibility criteria.

#### Parameters

- `name`: Biomarker name
- `type`: mutation, expression, etc.
- `gene`: Associated gene symbol

### 8. Disease Search

```
GET /diseases
```

Search NCI's controlled vocabulary of cancer conditions.

#### Parameters

- `name`: Disease name
- `include_synonyms`: Include synonym matches
- `category`: Disease category

## Advanced Features

### Biomarker-Based Trial Search

Find trials requiring specific mutations:

```python
params = {
    "diseases": "non-small cell lung cancer",
    "biomarkers": ["EGFR L858R", "EGFR exon 19 deletion"],
    "prior_therapy_required": False,
    "accepts_brain_mets": True
}

response = requests.get(
    "https://clinicaltrialsapi.cancer.gov/api/v2/trials",
    headers={"X-API-KEY": api_key},
    params=params
)
```

### Complex Eligibility Queries

```python
# Find trials with specific eligibility
params = {
    "diseases": "melanoma",
    "biomarkers": "BRAF V600E",
    "min_age": 18,
    "max_age": 75,
    "prior_therapy": "vemurafenib",  # Exclude if prior vemurafenib
    "performance_status": "0-1"       # ECOG 0 or 1
}
```

### Organization Network Analysis

```python
# Find all NCI-designated centers in a region
params = {
    "org_type": "NCI-designated",
    "org_state_or_province": ["CA", "OR", "WA"]  # West Coast
}

orgs = requests.get(
    "https://clinicaltrialsapi.cancer.gov/api/v2/organizations",
    headers={"X-API-KEY": api_key},
    params=params
)

# Get trials at each center
for org in orgs.json()["organizations"]:
    trials = requests.get(
        f"https://clinicaltrialsapi.cancer.gov/api/v2/trials",
        headers={"X-API-KEY": api_key},
        params={"site_org_id": org["id"]}
    )
```

## Data Models

### Trial Object

```json
{
  "nct_id": "NCT04280705",
  "brief_title": "Study Title",
  "official_title": "Full Protocol Title",
  "current_trial_status": "Active",
  "phase": "Phase II",
  "study_type": "Interventional",
  "primary_purpose": "Treatment",
  "diseases": [
    {
      "name": "Melanoma",
      "nci_thesaurus_id": "C0025202"
    }
  ],
  "biomarker_eligibility": [
    {
      "gene": "BRAF",
      "variant": "V600E",
      "required": true,
      "inclusion": true
    }
  ],
  "arms": [...],
  "sites": [...]
}
```

### Organization Object

```json
{
  "org_id": "NCI-2021-00123",
  "name": "MD Anderson Cancer Center",
  "type": "NCI-designated",
  "address": {
    "city": "Houston",
    "state": "TX",
    "country": "United States",
    "postal_code": "77030"
  },
  "contact": {
    "name": "Clinical Trials Office",
    "phone": "1-800-392-1611",
    "email": "[email protected]"
  },
  "active_trials_count": 1250
}
```

## Error Handling

### Common Errors

#### 401 Unauthorized

```json
{
  "error": "Invalid or missing API key"
}
```

#### 400 Bad Request

```json
{
  "error": "Invalid parameter combination",
  "details": "Must specify both city AND state for location search"
}
```

#### 429 Rate Limited

```json
{
  "error": "Rate limit exceeded",
  "retry_after": 3600
}
```

### Best Practices

1. **Always use city AND state together** for location searches
2. **Handle missing totals** - the API may not return total counts with size parameter
3. **Use specific searches** - broad queries may timeout
4. **Implement retry logic** for rate limits

## Rate Limits

- **With API Key**: 1,000 requests/day
- **Burst Rate**: 10 requests/second
- **Without Key**: Not supported

## Differences from ClinicalTrials.gov

### Enhanced Features

- **Biomarker search**: Mutation-specific queries
- **Prior therapy**: Exclude based on previous treatments
- **Brain metastases**: Specific acceptance criteria
- **Performance status**: ECOG/Karnofsky filtering

### Limitations

- **Cancer trials only**: Limited to oncology studies
- **No offset pagination**: Must use size parameter carefully
- **Location parameters**: Different naming (org\_ prefix)

## Integration Examples

### Example 1: Precision Medicine Search

```python
async def find_precision_trials(mutation, cancer_type, location):
    """Find trials for specific mutation in cancer type near location"""

    # Search for trials
    trial_params = {
        "diseases": cancer_type,
        "biomarkers": mutation,
        "accepts_brain_mets": True,
        "size": 50
    }

    trials = await fetch_nci_api("trials", trial_params)

    # Filter by location if provided
    if location:
        nearby_trials = []
        for trial in trials["trials"]:
            for site in trial.get("sites", []):
                distance = calculate_distance(location, site["coordinates"])
                if distance < 100:  # 100 miles
                    nearby_trials.append(trial)
                    break

        return nearby_trials

    return trials["trials"]
```

### Example 2: Biomarker-Driven Pipeline

```python
def biomarker_trial_pipeline(gene, variant):
    """Complete pipeline from variant to trials"""

    # 1. Search biomarkers
    biomarkers = requests.get(
        "https://clinicaltrialsapi.cancer.gov/api/v2/biomarkers",
        headers={"X-API-KEY": api_key},
        params={"gene": gene, "name": variant}
    ).json()

    # 2. Get associated trials
    all_trials = []
    for biomarker in biomarkers.get("biomarkers", []):
        trials = requests.get(
            "https://clinicaltrialsapi.cancer.gov/api/v2/trials",
            headers={"X-API-KEY": api_key},
            params={"biomarker_id": biomarker["id"]}
        ).json()
        all_trials.extend(trials.get("trials", []))

    # 3. Deduplicate and sort by phase
    unique_trials = {t["nct_id"]: t for t in all_trials}.values()
    return sorted(unique_trials, key=lambda x: x.get("phase", ""))
```

## Support Resources

- **API Documentation**: [https://clinicaltrialsapi.cancer.gov/](https://clinicaltrialsapi.cancer.gov/)
- **Support Email**: [email protected]
- **Status Page**: [https://status.cancer.gov/](https://status.cancer.gov/)
- **Terms of Use**: [https://clinicaltrialsapi.cancer.gov/terms](https://clinicaltrialsapi.cancer.gov/terms)

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_approvals.py:
--------------------------------------------------------------------------------

```python
"""
OpenFDA drug approvals (Drugs@FDA) integration.
"""

import logging
from typing import Any

from .constants import (
    OPENFDA_DEFAULT_LIMIT,
    OPENFDA_DISCLAIMER,
    OPENFDA_DRUGSFDA_URL,
)
from .utils import (
    format_count,
    make_openfda_request,
)

logger = logging.getLogger(__name__)


async def search_drug_approvals(
    drug: str | None = None,
    application_number: str | None = None,
    approval_year: str | None = None,
    limit: int = OPENFDA_DEFAULT_LIMIT,
    skip: int = 0,
    api_key: str | None = None,
) -> str:
    """
    Search FDA drug approval records from Drugs@FDA.

    Args:
        drug: Drug name (brand or generic) to search for
        application_number: NDA or BLA application number
        approval_year: Year of approval (YYYY format)
        limit: Maximum number of results to return
        skip: Number of results to skip (for pagination)

        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with drug approval information
    """
    # Build search query
    search_params = {}

    if drug:
        # Search both brand and generic names
        search_params["search"] = (
            f'(openfda.brand_name:"{drug}" OR '
            f'openfda.generic_name:"{drug}" OR '
            f'openfda.substance_name:"{drug}")'
        )
    elif application_number:
        search_params["search"] = f'application_number:"{application_number}"'
    elif approval_year:
        # Search for approvals in a specific year
        search_params["search"] = (
            f"products.marketing_status_date:[{approval_year}-01-01 TO {approval_year}-12-31]"
        )

    # Add pagination
    search_params["limit"] = str(min(limit, 100))
    search_params["skip"] = str(skip)

    # Sort by submission date (most recent first)
    search_params["sort"] = "submissions.submission_status_date:desc"

    # Make the request
    response, error = await make_openfda_request(
        OPENFDA_DRUGSFDA_URL, search_params, "openfda_approvals", api_key
    )

    if error:
        return f"⚠️ Error searching drug approvals: {error}"

    if not response or not response.get("results"):
        return "No drug approval records found matching your criteria."

    # Format the results
    results = response["results"]
    total = (
        response.get("meta", {}).get("results", {}).get("total", len(results))
    )

    output = ["## FDA Drug Approval Records\n"]

    if drug:
        output.append(f"**Drug**: {drug}")
    if application_number:
        output.append(f"**Application**: {application_number}")
    if approval_year:
        output.append(f"**Approval Year**: {approval_year}")

    output.append(
        f"**Total Records Found**: {format_count(total, 'record')}\n"
    )

    # Show results
    output.append(f"### Results (showing {len(results)} of {total}):\n")

    for i, record in enumerate(results, 1):
        output.extend(_format_approval_summary(record, i))

    output.append(f"\n{OPENFDA_DISCLAIMER}")

    return "\n".join(output)


async def get_drug_approval(
    application_number: str,
    api_key: str | None = None,
) -> str:
    """
    Get detailed drug approval information for a specific application.

    Args:
        application_number: NDA or BLA application number

        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with detailed approval information
    """
    # Search for the specific application
    search_params = {
        "search": f'application_number:"{application_number}"',
        "limit": 1,
    }

    response, error = await make_openfda_request(
        OPENFDA_DRUGSFDA_URL, search_params, "openfda_approvals", api_key
    )

    if error:
        return f"⚠️ Error retrieving drug approval: {error}"

    if not response or not response.get("results"):
        return f"No approval record found for application {application_number}"

    record = response["results"][0]

    # Format detailed approval information
    output = [f"## Drug Approval Details: {application_number}\n"]

    # Basic information
    output.extend(_format_approval_header(record))

    # Products
    if products := record.get("products"):
        output.extend(_format_products(products))

    # Submissions history
    if submissions := record.get("submissions"):
        output.extend(_format_submissions(submissions))

    # OpenFDA metadata
    if openfda := record.get("openfda"):
        output.extend(_format_openfda_metadata(openfda))

    output.append(f"\n{OPENFDA_DISCLAIMER}")

    return "\n".join(output)


def _format_approval_summary(record: dict[str, Any], num: int) -> list[str]:
    """Format a single approval record summary."""
    output = [
        f"#### {num}. Application {record.get('application_number', 'Unknown')}"
    ]

    # Get sponsor/applicant
    if sponsor := record.get("sponsor_name"):
        output.append(f"**Sponsor**: {sponsor}")

    # Get drug names from OpenFDA data
    openfda = record.get("openfda", {})
    if brand_names := openfda.get("brand_name"):
        output.append(f"**Brand Name(s)**: {', '.join(brand_names[:3])}")
    if generic_names := openfda.get("generic_name"):
        output.append(f"**Generic Name(s)**: {', '.join(generic_names[:3])}")

    # Get products and their approval dates
    if products := record.get("products"):
        output.append("\n**Products**:")
        for prod in products[:3]:
            prod_num = prod.get("product_number", "?")
            dosage = prod.get("dosage_form", "")
            strength = prod.get("strength", "")
            status = prod.get("marketing_status", "")

            prod_line = f"- Product {prod_num}: {dosage}"
            if strength:
                prod_line += f" ({strength})"
            if status:
                prod_line += f" - {status}"
            output.append(prod_line)

    # Get most recent submission
    if submissions := record.get("submissions"):
        # Sort by date to get most recent
        recent = submissions[0]
        sub_type = recent.get("submission_type", "")
        sub_status = recent.get("submission_status", "")
        sub_date = recent.get("submission_status_date", "")

        if sub_date:
            output.append(
                f"\n**Latest Activity**: {sub_type} - {sub_status} ({sub_date})"
            )

    output.append("")
    return output


def _format_approval_header(record: dict[str, Any]) -> list[str]:
    """Format the header section of detailed approval."""
    output = ["### Application Information"]

    output.append(
        f"**Application Number**: {record.get('application_number', 'Unknown')}"
    )

    if sponsor := record.get("sponsor_name"):
        output.append(f"**Sponsor**: {sponsor}")

    # OpenFDA names
    openfda = record.get("openfda", {})
    if brand_names := openfda.get("brand_name"):
        output.append(f"**Brand Names**: {', '.join(brand_names)}")
    if generic_names := openfda.get("generic_name"):
        output.append(f"**Generic Names**: {', '.join(generic_names)}")
    if substances := openfda.get("substance_name"):
        output.append(f"**Active Substances**: {', '.join(substances)}")

    output.append("")
    return output


def _format_products(products: list[dict[str, Any]]) -> list[str]:
    """Format product information."""
    output = ["### Products"]

    for prod in products:
        prod_num = prod.get("product_number", "Unknown")
        output.append(f"\n#### Product {prod_num}")

        if dosage := prod.get("dosage_form"):
            output.append(f"**Dosage Form**: {dosage}")
        if strength := prod.get("strength"):
            output.append(f"**Strength**: {strength}")
        if route := prod.get("route"):
            output.append(f"**Route**: {route}")
        if status := prod.get("marketing_status"):
            output.append(f"**Marketing Status**: {status}")
        if status_date := prod.get("marketing_status_date"):
            output.append(f"**Status Date**: {status_date}")
        if te_code := prod.get("te_code"):
            output.append(f"**Therapeutic Equivalence**: {te_code}")

    output.append("")
    return output


def _format_submissions(submissions: list[dict[str, Any]]) -> list[str]:
    """Format submission history."""
    output = ["### Submission History"]

    # Show most recent 5 submissions
    for sub in submissions[:5]:
        sub_num = sub.get("submission_number", "?")
        sub_type = sub.get("submission_type", "Unknown")
        sub_status = sub.get("submission_status", "")
        sub_date = sub.get("submission_status_date", "")

        output.append(f"\n**Submission {sub_num}**: {sub_type}")
        if sub_status:
            output.append(f"- Status: {sub_status}")
        if sub_date:
            output.append(f"- Date: {sub_date}")

        # Review priority if present
        if priority := sub.get("review_priority"):
            output.append(f"- Review Priority: {priority}")

        # Submission class if present
        if sub_class := sub.get("submission_class_code"):
            class_desc = sub.get("submission_class_code_description", "")
            output.append(f"- Class: {sub_class} - {class_desc}")

    output.append("")
    return output


def _format_openfda_metadata(openfda: dict[str, Any]) -> list[str]:
    """Format OpenFDA metadata."""
    output = ["### Additional Information"]

    if nui := openfda.get("nui"):
        output.append(f"**NUI Codes**: {', '.join(nui[:5])}")

    if pharm_class := openfda.get("pharm_class_epc"):
        output.append(f"**Pharmacologic Class**: {', '.join(pharm_class[:3])}")

    if moa := openfda.get("pharm_class_moa"):
        output.append(f"**Mechanism of Action**: {', '.join(moa[:3])}")

    if unii := openfda.get("unii"):
        output.append(f"**UNII Codes**: {', '.join(unii[:5])}")

    output.append("")
    return output

```

--------------------------------------------------------------------------------
/tests/tdd/variants/test_alphagenome_comprehensive.py:
--------------------------------------------------------------------------------

```python
"""Comprehensive tests for AlphaGenome integration."""

from unittest.mock import MagicMock, patch

import pandas as pd
import pytest

from biomcp.variants.alphagenome import (
    _validate_inputs,
    predict_variant_effects,
)


class TestInputValidation:
    """Test input validation for AlphaGenome."""

    def test_valid_chromosomes(self):
        """Test validation accepts valid chromosome formats."""
        valid_chroms = ["chr1", "chr22", "chrX", "chrY", "chrM", "chrMT"]
        for chrom in valid_chroms:
            # Should not raise
            _validate_inputs(chrom, 100, "A", "T")

    def test_invalid_chromosomes(self):
        """Test validation rejects invalid chromosome formats."""
        invalid_chroms = ["1", "chr23", "chrZ", "chromosome1", "Chr1", ""]
        for chrom in invalid_chroms:
            with pytest.raises(ValueError, match="Invalid chromosome format"):
                _validate_inputs(chrom, 100, "A", "T")

    def test_invalid_position(self):
        """Test validation rejects invalid positions."""
        with pytest.raises(ValueError, match="Position must be >= 1"):
            _validate_inputs("chr1", 0, "A", "T")
        with pytest.raises(ValueError, match="Position must be >= 1"):
            _validate_inputs("chr1", -10, "A", "T")

    def test_valid_nucleotides(self):
        """Test validation accepts valid nucleotides."""
        valid_cases = [
            ("A", "T"),
            ("C", "G"),
            ("ACGT", "TGCA"),
            ("a", "t"),
            ("acgt", "tgca"),  # lowercase should work
        ]
        for ref, alt in valid_cases:
            # Should not raise
            _validate_inputs("chr1", 100, ref, alt)

    def test_invalid_nucleotides(self):
        """Test validation rejects invalid nucleotides."""
        invalid_cases = [("N", "A"), ("A", "U"), ("AXG", "T"), ("A", "123")]
        for ref, alt in invalid_cases:
            with pytest.raises(ValueError, match="Invalid nucleotides"):
                _validate_inputs("chr1", 100, ref, alt)

    def test_empty_alleles(self):
        """Test validation rejects empty alleles."""
        with pytest.raises(
            ValueError, match="Reference allele cannot be empty"
        ):
            _validate_inputs("chr1", 100, "", "A")
        with pytest.raises(
            ValueError, match="Alternate allele cannot be empty"
        ):
            _validate_inputs("chr1", 100, "A", "")


class TestIntervalSizeCalculation:
    """Test interval size selection logic."""

    @pytest.mark.asyncio
    async def test_interval_size_edge_cases(self):
        """Test interval size selection for edge cases."""
        with patch.dict("os.environ", {}, clear=True):
            # Without API key, we should get early return
            result = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
                interval_size=2000000,  # Larger than max
            )
            assert "AlphaGenome API key required" in result


class TestCaching:
    """Test caching behavior."""

    @pytest.mark.asyncio
    async def test_skip_cache_parameter(self):
        """Test that skip_cache parameter works."""
        with patch.dict("os.environ", {}, clear=True):
            # First call
            result1 = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
                skip_cache=True,
            )

            # Second call with skip_cache
            result2 = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
                skip_cache=True,
            )

            # Both should show API key error
            assert "AlphaGenome API key required" in result1
            assert "AlphaGenome API key required" in result2


class TestErrorHandling:
    """Test error handling and context."""

    @pytest.mark.asyncio
    async def test_error_context_with_api_key(self):
        """Test that errors include proper context."""
        with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "test-key"}):
            result = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
                tissue_types=["UBERON:0002367"],
                skip_cache=True,
            )

            # Should either get import error or API error with context
            if "AlphaGenome prediction failed" in result:
                assert "Context:" in result
                assert "chr1:100 A>T" in result
                assert "Tissue types:" in result

    @pytest.mark.asyncio
    async def test_input_validation_errors(self):
        """Test that input validation errors are raised."""
        with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "test-key"}):
            # Invalid chromosome
            with pytest.raises(ValueError, match="Invalid chromosome format"):
                await predict_variant_effects(
                    chromosome="invalid",
                    position=100,
                    reference="A",
                    alternate="T",
                )

            # Invalid nucleotides
            with pytest.raises(ValueError, match="Invalid nucleotides"):
                await predict_variant_effects(
                    chromosome="chr1",
                    position=100,
                    reference="X",
                    alternate="T",
                )


class TestThresholdParameter:
    """Test significance threshold parameter."""

    @pytest.mark.asyncio
    async def test_custom_threshold(self):
        """Test that custom threshold is accepted."""
        with patch.dict("os.environ", {}, clear=True):
            # Test with custom threshold
            result = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
                significance_threshold=0.8,
            )

            # Should work (get API key error, not parameter error)
            assert "AlphaGenome API key required" in result

    @pytest.mark.asyncio
    async def test_default_threshold(self):
        """Test that default threshold is used."""
        with patch.dict("os.environ", {}, clear=True):
            # Test without threshold parameter
            result = await predict_variant_effects(
                chromosome="chr1",
                position=100,
                reference="A",
                alternate="T",
            )

            # Should work with default
            assert "AlphaGenome API key required" in result


class TestIntegration:
    """Integration tests with mocked AlphaGenome."""

    @pytest.mark.asyncio
    async def test_successful_prediction_mock(self):
        """Test successful prediction with mocked AlphaGenome."""
        with patch.dict("os.environ", {"ALPHAGENOME_API_KEY": "test-key"}):
            # Mock the AlphaGenome imports
            mock_genome = MagicMock()
            mock_dna_client = MagicMock()
            mock_variant_scorers = MagicMock()

            # Mock the model
            mock_model = MagicMock()
            mock_dna_client.create.return_value = mock_model

            # Mock scorers
            mock_variant_scorers.get_recommended_scorers.return_value = [
                "scorer1"
            ]

            # Mock scores DataFrame
            mock_df = pd.DataFrame({
                "output_type": ["RNA_SEQ"],
                "raw_score": [1.0],
                "gene_name": ["GENE1"],
                "track_name": ["tissue1"],
            })
            mock_variant_scorers.tidy_scores.return_value = mock_df

            # Mock score_variant to return mock scores
            mock_model.score_variant.return_value = [MagicMock()]

            # Patch the imports
            with patch.dict(
                "sys.modules",
                {
                    "alphagenome.data.genome": mock_genome,
                    "alphagenome.models.dna_client": mock_dna_client,
                    "alphagenome.models.variant_scorers": mock_variant_scorers,
                    "alphagenome.data": MagicMock(genome=mock_genome),
                    "alphagenome.models": MagicMock(
                        dna_client=mock_dna_client,
                        variant_scorers=mock_variant_scorers,
                    ),
                },
            ):
                result = await predict_variant_effects(
                    chromosome="chr7",
                    position=140753336,
                    reference="A",
                    alternate="T",
                    interval_size=131072,
                    skip_cache=True,
                )

                # Check model was created with API key
                mock_dna_client.create.assert_called_once_with("test-key")

                # Check interval was created correctly
                mock_genome.Interval.assert_called_once()
                call_args = mock_genome.Interval.call_args
                assert (
                    call_args[1]["start"] == 140753336 - 65536 - 1
                )  # 0-based
                assert call_args[1]["end"] == call_args[1]["start"] + 131072

                # Check variant was created
                mock_genome.Variant.assert_called_once_with(
                    chromosome="chr7",
                    position=140753336,
                    reference_bases="A",
                    alternate_bases="T",
                )

                # Check result contains expected formatting
                assert "AlphaGenome Variant Effect Predictions" in result
                assert "Gene Expression" in result
                assert "GENE1" in result

```

--------------------------------------------------------------------------------
/src/biomcp/trials/getter.py:
--------------------------------------------------------------------------------

```python
import json
import logging
from ssl import TLSVersion
from typing import Annotated, Any

from .. import StrEnum, http_client, render
from ..constants import CLINICAL_TRIALS_BASE_URL

logger = logging.getLogger(__name__)


class Module(StrEnum):
    PROTOCOL = "Protocol"
    LOCATIONS = "Locations"
    REFERENCES = "References"
    OUTCOMES = "Outcomes"
    ALL = "All"


modules: dict[Module, list[str]] = {
    Module.PROTOCOL: [
        "IdentificationModule",
        "StatusModule",
        "SponsorCollaboratorsModule",
        "OversightModule",
        "DescriptionModule",
        "ConditionsModule",
        "DesignModule",
        "ArmsInterventionsModule",
        "EligibilityModule",
    ],
    Module.LOCATIONS: ["ContactsLocationsModule"],
    Module.REFERENCES: ["ReferencesModule"],
    Module.OUTCOMES: ["OutcomesModule", "ResultsSection"],
    Module.ALL: [
        "IdentificationModule",
        "StatusModule",
        "SponsorCollaboratorsModule",
        "OversightModule",
        "DescriptionModule",
        "ConditionsModule",
        "DesignModule",
        "ArmsInterventionsModule",
        "EligibilityModule",
        "ContactsLocationsModule",
        "ReferencesModule",
        "OutcomesModule",
        "ResultsSection",
    ],
}


async def get_trial(
    nct_id: str,
    module: Module = Module.PROTOCOL,
    output_json: bool = False,
) -> str:
    """Get details of a clinical trial by module."""
    fields = ",".join(modules[module])
    params = {"fields": fields}
    url = f"{CLINICAL_TRIALS_BASE_URL}/{nct_id}"

    logger.debug(f"Fetching trial {nct_id} with module {module.value}")
    logger.debug(f"URL: {url}, Params: {params}")

    parsed_data: dict[str, Any] | None
    error_obj: http_client.RequestError | None
    parsed_data, error_obj = await http_client.request_api(
        url=url,
        request=params,
        method="GET",
        tls_version=TLSVersion.TLSv1_2,
        response_model_type=None,
        domain="clinicaltrials",
    )

    data_to_return: dict[str, Any]

    if error_obj:
        logger.error(
            f"API Error for {nct_id}: {error_obj.code} - {error_obj.message}"
        )
        data_to_return = {
            "error": f"API Error {error_obj.code}",
            "details": error_obj.message,
        }
    elif parsed_data:
        # ClinicalTrials.gov API returns data wrapped in a "studies" array
        # Extract the first study if it exists
        if isinstance(parsed_data, dict) and "studies" in parsed_data:
            studies = parsed_data.get("studies", [])
            if studies and len(studies) > 0:
                data_to_return = studies[0]
                data_to_return["URL"] = (
                    f"https://clinicaltrials.gov/study/{nct_id}"
                )
            else:
                logger.warning(f"No studies found in response for {nct_id}")
                data_to_return = {
                    "error": f"No studies found for {nct_id}",
                    "details": "API returned empty studies array",
                }
        else:
            # Handle case where API returns data in unexpected format
            logger.debug(
                f"Unexpected response format for {nct_id}: {type(parsed_data)}"
            )
            data_to_return = parsed_data
            data_to_return["URL"] = (
                f"https://clinicaltrials.gov/study/{nct_id}"
            )
    else:
        logger.warning(
            f"No data received for {nct_id} with module {module.value}"
        )
        data_to_return = {
            "error": f"No data found for {nct_id} with module {module.value}",
            "details": "API returned no data",
        }

    if output_json:
        return json.dumps(data_to_return, indent=2)
    else:
        return render.to_markdown(data_to_return)


async def _trial_protocol(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    nct_id: str,
):
    """
    Retrieves core protocol information for a single clinical
    trial identified by its NCT ID.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - nct_id: A single NCT ID (string, e.g., "NCT04280705")

    Process: Fetches standard "Protocol" view modules (like ID,
             Status, Sponsor, Design, Eligibility) from the
             ClinicalTrials.gov v2 API.
    Output: A Markdown formatted string detailing title, status,
            sponsor, purpose, study design, phase, interventions,
            eligibility criteria, etc. Returns error if invalid.
    """
    return await get_trial(nct_id, Module.PROTOCOL)


async def _trial_locations(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    nct_id: str,
) -> str:
    """
    Retrieves contact and location details for a single
    clinical trial identified by its NCT ID.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - nct_id: A single NCT ID (string, e.g., "NCT04280705")

    Process: Fetches the `ContactsLocationsModule` from the
             ClinicalTrials.gov v2 API for the given NCT ID.
    Output: A Markdown formatted string detailing facility names,
            addresses (city, state, country), and contact info.
            Returns an error message if the NCT ID is invalid.
    """
    return await get_trial(nct_id, Module.LOCATIONS)


async def _trial_outcomes(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    nct_id: str,
) -> str:
    """
    Retrieves outcome measures, results (if available), and
    adverse event data for a single clinical trial.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - nct_id: A single NCT ID (string, e.g., "NCT04280705")

    Process: Fetches the `OutcomesModule` and `ResultsSection`
             from the ClinicalTrials.gov v2 API for the NCT ID.
    Output: A Markdown formatted string detailing primary/secondary
            outcomes, participant flow, results tables (if posted),
            and adverse event summaries. Returns an error if invalid.
    """
    return await get_trial(nct_id, Module.OUTCOMES)


async def _trial_references(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    nct_id: str,
):
    """
    Retrieves publications and other references associated with
    a single clinical trial identified by its NCT ID.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - nct_id: A single NCT ID (string, e.g., "NCT04280705")

    Process: Fetches the `ReferencesModule` from the
             ClinicalTrials.gov v2 API for the NCT ID.
    Output: A Markdown formatted string listing citations,
            associated PubMed IDs (PMIDs), and reference types
            (e.g., result publication). Returns error if invalid.
    """
    return await get_trial(nct_id, Module.REFERENCES)


async def get_trial_unified(
    nct_id: str,
    source: str = "clinicaltrials",
    api_key: str | None = None,
    sections: list[str] | None = None,
) -> str:
    """
    Get trial details from either ClinicalTrials.gov or NCI CTS API.

    Args:
        nct_id: NCT identifier (e.g., "NCT04280705")
        source: Data source - "clinicaltrials" (default) or "nci"
        api_key: API key for NCI (required if source="nci")
        sections: List of sections to include (for clinicaltrials.gov)
                 Options: ["protocol", "locations", "outcomes", "references", "all"]

    Returns:
        Formatted markdown string with trial details
    """
    if source == "nci":
        # Import here to avoid circular imports
        from .nci_getter import format_nci_trial_details, get_trial_nci

        trial_data = await get_trial_nci(nct_id, api_key)
        return await format_nci_trial_details(trial_data, api_key)
    else:
        # Default to ClinicalTrials.gov
        if sections and "all" in sections:
            return await get_trial(nct_id, Module.ALL)
        elif sections:
            # Get specific sections
            results = []
            for section in sections:
                if section == "protocol":
                    results.append(
                        await _trial_protocol(
                            call_benefit=f"Getting protocol information for trial {nct_id}",
                            nct_id=nct_id,
                        )
                    )
                elif section == "locations":
                    results.append(
                        await _trial_locations(
                            call_benefit=f"Getting locations for trial {nct_id}",
                            nct_id=nct_id,
                        )
                    )
                elif section == "outcomes":
                    results.append(
                        await _trial_outcomes(
                            call_benefit=f"Getting outcomes for trial {nct_id}",
                            nct_id=nct_id,
                        )
                    )
                elif section == "references":
                    results.append(
                        await _trial_references(
                            call_benefit=f"Getting references for trial {nct_id}",
                            nct_id=nct_id,
                        )
                    )
            return "\n\n---\n\n".join(results)
        else:
            # Default to protocol only
            return await _trial_protocol(
                call_benefit=f"Getting trial protocol details for {nct_id}",
                nct_id=nct_id,
            )

```
Page 5/15FirstPrevNextLast