genomoncology/biomcp # codebase.md

This is page 6 of 15. Use http://codebase.md/genomoncology/biomcp?lines=false&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/biomarkers/search.py:
--------------------------------------------------------------------------------

```python
"""Search functionality for biomarkers via NCI CTS API.

Note: Biomarker data availability may be limited in CTRP.
This module focuses on biomarkers used in trial eligibility criteria.
"""

import logging
from typing import Any

from ..constants import NCI_BIOMARKERS_URL
from ..integrations.cts_api import CTSAPIError, make_cts_request
from ..utils import parse_or_query

logger = logging.getLogger(__name__)


def _build_biomarker_params(
    name: str | None,
    eligibility_criterion: str | None,
    biomarker_type: str | None,
    codes: list[str] | None,
    assay_purpose: str | None,
    include: list[str] | None,
    sort: str | None,
    order: str | None,
    page_size: int,
) -> dict[str, Any]:
    """Build query parameters for biomarker search."""
    params: dict[str, Any] = {"size": page_size}

    # Add search filters with correct API parameter names
    if name:
        params["name"] = name
    if eligibility_criterion:
        params["eligibility_criterion"] = eligibility_criterion
    if biomarker_type:
        params["type"] = biomarker_type
    if codes:
        params["codes"] = ",".join(codes) if isinstance(codes, list) else codes
    if assay_purpose:
        params["assay_purpose"] = assay_purpose
    if include:
        params["include"] = (
            ",".join(include) if isinstance(include, list) else include
        )
    if sort:
        params["sort"] = sort
        if order:
            params["order"] = order.lower()

    return params


def _process_biomarker_response(
    response: dict[str, Any],
    page: int,
    page_size: int,
) -> dict[str, Any]:
    """Process biomarker API response."""
    biomarkers = response.get("data", response.get("biomarkers", []))
    total = response.get("total", len(biomarkers))

    result = {
        "biomarkers": biomarkers,
        "total": total,
        "page": page,
        "page_size": page_size,
    }

    # Add note about data limitations if response indicates it
    if response.get("limited_data") or not biomarkers:
        result["note"] = (
            "Biomarker data availability is limited in CTRP. "
            "Results show biomarkers referenced in trial eligibility criteria. "
            "For detailed variant annotations, use variant_searcher with MyVariant.info."
        )

    return result


async def search_biomarkers(
    name: str | None = None,
    eligibility_criterion: str | None = None,
    biomarker_type: str | None = None,
    codes: list[str] | None = None,
    assay_purpose: str | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    page_size: int = 20,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for biomarkers in the NCI CTS database.

    Note: Biomarker data availability may be limited per CTRP documentation.
    Results focus on biomarkers used in clinical trial eligibility criteria.

    Args:
        name: Biomarker name to search for (e.g., "PD-L1", "EGFR mutation")
        eligibility_criterion: Eligibility criterion text
        biomarker_type: Type of biomarker ("reference_gene" or "branch")
        codes: List of biomarker codes
        assay_purpose: Purpose of the assay
        include: Fields to include in response
        sort: Sort field
        order: Sort order ('asc' or 'desc')
        page_size: Number of results per page
        page: Page number
        api_key: Optional API key (if not provided, uses NCI_API_KEY env var)

    Returns:
        Dictionary with search results containing:
        - biomarkers: List of biomarker records
        - total: Total number of results
        - page: Current page
        - page_size: Results per page
        - note: Any limitations about the data

    Raises:
        CTSAPIError: If the API request fails
    """
    # Build query parameters
    params = _build_biomarker_params(
        name,
        eligibility_criterion,
        biomarker_type,
        codes,
        assay_purpose,
        include,
        sort,
        order,
        page_size,
    )

    try:
        # Make API request
        response = await make_cts_request(
            url=NCI_BIOMARKERS_URL,
            params=params,
            api_key=api_key,
        )

        # Process response
        return _process_biomarker_response(response, page, page_size)

    except CTSAPIError:
        raise
    except Exception as e:
        logger.error(f"Failed to search biomarkers: {e}")
        raise CTSAPIError(f"Biomarker search failed: {e!s}") from e


def _format_biomarker_header(total: int, note: str) -> list[str]:
    """Format the header section of biomarker results."""
    lines = [
        f"## Biomarker Search Results ({total} found)",
        "",
    ]

    if note:
        lines.extend([
            f"*Note: {note}*",
            "",
        ])

    return lines


def _format_single_biomarker(biomarker: dict[str, Any]) -> list[str]:
    """Format a single biomarker record."""
    bio_id = biomarker.get("id", biomarker.get("biomarker_id", "Unknown"))
    name = biomarker.get("name", "Unknown Biomarker")
    gene = biomarker.get("gene", biomarker.get("gene_symbol", ""))
    bio_type = biomarker.get("type", biomarker.get("category", ""))

    lines = [
        f"### {name}",
        f"- **ID**: {bio_id}",
    ]

    if gene:
        lines.append(f"- **Gene**: {gene}")
    if bio_type:
        lines.append(f"- **Type**: {bio_type}")

    # Add assay information if available
    if biomarker.get("assay_type"):
        lines.append(f"- **Assay**: {biomarker['assay_type']}")

    # Add criteria examples if available
    if biomarker.get("criteria_examples"):
        examples = biomarker["criteria_examples"]
        if isinstance(examples, list) and examples:
            lines.append("- **Example Criteria**:")
            for ex in examples[:3]:  # Show up to 3 examples
                lines.append(f"  - {ex}")
            if len(examples) > 3:
                lines.append(f"  *(and {len(examples) - 3} more)*")

    # Add trial count if available
    if biomarker.get("trial_count"):
        lines.append(
            f"- **Trials Using This Biomarker**: {biomarker['trial_count']}"
        )

    lines.append("")
    return lines


async def search_biomarkers_with_or(
    name_query: str,
    eligibility_criterion: str | None = None,
    biomarker_type: str | None = None,
    codes: list[str] | None = None,
    assay_purpose: str | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    page_size: int = 20,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for biomarkers with OR query support.

    This function handles OR queries by making multiple API calls and combining results.
    For example: "PD-L1 OR CD274 OR programmed death ligand 1" will search for each term.

    Args:
        name_query: Name query that may contain OR operators
        Other args same as search_biomarkers

    Returns:
        Combined results from all searches with duplicates removed
    """
    # Check if this is an OR query
    if " OR " in name_query or " or " in name_query:
        search_terms = parse_or_query(name_query)
        logger.info(f"Parsed OR query into terms: {search_terms}")
    else:
        # Single term search
        search_terms = [name_query]

    # Collect all unique biomarkers
    all_biomarkers = {}
    total_found = 0

    # Search for each term
    for term in search_terms:
        logger.info(f"Searching biomarkers for term: {term}")
        try:
            results = await search_biomarkers(
                name=term,
                eligibility_criterion=eligibility_criterion,
                biomarker_type=biomarker_type,
                codes=codes,
                assay_purpose=assay_purpose,
                include=include,
                sort=sort,
                order=order,
                page_size=page_size,  # Get full page size for each term
                page=page,
                api_key=api_key,
            )

            # Add unique biomarkers (deduplicate by ID)
            for biomarker in results.get("biomarkers", []):
                bio_id = biomarker.get("id", biomarker.get("biomarker_id"))
                if bio_id and bio_id not in all_biomarkers:
                    all_biomarkers[bio_id] = biomarker

            total_found += results.get("total", 0)

        except Exception as e:
            logger.warning(f"Failed to search for term '{term}': {e}")
            # Continue with other terms

    # Convert back to list and apply pagination
    unique_biomarkers = list(all_biomarkers.values())

    # Sort if requested (by name by default for consistent results)
    if sort == "name" or sort is None:
        unique_biomarkers.sort(key=lambda x: x.get("name", "").lower())

    # Apply pagination to combined results
    start_idx = (page - 1) * page_size
    end_idx = start_idx + page_size
    paginated_biomarkers = unique_biomarkers[start_idx:end_idx]

    return {
        "biomarkers": paginated_biomarkers,
        "total": len(unique_biomarkers),
        "page": page,
        "page_size": page_size,
        "search_terms": search_terms,  # Include what we searched for
        "total_found_across_terms": total_found,  # Total before deduplication
    }


def format_biomarker_results(results: dict[str, Any]) -> str:
    """
    Format biomarker search results as markdown.

    Args:
        results: Search results dictionary

    Returns:
        Formatted markdown string
    """
    biomarkers = results.get("biomarkers", [])
    total = results.get("total", 0)
    note = results.get("note", "")

    if not biomarkers:
        msg = "No biomarkers found matching the search criteria."
        if note:
            msg += f"\n\n*Note: {note}*"
        return msg

    # Build markdown output
    lines = _format_biomarker_header(total, note)

    for biomarker in biomarkers:
        lines.extend(_format_single_biomarker(biomarker))

    return "\n".join(lines)

```

--------------------------------------------------------------------------------
/docs/tutorials/nci-prompts.md:
--------------------------------------------------------------------------------

```markdown
# NCI Tools Example Prompts

This guide provides example prompts for AI assistants to effectively use the NCI (National Cancer Institute) Clinical Trials Search API tools in BioMCP.

## Overview of NCI Tools

BioMCP integrates with the NCI Clinical Trials Search API to provide:

- **Organization Search & Lookup** - Find cancer research centers, hospitals, and trial sponsors
- **Intervention Search & Lookup** - Search for drugs, devices, procedures, and other interventions

These tools require an NCI API key from: https://clinicaltrialsapi.cancer.gov/

## Best Practices

### API Key Required

All example prompts in this guide should include your NCI API key. Add this to the end of each prompt:

```
"... my NCI API key is YOUR_API_KEY"
```

### Location Searches

**ALWAYS use city AND state together** when searching organizations by location. The NCI API has Elasticsearch limitations that cause errors with broad searches.

✅ **Good**: `nci_organization_searcher(city="Cleveland", state="OH")`
❌ **Bad**: `nci_organization_searcher(city="Cleveland")` or `nci_organization_searcher(state="OH")`

### API Parameter Notes

- The NCI APIs do not support offset-based pagination (`from` parameter)
- Organization location parameters use `org_` prefix (e.g., `org_city`, `org_state_or_province`)
- When using `size` parameter, the API may not return a `total` count

### Avoiding API Errors

- Use specific organization names when possible
- Combine multiple filters (name + type, city + state)
- Start with more specific searches, then broaden if needed

## Organization Tools

### Organization Search

#### Basic Organization Search

```
"Find cancer centers in California, my NCI API key is YOUR_API_KEY"
"Search for MD Anderson Cancer Center, my NCI API key is YOUR_API_KEY"
"List academic cancer research centers in New York, my NCI API key is YOUR_API_KEY"
"Find all NCI-designated cancer centers, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_organization_searcher(state="CA", organization_type="Academic")`

#### Organization by Location

**IMPORTANT**: Always use city AND state together to avoid API errors!

```
"Show me cancer treatment centers in Boston, MA, my NCI API key is YOUR_API_KEY"
"Find clinical trial sites in Houston, Texas, my NCI API key is YOUR_API_KEY"
"List all cancer research organizations in Cleveland, OH, my NCI API key is YOUR_API_KEY"
"Search for industry sponsors in San Francisco, CA, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_organization_searcher(city="Boston", state="MA")` ✓
**Never use**: `nci_organization_searcher(city="Boston")` ✗ or `nci_organization_searcher(state="MA")` ✗

#### Organization by Type

```
"Find all government cancer research facilities, my NCI API key is YOUR_API_KEY"
"List pharmaceutical companies running cancer trials, my NCI API key is YOUR_API_KEY"
"Show me academic medical centers conducting trials, my NCI API key is YOUR_API_KEY"
"Find community hospitals participating in cancer research, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_organization_searcher(organization_type="Industry")`

### Organization Details

```
"Get details about organization NCI-2011-03337, my NCI API key is YOUR_API_KEY"
"Show me contact information for this cancer center, my NCI API key is YOUR_API_KEY"
"What trials is this organization conducting? My NCI API key is YOUR_API_KEY"
"Give me the full profile of this research institution, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `organization_getter(organization_id="NCI-2011-03337")`

## Intervention Tools

### Intervention Search

#### Drug Search

```
"Find all trials using pembrolizumab, my NCI API key is YOUR_API_KEY"
"Search for PD-1 inhibitor drugs in trials, my NCI API key is YOUR_API_KEY"
"List all immunotherapy drugs being tested, my NCI API key is YOUR_API_KEY"
"Find trials using Keytruda or similar drugs, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_intervention_searcher(name="pembrolizumab", intervention_type="Drug")`

#### Device Search

```
"Search for medical devices in cancer trials, my NCI API key is YOUR_API_KEY"
"Find trials using surgical robots, my NCI API key is YOUR_API_KEY"
"List radiation therapy devices being tested, my NCI API key is YOUR_API_KEY"
"Show me trials with diagnostic devices, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_intervention_searcher(intervention_type="Device")`

#### Procedure Search

```
"Find surgical procedures in cancer trials, my NCI API key is YOUR_API_KEY"
"Search for minimally invasive surgery trials, my NCI API key is YOUR_API_KEY"
"List trials with radiation therapy procedures, my NCI API key is YOUR_API_KEY"
"Show me trials testing new biopsy techniques, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_intervention_searcher(intervention_type="Procedure")`

#### Other Interventions

```
"Find behavioral interventions for cancer patients, my NCI API key is YOUR_API_KEY"
"Search for dietary interventions in trials, my NCI API key is YOUR_API_KEY"
"List genetic therapy trials, my NCI API key is YOUR_API_KEY"
"Show me trials with exercise interventions, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_intervention_searcher(intervention_type="Behavioral")`

### Intervention Details

```
"Get full details about intervention INT123456, my NCI API key is YOUR_API_KEY"
"Show me the mechanism of action for this drug, my NCI API key is YOUR_API_KEY"
"Is this intervention FDA approved? My NCI API key is YOUR_API_KEY"
"What trials are using this intervention? My NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `intervention_getter(intervention_id="INT123456")`

## Biomarker Tools

### Biomarker Search

#### Basic Biomarker Search

```
"Find PD-L1 expression biomarkers, my NCI API key is YOUR_API_KEY"
"Search for EGFR mutations used in trials, my NCI API key is YOUR_API_KEY"
"List biomarkers tested by IHC, my NCI API key is YOUR_API_KEY"
"Find HER2 positive biomarkers, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_biomarker_searcher(name="PD-L1")`

#### Biomarker by Type

```
"Show me all reference gene biomarkers, my NCI API key is YOUR_API_KEY"
"Find branch biomarkers, my NCI API key is YOUR_API_KEY"
"List all biomarkers of type reference_gene, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_biomarker_searcher(biomarker_type="reference_gene")`

#### Important Note on Biomarker Types

The NCI API only supports two biomarker types:

- `reference_gene`: Gene-based biomarkers
- `branch`: Branch/pathway biomarkers

Note: The API does NOT support searching by gene symbol or assay type directly.

## NCI Disease Tools

### Disease Search

#### Basic Disease Search

```
"Find melanoma in NCI vocabulary, my NCI API key is YOUR_API_KEY"
"Search for lung cancer types, my NCI API key is YOUR_API_KEY"
"List breast cancer subtypes, my NCI API key is YOUR_API_KEY"
"Find official name for GIST, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_disease_searcher(name="melanoma")`

#### Disease with Synonyms

```
"Find all names for gastrointestinal stromal tumor, my NCI API key is YOUR_API_KEY"
"Search for NSCLC and all its synonyms, my NCI API key is YOUR_API_KEY"
"List all terms for triple-negative breast cancer, my NCI API key is YOUR_API_KEY"
"Find alternative names for melanoma, my NCI API key is YOUR_API_KEY"
```

**Expected tool usage**: `nci_disease_searcher(name="GIST", include_synonyms=True)`

## Combined Workflows

### Finding Trials at Specific Centers

```
"First find cancer centers in California, then show me their trials, my NCI API key is YOUR_API_KEY"
```

**Expected workflow**:

1. `nci_organization_searcher(state="CA")`
2. For each organization, search trials with that sponsor

### Drug Development Pipeline

```
"Search for CAR-T cell therapies and show me which organizations are developing them, my NCI API key is YOUR_API_KEY"
```

**Expected workflow**:

1. `nci_intervention_searcher(name="CAR-T", intervention_type="Biological")`
2. For each intervention, get details to see associated trials
3. Extract organization information from trial data

### Regional Cancer Research

```
"What cancer drugs are being tested in Boston area hospitals? My NCI API key is YOUR_API_KEY"
```

**Expected workflow**:

1. `nci_organization_searcher(city="Boston", state="MA")`
2. `trial_searcher(location="Boston, MA", source="nci")` with organization filters
3. Extract intervention information from trials

## Important Notes

### API Key Handling

All NCI tools require an API key. The tools will check for:

1. API key provided in the function call
2. `NCI_API_KEY` environment variable
3. User-provided key in their message (e.g., "my NCI API key is...")

### Synonym Support

The intervention searcher includes a `synonyms` parameter (default: True) that will search for:

- Drug trade names (e.g., "Keytruda" finds "pembrolizumab")
- Alternative spellings
- Related terms

### Pagination

Both search tools support pagination:

- `page`: Page number (1-based)
- `page_size`: Results per page (max 100)

### Organization Types

Valid organization types include:

- Academic
- Industry
- Government
- Community
- Network
- Other

### Intervention Types

Valid intervention types include:

- Drug
- Device
- Biological
- Procedure
- Radiation
- Behavioral
- Genetic
- Dietary
- Other

## Error Handling

Common errors and solutions:

1. **"NCI API key required"**: User needs to provide an API key
2. **"No results found"**: Try broader search terms or remove filters
3. **"Invalid organization/intervention ID"**: Verify the ID format
4. **Rate limiting**: The API has rate limits; wait before retrying
5. **"Search Too Broad" (Elasticsearch error)**: The search returns too many results
   - This happens when searching with broad criteria
   - **Prevention**: Always use city AND state together for location searches
   - Add organization name (even partial) to narrow results
   - Avoid searching by state alone or organization type alone

```

--------------------------------------------------------------------------------
/src/biomcp/interventions/search.py:
--------------------------------------------------------------------------------

```python
"""Search functionality for interventions via NCI CTS API."""

import logging
from typing import Any

from ..constants import NCI_INTERVENTIONS_URL
from ..integrations.cts_api import CTSAPIError, make_cts_request
from ..utils import parse_or_query

logger = logging.getLogger(__name__)


# Intervention types based on ClinicalTrials.gov categories
INTERVENTION_TYPES = [
    "Drug",
    "Device",
    "Biological",
    "Procedure",
    "Radiation",
    "Behavioral",
    "Genetic",
    "Dietary",
    "Diagnostic Test",
    "Other",
]


def _build_intervention_params(
    name: str | None,
    intervention_type: str | None,
    category: str | None,
    codes: list[str] | None,
    include: list[str] | None,
    sort: str | None,
    order: str | None,
    page_size: int | None,
) -> dict[str, Any]:
    """Build query parameters for intervention search."""
    params: dict[str, Any] = {}

    if name:
        params["name"] = name

    if intervention_type:
        params["type"] = intervention_type.lower()

    if category:
        params["category"] = category

    if codes:
        params["codes"] = ",".join(codes) if isinstance(codes, list) else codes

    if include:
        params["include"] = (
            ",".join(include) if isinstance(include, list) else include
        )

    if sort:
        params["sort"] = sort
        if order:
            params["order"] = order.lower()

    # Only add size if explicitly requested and > 0
    if page_size and page_size > 0:
        params["size"] = page_size

    return params


def _process_intervention_response(
    response: Any,
    page: int,
    page_size: int | None,
) -> dict[str, Any]:
    """Process intervention search response."""
    if isinstance(response, dict):
        # Standard response format from the API
        interventions = response.get("data", [])
        # When size parameter is used, API doesn't return 'total'
        total = response.get("total", len(interventions))
    elif isinstance(response, list):
        # Direct list of interventions
        interventions = response
        total = len(interventions)
    else:
        # Unexpected response format
        logger.warning(f"Unexpected response type: {type(response)}")
        interventions = []
        total = 0

    return {
        "interventions": interventions,
        "total": total,
        "page": page,
        "page_size": page_size,
    }


async def search_interventions(
    name: str | None = None,
    intervention_type: str | None = None,
    category: str | None = None,
    codes: list[str] | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    synonyms: bool = True,  # Kept for backward compatibility but ignored
    page_size: int | None = None,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for interventions in the NCI CTS database.

    Args:
        name: Intervention name to search for (partial match)
        intervention_type: Type of intervention (Drug, Device, Procedure, etc.)
        category: Category filter (agent, agent category, other)
        codes: List of intervention codes to search for (e.g., ["C82416", "C171257"])
        include: Fields to include in response (all fields, name, category, codes, etc.)
        sort: Sort field (default: 'name', also supports 'count')
        order: Sort order ('asc' or 'desc', required when using sort)
        synonyms: [Deprecated] Kept for backward compatibility but ignored
        page_size: Number of results per page (when used, 'total' field not returned)
        page: Page number (Note: API doesn't support offset pagination)
        api_key: Optional API key (if not provided, uses NCI_API_KEY env var)

    Returns:
        Dictionary with search results containing:
        - interventions: List of intervention records
        - total: Total number of results (only when size not specified)
        - page: Current page
        - page_size: Results per page

    Raises:
        CTSAPIError: If the API request fails
    """
    # Build query parameters
    params = _build_intervention_params(
        name,
        intervention_type,
        category,
        codes,
        include,
        sort,
        order,
        page_size,
    )

    logger.info(
        f"Searching interventions at {NCI_INTERVENTIONS_URL} with params: {params}"
    )

    try:
        # Make API request
        response = await make_cts_request(
            url=NCI_INTERVENTIONS_URL,
            params=params,
            api_key=api_key,
        )

        # Log response info
        logger.debug(f"Response type: {type(response)}")

        # Process response
        return _process_intervention_response(response, page, page_size)

    except CTSAPIError:
        raise
    except Exception as e:
        logger.error(f"Failed to search interventions: {e}")
        raise CTSAPIError(f"Intervention search failed: {e!s}") from e


def format_intervention_results(results: dict[str, Any]) -> str:
    """
    Format intervention search results as markdown.

    Args:
        results: Search results dictionary

    Returns:
        Formatted markdown string
    """
    interventions = results.get("interventions", [])
    total = results.get("total", 0)

    if not interventions:
        return "No interventions found matching the search criteria."

    # Build markdown output
    actual_count = len(interventions)
    if actual_count < total:
        lines = [
            f"## Intervention Search Results (showing {actual_count} of {total} found)",
            "",
        ]
    else:
        lines = [
            f"## Intervention Search Results ({total} found)",
            "",
        ]

    for intervention in interventions:
        int_id = intervention.get(
            "id", intervention.get("intervention_id", "Unknown")
        )
        name = intervention.get("name", "Unknown Intervention")
        int_type = intervention.get(
            "type", intervention.get("category", "Unknown")
        )

        lines.append(f"### {name}")
        lines.append(f"- **ID**: {int_id}")
        lines.append(f"- **Type**: {int_type}")

        # Add synonyms if available
        synonyms = intervention.get("synonyms", [])
        if synonyms:
            if isinstance(synonyms, list):
                lines.append(f"- **Synonyms**: {', '.join(synonyms[:5])}")
                if len(synonyms) > 5:
                    lines.append(f"  *(and {len(synonyms) - 5} more)*")
            elif isinstance(synonyms, str):
                lines.append(f"- **Synonyms**: {synonyms}")

        # Add description if available
        if intervention.get("description"):
            desc = intervention["description"]
            if len(desc) > 200:
                desc = desc[:197] + "..."
            lines.append(f"- **Description**: {desc}")

        lines.append("")

    return "\n".join(lines)


async def search_interventions_with_or(
    name_query: str,
    intervention_type: str | None = None,
    category: str | None = None,
    codes: list[str] | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    synonyms: bool = True,
    page_size: int | None = None,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for interventions with OR query support.

    This function handles OR queries by making multiple API calls and combining results.
    For example: "pembrolizumab OR nivolumab" will search for each term.

    Args:
        name_query: Name query that may contain OR operators
        Other args same as search_interventions

    Returns:
        Combined results from all searches with duplicates removed
    """
    # Check if this is an OR query
    if " OR " in name_query or " or " in name_query:
        search_terms = parse_or_query(name_query)
        logger.info(f"Parsed OR query into terms: {search_terms}")
    else:
        # Single term search
        search_terms = [name_query]

    # Collect all unique interventions
    all_interventions = {}
    total_found = 0

    # Search for each term
    for term in search_terms:
        logger.info(f"Searching interventions for term: {term}")
        try:
            results = await search_interventions(
                name=term,
                intervention_type=intervention_type,
                category=category,
                codes=codes,
                include=include,
                sort=sort,
                order=order,
                synonyms=synonyms,
                page_size=page_size,
                page=page,
                api_key=api_key,
            )

            # Add unique interventions (deduplicate by ID)
            for intervention in results.get("interventions", []):
                int_id = intervention.get(
                    "id", intervention.get("intervention_id")
                )
                if int_id and int_id not in all_interventions:
                    all_interventions[int_id] = intervention

            total_found += results.get("total", 0)

        except Exception as e:
            logger.warning(f"Failed to search for term '{term}': {e}")
            # Continue with other terms

    # Convert back to list and apply pagination
    unique_interventions = list(all_interventions.values())

    # Sort by name for consistent results
    unique_interventions.sort(key=lambda x: x.get("name", "").lower())

    # Apply pagination to combined results
    if page_size:
        start_idx = (page - 1) * page_size
        end_idx = start_idx + page_size
        paginated_interventions = unique_interventions[start_idx:end_idx]
    else:
        paginated_interventions = unique_interventions

    return {
        "interventions": paginated_interventions,
        "total": len(unique_interventions),
        "page": page,
        "page_size": page_size,
        "search_terms": search_terms,  # Include what we searched for
        "total_found_across_terms": total_found,  # Total before deduplication
    }

```

--------------------------------------------------------------------------------
/docs/developer-guides/01-server-deployment.md:
--------------------------------------------------------------------------------

```markdown
# Server Deployment Guide

This guide covers various deployment options for BioMCP, from local development to production cloud deployments with authentication.

## Deployment Options Overview

| Mode                  | Use Case      | Transport       | Authentication | Scalability |
| --------------------- | ------------- | --------------- | -------------- | ----------- |
| **Local STDIO**       | Development   | STDIO           | None           | Single user |
| **HTTP Server**       | Small teams   | Streamable HTTP | Optional       | Moderate    |
| **Docker**            | Containerized | Streamable HTTP | Optional       | Moderate    |
| **Cloudflare Worker** | Production    | SSE/HTTP        | OAuth optional | High        |

## Local Development (STDIO)

The simplest deployment for development and testing.

### Setup

```bash
# Install BioMCP
uv tool install biomcp

# Run in STDIO mode (default)
biomcp run
```

### Configuration

For Claude Desktop integration:

```json
{
  "mcpServers": {
    "biomcp": {
      "command": "biomcp",
      "args": ["run"]
    }
  }
}
```

### Use Cases

- Local development
- Single-user research
- Testing new features

## HTTP Server Deployment

Modern deployment using Streamable HTTP transport.

### Basic Setup

```bash
# Run HTTP server
biomcp run --mode http --host 0.0.0.0 --port 8000
```

### With Environment Variables

```bash
# Create .env file
cat > .env << EOF
BIOMCP_HOST=0.0.0.0
BIOMCP_PORT=8000
NCI_API_KEY=your-key
ALPHAGENOME_API_KEY=your-key
EOF

# Run with env file
biomcp run --mode http
```

### Systemd Service (Linux)

Create `/etc/systemd/system/biomcp.service`:

```ini
[Unit]
Description=BioMCP Server
After=network.target

[Service]
Type=simple
User=biomcp
WorkingDirectory=/opt/biomcp
Environment="PATH=/usr/local/bin:/usr/bin"
EnvironmentFile=/opt/biomcp/.env
ExecStart=/usr/local/bin/biomcp run --mode http
Restart=always
RestartSec=10

[Install]
WantedBy=multi-user.target
```

Enable and start:

```bash
sudo systemctl enable biomcp
sudo systemctl start biomcp
```

### Nginx Reverse Proxy

```nginx
server {
    listen 443 ssl;
    server_name biomcp.example.com;

    ssl_certificate /etc/ssl/certs/biomcp.crt;
    ssl_certificate_key /etc/ssl/private/biomcp.key;

    location /mcp {
        proxy_pass http://localhost:8000;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_buffering off;
    }
}
```

## Docker Deployment

Containerized deployment for consistency and portability.

### Basic Dockerfile

```dockerfile
FROM python:3.11-slim

# Install BioMCP
RUN pip install biomcp-python

# Add API keys (use secrets in production!)
ENV NCI_API_KEY=""
ENV ALPHAGENOME_API_KEY=""

# Expose port
EXPOSE 8000

# Run server
CMD ["biomcp", "run", "--mode", "http", "--host", "0.0.0.0"]
```

### With AlphaGenome Support

```dockerfile
FROM python:3.11-slim

# Install system dependencies
RUN apt-get update && apt-get install -y git

# Install BioMCP
RUN pip install biomcp-python

# Install AlphaGenome
RUN git clone https://github.com/google-deepmind/alphagenome.git && \
    cd alphagenome && \
    pip install .

# Configure
ENV MCP_MODE=http
ENV BIOMCP_HOST=0.0.0.0
ENV BIOMCP_PORT=8000

EXPOSE 8000

CMD ["biomcp", "run"]
```

### Docker Compose

```yaml
version: "3.8"

services:
  biomcp:
    build: .
    ports:
      - "8000:8000"
    environment:
      - MCP_MODE=http
      - NCI_API_KEY=${NCI_API_KEY}
      - ALPHAGENOME_API_KEY=${ALPHAGENOME_API_KEY}
    volumes:
      - ./logs:/app/logs
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
```

### Running

```bash
# Build and run
docker-compose up -d

# View logs
docker-compose logs -f

# Scale horizontally
docker-compose up -d --scale biomcp=3
```

## Cloudflare Worker Deployment

Enterprise-grade deployment with global edge distribution.

### Prerequisites

1. Cloudflare account
2. Wrangler CLI installed
3. Remote BioMCP server running

### Architecture

```
Claude Desktop → Cloudflare Worker (Edge) → BioMCP Server (Origin)
```

### Setup Worker

1. **Install dependencies:**

```bash
npm install @modelcontextprotocol/sdk itty-router
```

2. **Create `wrangler.toml`:**

```toml
name = "biomcp-worker"
main = "src/index.js"
compatibility_date = "2024-01-01"

[vars]
REMOTE_MCP_SERVER_URL = "https://your-biomcp-server.com/mcp"
MCP_SERVER_API_KEY = "your-secret-key"

[[kv_namespaces]]
binding = "AUTH_TOKENS"
id = "your-kv-namespace-id"
```

3. **Deploy:**

```bash
wrangler deploy
```

### With OAuth Authentication (Stytch)

1. **Configure Stytch:**

```toml
[vars]
STYTCH_PROJECT_ID = "project-test-..."
STYTCH_SECRET = "secret-test-..."
STYTCH_PUBLIC_TOKEN = "public-token-test-..."
JWT_SECRET = "your-jwt-secret"
```

2. **OAuth Endpoints:**
   The worker automatically provides:

- `/.well-known/oauth-authorization-server`
- `/authorize`
- `/callback`
- `/token`

3. **Client Configuration:**

```json
{
  "mcpServers": {
    "biomcp": {
      "transport": {
        "type": "sse",
        "url": "https://your-worker.workers.dev"
      },
      "auth": {
        "type": "oauth",
        "client_id": "mcp-client",
        "authorization_endpoint": "https://your-worker.workers.dev/authorize",
        "token_endpoint": "https://your-worker.workers.dev/token",
        "scope": "mcp:access"
      }
    }
  }
}
```

## Production Considerations

### Security

1. **API Key Management:**

```bash
# Use environment variables
export NCI_API_KEY="$(vault kv get -field=key secret/biomcp/nci)"

# Or use secrets management
docker run --secret biomcp_keys biomcp:latest
```

2. **Network Security:**

- Use HTTPS everywhere
- Implement rate limiting
- Set up CORS properly
- Use authentication for public endpoints

3. **Access Control:**

```python
# Example middleware
async def auth_middleware(request, call_next):
    token = request.headers.get("Authorization")
    if not validate_token(token):
        return JSONResponse({"error": "Unauthorized"}, status_code=401)
    return await call_next(request)
```

### Monitoring

1. **Health Checks:**

```python
# Built-in health endpoint
GET /health

# Custom health check
@app.get("/health/detailed")
async def health_detailed():
    return {
        "status": "healthy",
        "version": __version__,
        "apis": check_api_status(),
        "timestamp": datetime.utcnow()
    }
```

2. **Metrics:**

```python
# Prometheus metrics
from prometheus_client import Counter, Histogram

request_count = Counter('biomcp_requests_total', 'Total requests')
request_duration = Histogram('biomcp_request_duration_seconds', 'Request duration')
```

3. **Logging:**

```python
# Structured logging
import structlog

logger = structlog.get_logger()
logger.info("request_processed",
    tool="article_searcher",
    duration=0.234,
    user_id="user123"
)
```

### Scaling

1. **Horizontal Scaling:**

```yaml
# Kubernetes deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: biomcp
spec:
  replicas: 3
  selector:
    matchLabels:
      app: biomcp
  template:
    metadata:
      labels:
        app: biomcp
    spec:
      containers:
        - name: biomcp
          image: biomcp:latest
          ports:
            - containerPort: 8000
          resources:
            requests:
              memory: "512Mi"
              cpu: "500m"
            limits:
              memory: "1Gi"
              cpu: "1000m"
```

2. **Caching:**

```python
# Redis caching
import redis
from functools import wraps

redis_client = redis.Redis()

def cache_result(ttl=3600):
    def decorator(func):
        @wraps(func)
        async def wrapper(*args, **kwargs):
            key = f"{func.__name__}:{str(args)}:{str(kwargs)}"
            cached = redis_client.get(key)
            if cached:
                return json.loads(cached)
            result = await func(*args, **kwargs)
            redis_client.setex(key, ttl, json.dumps(result))
            return result
        return wrapper
    return decorator
```

### Performance Optimization

1. **Connection Pooling:**

```python
# Reuse HTTP connections
import httpx

client = httpx.AsyncClient(
    limits=httpx.Limits(max_keepalive_connections=20),
    timeout=httpx.Timeout(30.0)
)
```

2. **Async Processing:**

```python
# Process requests concurrently
async def handle_batch(requests):
    tasks = [process_request(req) for req in requests]
    return await asyncio.gather(*tasks)
```

3. **Response Compression:**

```python
# Enable gzip compression
from fastapi.middleware.gzip import GZipMiddleware

app.add_middleware(GZipMiddleware, minimum_size=1000)
```

## Migration Path

### From STDIO to HTTP

1. Update server startup:

```bash
# Old
biomcp run

# New
biomcp run --mode http
```

2. Update client configuration:

```json
{
  "mcpServers": {
    "biomcp": {
      "url": "http://localhost:8000/mcp"
    }
  }
}
```

### From SSE to Streamable HTTP

1. Update worker code to use `/mcp` endpoint
2. Update client to use new transport:

```json
{
  "transport": {
    "type": "http",
    "url": "https://biomcp.example.com/mcp"
  }
}
```

## Troubleshooting

### Common Issues

1. **Port Already in Use:**

```bash
# Find process using port
lsof -i :8000

# Kill process
kill -9 <PID>
```

2. **API Key Errors:**

```bash
# Verify environment variables
env | grep -E "(NCI|ALPHAGENOME|CBIO)"

# Test API key
curl -H "X-API-KEY: $NCI_API_KEY" https://api.cancer.gov/v2/trials
```

3. **Connection Timeouts:**

- Increase timeout values
- Check firewall rules
- Verify network connectivity

### Debug Mode

```bash
# Enable debug logging
BIOMCP_LOG_LEVEL=DEBUG biomcp run --mode http

# Or in Docker
docker run -e BIOMCP_LOG_LEVEL=DEBUG biomcp:latest
```

## Next Steps

- Set up [monitoring](../how-to-guides/05-logging-and-monitoring-with-bigquery.md)
- Configure [authentication](../getting-started/03-authentication-and-api-keys.md)
- Review [security policies](../policies.md)
- Implement [CI/CD pipeline](02-contributing-and-testing.md)

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/utils.py:
--------------------------------------------------------------------------------

```python
"""
Utility functions for OpenFDA API integration.
"""

import asyncio
import logging
import os
from typing import Any

from ..http_client import request_api
from .cache import (
    get_cached_response,
    is_cacheable_request,
    set_cached_response,
)
from .exceptions import (
    OpenFDAConnectionError,
    OpenFDARateLimitError,
    OpenFDATimeoutError,
    OpenFDAValidationError,
)
from .input_validation import build_safe_query
from .rate_limiter import FDA_CIRCUIT_BREAKER, FDA_RATE_LIMITER, FDA_SEMAPHORE
from .validation import sanitize_response, validate_fda_response

logger = logging.getLogger(__name__)


def get_api_key() -> str | None:
    """Get OpenFDA API key from environment variable."""
    api_key = os.environ.get("OPENFDA_API_KEY")
    if not api_key:
        logger.debug("No OPENFDA_API_KEY found in environment")
    return api_key


async def make_openfda_request(  # noqa: C901
    endpoint: str,
    params: dict[str, Any],
    domain: str = "openfda",
    api_key: str | None = None,
    max_retries: int = 3,
    initial_delay: float = 1.0,
) -> tuple[dict[str, Any] | None, str | None]:
    """
    Make a request to the OpenFDA API with retry logic and caching.

    Args:
        endpoint: Full URL to the OpenFDA endpoint
        params: Query parameters
        domain: Domain name for metrics tracking
        api_key: Optional API key (overrides environment variable)
        max_retries: Maximum number of retry attempts (default 3)
        initial_delay: Initial delay in seconds for exponential backoff (default 1.0)

    Returns:
        Tuple of (response_data, error_message)
    """
    # Validate and sanitize input parameters
    safe_params = build_safe_query(params)

    # Check cache first (with safe params)
    if is_cacheable_request(endpoint, safe_params):
        cached_response = get_cached_response(endpoint, safe_params)
        if cached_response:
            return cached_response, None

    # Use provided API key or get from environment
    if not api_key:
        api_key = get_api_key()
    if api_key:
        safe_params["api_key"] = api_key

    last_error = None
    delay = initial_delay

    for attempt in range(max_retries + 1):
        try:
            # Apply rate limiting and circuit breaker
            async with FDA_SEMAPHORE:
                await FDA_RATE_LIMITER.acquire()

                # Check circuit breaker state
                if FDA_CIRCUIT_BREAKER.is_open:
                    state = FDA_CIRCUIT_BREAKER.get_state()
                    return None, f"FDA API circuit breaker is open: {state}"

                response, error = await request_api(
                    url=endpoint,
                    request=safe_params,
                    method="GET",
                    domain=domain,
                )

            if error:
                error_msg = (
                    error.message if hasattr(error, "message") else str(error)
                )

                # Check for specific error types
                if "429" in error_msg or "rate limit" in error_msg.lower():
                    if attempt < max_retries:
                        logger.warning(
                            f"Rate limit hit (attempt {attempt + 1}/{max_retries + 1}). "
                            f"Retrying in {delay:.1f} seconds..."
                        )
                        await asyncio.sleep(delay)
                        delay *= 2  # Exponential backoff
                        continue
                    else:
                        raise OpenFDARateLimitError(error_msg)

                # Check if error is retryable
                if _is_retryable_error(error_msg) and attempt < max_retries:
                    logger.warning(
                        f"OpenFDA API error (attempt {attempt + 1}/{max_retries + 1}): {error_msg}. "
                        f"Retrying in {delay:.1f} seconds..."
                    )
                    await asyncio.sleep(delay)
                    delay *= 2  # Exponential backoff
                    continue

                logger.error(f"OpenFDA API error: {error_msg}")
                return None, error_msg

            # Validate and sanitize response
            if response:
                try:
                    validate_fda_response(response, response_type="search")
                    response = sanitize_response(response)
                except OpenFDAValidationError as e:
                    logger.error(f"Invalid FDA response: {e}")
                    return None, str(e)

                # Cache successful response
                if is_cacheable_request(endpoint, safe_params):
                    set_cached_response(endpoint, safe_params, response)

            return response, None

        except asyncio.TimeoutError:
            last_error = "Request timeout"
            if attempt < max_retries:
                logger.warning(
                    f"OpenFDA request timeout (attempt {attempt + 1}/{max_retries + 1}). "
                    f"Retrying in {delay:.1f} seconds..."
                )
                await asyncio.sleep(delay)
                delay *= 2
                continue
            logger.error(
                f"OpenFDA request failed after {max_retries + 1} attempts: {last_error}"
            )
            raise OpenFDATimeoutError(last_error) from None

        except ConnectionError as e:
            last_error = f"Connection error: {e}"
            if attempt < max_retries:
                logger.warning(
                    f"OpenFDA connection error (attempt {attempt + 1}/{max_retries + 1}): {e}. "
                    f"Retrying in {delay:.1f} seconds..."
                )
                await asyncio.sleep(delay)
                delay *= 2
                continue
            logger.error(
                f"OpenFDA request failed after {max_retries + 1} attempts: {last_error}"
            )
            raise OpenFDAConnectionError(last_error) from None

        except (
            OpenFDARateLimitError,
            OpenFDATimeoutError,
            OpenFDAConnectionError,
        ):
            # Re-raise our custom exceptions
            raise
        except Exception as e:
            # Handle unexpected errors gracefully
            logger.error(f"Unexpected OpenFDA request error: {e}")
            return None, str(e)

    return None, last_error


def _is_retryable_error(error_msg: str) -> bool:
    """
    Check if an error is retryable.

    Args:
        error_msg: Error message string

    Returns:
        True if the error is retryable
    """
    retryable_patterns = [
        "rate limit",
        "timeout",
        "connection",
        "503",  # Service unavailable
        "502",  # Bad gateway
        "504",  # Gateway timeout
        "429",  # Too many requests
        "temporary",
        "try again",
    ]

    error_lower = error_msg.lower()
    return any(pattern in error_lower for pattern in retryable_patterns)


def format_count(count: int, label: str) -> str:
    """Format a count with appropriate singular/plural label."""
    if count == 1:
        return f"1 {label}"
    return f"{count:,} {label}s"


def truncate_text(text: str, max_length: int = 500) -> str:
    """Truncate text to a maximum length with ellipsis."""
    if len(text) <= max_length:
        return text
    return text[: max_length - 3] + "..."


def clean_text(text: str | None) -> str:
    """Clean and normalize text from FDA data."""
    if not text:
        return ""

    # Remove extra whitespace and newlines
    text = " ".join(text.split())

    # Remove common FDA formatting artifacts
    text = text.replace("\\n", " ")
    text = text.replace("\\r", " ")
    text = text.replace("\\t", " ")

    return text.strip()


def build_search_query(
    field_map: dict[str, str], operator: str = "AND"
) -> str:
    """
    Build an OpenFDA search query from field mappings.

    Args:
        field_map: Dictionary mapping field names to search values
        operator: Logical operator (AND/OR) to combine fields

    Returns:
        Formatted search query string
    """
    query_parts = []

    for field, value in field_map.items():
        if value:
            # Escape special characters
            escaped_value = value.replace('"', '\\"')
            # Add quotes for multi-word values
            if " " in escaped_value:
                escaped_value = f'"{escaped_value}"'
            query_parts.append(f"{field}:{escaped_value}")

    return f" {operator} ".join(query_parts)


def extract_drug_names(result: dict[str, Any]) -> list[str]:
    """Extract drug names from an OpenFDA result."""
    drug_names = set()

    # Check patient drug info (for adverse events)
    if "patient" in result:
        drugs = result.get("patient", {}).get("drug", [])
        for drug in drugs:
            if "medicinalproduct" in drug:
                drug_names.add(drug["medicinalproduct"])
            # Check OpenFDA fields
            openfda = drug.get("openfda", {})
            if "brand_name" in openfda:
                drug_names.update(openfda["brand_name"])
            if "generic_name" in openfda:
                drug_names.update(openfda["generic_name"])

    # Check direct OpenFDA fields (for labels)
    if "openfda" in result:
        openfda = result["openfda"]
        if "brand_name" in openfda:
            drug_names.update(openfda["brand_name"])
        if "generic_name" in openfda:
            drug_names.update(openfda["generic_name"])

    return sorted(drug_names)


def extract_reactions(result: dict[str, Any]) -> list[str]:
    """Extract reaction terms from an adverse event result."""
    reactions = []

    patient = result.get("patient", {})
    reaction_list = patient.get("reaction", [])

    for reaction in reaction_list:
        if "reactionmeddrapt" in reaction:
            reactions.append(reaction["reactionmeddrapt"])

    return reactions


def format_drug_list(drugs: list[str], max_items: int = 5) -> str:
    """Format a list of drug names for display."""
    if not drugs:
        return "None specified"

    if len(drugs) <= max_items:
        return ", ".join(drugs)

    shown = drugs[:max_items]
    remaining = len(drugs) - max_items
    return f"{', '.join(shown)} (+{remaining} more)"

```

--------------------------------------------------------------------------------
/src/biomcp/openfda/drug_recalls.py:
--------------------------------------------------------------------------------

```python
"""
OpenFDA drug recalls (Enforcement) integration.
"""

import logging
from typing import Any

from .constants import (
    OPENFDA_DEFAULT_LIMIT,
    OPENFDA_DISCLAIMER,
    OPENFDA_DRUG_ENFORCEMENT_URL,
)
from .drug_recalls_helpers import (
    build_recall_search_params,
)
from .utils import (
    clean_text,
    format_count,
    make_openfda_request,
    truncate_text,
)

logger = logging.getLogger(__name__)


async def search_drug_recalls(
    drug: str | None = None,
    recall_class: str | None = None,
    status: str | None = None,
    reason: str | None = None,
    since_date: str | None = None,
    limit: int = OPENFDA_DEFAULT_LIMIT,
    skip: int = 0,
    api_key: str | None = None,
) -> str:
    """
    Search FDA drug recall records from Enforcement database.

    Args:
        drug: Drug name (brand or generic) to search for
        recall_class: Classification (1, 2, or 3)
        status: Recall status (ongoing, completed, terminated)
        reason: Search text in recall reason
        since_date: Only show recalls after this date (YYYYMMDD format)
        limit: Maximum number of results to return
        skip: Number of results to skip (for pagination)

        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with drug recall information
    """
    # Build search parameters
    search_params = build_recall_search_params(
        drug, recall_class, status, reason, since_date, limit, skip
    )

    # Make the request
    response, error = await make_openfda_request(
        OPENFDA_DRUG_ENFORCEMENT_URL, search_params, "openfda_recalls", api_key
    )

    if error:
        return f"⚠️ Error searching drug recalls: {error}"

    if not response or not response.get("results"):
        return "No drug recall records found matching your criteria."

    # Format the results
    results = response["results"]
    total = (
        response.get("meta", {}).get("results", {}).get("total", len(results))
    )

    output = ["## FDA Drug Recall Records\n"]

    if drug:
        output.append(f"**Drug**: {drug}")
    if recall_class:
        output.append(f"**Classification**: Class {recall_class}")
    if status:
        output.append(f"**Status**: {status}")
    if since_date:
        output.append(f"**Since**: {since_date}")

    output.append(
        f"**Total Recalls Found**: {format_count(total, 'recall')}\n"
    )

    # Summary of recall classes if multiple results
    if len(results) > 1:
        output.extend(_format_recall_class_summary(results))

    # Show results
    output.append(f"### Recalls (showing {len(results)} of {total}):\n")

    for i, recall in enumerate(results, 1):
        output.extend(_format_recall_summary(recall, i))

    output.append(f"\n{OPENFDA_DISCLAIMER}")

    return "\n".join(output)


async def get_drug_recall(
    recall_number: str,
    api_key: str | None = None,
) -> str:
    """
    Get detailed drug recall information for a specific recall.

    Args:
        recall_number: FDA recall number

        api_key: Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)

    Returns:
        Formatted string with detailed recall information
    """
    # Search for the specific recall
    search_params = {"search": f'recall_number:"{recall_number}"', "limit": 1}

    response, error = await make_openfda_request(
        OPENFDA_DRUG_ENFORCEMENT_URL, search_params, "openfda_recalls", api_key
    )

    if error:
        return f"⚠️ Error retrieving drug recall: {error}"

    if not response or not response.get("results"):
        return f"No recall record found for {recall_number}"

    recall = response["results"][0]

    # Format detailed recall information
    output = [f"## Drug Recall Details: {recall_number}\n"]

    # Basic information
    output.extend(_format_recall_header(recall))

    # Reason and details
    output.extend(_format_recall_details(recall))

    # Distribution information
    output.extend(_format_distribution_info(recall))

    # OpenFDA metadata
    if openfda := recall.get("openfda"):
        output.extend(_format_recall_openfda(openfda))

    output.append(f"\n{OPENFDA_DISCLAIMER}")

    return "\n".join(output)


def _format_recall_class_summary(results: list[dict[str, Any]]) -> list[str]:
    """Format summary of recall classifications."""
    output = []

    # Count by classification
    class_counts = {"Class I": 0, "Class II": 0, "Class III": 0}
    for recall in results:
        classification = recall.get("classification", "")
        if classification in class_counts:
            class_counts[classification] += 1

    if any(class_counts.values()):
        output.append("### Classification Summary:")
        if class_counts["Class I"]:
            output.append(
                f"- **Class I** (most serious): {class_counts['Class I']} recalls"
            )
        if class_counts["Class II"]:
            output.append(
                f"- **Class II** (moderate): {class_counts['Class II']} recalls"
            )
        if class_counts["Class III"]:
            output.append(
                f"- **Class III** (least serious): {class_counts['Class III']} recalls"
            )
        output.append("")

    return output


def _format_recall_summary(recall: dict[str, Any], num: int) -> list[str]:
    """Format a single recall summary."""
    output = [f"#### {num}. Recall {recall.get('recall_number', 'Unknown')}"]

    # Classification and status
    classification = recall.get("classification", "Unknown")
    status = recall.get("status", "Unknown")

    # Add severity indicator
    severity_emoji = {
        "Class I": "🔴",  # Most serious
        "Class II": "🟡",  # Moderate
        "Class III": "🟢",  # Least serious
    }.get(classification, "⚪")

    output.append(f"{severity_emoji} **{classification}** - {status}")

    # Date
    if init_date := recall.get("recall_initiation_date"):
        formatted_date = f"{init_date[:4]}-{init_date[4:6]}-{init_date[6:]}"
        output.append(f"**Initiated**: {formatted_date}")

    # Product description
    if product_desc := recall.get("product_description"):
        cleaned = truncate_text(clean_text(product_desc), 200)
        output.append(f"**Product**: {cleaned}")

    # OpenFDA names
    openfda = recall.get("openfda", {})
    if brand_names := openfda.get("brand_name"):
        output.append(f"**Brand**: {', '.join(brand_names[:3])}")

    # Reason for recall
    if reason := recall.get("reason_for_recall"):
        cleaned_reason = truncate_text(clean_text(reason), 300)
        output.append(f"\n**Reason**: {cleaned_reason}")

    # Firm name
    if firm := recall.get("recalling_firm"):
        output.append(f"\n**Recalling Firm**: {firm}")

    output.append("")
    return output


def _format_recall_header(recall: dict[str, Any]) -> list[str]:
    """Format the header section of detailed recall."""
    output = ["### Recall Information"]

    output.append(
        f"**Recall Number**: {recall.get('recall_number', 'Unknown')}"
    )
    output.append(
        f"**Classification**: {recall.get('classification', 'Unknown')}"
    )
    output.append(f"**Status**: {recall.get('status', 'Unknown')}")

    if event_id := recall.get("event_id"):
        output.append(f"**Event ID**: {event_id}")

    # Dates
    if init_date := recall.get("recall_initiation_date"):
        formatted = f"{init_date[:4]}-{init_date[4:6]}-{init_date[6:]}"
        output.append(f"**Initiation Date**: {formatted}")

    if report_date := recall.get("report_date"):
        formatted = f"{report_date[:4]}-{report_date[4:6]}-{report_date[6:]}"
        output.append(f"**Report Date**: {formatted}")

    if term_date := recall.get("termination_date"):
        formatted = f"{term_date[:4]}-{term_date[4:6]}-{term_date[6:]}"
        output.append(f"**Termination Date**: {formatted}")

    output.append("")
    return output


def _format_recall_details(recall: dict[str, Any]) -> list[str]:
    """Format recall details and reason."""
    output = ["### Product and Reason"]

    if product_desc := recall.get("product_description"):
        output.append(f"**Product Description**:\n{clean_text(product_desc)}")

    if reason := recall.get("reason_for_recall"):
        output.append(f"\n**Reason for Recall**:\n{clean_text(reason)}")

    if quantity := recall.get("product_quantity"):
        output.append(f"\n**Product Quantity**: {quantity}")

    if code_info := recall.get("code_info"):
        output.append(f"\n**Code Information**:\n{clean_text(code_info)}")

    output.append("")
    return output


def _format_distribution_info(recall: dict[str, Any]) -> list[str]:
    """Format distribution information."""
    output = ["### Distribution Information"]

    if firm := recall.get("recalling_firm"):
        output.append(f"**Recalling Firm**: {firm}")

    if city := recall.get("city"):
        state = recall.get("state", "")
        country = recall.get("country", "")
        location = city
        if state:
            location += f", {state}"
        if country:
            location += f", {country}"
        output.append(f"**Location**: {location}")

    if dist_pattern := recall.get("distribution_pattern"):
        output.append(
            f"\n**Distribution Pattern**:\n{clean_text(dist_pattern)}"
        )

    if action := recall.get("voluntary_mandated"):
        output.append(f"\n**Action Type**: {action}")

    output.append("")
    return output


def _format_recall_openfda(openfda: dict[str, Any]) -> list[str]:
    """Format OpenFDA metadata for recall."""
    output = ["### Drug Information"]

    if brand_names := openfda.get("brand_name"):
        output.append(f"**Brand Names**: {', '.join(brand_names)}")

    if generic_names := openfda.get("generic_name"):
        output.append(f"**Generic Names**: {', '.join(generic_names)}")

    if manufacturers := openfda.get("manufacturer_name"):
        output.append(f"**Manufacturers**: {', '.join(manufacturers[:3])}")

    if ndas := openfda.get("application_number"):
        output.append(f"**Application Numbers**: {', '.join(ndas[:5])}")

    if routes := openfda.get("route"):
        output.append(f"**Routes**: {', '.join(routes)}")

    if pharm_class := openfda.get("pharm_class_epc"):
        output.append(f"**Pharmacologic Class**: {', '.join(pharm_class[:3])}")

    output.append("")
    return output

```

--------------------------------------------------------------------------------
/docs/workflows/all-workflows.md:
--------------------------------------------------------------------------------

```markdown
# BioMCP Research Workflows

Quick, practical workflows for common biomedical research tasks.

## 1. Literature Review Workflow

### Quick Start

```bash
# Find key papers on BRAF V600E melanoma therapy
biomcp article search --gene BRAF --disease melanoma \
  --keyword "V600E|therapy|treatment" --limit 50 \
  --format json > braf_papers.json
```

### Full Workflow Script

```python
import asyncio
from biomcp import BioMCPClient

async def literature_review(gene, disease, focus_terms):
    async with BioMCPClient() as client:
        # 1. Get gene context
        gene_info = await client.genes.get(gene)

        # 2. Search by topic
        results = {}
        for term in focus_terms:
            articles = await client.articles.search(
                genes=[gene],
                diseases=[disease],
                keywords=[term],
                limit=30
            )
            results[term] = articles.articles

        # 3. Generate summary
        print(f"\n{gene} in {disease}: Found {sum(len(v) for v in results.values())} articles")
        for topic, articles in results.items():
            print(f"\n{topic}: {len(articles)} articles")
            for a in articles[:3]:
                print(f"  - {a.title[:80]}... ({a.year})")

        return results

# Run it
asyncio.run(literature_review(
    "BRAF",
    "melanoma",
    ["resistance", "combination therapy", "immunotherapy"]
))
```

### Key Points

- Start broad, then narrow by topic
- Use OR syntax for variant notations
- Export results for citation management
- Set up weekly searches for updates

---

## 2. Clinical Trial Matching Workflow

### Quick Start

```bash
# Find trials for EGFR-mutant lung cancer near Boston
biomcp trial search --condition "lung cancer" \
  --term "EGFR mutation" --status RECRUITING \
  --latitude 42.3601 --longitude -71.0589 --distance 100
```

### Patient Matching Script

```python
async def match_patient_to_trials(patient_profile):
    async with BioMCPClient() as client:
        # 1. Search trials with location
        trials = await client.trials.search(
            conditions=[patient_profile['diagnosis']],
            other_terms=patient_profile['mutations'],
            lat=patient_profile['lat'],
            long=patient_profile['long'],
            distance=patient_profile['max_distance'],
            status="RECRUITING"
        )

        # 2. Score trials
        scored = []
        for trial in trials.trials[:20]:
            score = 0

            # Location score
            if trial.distance < 50:
                score += 25

            # Phase score
            if trial.phase == "PHASE3":
                score += 20
            elif trial.phase == "PHASE2":
                score += 15

            # Mutation match
            if any(mut in str(trial.eligibility) for mut in patient_profile['mutations']):
                score += 30

            scored.append((score, trial))

        # 3. Return top matches
        scored.sort(reverse=True, key=lambda x: x[0])
        return [(s, t) for s, t in scored[:5]]

# Example patient
patient = {
    'diagnosis': 'non-small cell lung cancer',
    'mutations': ['EGFR L858R'],
    'lat': 42.3601,
    'long': -71.0589,
    'max_distance': 100
}

matches = asyncio.run(match_patient_to_trials(patient))
```

### Key Points

- Always use coordinates for location search
- Check both ClinicalTrials.gov and NCI sources
- Contact trial sites directly for pre-screening
- Consider travel burden in recommendations

---

## 3. Variant Interpretation Workflow

### Quick Start

```bash
# Get variant annotations
biomcp variant get rs121913529  # By rsID
biomcp variant get "NM_007294.4:c.5266dupC"  # By HGVS

# Search pathogenic variants
biomcp variant search --gene BRCA1 --significance pathogenic
```

### Variant Analysis Script

```python
async def interpret_variant(gene, variant_notation, cancer_type):
    async with BioMCPClient() as client:
        # 1. Get variant details
        try:
            variant = await client.variants.get(variant_notation)
            significance = variant.clinical_significance
            frequency = variant.frequencies.gnomad if hasattr(variant, 'frequencies') else None
        except:
            significance = "Not found"
            frequency = None

        # 2. Search literature
        articles = await client.articles.search(
            genes=[gene],
            variants=[variant_notation],
            diseases=[cancer_type],
            limit=10
        )

        # 3. Find trials
        trials = await client.trials.search(
            conditions=[cancer_type],
            other_terms=[f"{gene} mutation"],
            status="RECRUITING",
            limit=5
        )

        # 4. Generate interpretation
        print(f"\nVariant: {gene} {variant_notation}")
        print(f"Significance: {significance}")
        print(f"Population Frequency: {frequency or 'Unknown'}")
        print(f"Literature: {len(articles.articles)} relevant papers")
        print(f"Clinical Trials: {len(trials.trials)} active trials")

        # Actionability assessment
        if significance in ["Pathogenic", "Likely pathogenic"]:
            if trials.trials:
                print("✓ ACTIONABLE - Clinical trials available")
            else:
                print("⚠ Pathogenic but no targeted trials")

        return {
            'significance': significance,
            'frequency': frequency,
            'articles': len(articles.articles),
            'trials': len(trials.trials)
        }

# Run it
asyncio.run(interpret_variant("BRAF", "p.V600E", "melanoma"))
```

### Key Points

- Check multiple databases (MyVariant, ClinVar via articles)
- Consider cancer type for interpretation
- Look for FDA-approved therapies
- Document tier classification

---

## 4. Quick Integration Patterns

### Batch Processing

```python
# Process multiple queries efficiently
async def batch_analysis(items):
    async with BioMCPClient() as client:
        tasks = []
        for item in items:
            if item['type'] == 'gene':
                tasks.append(client.genes.get(item['id']))
            elif item['type'] == 'variant':
                tasks.append(client.variants.get(item['id']))

        results = await asyncio.gather(*tasks, return_exceptions=True)
        return results
```

### Error Handling

```python
from biomcp.exceptions import NotFoundError, RateLimitError
import time

async def robust_search(search_func, **params):
    retries = 3
    for attempt in range(retries):
        try:
            return await search_func(**params)
        except RateLimitError as e:
            if attempt < retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                raise
        except NotFoundError:
            return None
```

### Caching Results

```python
from functools import lru_cache
import json

# Simple file-based cache
def cache_results(filename):
    def decorator(func):
        async def wrapper(*args, **kwargs):
            # Check cache
            try:
                with open(filename, 'r') as f:
                    return json.load(f)
            except FileNotFoundError:
                pass

            # Fetch and cache
            result = await func(*args, **kwargs)
            with open(filename, 'w') as f:
                json.dump(result, f)
            return result
        return wrapper
    return decorator

@cache_results('gene_cache.json')
async def get_gene_info(gene):
    async with BioMCPClient() as client:
        return await client.genes.get(gene)
```

---

## Complete Example: Precision Medicine Report

```python
async def generate_precision_medicine_report(patient):
    """Generate comprehensive report for molecular tumor board."""

    async with BioMCPClient() as client:
        report = {
            'patient_id': patient['id'],
            'date': datetime.now().isoformat(),
            'variants': [],
            'trials': [],
            'therapies': []
        }

        # Analyze each variant
        for variant in patient['variants']:
            # Get annotations
            var_info = await robust_search(
                client.variants.search,
                gene=variant['gene'],
                hgvs=variant['hgvs']
            )

            # Search literature
            articles = await client.articles.search(
                genes=[variant['gene']],
                diseases=[patient['cancer_type']],
                keywords=['therapy', 'treatment'],
                limit=5
            )

            # Find trials
            trials = await client.trials.search(
                conditions=[patient['cancer_type']],
                other_terms=[f"{variant['gene']} mutation"],
                status="RECRUITING",
                limit=3
            )

            report['variants'].append({
                'variant': variant,
                'annotation': var_info,
                'relevant_articles': len(articles.articles),
                'available_trials': len(trials.trials)
            })

            report['trials'].extend(trials.trials)

        # Generate summary
        print(f"\nPrecision Medicine Report - {patient['id']}")
        print(f"Cancer Type: {patient['cancer_type']}")
        print(f"Variants Analyzed: {len(report['variants'])}")
        print(f"Clinical Trials Found: {len(report['trials'])}")

        # Prioritize actionable findings
        actionable = [v for v in report['variants']
                     if v['available_trials'] > 0]

        if actionable:
            print(f"\n✓ {len(actionable)} ACTIONABLE variants with trial options")

        return report

# Example usage
patient = {
    'id': 'PT001',
    'cancer_type': 'lung adenocarcinoma',
    'variants': [
        {'gene': 'EGFR', 'hgvs': 'p.L858R'},
        {'gene': 'TP53', 'hgvs': 'p.R273H'}
    ]
}

report = asyncio.run(generate_precision_medicine_report(patient))
```

---

## Tips for All Workflows

1. **Always start with the think tool** (for AI assistants)
2. **Use official gene symbols** - check genenames.org
3. **Batch API calls** when possible
4. **Handle errors gracefully** - APIs can be unavailable
5. **Cache frequently accessed data** - respect rate limits
6. **Document your process** - for reproducibility

## Next Steps

- [Command Reference](../reference/quick-reference.md)
- [API Documentation](../apis/python-sdk.md)
- [Troubleshooting](../troubleshooting.md)

```

--------------------------------------------------------------------------------
/src/biomcp/trials/nci_search.py:
--------------------------------------------------------------------------------

```python
"""NCI Clinical Trials Search API integration for trial searches."""

import logging
from typing import Any

from ..constants import NCI_TRIALS_URL
from ..diseases.search import search_diseases
from ..integrations.cts_api import CTSAPIError, make_cts_request
from ..interventions.search import search_interventions
from .search import TrialQuery

logger = logging.getLogger(__name__)


async def _expand_disease_terms(
    conditions: list[str],
    expand_synonyms: bool,
) -> list[str]:
    """Expand disease terms with synonyms if requested."""
    if not expand_synonyms:
        return conditions

    disease_terms = []
    for condition in conditions:
        try:
            results = await search_diseases(
                name=condition,
                include_synonyms=True,
                page_size=5,
            )
            # Add the original term plus any exact matches
            disease_terms.append(condition)
            for disease in results.get("diseases", [])[:3]:
                if disease.get("name"):
                    disease_terms.append(disease["name"])
                # Add top synonyms
                synonyms = disease.get("synonyms", [])
                if isinstance(synonyms, list):
                    disease_terms.extend(synonyms[:2])
        except Exception as e:
            logger.warning(f"Failed to expand disease term {condition}: {e}")
            disease_terms.append(condition)

    # Remove duplicates while preserving order
    seen = set()
    unique_diseases = []
    for term in disease_terms:
        if term.lower() not in seen:
            seen.add(term.lower())
            unique_diseases.append(term)

    return unique_diseases


async def _normalize_interventions(interventions: list[str]) -> list[str]:
    """Normalize intervention names to IDs where possible."""
    intervention_ids = []
    for intervention in interventions:
        try:
            results = await search_interventions(
                name=intervention,
                page_size=1,
            )
            interventions_data = results.get("interventions", [])
            if interventions_data:
                # Use the ID if available, otherwise the name
                int_id = interventions_data[0].get("id", intervention)
                intervention_ids.append(int_id)
            else:
                intervention_ids.append(intervention)
        except Exception:
            intervention_ids.append(intervention)

    return intervention_ids


def _map_phase_to_nci(phase: Any) -> str | None:
    """Map TrialPhase enum to NCI phase values."""
    if not phase:
        return None

    phase_map = {
        "EARLY_PHASE1": "I",
        "PHASE1": "I",
        "PHASE2": "II",
        "PHASE3": "III",
        "PHASE4": "IV",
        "NOT_APPLICABLE": "NA",
    }
    return phase_map.get(phase.value, phase.value)


def _map_status_to_nci(recruiting_status: Any) -> list[str] | None:
    """Map RecruitingStatus enum to NCI status values."""
    if not recruiting_status:
        return None

    status_map = {
        "OPEN": ["recruiting", "enrolling_by_invitation"],
        "CLOSED": ["active_not_recruiting", "completed", "terminated"],
        "ANY": None,
    }
    return status_map.get(recruiting_status.value)


def _map_sort_to_nci(sort: Any) -> str | None:
    """Map SortOrder enum to NCI sort values."""
    if not sort:
        return None

    sort_map = {
        "RELEVANCE": "relevance",
        "LAST_UPDATE": "last_update_date",
        "START_DATE": "start_date",
        "COMPLETION_DATE": "completion_date",
    }
    return sort_map.get(sort.value)


def _add_location_params(params: dict[str, Any], query: TrialQuery) -> None:
    """Add location parameters if present."""
    if query.lat is not None and query.long is not None:
        params["latitude"] = query.lat
        params["longitude"] = query.long
        params["distance"] = query.distance or 50


def _add_eligibility_params(params: dict[str, Any], query: TrialQuery) -> None:
    """Add advanced eligibility criteria parameters."""
    if query.prior_therapies:
        params["prior_therapy"] = query.prior_therapies

    if query.required_mutations:
        params["biomarkers"] = query.required_mutations

    if query.allow_brain_mets is not None:
        params["accepts_brain_mets"] = query.allow_brain_mets


async def convert_query_to_nci(query: TrialQuery) -> dict[str, Any]:
    """
    Convert a TrialQuery object to NCI CTS API parameters.

    Maps BioMCP's TrialQuery fields to NCI's parameter structure.
    """
    params: dict[str, Any] = {}

    # Basic search terms
    if query.terms:
        params["_fulltext"] = " ".join(query.terms)

    # Conditions/diseases with synonym expansion
    if query.conditions:
        disease_terms = await _expand_disease_terms(
            query.conditions,
            query.expand_synonyms,
        )
        if disease_terms:
            params["diseases"] = disease_terms

    # Interventions
    if query.interventions:
        params["interventions"] = await _normalize_interventions(
            query.interventions
        )

    # NCT IDs
    if query.nct_ids:
        params["nct_ids"] = query.nct_ids

    # Phase and status mappings
    nci_phase = _map_phase_to_nci(query.phase)
    if nci_phase:
        params["phase"] = nci_phase

    statuses = _map_status_to_nci(query.recruiting_status)
    if statuses:
        params["recruitment_status"] = statuses

    # Location and eligibility
    _add_location_params(params, query)
    _add_eligibility_params(params, query)

    # Pagination
    params["size"] = query.page_size if query.page_size else 20

    # Sort order
    sort_value = _map_sort_to_nci(query.sort)
    if sort_value:
        params["sort"] = sort_value

    return params


async def search_trials_nci(
    query: TrialQuery,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for clinical trials using NCI CTS API.

    Returns:
        Dictionary with:
        - trials: List of trial records
        - total: Total number of results
        - next_page: Token for next page (if available)
        - source: "nci" to indicate data source
    """
    try:
        # Convert query to NCI parameters
        params = await convert_query_to_nci(query)

        # Make API request
        response = await make_cts_request(
            url=NCI_TRIALS_URL,
            params=params,
            api_key=api_key,
        )

        # Process response
        trials = response.get("data", response.get("trials", []))
        total = response.get("total", len(trials))
        next_page = response.get("next_page_token")

        return {
            "trials": trials,
            "total": total,
            "next_page": next_page,
            "source": "nci",
        }

    except CTSAPIError:
        raise
    except Exception as e:
        logger.error(f"NCI trial search failed: {e}")
        raise CTSAPIError(f"Trial search failed: {e!s}") from e


def _format_trial_header(trial: dict[str, Any]) -> list[str]:
    """Format trial header with basic info."""
    nct_id = trial.get("nct_id", trial.get("protocol_id", "Unknown"))
    title = trial.get("title", trial.get("brief_title", "Untitled"))
    phase = trial.get("phase", "Not specified")
    status = trial.get("overall_status", trial.get("status", "Unknown"))

    return [
        f"### [{nct_id}] {title}",
        f"- **Phase**: {phase}",
        f"- **Status**: {status}",
    ]


def _format_trial_summary_text(trial: dict[str, Any]) -> list[str]:
    """Format trial summary text if available."""
    summary = trial.get("brief_summary", trial.get("description", ""))
    if not summary:
        return []

    if len(summary) > 200:
        summary = summary[:197] + "..."
    return [f"- **Summary**: {summary}"]


def _format_trial_conditions(trial: dict[str, Any]) -> list[str]:
    """Format trial conditions/diseases."""
    conditions = trial.get("diseases", trial.get("conditions", []))
    if not conditions:
        return []

    lines = []
    if isinstance(conditions, list):
        lines.append(f"- **Conditions**: {', '.join(conditions[:3])}")
        if len(conditions) > 3:
            lines.append(f"  *(and {len(conditions) - 3} more)*")
    else:
        lines.append(f"- **Conditions**: {conditions}")

    return lines


def _format_trial_interventions(trial: dict[str, Any]) -> list[str]:
    """Format trial interventions."""
    interventions = trial.get("interventions", [])
    if not interventions:
        return []

    int_names = []
    for intervention in interventions[:3]:
        if isinstance(intervention, dict):
            int_names.append(intervention.get("name", "Unknown"))
        else:
            int_names.append(str(intervention))

    if not int_names:
        return []

    lines = [f"- **Interventions**: {', '.join(int_names)}"]
    if len(interventions) > 3:
        lines.append(f"  *(and {len(interventions) - 3} more)*")

    return lines


def _format_trial_metadata(trial: dict[str, Any]) -> list[str]:
    """Format trial metadata (sponsor, eligibility notes)."""
    lines = []

    lead_org = trial.get("lead_org", trial.get("sponsor", ""))
    if lead_org:
        lines.append(f"- **Lead Organization**: {lead_org}")

    if trial.get("accepts_brain_mets"):
        lines.append("- **Note**: Accepts patients with brain metastases")

    return lines


def _format_trial_summary(trial: dict[str, Any]) -> list[str]:
    """Format a single trial summary."""
    lines = []

    # Add header info
    lines.extend(_format_trial_header(trial))

    # Add summary text
    lines.extend(_format_trial_summary_text(trial))

    # Add conditions
    lines.extend(_format_trial_conditions(trial))

    # Add interventions
    lines.extend(_format_trial_interventions(trial))

    # Add metadata
    lines.extend(_format_trial_metadata(trial))

    lines.append("")
    return lines


def format_nci_trial_results(results: dict[str, Any]) -> str:
    """
    Format NCI trial search results as markdown.
    """
    trials = results.get("trials", [])
    total = results.get("total", 0)

    if not trials:
        return "No trials found matching the search criteria in NCI database."

    lines = [
        f"## NCI Clinical Trials Search Results ({total} found)",
        "",
        "*Source: NCI Clinical Trials Search API*",
        "",
    ]

    for trial in trials:
        lines.extend(_format_trial_summary(trial))

    return "\n".join(lines)

```

--------------------------------------------------------------------------------
/src/biomcp/variants/alphagenome.py:
--------------------------------------------------------------------------------

```python
"""AlphaGenome integration for variant effect prediction."""

import logging
import os
import re
from typing import Any, TypedDict

from ..utils.request_cache import request_cache

logger = logging.getLogger(__name__)

# Default threshold for significant changes
DEFAULT_SIGNIFICANCE_THRESHOLD = 0.5

# Chromosome pattern for validation
CHROMOSOME_PATTERN = re.compile(r"^chr([1-9]|1[0-9]|2[0-2]|X|Y|M|MT)$")

# Valid nucleotide characters
VALID_NUCLEOTIDES = set("ACGT")


class VariantPrediction(TypedDict):
    """Type definition for variant prediction results."""

    gene_expression: dict[str, float]
    chromatin_accessibility: dict[str, float]
    splicing_effects: list[str]
    summary_stats: dict[str, int]


@request_cache(ttl=1800)  # Cache for 30 minutes
async def predict_variant_effects(
    chromosome: str,
    position: int,
    reference: str,
    alternate: str,
    interval_size: int = 131_072,
    tissue_types: list[str] | None = None,
    significance_threshold: float = DEFAULT_SIGNIFICANCE_THRESHOLD,
    api_key: str | None = None,
) -> str:
    """
    Predict variant effects using AlphaGenome.

    Args:
        chromosome: Chromosome (e.g., 'chr7')
        position: 1-based genomic position
        reference: Reference allele(s)
        alternate: Alternate allele(s)
        interval_size: Size of genomic context window (max 1,000,000)
        tissue_types: Optional UBERON ontology terms for tissue-specific predictions
        significance_threshold: Threshold for significant changes (default 0.5)
        api_key: Optional API key (if not provided, uses ALPHAGENOME_API_KEY env var)

    Returns:
        Formatted markdown string with predictions

    Raises:
        ValueError: If input parameters are invalid
    """
    # Validate inputs
    _validate_inputs(chromosome, position, reference, alternate)

    # Check for API key (prefer parameter over environment variable)
    if not api_key:
        api_key = os.getenv("ALPHAGENOME_API_KEY")

    if not api_key:
        return (
            "❌ **AlphaGenome API key required**\n\n"
            "I need an API key to use AlphaGenome. Please provide it by either:\n\n"
            "**Option 1: Include your key in your request**\n"
            'Say: "My AlphaGenome API key is YOUR_KEY_HERE" and I\'ll use it for this prediction.\n\n'
            "**Option 2: Set it as an environment variable (for persistent use)**\n"
            "```bash\n"
            "export ALPHAGENOME_API_KEY='your-key'\n"
            "```\n\n"
            "Get a free API key at: https://deepmind.google.com/science/alphagenome\n\n"
            "**ACTION REQUIRED**: Please provide your API key using Option 1 above to continue."
        )

    # Try to import AlphaGenome
    try:
        # Suppress protobuf version warnings
        import warnings

        warnings.filterwarnings(
            "ignore",
            category=UserWarning,
            module="google.protobuf.runtime_version",
        )

        from alphagenome.data import genome
        from alphagenome.models import dna_client, variant_scorers
    except ImportError:
        return (
            "❌ **AlphaGenome not installed**\n\n"
            "To install:\n"
            "```bash\n"
            "git clone https://github.com/google-deepmind/alphagenome.git\n"
            "cd alphagenome && pip install .\n"
            "```\n\n"
            "Standard variant annotations are still available via `variant_searcher`."
        )

    try:
        # Create client
        model = dna_client.create(api_key)

        # Calculate interval boundaries (ensure within supported sizes)
        # Supported sizes: 2048, 16384, 131072, 524288, 1048576
        supported_sizes = [2048, 16384, 131072, 524288, 1048576]

        # Find smallest supported size that's >= requested size
        valid_sizes = [s for s in supported_sizes if s >= interval_size]
        if not valid_sizes:
            # If requested size is larger than max, use max
            interval_size = supported_sizes[-1]
        else:
            interval_size = min(valid_sizes)

        half_size = interval_size // 2
        interval_start = max(0, position - half_size - 1)  # Convert to 0-based
        interval_end = interval_start + interval_size

        # Create interval and variant objects
        interval = genome.Interval(
            chromosome=chromosome, start=interval_start, end=interval_end
        )

        variant = genome.Variant(
            chromosome=chromosome,
            position=position,
            reference_bases=reference,
            alternate_bases=alternate,
        )

        # Get recommended scorers for human
        scorers = variant_scorers.get_recommended_scorers(organism="human")

        # Make prediction
        scores = model.score_variant(
            interval=interval, variant=variant, variant_scorers=scorers
        )

        # Format results
        return _format_predictions(
            variant, scores, interval_size, significance_threshold
        )

    except Exception as e:
        logger.error(f"AlphaGenome prediction failed: {e}", exc_info=True)
        error_context = (
            f"❌ **AlphaGenome prediction failed**\n\n"
            f"Error: {e!s}\n\n"
            f"**Context:**\n"
            f"- Variant: {chromosome}:{position} {reference}>{alternate}\n"
            f"- Interval size: {interval_size:,} bp\n"
            f"- Tissue types: {tissue_types or 'None specified'}"
        )
        return error_context


def _format_predictions(
    variant: Any,
    scores: list[Any],
    interval_size: int,
    significance_threshold: float = DEFAULT_SIGNIFICANCE_THRESHOLD,
) -> str:
    """Format AlphaGenome predictions into markdown.

    Args:
        variant: The variant object from AlphaGenome
        scores: List of prediction scores
        interval_size: Size of the genomic context window
        significance_threshold: Threshold for significant changes

    Returns:
        Formatted markdown string
    """
    try:
        from alphagenome.models import variant_scorers

        # Convert scores to DataFrame
        scores_df = variant_scorers.tidy_scores(scores)

        # Start building the output
        lines = [
            "## AlphaGenome Variant Effect Predictions\n",
            f"**Variant**: {variant.chromosome}:{variant.position} {variant.reference_bases}>{variant.alternate_bases}",
            f"**Analysis window**: {interval_size:,} bp\n",
        ]

        # Group scores by output type
        if not scores_df.empty:
            # Gene expression effects
            expr_scores = scores_df[
                scores_df["output_type"].str.contains("RNA_SEQ", na=False)
            ]
            if not expr_scores.empty:
                top_expr = expr_scores.loc[
                    expr_scores["raw_score"].abs().idxmax()
                ]
                gene = top_expr.get("gene_name", "Unknown")
                score = top_expr["raw_score"]
                direction = "↓ decreases" if score < 0 else "↑ increases"
                lines.append("\n### Gene Expression")
                lines.append(
                    f"- **{gene}**: {score:+.2f} log₂ fold change ({direction} expression)"
                )

            # Chromatin accessibility
            chrom_scores = scores_df[
                scores_df["output_type"].str.contains("ATAC|DNASE", na=False)
            ]
            if not chrom_scores.empty:
                top_chrom = chrom_scores.loc[
                    chrom_scores["raw_score"].abs().idxmax()
                ]
                score = top_chrom["raw_score"]
                track = top_chrom.get("track_name", "tissue")
                direction = "↓ decreases" if score < 0 else "↑ increases"
                lines.append("\n### Chromatin Accessibility")
                lines.append(
                    f"- **{track}**: {score:+.2f} log₂ change ({direction} accessibility)"
                )

            # Splicing effects
            splice_scores = scores_df[
                scores_df["output_type"].str.contains("SPLICE", na=False)
            ]
            if not splice_scores.empty:
                lines.append("\n### Splicing")
                lines.append("- Potential splicing alterations detected")

            # Summary statistics
            total_tracks = len(scores_df)
            significant = len(
                scores_df[
                    scores_df["raw_score"].abs() > significance_threshold
                ]
            )
            lines.append("\n### Summary")
            lines.append(f"- Analyzed {total_tracks} regulatory tracks")
            lines.append(
                f"- {significant} tracks show substantial changes (|log₂| > {significance_threshold})"
            )
        else:
            lines.append("\n*No significant regulatory effects predicted*")

        return "\n".join(lines)

    except Exception as e:
        logger.error(f"Failed to format predictions: {e}")
        return f"## AlphaGenome Results\n\nPrediction completed but formatting failed: {e!s}"


def _validate_inputs(
    chromosome: str, position: int, reference: str, alternate: str
) -> None:
    """Validate input parameters for variant prediction.

    Args:
        chromosome: Chromosome identifier
        position: Genomic position
        reference: Reference allele(s)
        alternate: Alternate allele(s)

    Raises:
        ValueError: If any input is invalid
    """
    # Validate chromosome format
    if not CHROMOSOME_PATTERN.match(chromosome):
        raise ValueError(
            f"Invalid chromosome format: {chromosome}. "
            "Expected format: chr1-22, chrX, chrY, chrM, or chrMT"
        )

    # Validate position
    if position < 1:
        raise ValueError(f"Position must be >= 1, got {position}")

    # Validate nucleotides
    ref_upper = reference.upper()
    alt_upper = alternate.upper()

    if not ref_upper:
        raise ValueError("Reference allele cannot be empty")

    if not alt_upper:
        raise ValueError("Alternate allele cannot be empty")

    invalid_ref = set(ref_upper) - VALID_NUCLEOTIDES
    if invalid_ref:
        raise ValueError(
            f"Invalid nucleotides in reference allele: {invalid_ref}. "
            f"Only A, C, G, T are allowed"
        )

    invalid_alt = set(alt_upper) - VALID_NUCLEOTIDES
    if invalid_alt:
        raise ValueError(
            f"Invalid nucleotides in alternate allele: {invalid_alt}. "
            f"Only A, C, G, T are allowed"
        )

```

--------------------------------------------------------------------------------
/docs/backend-services-reference/02-biothings-suite.md:
--------------------------------------------------------------------------------

```markdown
# BioThings Suite API Reference

The BioThings Suite provides unified access to biomedical annotations across genes, variants, diseases, and drugs through a consistent API interface.

## Usage Examples

For practical examples using the BioThings APIs, see:

- [How to Find Trials with NCI and BioThings](../how-to-guides/02-find-trials-with-nci-and-biothings.md#biothings-integration-for-enhanced-search)
- [Get Comprehensive Variant Annotations](../how-to-guides/03-get-comprehensive-variant-annotations.md#integration-with-other-biomcp-tools)

## Overview

BioMCP integrates with four BioThings APIs:

- **MyGene.info**: Gene annotations and functional information
- **MyVariant.info**: Genetic variant annotations and clinical significance
- **MyDisease.info**: Disease ontology and terminology mappings
- **MyChem.info**: Drug/chemical properties and mechanisms

All APIs share:

- RESTful JSON interface
- No authentication required
- Elasticsearch-based queries
- Comprehensive data aggregation

## MyGene.info

### Base URL

`https://mygene.info/v1/`

### Key Endpoints

#### Gene Query

```
GET /query?q={query}
```

**Parameters:**

- `q`: Query string (gene symbol, name, or ID)
- `fields`: Specific fields to return
- `species`: Limit to species (default: human, mouse, rat)
- `size`: Number of results (default: 10)

**Example:**

```bash
curl "https://mygene.info/v1/query?q=BRAF&fields=symbol,name,summary,type_of_gene"
```

#### Gene Annotation

```
GET /gene/{geneid}
```

**Gene ID formats:**

- Entrez Gene ID: `673`
- Ensembl ID: `ENSG00000157764`
- Gene Symbol: `BRAF`

**Example:**

```bash
curl "https://mygene.info/v1/gene/673?fields=symbol,name,summary,genomic_pos,pathway,go"
```

### Important Fields

| Field         | Description            | Example                                 |
| ------------- | ---------------------- | --------------------------------------- |
| `symbol`      | Official gene symbol   | "BRAF"                                  |
| `name`        | Full gene name         | "B-Raf proto-oncogene"                  |
| `entrezgene`  | NCBI Entrez ID         | 673                                     |
| `summary`     | Functional description | "This gene encodes..."                  |
| `genomic_pos` | Chromosomal location   | {"chr": "7", "start": 140433812}        |
| `pathway`     | Pathway memberships    | {"kegg": [...], "reactome": [...]}      |
| `go`          | Gene Ontology terms    | {"BP": [...], "MF": [...], "CC": [...]} |

## MyVariant.info

### Base URL

`https://myvariant.info/v1/`

### Key Endpoints

#### Variant Query

```
GET /query?q={query}
```

**Query syntax:**

- Gene + variant: `dbnsfp.genename:BRAF AND dbnsfp.hgvsp:p.V600E`
- rsID: `dbsnp.rsid:rs121913529`
- Genomic: `_id:chr7:g.140453136A>T`

**Example:**

```bash
curl "https://myvariant.info/v1/query?q=dbnsfp.genename:TP53&fields=_id,clinvar,gnomad_exome"
```

#### Variant Annotation

```
GET /variant/{variant_id}
```

**ID formats:**

- HGVS genomic: `chr7:g.140453136A>T`
- dbSNP: `rs121913529`

### Important Fields

| Field          | Description            | Example                                 |
| -------------- | ---------------------- | --------------------------------------- |
| `clinvar`      | Clinical significance  | {"clinical_significance": "Pathogenic"} |
| `dbsnp`        | dbSNP annotations      | {"rsid": "rs121913529"}                 |
| `cadd`         | CADD scores            | {"phred": 35}                           |
| `gnomad_exome` | Population frequency   | {"af": {"af": 0.00001}}                 |
| `dbnsfp`       | Functional predictions | {"polyphen2": "probably_damaging"}      |

### Query Filters

```python
# Clinical significance
q = "clinvar.clinical_significance:pathogenic"

# Frequency filters
q = "gnomad_exome.af.af:<0.01"  # Rare variants

# Gene-specific
q = "dbnsfp.genename:BRCA1 AND cadd.phred:>20"
```

## MyDisease.info

### Base URL

`https://mydisease.info/v1/`

### Key Endpoints

#### Disease Query

```
GET /query?q={query}
```

**Example:**

```bash
curl "https://mydisease.info/v1/query?q=melanoma&fields=mondo,disease_ontology,synonyms"
```

#### Disease Annotation

```
GET /disease/{disease_id}
```

**ID formats:**

- MONDO: `MONDO:0007254`
- DOID: `DOID:1909`
- OMIM: `OMIM:155600`

### Important Fields

| Field              | Description       | Example                                      |
| ------------------ | ----------------- | -------------------------------------------- |
| `mondo`            | MONDO ontology    | {"id": "MONDO:0007254", "label": "melanoma"} |
| `disease_ontology` | Disease Ontology  | {"id": "DOID:1909"}                          |
| `synonyms`         | Alternative names | ["malignant melanoma", "MM"]                 |
| `xrefs`            | Cross-references  | {"omim": ["155600"], "mesh": ["D008545"]}    |
| `phenotypes`       | HPO terms         | [{"hpo_id": "HP:0002861"}]                   |

## MyChem.info

### Base URL

`https://mychem.info/v1/`

### Key Endpoints

#### Drug Query

```
GET /query?q={query}
```

**Example:**

```bash
curl "https://mychem.info/v1/query?q=imatinib&fields=drugbank,chembl,chebi"
```

#### Drug Annotation

```
GET /drug/{drug_id}
```

**ID formats:**

- DrugBank: `DB00619`
- ChEMBL: `CHEMBL941`
- Name: `imatinib`

### Important Fields

| Field          | Description    | Example                                      |
| -------------- | -------------- | -------------------------------------------- |
| `drugbank`     | DrugBank data  | {"id": "DB00619", "name": "Imatinib"}        |
| `chembl`       | ChEMBL data    | {"molecule_chembl_id": "CHEMBL941"}          |
| `chebi`        | ChEBI ontology | {"id": "CHEBI:45783"}                        |
| `drugcentral`  | Indications    | {"indications": [...]}                       |
| `pharmacology` | Mechanism      | {"mechanism_of_action": "BCR-ABL inhibitor"} |

## Common Query Patterns

### 1. Gene to Variant Pipeline

```python
# Step 1: Get gene info
gene_response = requests.get(
    "https://mygene.info/v1/gene/BRAF",
    params={"fields": "symbol,genomic_pos"}
)

# Step 2: Find variants in gene
variant_response = requests.get(
    "https://myvariant.info/v1/query",
    params={
        "q": "dbnsfp.genename:BRAF",
        "fields": "clinvar.clinical_significance,gnomad_exome.af",
        "size": 100
    }
)
```

### 2. Disease Synonym Expansion

```python
# Get all synonyms for a disease
disease_response = requests.get(
    "https://mydisease.info/v1/query",
    params={
        "q": "melanoma",
        "fields": "mondo,synonyms,xrefs"
    }
)

# Extract all names
all_names = ["melanoma"]
for hit in disease_response.json()["hits"]:
    if "synonyms" in hit:
        all_names.extend(hit["synonyms"])
```

### 3. Drug Target Lookup

```python
# Find drugs targeting a gene
drug_response = requests.get(
    "https://mychem.info/v1/query",
    params={
        "q": "drugcentral.targets.gene_symbol:BRAF",
        "fields": "drugbank.name,chembl.pref_name",
        "size": 50
    }
)
```

## Rate Limits and Best Practices

### Rate Limits

- **Default**: 1,000 requests/hour per IP
- **Batch queries**: Up to 1,000 IDs per request
- **No authentication**: Public access

### Best Practices

#### 1. Use Field Filtering

```python
# Good - only request needed fields
params = {"fields": "symbol,name,summary"}

# Bad - returns all fields
params = {}
```

#### 2. Batch Requests

```python
# Good - single request for multiple genes
response = requests.post(
    "https://mygene.info/v1/gene",
    json={"ids": ["BRAF", "KRAS", "EGFR"]}
)

# Bad - multiple individual requests
for gene in ["BRAF", "KRAS", "EGFR"]:
    requests.get(f"https://mygene.info/v1/gene/{gene}")
```

#### 3. Handle Missing Data

```python
# Check for field existence
if "clinvar" in variant and "clinical_significance" in variant["clinvar"]:
    significance = variant["clinvar"]["clinical_significance"]
else:
    significance = "Not available"
```

## Error Handling

### Common Errors

#### 404 Not Found

```json
{
  "success": false,
  "error": "ID not found"
}
```

#### 400 Bad Request

```json
{
  "success": false,
  "error": "Invalid query syntax"
}
```

#### 429 Rate Limited

```json
{
  "success": false,
  "error": "Rate limit exceeded"
}
```

### Error Handling Code

```python
def query_biothings(api_url, query_params):
    try:
        response = requests.get(api_url, params=query_params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            return {"error": "Not found", "query": query_params}
        elif e.response.status_code == 429:
            # Implement exponential backoff
            time.sleep(60)
            return query_biothings(api_url, query_params)
        else:
            raise
```

## Data Sources

Each BioThings API aggregates data from multiple sources:

### MyGene.info Sources

- NCBI Entrez Gene
- Ensembl
- UniProt
- KEGG, Reactome, WikiPathways
- Gene Ontology

### MyVariant.info Sources

- dbSNP
- ClinVar
- gnomAD
- CADD
- PolyPhen-2, SIFT
- COSMIC

### MyDisease.info Sources

- MONDO
- Disease Ontology
- OMIM
- MeSH
- HPO

### MyChem.info Sources

- DrugBank
- ChEMBL
- ChEBI
- PubChem
- DrugCentral

## Advanced Features

### Full-Text Search

```python
# Search across all fields
params = {
    "q": "lung cancer EGFR",  # Searches all text fields
    "fields": "symbol,name,summary"
}
```

### Faceted Search

```python
# Get aggregations
params = {
    "q": "clinvar.clinical_significance:pathogenic",
    "facets": "dbnsfp.genename",
    "size": 0  # Only return facets
}
```

### Scrolling Large Results

```python
# For results > 10,000
params = {
    "q": "dbnsfp.genename:TP53",
    "fetch_all": True,
    "fields": "_id"
}
```

## Integration Tips

### 1. Caching Strategy

- Cache gene/drug/disease lookups (stable)
- Don't cache variant queries (frequently updated)
- Use ETags for conditional requests

### 2. Parallel Requests

```python
import asyncio
import aiohttp

async def fetch_all(session, urls):
    tasks = []
    for url in urls:
        tasks.append(session.get(url))
    return await asyncio.gather(*tasks)
```

### 3. Data Normalization

```python
def normalize_gene_symbol(symbol):
    # Query MyGene to get official symbol
    response = requests.get(
        f"https://mygene.info/v1/query?q={symbol}"
    )
    if response.json()["hits"]:
        return response.json()["hits"][0]["symbol"]
    return symbol
```

```

--------------------------------------------------------------------------------
/tests/tdd/test_biothings_integration.py:
--------------------------------------------------------------------------------

```python
"""Unit tests for BioThings API integration."""

from unittest.mock import AsyncMock, patch

import pytest

from biomcp.integrations import BioThingsClient, DiseaseInfo, GeneInfo


@pytest.fixture
def mock_http_client():
    """Mock the http_client.request_api function."""
    with patch("biomcp.integrations.biothings_client.http_client") as mock:
        yield mock


@pytest.fixture
def biothings_client():
    """Create a BioThings client instance."""
    return BioThingsClient()


class TestGeneInfo:
    """Test gene information retrieval."""

    @pytest.mark.asyncio
    async def test_get_gene_by_symbol(
        self, biothings_client, mock_http_client
    ):
        """Test getting gene info by symbol."""
        # Mock query response
        mock_http_client.request_api = AsyncMock(
            side_effect=[
                (
                    {
                        "hits": [
                            {
                                "_id": "7157",
                                "symbol": "TP53",
                                "name": "tumor protein p53",
                                "taxid": 9606,
                            }
                        ]
                    },
                    None,
                ),
                # Mock get response
                (
                    {
                        "_id": "7157",
                        "symbol": "TP53",
                        "name": "tumor protein p53",
                        "summary": "This gene encodes a tumor suppressor protein...",
                        "alias": ["p53", "LFS1"],
                        "type_of_gene": "protein-coding",
                        "entrezgene": 7157,
                    },
                    None,
                ),
            ]
        )

        result = await biothings_client.get_gene_info("TP53")

        assert result is not None
        assert isinstance(result, GeneInfo)
        assert result.symbol == "TP53"
        assert result.name == "tumor protein p53"
        assert result.gene_id == "7157"
        assert "p53" in result.alias

    @pytest.mark.asyncio
    async def test_get_gene_by_id(self, biothings_client, mock_http_client):
        """Test getting gene info by Entrez ID."""
        # Mock direct get response
        mock_http_client.request_api = AsyncMock(
            return_value=(
                {
                    "_id": "7157",
                    "symbol": "TP53",
                    "name": "tumor protein p53",
                    "summary": "This gene encodes a tumor suppressor protein...",
                },
                None,
            )
        )

        result = await biothings_client.get_gene_info("7157")

        assert result is not None
        assert result.symbol == "TP53"
        assert result.gene_id == "7157"

    @pytest.mark.asyncio
    async def test_gene_not_found(self, biothings_client, mock_http_client):
        """Test handling of gene not found."""
        mock_http_client.request_api = AsyncMock(
            return_value=({"hits": []}, None)
        )

        result = await biothings_client.get_gene_info("INVALID_GENE")
        assert result is None

    @pytest.mark.asyncio
    async def test_batch_get_genes(self, biothings_client, mock_http_client):
        """Test batch gene retrieval."""
        mock_http_client.request_api = AsyncMock(
            return_value=(
                [
                    {
                        "_id": "7157",
                        "symbol": "TP53",
                        "name": "tumor protein p53",
                    },
                    {
                        "_id": "673",
                        "symbol": "BRAF",
                        "name": "B-Raf proto-oncogene",
                    },
                ],
                None,
            )
        )

        results = await biothings_client.batch_get_genes(["TP53", "BRAF"])

        assert len(results) == 2
        assert results[0].symbol == "TP53"
        assert results[1].symbol == "BRAF"


class TestDiseaseInfo:
    """Test disease information retrieval."""

    @pytest.mark.asyncio
    async def test_get_disease_by_name(
        self, biothings_client, mock_http_client
    ):
        """Test getting disease info by name."""
        # Mock query response
        mock_http_client.request_api = AsyncMock(
            side_effect=[
                (
                    {
                        "hits": [
                            {
                                "_id": "MONDO:0007959",
                                "name": "melanoma",
                                "mondo": {"mondo": "MONDO:0007959"},
                            }
                        ]
                    },
                    None,
                ),
                # Mock get response
                (
                    {
                        "_id": "MONDO:0007959",
                        "name": "melanoma",
                        "mondo": {
                            "definition": "A malignant neoplasm composed of melanocytes.",
                            "synonym": {
                                "exact": [
                                    "malignant melanoma",
                                    "naevocarcinoma",
                                ]
                            },
                        },
                    },
                    None,
                ),
            ]
        )

        result = await biothings_client.get_disease_info("melanoma")

        assert result is not None
        assert isinstance(result, DiseaseInfo)
        assert result.name == "melanoma"
        assert result.disease_id == "MONDO:0007959"
        assert "malignant melanoma" in result.synonyms

    @pytest.mark.asyncio
    async def test_get_disease_by_id(self, biothings_client, mock_http_client):
        """Test getting disease info by MONDO ID."""
        mock_http_client.request_api = AsyncMock(
            return_value=(
                {
                    "_id": "MONDO:0016575",
                    "name": "GIST",
                    "mondo": {
                        "definition": "Gastrointestinal stromal tumor...",
                    },
                },
                None,
            )
        )

        result = await biothings_client.get_disease_info("MONDO:0016575")

        assert result is not None
        assert result.name == "GIST"
        assert result.disease_id == "MONDO:0016575"

    @pytest.mark.asyncio
    async def test_get_disease_synonyms(
        self, biothings_client, mock_http_client
    ):
        """Test getting disease synonyms for query expansion."""
        mock_http_client.request_api = AsyncMock(
            side_effect=[
                (
                    {
                        "hits": [
                            {
                                "_id": "MONDO:0018076",
                                "name": "GIST",
                            }
                        ]
                    },
                    None,
                ),
                (
                    {
                        "_id": "MONDO:0018076",
                        "name": "gastrointestinal stromal tumor",
                        "mondo": {
                            "synonym": {
                                "exact": [
                                    "GIST",
                                    "gastrointestinal stromal tumour",
                                    "GI stromal tumor",
                                ]
                            }
                        },
                    },
                    None,
                ),
            ]
        )

        synonyms = await biothings_client.get_disease_synonyms("GIST")

        assert "GIST" in synonyms
        assert "gastrointestinal stromal tumor" in synonyms
        assert len(synonyms) <= 5  # Limited to 5


class TestTrialSynonymExpansion:
    """Test disease synonym expansion in trial searches."""

    @pytest.mark.asyncio
    async def test_trial_search_with_synonym_expansion(self):
        """Test that trial search expands disease synonyms."""
        from biomcp.trials.search import TrialQuery, convert_query

        with patch("biomcp.trials.search.BioThingsClient") as mock_client:
            # Mock synonym expansion
            mock_instance = mock_client.return_value
            mock_instance.get_disease_synonyms = AsyncMock(
                return_value=[
                    "GIST",
                    "gastrointestinal stromal tumor",
                    "GI stromal tumor",
                ]
            )

            query = TrialQuery(
                conditions=["GIST"],
                expand_synonyms=True,
            )

            params = await convert_query(query)

            # Check that conditions were expanded
            assert "query.cond" in params
            cond_value = params["query.cond"][0]
            assert "GIST" in cond_value
            assert "gastrointestinal stromal tumor" in cond_value

    @pytest.mark.asyncio
    async def test_trial_search_without_synonym_expansion(self):
        """Test that trial search works without synonym expansion."""
        from biomcp.trials.search import TrialQuery, convert_query

        query = TrialQuery(
            conditions=["GIST"],
            expand_synonyms=False,
        )

        params = await convert_query(query)

        # Check that conditions were not expanded
        assert "query.cond" in params
        assert params["query.cond"] == ["GIST"]


class TestErrorHandling:
    """Test error handling in BioThings integration."""

    @pytest.mark.asyncio
    async def test_api_error_handling(
        self, biothings_client, mock_http_client
    ):
        """Test handling of API errors."""
        from biomcp.http_client import RequestError

        mock_http_client.request_api = AsyncMock(
            return_value=(
                None,
                RequestError(code=500, message="Internal server error"),
            )
        )

        result = await biothings_client.get_gene_info("TP53")
        assert result is None

    @pytest.mark.asyncio
    async def test_invalid_response_format(
        self, biothings_client, mock_http_client
    ):
        """Test handling of invalid API responses."""
        mock_http_client.request_api = AsyncMock(
            return_value=({"invalid": "response"}, None)
        )

        result = await biothings_client.get_gene_info("TP53")
        assert result is None

```

--------------------------------------------------------------------------------
/src/biomcp/http_client.py:
--------------------------------------------------------------------------------

```python
import csv
import json
import os
import ssl
from io import StringIO
from ssl import PROTOCOL_TLS_CLIENT, SSLContext, TLSVersion
from typing import Literal, TypeVar

import certifi
from diskcache import Cache
from platformdirs import user_cache_dir
from pydantic import BaseModel

from .circuit_breaker import CircuitBreakerConfig, circuit_breaker
from .constants import (
    AGGRESSIVE_INITIAL_RETRY_DELAY,
    AGGRESSIVE_MAX_RETRY_ATTEMPTS,
    AGGRESSIVE_MAX_RETRY_DELAY,
    DEFAULT_CACHE_TIMEOUT,
    DEFAULT_FAILURE_THRESHOLD,
    DEFAULT_RECOVERY_TIMEOUT,
    DEFAULT_SUCCESS_THRESHOLD,
)
from .http_client_simple import execute_http_request
from .metrics import Timer
from .rate_limiter import domain_limiter
from .retry import (
    RetryableHTTPError,
    RetryConfig,
    is_retryable_status,
    with_retry,
)
from .utils.endpoint_registry import get_registry

T = TypeVar("T", bound=BaseModel)


class RequestError(BaseModel):
    code: int
    message: str


_cache: Cache | None = None


def get_cache() -> Cache:
    global _cache
    if _cache is None:
        cache_path = os.path.join(user_cache_dir("biomcp"), "http_cache")
        _cache = Cache(cache_path)
    return _cache


def generate_cache_key(method: str, url: str, params: dict) -> str:
    """Generate cache key using Python's built-in hash function for speed."""
    # Handle simple cases without params
    if not params:
        return f"{method.upper()}:{url}"

    # Use Python's built-in hash with a fixed seed for consistency
    # This is much faster than SHA256 for cache keys
    params_str = json.dumps(params, sort_keys=True, separators=(",", ":"))
    key_source = f"{method.upper()}:{url}:{params_str}"

    # Use Python's hash function with a fixed seed for deterministic results
    # Convert to positive hex string for compatibility
    hash_value = hash(key_source)
    return f"{hash_value & 0xFFFFFFFFFFFFFFFF:016x}"


def cache_response(cache_key: str, content: str, ttl: int):
    expire = None if ttl == -1 else ttl
    cache = get_cache()
    cache.set(cache_key, content, expire=expire)


def get_cached_response(cache_key: str) -> str | None:
    cache = get_cache()
    return cache.get(cache_key)


def get_ssl_context(tls_version: TLSVersion) -> SSLContext:
    """Create an SSLContext with the specified TLS version."""
    context = SSLContext(PROTOCOL_TLS_CLIENT)
    context.minimum_version = tls_version
    context.maximum_version = tls_version
    context.load_verify_locations(cafile=certifi.where())
    return context


async def call_http(
    method: str,
    url: str,
    params: dict,
    verify: ssl.SSLContext | str | bool = True,
    retry_config: RetryConfig | None = None,
    headers: dict[str, str] | None = None,
) -> tuple[int, str]:
    """Make HTTP request with optional retry logic.

    Args:
        method: HTTP method (GET or POST)
        url: Target URL
        params: Request parameters
        verify: SSL verification settings
        retry_config: Retry configuration (if None, no retry)

    Returns:
        Tuple of (status_code, response_text)
    """

    async def _make_request() -> tuple[int, str]:
        # Extract domain from URL for metrics tagging
        from urllib.parse import urlparse

        parsed = urlparse(url)
        host = parsed.hostname or "unknown"

        # Apply circuit breaker for the host
        breaker_config = CircuitBreakerConfig(
            failure_threshold=DEFAULT_FAILURE_THRESHOLD,
            recovery_timeout=DEFAULT_RECOVERY_TIMEOUT,
            success_threshold=DEFAULT_SUCCESS_THRESHOLD,
            expected_exception=(ConnectionError, TimeoutError),
        )

        @circuit_breaker(f"http_{host}", breaker_config)
        async def _execute_with_breaker():
            async with Timer(
                "http_request", tags={"method": method, "host": host}
            ):
                return await execute_http_request(
                    method, url, params, verify, headers
                )

        status, text = await _execute_with_breaker()

        # Check if status code should trigger retry
        if retry_config and is_retryable_status(status, retry_config):
            raise RetryableHTTPError(status, text)

        return status, text

    # Apply retry logic if configured
    if retry_config:
        wrapped_func = with_retry(retry_config)(_make_request)
        try:
            return await wrapped_func()
        except RetryableHTTPError as exc:
            # Convert retryable HTTP errors back to status/text
            return exc.status_code, exc.message
        except Exception:
            # Let other exceptions bubble up
            raise
    else:
        return await _make_request()


def _handle_offline_mode(
    url: str,
    method: str,
    request: BaseModel | dict,
    cache_ttl: int,
    response_model_type: type[T] | None,
) -> tuple[T | None, RequestError | None] | None:
    """Handle offline mode logic. Returns None if not in offline mode."""
    if os.getenv("BIOMCP_OFFLINE", "").lower() not in ("true", "1", "yes"):
        return None

    # In offline mode, only return cached responses
    if cache_ttl > 0:
        cache_key = generate_cache_key(
            method,
            url,
            request
            if isinstance(request, dict)
            else request.model_dump(exclude_none=True, by_alias=True),
        )
        cached_content = get_cached_response(cache_key)
        if cached_content:
            return parse_response(200, cached_content, response_model_type)

    return None, RequestError(
        code=503,
        message=f"Offline mode enabled (BIOMCP_OFFLINE=true). Cannot fetch from {url}",
    )


def _validate_endpoint(endpoint_key: str | None) -> None:
    """Validate endpoint key if provided."""
    if endpoint_key:
        registry = get_registry()
        if endpoint_key not in registry.get_all_endpoints():
            raise ValueError(
                f"Unknown endpoint key: {endpoint_key}. Please register in endpoint_registry.py"
            )


def _prepare_request_params(
    request: BaseModel | dict,
) -> tuple[dict, dict | None]:
    """Convert request to params dict and extract headers."""
    if isinstance(request, BaseModel):
        params = request.model_dump(exclude_none=True, by_alias=True)
    else:
        params = request.copy() if isinstance(request, dict) else request

    # Extract headers if present
    headers = None
    if isinstance(params, dict) and "_headers" in params:
        try:
            import json

            headers = json.loads(params.pop("_headers"))
        except (json.JSONDecodeError, TypeError):
            pass  # Ignore invalid headers

    return params, headers


def _get_retry_config(
    enable_retry: bool, domain: str | None
) -> RetryConfig | None:
    """Get retry configuration based on settings."""
    if not enable_retry:
        return None

    # Use more aggressive retry for certain domains
    if domain in ["clinicaltrials", "pubmed", "myvariant"]:
        return RetryConfig(
            max_attempts=AGGRESSIVE_MAX_RETRY_ATTEMPTS,
            initial_delay=AGGRESSIVE_INITIAL_RETRY_DELAY,
            max_delay=AGGRESSIVE_MAX_RETRY_DELAY,
        )
    return RetryConfig()  # Default settings


async def request_api(
    url: str,
    request: BaseModel | dict,
    response_model_type: type[T] | None = None,
    method: Literal["GET", "POST"] = "GET",
    cache_ttl: int = DEFAULT_CACHE_TIMEOUT,
    tls_version: TLSVersion | None = None,
    domain: str | None = None,
    enable_retry: bool = True,
    endpoint_key: str | None = None,
) -> tuple[T | None, RequestError | None]:
    # Handle offline mode
    offline_result = _handle_offline_mode(
        url, method, request, cache_ttl, response_model_type
    )
    if offline_result is not None:
        return offline_result

    # Validate endpoint
    _validate_endpoint(endpoint_key)

    # Apply rate limiting if domain is specified
    if domain:
        async with domain_limiter.limit(domain):
            pass  # Rate limit acquired

    # Prepare request
    verify = get_ssl_context(tls_version) if tls_version else True
    params, headers = _prepare_request_params(request)
    retry_config = _get_retry_config(enable_retry, domain)

    # Short-circuit if caching disabled
    if cache_ttl == 0:
        status, content = await call_http(
            method,
            url,
            params,
            verify=verify,
            retry_config=retry_config,
            headers=headers,
        )
        return parse_response(status, content, response_model_type)

    # Handle caching
    cache_key = generate_cache_key(method, url, params)
    cached_content = get_cached_response(cache_key)

    if cached_content:
        return parse_response(200, cached_content, response_model_type)

    # Make HTTP request if not cached
    status, content = await call_http(
        method,
        url,
        params,
        verify=verify,
        retry_config=retry_config,
        headers=headers,
    )
    parsed_response = parse_response(status, content, response_model_type)

    # Cache if successful response
    if status == 200:
        cache_response(cache_key, content, cache_ttl)

    return parsed_response


def parse_response(
    status_code: int,
    content: str,
    response_model_type: type[T] | None = None,
) -> tuple[T | None, RequestError | None]:
    if status_code != 200:
        return None, RequestError(code=status_code, message=content)

    # Handle empty content
    if not content or content.strip() == "":
        return None, RequestError(
            code=500,
            message="Empty response received from API",
        )

    try:
        if response_model_type is None:
            # Try to parse as JSON first
            if content.startswith("{") or content.startswith("["):
                response_dict = json.loads(content)
            elif "," in content:
                io = StringIO(content)
                response_dict = list(csv.DictReader(io))
            else:
                response_dict = {"text": content}
            return response_dict, None

        parsed: T = response_model_type.model_validate_json(content)
        return parsed, None

    except json.JSONDecodeError as exc:
        # Provide more detailed error message for JSON parsing issues
        return None, RequestError(
            code=500,
            message=f"Invalid JSON response: {exc}. Content preview: {content[:100]}...",
        )
    except Exception as exc:
        return None, RequestError(
            code=500,
            message=f"Failed to parse response: {exc}",
        )

```

--------------------------------------------------------------------------------
/src/biomcp/diseases/search.py:
--------------------------------------------------------------------------------

```python
"""Search functionality for diseases via NCI CTS API."""

import logging
from typing import Any

from ..constants import NCI_DISEASES_URL
from ..integrations.cts_api import CTSAPIError, make_cts_request
from ..utils import parse_or_query

logger = logging.getLogger(__name__)


def _build_disease_params(
    name: str | None,
    disease_type: str | None,
    category: str | None,
    codes: list[str] | None,
    parent_ids: list[str] | None,
    ancestor_ids: list[str] | None,
    include: list[str] | None,
    sort: str | None,
    order: str | None,
    page_size: int,
) -> dict[str, Any]:
    """Build query parameters for disease search."""
    params: dict[str, Any] = {"size": page_size}

    if name:
        params["name"] = name

    # Use 'type' parameter instead of 'category'
    if disease_type:
        params["type"] = disease_type
    elif category:  # Backward compatibility
        params["type"] = category

    if codes:
        params["codes"] = ",".join(codes) if isinstance(codes, list) else codes

    if parent_ids:
        params["parent_ids"] = (
            ",".join(parent_ids)
            if isinstance(parent_ids, list)
            else parent_ids
        )

    if ancestor_ids:
        params["ancestor_ids"] = (
            ",".join(ancestor_ids)
            if isinstance(ancestor_ids, list)
            else ancestor_ids
        )

    if include:
        params["include"] = (
            ",".join(include) if isinstance(include, list) else include
        )

    if sort:
        params["sort"] = sort
        if order:
            params["order"] = order.lower()

    return params


async def search_diseases(
    name: str | None = None,
    include_synonyms: bool = True,  # Deprecated - kept for backward compatibility
    category: str | None = None,
    disease_type: str | None = None,
    codes: list[str] | None = None,
    parent_ids: list[str] | None = None,
    ancestor_ids: list[str] | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    page_size: int = 20,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for diseases in the NCI CTS database.

    This provides access to NCI's controlled vocabulary of cancer conditions
    used in clinical trials, with official terms and synonyms.

    Args:
        name: Disease name to search for (partial match, searches synonyms automatically)
        include_synonyms: [Deprecated] This parameter is ignored - API always searches synonyms
        category: Disease category/type filter (deprecated - use disease_type)
        disease_type: Type of disease (e.g., 'maintype', 'subtype', 'stage')
        codes: List of disease codes (e.g., ['C3868', 'C5806'])
        parent_ids: List of parent disease IDs
        ancestor_ids: List of ancestor disease IDs
        include: Fields to include in response
        sort: Sort field
        order: Sort order ('asc' or 'desc')
        page_size: Number of results per page
        page: Page number
        api_key: Optional API key (if not provided, uses NCI_API_KEY env var)

    Returns:
        Dictionary with search results containing:
        - diseases: List of disease records with names and synonyms
        - total: Total number of results
        - page: Current page
        - page_size: Results per page

    Raises:
        CTSAPIError: If the API request fails
    """
    # Build query parameters
    params = _build_disease_params(
        name,
        disease_type,
        category,
        codes,
        parent_ids,
        ancestor_ids,
        include,
        sort,
        order,
        page_size,
    )

    try:
        # Make API request
        response = await make_cts_request(
            url=NCI_DISEASES_URL,
            params=params,
            api_key=api_key,
        )

        # Process response
        diseases = response.get("data", response.get("diseases", []))
        total = response.get("total", len(diseases))

        return {
            "diseases": diseases,
            "total": total,
            "page": page,
            "page_size": page_size,
        }

    except CTSAPIError:
        raise
    except Exception as e:
        logger.error(f"Failed to search diseases: {e}")
        raise CTSAPIError(f"Disease search failed: {e!s}") from e


async def get_disease_by_id(
    disease_id: str,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Get detailed information about a specific disease by ID.

    Args:
        disease_id: Disease ID from NCI CTS
        api_key: Optional API key (if not provided, uses NCI_API_KEY env var)

    Returns:
        Dictionary with disease details including synonyms

    Raises:
        CTSAPIError: If the API request fails
    """
    try:
        # Make API request
        url = f"{NCI_DISEASES_URL}/{disease_id}"
        response = await make_cts_request(
            url=url,
            api_key=api_key,
        )

        # Return the disease data
        if "data" in response:
            return response["data"]
        elif "disease" in response:
            return response["disease"]
        else:
            return response

    except CTSAPIError:
        raise
    except Exception as e:
        logger.error(f"Failed to get disease {disease_id}: {e}")
        raise CTSAPIError(f"Failed to retrieve disease: {e!s}") from e


def _format_disease_synonyms(synonyms: Any) -> list[str]:
    """Format disease synonyms section."""
    lines: list[str] = []
    if not synonyms:
        return lines

    if isinstance(synonyms, list) and synonyms:
        lines.append("- **Synonyms**:")
        for syn in synonyms[:5]:  # Show up to 5 synonyms
            lines.append(f"  - {syn}")
        if len(synonyms) > 5:
            lines.append(f"  *(and {len(synonyms) - 5} more)*")
    elif isinstance(synonyms, str):
        lines.append(f"- **Synonyms**: {synonyms}")

    return lines


def _format_disease_codes(codes: Any) -> list[str]:
    """Format disease code mappings."""
    if not codes or not isinstance(codes, dict):
        return []

    code_items = []
    for system, code in codes.items():
        code_items.append(f"{system}: {code}")

    if code_items:
        return [f"- **Codes**: {', '.join(code_items)}"]
    return []


def _format_single_disease(disease: dict[str, Any]) -> list[str]:
    """Format a single disease record."""
    disease_id = disease.get("id", disease.get("disease_id", "Unknown"))
    name = disease.get(
        "name", disease.get("preferred_name", "Unknown Disease")
    )
    category = disease.get("category", disease.get("type", ""))

    lines = [
        f"### {name}",
        f"- **ID**: {disease_id}",
    ]

    if category:
        lines.append(f"- **Category**: {category}")

    # Add synonyms
    lines.extend(_format_disease_synonyms(disease.get("synonyms", [])))

    # Add code mappings
    lines.extend(_format_disease_codes(disease.get("codes")))

    lines.append("")
    return lines


def format_disease_results(results: dict[str, Any]) -> str:
    """
    Format disease search results as markdown.

    Args:
        results: Search results dictionary

    Returns:
        Formatted markdown string
    """
    diseases = results.get("diseases", [])
    total = results.get("total", 0)

    if not diseases:
        return "No diseases found matching the search criteria."

    # Build markdown output
    lines = [
        f"## Disease Search Results ({total} found)",
        "",
    ]

    for disease in diseases:
        lines.extend(_format_single_disease(disease))

    return "\n".join(lines)


async def search_diseases_with_or(
    name_query: str,
    include_synonyms: bool = True,
    category: str | None = None,
    disease_type: str | None = None,
    codes: list[str] | None = None,
    parent_ids: list[str] | None = None,
    ancestor_ids: list[str] | None = None,
    include: list[str] | None = None,
    sort: str | None = None,
    order: str | None = None,
    page_size: int = 20,
    page: int = 1,
    api_key: str | None = None,
) -> dict[str, Any]:
    """
    Search for diseases with OR query support.

    This function handles OR queries by making multiple API calls and combining results.
    For example: "melanoma OR lung cancer" will search for each term.

    Args:
        name_query: Name query that may contain OR operators
        Other args same as search_diseases

    Returns:
        Combined results from all searches with duplicates removed
    """
    # Check if this is an OR query
    if " OR " in name_query or " or " in name_query:
        search_terms = parse_or_query(name_query)
        logger.info(f"Parsed OR query into terms: {search_terms}")
    else:
        # Single term search
        search_terms = [name_query]

    # Collect all unique diseases
    all_diseases = {}
    total_found = 0

    # Search for each term
    for term in search_terms:
        logger.info(f"Searching diseases for term: {term}")
        try:
            results = await search_diseases(
                name=term,
                include_synonyms=include_synonyms,
                category=category,
                disease_type=disease_type,
                codes=codes,
                parent_ids=parent_ids,
                ancestor_ids=ancestor_ids,
                include=include,
                sort=sort,
                order=order,
                page_size=page_size,
                page=page,
                api_key=api_key,
            )

            # Add unique diseases (deduplicate by ID)
            for disease in results.get("diseases", []):
                disease_id = disease.get("id", disease.get("disease_id"))
                if disease_id and disease_id not in all_diseases:
                    all_diseases[disease_id] = disease

            total_found += results.get("total", 0)

        except Exception as e:
            logger.warning(f"Failed to search for term '{term}': {e}")
            # Continue with other terms

    # Convert back to list and apply pagination
    unique_diseases = list(all_diseases.values())

    # Sort by name for consistent results
    unique_diseases.sort(
        key=lambda x: x.get("name", x.get("preferred_name", "")).lower()
    )

    # Apply pagination to combined results
    start_idx = (page - 1) * page_size
    end_idx = start_idx + page_size
    paginated_diseases = unique_diseases[start_idx:end_idx]

    return {
        "diseases": paginated_diseases,
        "total": len(unique_diseases),
        "page": page,
        "page_size": page_size,
        "search_terms": search_terms,  # Include what we searched for
        "total_found_across_terms": total_found,  # Total before deduplication
    }

```

--------------------------------------------------------------------------------
/docs/tutorials/openfda-integration.md:
--------------------------------------------------------------------------------

```markdown
# OpenFDA Integration Guide

## Overview

BioMCP now integrates with the FDA's openFDA API to provide access to critical drug safety and regulatory information. This integration adds three major data sources to BioMCP's capabilities:

1. **Drug Adverse Events (FAERS)** - FDA Adverse Event Reporting System data
2. **Drug Labels (SPL)** - Official FDA drug product labeling
3. **Device Events (MAUDE)** - Medical device adverse event reports

This guide covers how to use these new tools effectively for precision oncology research.

## Quick Start

### Installation & Setup

The OpenFDA integration is included in the standard BioMCP installation:

```bash
# Install BioMCP
pip install biomcp-python

# Optional: Set API key for higher rate limits
export OPENFDA_API_KEY="your-api-key-here"
```

> **Note**: An API key is optional but recommended. Without one, you're limited to 40 requests/minute. With a key, you get 240 requests/minute. [Get a free API key here](https://open.fda.gov/apis/authentication/).

### Basic Usage Examples

#### Search for drug adverse events

```bash
# Find adverse events for a specific drug
biomcp openfda adverse search --drug imatinib

# Search for specific reactions
biomcp openfda adverse search --reaction nausea --serious

# Get detailed report
biomcp openfda adverse get REPORT123456
```

#### Search drug labels

```bash
# Find drugs for specific indications
biomcp openfda label search --indication melanoma

# Search for drugs with boxed warnings
biomcp openfda label search --boxed-warning

# Get complete label
biomcp openfda label get SET_ID_HERE
```

#### Search device events

```bash
# Search for genomic test device issues
biomcp openfda device search --device "FoundationOne"

# Search by manufacturer
biomcp openfda device search --manufacturer Illumina

# Get detailed device event
biomcp openfda device get MDR123456
```

## MCP Tool Usage

### For AI Agents

The OpenFDA tools are available as MCP tools for AI agents. Each tool includes built-in reminders to use the `think` tool first for complex queries.

#### Available Tools

- `openfda_adverse_searcher` - Search drug adverse events
- `openfda_adverse_getter` - Get specific adverse event report
- `openfda_label_searcher` - Search drug labels
- `openfda_label_getter` - Get complete drug label
- `openfda_device_searcher` - Search device adverse events
- `openfda_device_getter` - Get specific device event report

#### Example Tool Usage

```python
# Search for adverse events
result = await openfda_adverse_searcher(
    drug="pembrolizumab",
    serious=True,
    limit=25
)

# Get drug label
label = await openfda_label_getter(
    set_id="abc-123-def",
    sections=["indications_and_usage", "warnings_and_precautions"]
)

# Search genomic devices
devices = await openfda_device_searcher(
    device="sequencer",
    genomics_only=True,  # Filter to genomic/diagnostic devices
    problem="false positive"
)
```

## Data Sources Explained

### Drug Adverse Events (FAERS)

The FDA Adverse Event Reporting System contains reports of adverse events and medication errors submitted to FDA. Key features:

- **Voluntary reporting**: Reports come from healthcare professionals, patients, and manufacturers
- **No causation proof**: Reports don't establish that a drug caused the event
- **Rich detail**: Includes patient demographics, drug information, reactions, and outcomes
- **Real-world data**: Captures post-market safety signals

**Best for**: Understanding potential side effects, safety signals, drug interactions

### Drug Labels (SPL)

Structured Product Labeling contains the official FDA-approved prescribing information. Includes:

- **Indications and usage**: FDA-approved uses
- **Dosage and administration**: How to prescribe
- **Contraindications**: When not to use
- **Warnings and precautions**: Safety information
- **Drug interactions**: Known interactions
- **Clinical studies**: Trial data supporting approval

**Best for**: Official prescribing guidelines, approved indications, contraindications

### Device Events (MAUDE)

Manufacturer and User Facility Device Experience database contains medical device adverse events. For BioMCP, we focus on genomic/diagnostic devices:

- **Genomic test devices**: Issues with sequencing platforms, diagnostic panels
- **In vitro diagnostics**: Problems with biomarker tests
- **Device malfunctions**: Technical failures affecting test results
- **Patient impact**: How device issues affected patient care

**Best for**: Understanding reliability of genomic tests, device-related diagnostic issues

## Advanced Features

### Genomic Device Filtering

By default, device searches filter to genomic/diagnostic devices relevant to precision oncology:

```bash
# Search only genomic devices (default)
biomcp openfda device search --device test

# Search ALL medical devices
biomcp openfda device search --device test --all-devices
```

The genomic filter includes FDA product codes for:

- Next Generation Sequencing panels
- Gene mutation detection systems
- Tumor profiling tests
- Hereditary variant detection systems

### Pagination Support

All search tools support pagination for large result sets:

```bash
# Get second page of results
biomcp openfda adverse search --drug aspirin --page 2 --limit 50
```

### Section-Specific Label Retrieval

When retrieving drug labels, you can specify which sections to include:

```bash
# Get only specific sections
biomcp openfda label get SET_ID --sections "indications_and_usage,adverse_reactions"
```

## Integration with Other BioMCP Tools

### Complementary Data Sources

OpenFDA data complements existing BioMCP tools:

| Tool                       | Data Source        | Best For                          |
| -------------------------- | ------------------ | --------------------------------- |
| `drug_getter`              | MyChem.info        | Chemical properties, mechanisms   |
| `openfda_label_searcher`   | FDA Labels         | Official indications, prescribing |
| `openfda_adverse_searcher` | FAERS              | Safety signals, side effects      |
| `trial_searcher`           | ClinicalTrials.gov | Active trials, eligibility        |

### Workflow Examples

#### Complete Drug Profile

```python
# 1. Get drug chemical info
drug_info = await drug_getter("imatinib")

# 2. Get FDA label
label = await openfda_label_searcher(name="imatinib")

# 3. Check adverse events
safety = await openfda_adverse_searcher(drug="imatinib", serious=True)

# 4. Find current trials
trials = await trial_searcher(interventions=["imatinib"])
```

#### Device Reliability Check

```python
# 1. Search for device issues
events = await openfda_device_searcher(
    device="FoundationOne CDx",
    problem="false"
)

# 2. Get specific event details
if events:
    details = await openfda_device_getter("MDR_KEY_HERE")
```

## Important Considerations

### Data Limitations

1. **Adverse Events**:

   - Reports don't prove causation
   - Reporting is voluntary, so not all events are captured
   - Duplicate reports may exist
   - Include appropriate disclaimers when presenting data

2. **Drug Labels**:

   - May not reflect the most recent changes
   - Off-label uses not included
   - Generic drugs may have different inactive ingredients

3. **Device Events**:
   - Not all device problems are reported
   - User error vs device malfunction can be unclear
   - Reports may lack complete information

### Rate Limits

- **Without API key**: 40 requests/minute per IP
- **With API key**: 240 requests/minute per key
- **Burst limit**: 4 requests/second

### Best Practices

1. **Always use disclaimers**: Include FDA's disclaimer about adverse events not proving causation
2. **Check multiple sources**: Combine OpenFDA data with other BioMCP tools
3. **Filter appropriately**: Use genomic device filtering for relevant results
4. **Handle no results gracefully**: Many specific queries may return no results
5. **Respect rate limits**: Use API key for production use

## Troubleshooting

### Common Issues

**No results found**

- Try broader search terms
- Check spelling of drug/device names
- Remove filters to expand search

**Rate limit errors**

- Add API key to environment
- Reduce request frequency
- Batch queries when possible

**Timeout errors**

- OpenFDA API may be slow/down
- Retry after a brief wait
- Consider caching frequent queries

### Getting Help

- OpenFDA documentation: https://open.fda.gov/apis/
- OpenFDA status: https://api.fda.gov/status
- BioMCP issues: https://github.com/genomoncology/biomcp/issues

## API Reference

### Environment Variables

- `OPENFDA_API_KEY`: Your openFDA API key (optional but recommended)

### CLI Commands

```bash
# Adverse Events
biomcp openfda adverse search [OPTIONS]
  --drug TEXT           Drug name to search
  --reaction TEXT       Reaction to search
  --serious/--all       Filter serious events
  --limit INT           Results per page (max 100)
  --page INT            Page number

biomcp openfda adverse get REPORT_ID

# Drug Labels
biomcp openfda label search [OPTIONS]
  --name TEXT           Drug name
  --indication TEXT     Indication to search
  --boxed-warning       Has boxed warning
  --section TEXT        Label section
  --limit INT           Results per page
  --page INT            Page number

biomcp openfda label get SET_ID [OPTIONS]
  --sections TEXT       Comma-separated sections

# Device Events
biomcp openfda device search [OPTIONS]
  --device TEXT         Device name
  --manufacturer TEXT   Manufacturer name
  --problem TEXT        Problem description
  --product-code TEXT   FDA product code
  --genomics-only/--all-devices
  --limit INT           Results per page
  --page INT            Page number

biomcp openfda device get MDR_KEY
```

## Example Outputs

### Adverse Event Search

```markdown
## FDA Adverse Event Reports

**Drug**: imatinib | **Serious Events**: Yes
**Total Reports Found**: 1,234 reports

### Top Reported Reactions:

- **NAUSEA**: 234 reports (19.0%)
- **FATIGUE**: 189 reports (15.3%)
- **RASH**: 156 reports (12.6%)

### Sample Reports (showing 3 of 1,234):

...
```

### Drug Label Search

```markdown
## FDA Drug Labels

**Drug**: pembrolizumab
**Total Labels Found**: 5 labels

### Results (showing 5 of 5):

#### 1. KEYTRUDA

**Also known as**: pembrolizumab
**FDA Application**: BLA125514
**Manufacturer**: Merck Sharp & Dohme
**Route**: INTRAVENOUS

⚠️ **BOXED WARNING**: Immune-mediated adverse reactions...

**Indications**: KEYTRUDA is indicated for the treatment of...
```

### Device Event Search

```markdown
## FDA Device Adverse Event Reports

**Device**: FoundationOne | **Type**: Genomic/Diagnostic Devices
**Total Reports Found**: 12 reports

### Top Reported Problems:

- **False negative result**: 5 reports (41.7%)
- **Software malfunction**: 3 reports (25.0%)

### Sample Reports (showing 3 of 12):

...
```

```

--------------------------------------------------------------------------------
/docs/how-to-guides/02-find-trials-with-nci-and-biothings.md:
--------------------------------------------------------------------------------

```markdown
# How to Find Trials with NCI and BioThings

This guide demonstrates how to search for clinical trials using BioMCP's dual data sources and automatic disease synonym expansion.

## Overview

BioMCP provides access to clinical trials through:

- **ClinicalTrials.gov**: Default source with comprehensive U.S. and international trials ([API Reference](../backend-services-reference/04-clinicaltrials-gov.md))
- **NCI CTS API**: Advanced cancer trial search with biomarker filtering (requires API key) ([API Reference](../backend-services-reference/05-nci-cts-api.md))
- **BioThings Integration**: Automatic disease synonym expansion for better coverage ([BioThings Reference](../backend-services-reference/02-biothings-suite.md))

## Basic Trial Search

### Simple Disease Search

Find trials for a specific condition:

```bash
# CLI
biomcp trial search --condition melanoma --status RECRUITING

# Python
trials = await client.trials.search(
    conditions=["melanoma"],
    recruiting_status="RECRUITING"
)

# MCP Tool
trial_searcher(
    conditions=["melanoma"],
    recruiting_status="OPEN"
)
```

### Search by Intervention

Find trials testing specific drugs:

```bash
# CLI
biomcp trial search --intervention pembrolizumab --phase PHASE3

# Python
trials = await client.trials.search(
    interventions=["pembrolizumab"],
    phase="PHASE3"
)
```

## Location-Based Search

### Finding Nearby Trials

**Important**: Location searches require latitude and longitude coordinates.

```python
# Find trials near Cleveland, Ohio
trials = await trial_searcher(
    conditions=["lung cancer"],
    lat=41.4993,
    long=-81.6944,
    distance=50  # 50 miles radius
)

# Find trials near Boston
trials = await trial_searcher(
    conditions=["breast cancer"],
    lat=42.3601,
    long=-71.0589,
    distance=25
)
```

### Getting Coordinates

For common locations:

- Cleveland: lat=41.4993, long=-81.6944
- Boston: lat=42.3601, long=-71.0589
- New York: lat=40.7128, long=-74.0060
- Los Angeles: lat=34.0522, long=-118.2437
- Houston: lat=29.7604, long=-95.3698

## Advanced Filtering

### Multiple Criteria

Combine multiple filters for precise results:

```python
# Complex search example
trials = await trial_searcher(
    conditions=["non-small cell lung cancer", "NSCLC"],
    interventions=["pembrolizumab", "immunotherapy"],
    phase="PHASE3",
    recruiting_status="OPEN",
    age_group="ADULT",
    study_type="INTERVENTIONAL",
    funder_type="INDUSTRY"
)
```

### Date-Based Filtering

Find recently started trials:

```bash
# CLI - Trials started in 2024
biomcp trial search \
  --condition cancer \
  --start-date 2024-01-01 \
  --status RECRUITING
```

## Using NCI API Advanced Features

### Setup NCI API Key

Get your key from [api.cancer.gov](https://api.cancer.gov). For detailed setup instructions, see [Authentication and API Keys](../getting-started/03-authentication-and-api-keys.md#nci-clinical-trials-api):

```bash
export NCI_API_KEY="your-key-here"
```

### Biomarker-Based Search

Find trials for specific mutations:

```python
# Search using NCI source
trials = await search(
    domain="trial",
    source="nci",
    conditions=["melanoma"],
    required_mutations=["BRAF V600E"],
    allow_brain_mets=True,
    api_key="your-key"
)
```

### NCI-Specific Parameters

```python
# Advanced NCI search
trials = await trial_searcher(
    source="nci",
    conditions=["lung cancer"],
    required_mutations=["EGFR L858R", "EGFR exon 19 deletion"],
    prior_therapy_required=False,
    allow_brain_mets=True,
    allow_prior_immunotherapy=False,
    api_key="your-key"
)
```

## BioThings Integration for Enhanced Search

For technical details on the BioThings APIs, see:

- [BioThings Suite Reference](../backend-services-reference/02-biothings-suite.md)

### Automatic Disease Synonym Expansion

BioMCP automatically expands disease terms using MyDisease.info:

```python
# Searching for "GIST" automatically includes:
# - "gastrointestinal stromal tumor"
# - "gastrointestinal stromal tumour"
# - "GI stromal tumor"
trials = await trial_searcher(conditions=["GIST"])
```

### Manual Disease Lookup

Get all synonyms for a disease:

```python
# Get disease information
disease_info = await disease_getter("melanoma")

# Extract synonyms
synonyms = disease_info.synonyms
# Returns: ["malignant melanoma", "melanoma, malignant", ...]

# Use in trial search
trials = await trial_searcher(conditions=synonyms)
```

## Practical Workflows

### Workflow 1: Patient-Centric Trial Search

Find trials for a specific patient profile:

```python
async def find_trials_for_patient(
    disease: str,
    mutations: list[str],
    location: tuple[float, float],
    prior_treatments: list[str]
):
    # Step 1: Think about the search
    await think(
        thought=f"Searching trials for {disease} with {mutations}",
        thoughtNumber=1
    )

    # Step 2: Get disease synonyms
    disease_info = await disease_getter(disease)
    all_conditions = [disease] + disease_info.synonyms

    # Step 3: Search both sources
    # ClinicalTrials.gov
    ctgov_trials = await trial_searcher(
        conditions=all_conditions,
        other_terms=mutations,
        lat=location[0],
        long=location[1],
        distance=100,
        recruiting_status="OPEN"
    )

    # NCI (if API key available)
    if os.getenv("NCI_API_KEY"):
        nci_trials = await trial_searcher(
            source="nci",
            conditions=all_conditions,
            required_mutations=mutations,
            exclude_prior_therapy=prior_treatments,
            api_key=os.getenv("NCI_API_KEY")
        )

    return {
        "clinicaltrials_gov": ctgov_trials,
        "nci": nci_trials
    }

# Example usage
trials = await find_trials_for_patient(
    disease="melanoma",
    mutations=["BRAF V600E"],
    location=(40.7128, -74.0060),  # New York
    prior_treatments=["vemurafenib"]
)
```

### Workflow 2: Research Landscape Analysis

Understand ongoing research in a field:

```python
async def analyze_research_landscape(gene: str, disease: str):
    # Get gene information
    gene_info = await gene_getter(gene)

    # Find all active trials
    all_trials = await trial_searcher(
        conditions=[disease],
        other_terms=[gene, f"{gene} mutation", f"{gene} positive"],
        recruiting_status="OPEN",
        page_size=50
    )

    # Categorize by phase
    phase_distribution = {}
    for trial in all_trials:
        phase = trial.phase or "Not specified"
        phase_distribution[phase] = phase_distribution.get(phase, 0) + 1

    # Extract unique interventions
    interventions = set()
    for trial in all_trials:
        if trial.interventions:
            interventions.update(trial.interventions)

    return {
        "total_trials": len(all_trials),
        "phase_distribution": phase_distribution,
        "unique_interventions": list(interventions),
        "gene_info": gene_info
    }

# Example
landscape = await analyze_research_landscape("ALK", "lung cancer")
```

### Workflow 3: Biomarker-Driven Search

Find trials based on specific biomarkers:

```python
async def biomarker_trial_search(biomarkers: list[str], cancer_type: str):
    # Search NCI biomarker database
    biomarker_results = []
    for biomarker in biomarkers:
        result = await nci_biomarker_searcher(
            name=biomarker,
            api_key=os.getenv("NCI_API_KEY")
        )
        biomarker_results.extend(result)

    # Extract associated trials
    trial_ids = set()
    for bio in biomarker_results:
        if bio.get("associated_trials"):
            trial_ids.update(bio["associated_trials"])

    # Get trial details
    trials = []
    for nct_id in trial_ids:
        trial = await trial_getter(nct_id)
        trials.append(trial)

    return trials

# Example
trials = await biomarker_trial_search(
    biomarkers=["PD-L1", "TMB-high", "MSI-H"],
    cancer_type="colorectal cancer"
)
```

## Working with Trial Results

### Extracting Key Information

```python
# Process trial results
for trial in trials:
    print(f"NCT ID: {trial.nct_id}")
    print(f"Title: {trial.title}")
    print(f"Status: {trial.status}")
    print(f"Phase: {trial.phase}")

    # Locations
    if trial.locations:
        print("Locations:")
        for loc in trial.locations:
            print(f"  - {loc.facility}, {loc.city}, {loc.state}")

    # Eligibility
    if trial.eligibility:
        print(f"Age: {trial.eligibility.minimum_age} - {trial.eligibility.maximum_age}")
        print(f"Sex: {trial.eligibility.sex}")
```

### Getting Detailed Trial Information

```python
# Get complete trial details
full_trial = await trial_getter("NCT03006926")

# Get specific sections
protocol = await trial_protocol_getter("NCT03006926")
locations = await trial_locations_getter("NCT03006926")
outcomes = await trial_outcomes_getter("NCT03006926")
references = await trial_references_getter("NCT03006926")
```

## Tips for Effective Trial Searches

### 1. Use Multiple Search Terms

```python
# Cover variations
trials = await trial_searcher(
    conditions=["NSCLC", "non-small cell lung cancer", "lung adenocarcinoma"],
    interventions=["anti-PD-1", "pembrolizumab", "Keytruda"]
)
```

### 2. Check Both Data Sources

```python
# Some trials may only be in one database
ctgov_count = len(await trial_searcher(source="ctgov", conditions=["melanoma"]))
nci_count = len(await trial_searcher(source="nci", conditions=["melanoma"]))
```

### 3. Use Appropriate Filters

- **recruiting_status**: Focus on trials accepting patients
- **phase**: Later phases for established treatments
- **age_group**: Match patient demographics
- **study_type**: INTERVENTIONAL vs OBSERVATIONAL

### 4. Leverage Location Search

Always include location for patient-specific searches:

```python
# Bad - no location
trials = await trial_searcher(conditions=["cancer"])

# Good - includes location
trials = await trial_searcher(
    conditions=["cancer"],
    lat=40.7128,
    long=-74.0060,
    distance=50
)
```

## Troubleshooting

### No Results Found

1. **Broaden search terms**: Remove specific filters
2. **Check synonyms**: Use disease_getter to find alternatives
3. **Expand location**: Increase distance parameter
4. **Try both sources**: Some trials only in NCI or ClinicalTrials.gov

### Location Search Issues

- Ensure both latitude AND longitude are provided
- Use decimal degrees (not degrees/minutes/seconds)
- Check coordinate signs (negative for West/South)

### NCI API Errors

- Verify API key is valid
- Check rate limits (1000 requests/day with key)
- Some features require specific API key permissions

## Next Steps

- Learn about [variant annotations](03-get-comprehensive-variant-annotations.md)
- Explore [AlphaGenome predictions](04-predict-variant-effects-with-alphagenome.md)
- Set up [monitoring and logging](05-logging-and-monitoring-with-bigquery.md)

```

--------------------------------------------------------------------------------
/src/biomcp/variants/search.py:
--------------------------------------------------------------------------------

```python
import json
import logging
from typing import Annotated, Any

from pydantic import BaseModel, Field, model_validator

from .. import StrEnum, ensure_list, http_client, render
from ..constants import MYVARIANT_QUERY_URL, SYSTEM_PAGE_SIZE
from .filters import filter_variants
from .links import inject_links

logger = logging.getLogger(__name__)


class ClinicalSignificance(StrEnum):
    PATHOGENIC = "pathogenic"
    LIKELY_PATHOGENIC = "likely pathogenic"
    UNCERTAIN_SIGNIFICANCE = "uncertain significance"
    LIKELY_BENIGN = "likely benign"
    BENIGN = "benign"


class PolyPhenPrediction(StrEnum):
    PROBABLY_DAMAGING = "D"
    POSSIBLY_DAMAGING = "P"
    BENIGN = "B"


class SiftPrediction(StrEnum):
    DELETERIOUS = "D"
    TOLERATED = "T"


class VariantSources(StrEnum):
    CADD = "cadd"
    CGI = "cgi"
    CIVIC = "civic"
    CLINVAR = "clinvar"
    COSMIC = "cosmic"
    DBNSFP = "dbnsfp"
    DBSNP = "dbsnp"
    DOCM = "docm"
    EMV = "evm"
    EXAC = "exac"
    GNOMAD_EXOME = "gnomad_exome"
    HG19 = "hg19"
    MUTDB = "mutdb"
    SNPEFF = "snpeff"
    VCF = "vcf"


MYVARIANT_FIELDS = [
    "_id",
    "chrom",
    "vcf.position",
    "vcf.ref",
    "vcf.alt",
    "cadd.phred",
    "civic.id",
    "civic.openCravatUrl",
    "clinvar.rcv.clinical_significance",
    "clinvar.variant_id",
    "cosmic.cosmic_id",
    "dbnsfp.genename",
    "dbnsfp.hgvsc",
    "dbnsfp.hgvsp",
    "dbnsfp.polyphen2.hdiv.pred",
    "dbnsfp.polyphen2.hdiv.score",
    "dbnsfp.sift.pred",
    "dbnsfp.sift.score",
    "dbsnp.rsid",
    "exac.af",
    "gnomad_exome.af.af",
]


class VariantQuery(BaseModel):
    """Search parameters for querying variant data from MyVariant.info."""

    gene: str | None = Field(
        default=None,
        description="Gene symbol to search for (e.g. BRAF, TP53)",
    )
    hgvsp: str | None = Field(
        default=None,
        description="Protein change notation (e.g., p.V600E, p.Arg557His)",
    )
    hgvsc: str | None = Field(
        default=None,
        description="cDNA notation (e.g., c.1799T>A)",
    )
    rsid: str | None = Field(
        default=None,
        description="dbSNP rsID (e.g., rs113488022)",
    )
    region: str | None = Field(
        default=None,
        description="Genomic region as chr:start-end (e.g. chr1:12345-67890)",
    )
    significance: ClinicalSignificance | None = Field(
        default=None,
        description="ClinVar clinical significance",
    )
    max_frequency: float | None = Field(
        default=None,
        description="Maximum population allele frequency threshold",
    )
    min_frequency: float | None = Field(
        default=None,
        description="Minimum population allele frequency threshold",
    )
    cadd: float | None = Field(
        default=None,
        description="Minimum CADD phred score",
    )
    polyphen: PolyPhenPrediction | None = Field(
        default=None,
        description="PolyPhen-2 prediction",
    )
    sift: SiftPrediction | None = Field(
        default=None,
        description="SIFT prediction",
    )
    sources: list[VariantSources] = Field(
        description="Include only specific data sources",
        default_factory=list,
    )
    size: int = Field(
        default=SYSTEM_PAGE_SIZE,
        description="Number of results to return",
    )
    offset: int = Field(
        default=0,
        description="Result offset for pagination",
    )

    @model_validator(mode="after")
    def validate_query_params(self) -> "VariantQuery":
        if not self.model_dump(exclude_none=True, exclude_defaults=True):
            raise ValueError("At least one search parameter is required")
        return self


def _construct_query_part(
    field: str,
    val: Any | None,
    operator: str | None = None,
    quoted: bool = False,
) -> str | None:
    if val is not None:
        val = str(val)
        val = f'"{val}"' if quoted else val
        operator = operator or ""
        val = f"{field}:{operator}{val}"
    return val


def build_query_string(query: VariantQuery) -> str:
    query_parts: list[str] = list(filter(None, [query.region, query.rsid]))

    query_params = [
        ("dbnsfp.genename", query.gene, None, True),
        ("dbnsfp.hgvsp", query.hgvsp, None, True),
        ("dbnsfp.hgvsc", query.hgvsc, None, True),
        ("dbsnp.rsid", query.rsid, None, True),
        ("clinvar.rcv.clinical_significance", query.significance, None, True),
        ("gnomad_exome.af.af", query.max_frequency, "<=", False),
        ("gnomad_exome.af.af", query.min_frequency, ">=", False),
        ("cadd.phred", query.cadd, ">=", False),
        ("dbnsfp.polyphen2.hdiv.pred", query.polyphen, None, True),
        ("dbnsfp.sift.pred", query.sift, None, True),
    ]

    for field, val, operator, quoted in query_params:
        part = _construct_query_part(field, val, operator, quoted)
        if part is not None:
            query_parts.append(part)

    return " AND ".join(query_parts) if query_parts else "*"


async def convert_query(query: VariantQuery) -> dict[str, Any]:
    """Convert a VariantQuery to parameters for the MyVariant.info API."""
    fields = MYVARIANT_FIELDS[:] + [f"{s}.*" for s in query.sources]

    # Optimize common queries to prevent timeouts
    query_string = build_query_string(query)

    # Special handling for common BRAF V600E query
    if query.gene == "BRAF" and query.hgvsp == "V600E":
        # Use a more specific query that performs better
        query_string = 'dbnsfp.genename:"BRAF" AND (dbnsfp.aaref:"V" AND dbnsfp.aapos:600 AND dbnsfp.aaalt:"E")'

    return {
        "q": query_string,
        "size": query.size,
        "from": query.offset,
        "fields": ",".join(fields),
    }


async def search_variants(
    query: VariantQuery,
    output_json: bool = False,
    include_cbioportal: bool = True,
) -> str:
    """Search variants using the MyVariant.info API with optional cBioPortal summary."""

    params = await convert_query(query)

    response, error = await http_client.request_api(
        url=MYVARIANT_QUERY_URL,
        request=params,
        method="GET",
        domain="myvariant",
    )
    data: list = response.get("hits", []) if response else []

    if error:
        # Provide more specific error messages for common issues
        if "timed out" in error.message.lower():
            error_msg = (
                "MyVariant.info API request timed out. This can happen with complex queries. "
                "Try narrowing your search criteria or searching by specific identifiers (rsID, HGVS)."
            )
        else:
            error_msg = f"Error {error.code}: {error.message}"
        data = [{"error": error_msg}]
    else:
        data = inject_links(data)
        data = filter_variants(data)

    # Get cBioPortal summary if searching by gene
    cbioportal_summary = None
    if include_cbioportal and query.gene and not error:
        try:
            from .cbioportal_search import (
                CBioPortalSearchClient,
                format_cbioportal_search_summary,
            )

            client = CBioPortalSearchClient()
            summary = await client.get_gene_search_summary(query.gene)
            if summary:
                cbioportal_summary = format_cbioportal_search_summary(summary)
        except Exception as e:
            logger.warning(f"Failed to get cBioPortal summary: {e}")

    if not output_json:
        result = render.to_markdown(data)
        if cbioportal_summary:
            result = cbioportal_summary + "\n\n" + result
        return result
    else:
        if cbioportal_summary:
            return json.dumps(
                {"cbioportal_summary": cbioportal_summary, "variants": data},
                indent=2,
            )
        return json.dumps(data, indent=2)


async def _variant_searcher(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    gene: Annotated[
        str | None, "Gene symbol to search for (e.g. BRAF, TP53)"
    ] = None,
    hgvsp: Annotated[
        str | None, "Protein change notation (e.g., p.V600E, p.Arg557His)"
    ] = None,
    hgvsc: Annotated[str | None, "cDNA notation (e.g., c.1799T>A)"] = None,
    rsid: Annotated[str | None, "dbSNP rsID (e.g., rs113488022)"] = None,
    region: Annotated[
        str | None, "Genomic region as chr:start-end (e.g. chr1:12345-67890)"
    ] = None,
    significance: Annotated[
        ClinicalSignificance | str | None, "ClinVar clinical significance"
    ] = None,
    max_frequency: Annotated[
        float | None, "Maximum population allele frequency threshold"
    ] = None,
    min_frequency: Annotated[
        float | None, "Minimum population allele frequency threshold"
    ] = None,
    cadd: Annotated[float | None, "Minimum CADD phred score"] = None,
    polyphen: Annotated[
        PolyPhenPrediction | str | None, "PolyPhen-2 prediction"
    ] = None,
    sift: Annotated[SiftPrediction | str | None, "SIFT prediction"] = None,
    sources: Annotated[
        list[VariantSources] | list[str] | str | None,
        "Include only specific data sources (list or comma-separated string)",
    ] = None,
    size: Annotated[int, "Number of results to return"] = SYSTEM_PAGE_SIZE,
    offset: Annotated[int, "Result offset for pagination"] = 0,
) -> str:
    """
    Searches for genetic variants based on specified criteria.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - gene: Gene symbol to search for (e.g. BRAF, TP53)
    - hgvsp: Protein change notation (e.g., p.V600E, p.Arg557His)
    - hgvsc: cDNA notation (e.g., c.1799T>A)
    - rsid: dbSNP rsID (e.g., rs113488022)
    - region: Genomic region as chr:start-end (e.g. chr1:12345-67890)
    - significance: ClinVar clinical significance
    - max_frequency: Maximum population allele frequency threshold
    - min_frequency: Minimum population allele frequency threshold
    - cadd: Minimum CADD phred score
    - polyphen: PolyPhen-2 prediction
    - sift: SIFT prediction
    - sources: Include only specific data sources (list or comma-separated string)
    - size: Number of results to return (default: 10)
    - offset: Result offset for pagination (default: 0)

    Returns:
    Markdown formatted list of matching variants with key annotations
    """
    # Convert individual parameters to a VariantQuery object
    query = VariantQuery(
        gene=gene,
        hgvsp=hgvsp,
        hgvsc=hgvsc,
        rsid=rsid,
        region=region,
        significance=significance,
        max_frequency=max_frequency,
        min_frequency=min_frequency,
        cadd=cadd,
        polyphen=polyphen,
        sift=sift,
        sources=ensure_list(sources, split_strings=True),
        size=size,
        offset=offset,
    )
    return await search_variants(
        query, output_json=False, include_cbioportal=True
    )

```

--------------------------------------------------------------------------------
/tests/tdd/test_mcp_integration.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for MCP server functionality."""

import json
from unittest.mock import patch

import pytest

from biomcp.core import mcp_app


@pytest.mark.asyncio
class TestMCPIntegration:
    """Integration tests for the MCP server."""

    async def test_mcp_server_tools_registered(self):
        """Test that MCP tools are properly registered."""
        # Get the registered tools
        tools = await mcp_app.list_tools()

        # Should have 35 tools (2 unified + 1 think + 32 individual including OpenFDA)
        assert len(tools) == 35

        # Check tool names
        tool_names = [tool.name for tool in tools]
        # Unified tools
        assert "search" in tool_names
        assert "fetch" in tool_names
        assert "think" in tool_names
        # Individual tools
        assert "article_searcher" in tool_names
        assert "article_getter" in tool_names
        assert "trial_searcher" in tool_names
        assert "trial_getter" in tool_names
        assert "trial_protocol_getter" in tool_names
        assert "trial_references_getter" in tool_names
        assert "trial_outcomes_getter" in tool_names
        assert "trial_locations_getter" in tool_names
        assert "variant_searcher" in tool_names
        assert "variant_getter" in tool_names
        assert "alphagenome_predictor" in tool_names
        assert "gene_getter" in tool_names
        assert "drug_getter" in tool_names
        assert "disease_getter" in tool_names
        # OpenFDA tools
        assert "openfda_adverse_searcher" in tool_names
        assert "openfda_adverse_getter" in tool_names
        assert "openfda_label_searcher" in tool_names
        assert "openfda_label_getter" in tool_names
        assert "openfda_device_searcher" in tool_names
        assert "openfda_device_getter" in tool_names
        assert "openfda_approval_searcher" in tool_names
        assert "openfda_approval_getter" in tool_names
        assert "openfda_recall_searcher" in tool_names
        assert "openfda_recall_getter" in tool_names
        assert "openfda_shortage_searcher" in tool_names
        assert "openfda_shortage_getter" in tool_names

    async def test_mcp_search_tool_schema(self):
        """Test the search tool schema."""
        tools = await mcp_app.list_tools()
        search_tool = next(t for t in tools if t.name == "search")

        # Check required parameters
        assert "query" in search_tool.inputSchema["properties"]
        assert "domain" in search_tool.inputSchema["properties"]
        assert "call_benefit" in search_tool.inputSchema["properties"]
        # Verify query is required (no default value)
        assert "query" in search_tool.inputSchema.get("required", [])
        # Verify call_benefit is optional
        assert "call_benefit" not in search_tool.inputSchema.get(
            "required", []
        )

        # Check domain enum values
        domain_schema = search_tool.inputSchema["properties"]["domain"]
        # The enum is nested in anyOf
        enum_values = domain_schema["anyOf"][0]["enum"]
        assert "article" in enum_values
        assert "trial" in enum_values
        assert "variant" in enum_values
        # thinking domain was removed from search tool
        # assert "thinking" in enum_values

    async def test_mcp_fetch_tool_schema(self):
        """Test the fetch tool schema."""
        tools = await mcp_app.list_tools()
        fetch_tool = next(t for t in tools if t.name == "fetch")

        # Check required parameters - only id should be required
        required = fetch_tool.inputSchema["required"]
        assert "id" in required
        assert len(required) == 1  # Only id should be required
        # Check optional parameters are present
        assert "domain" in fetch_tool.inputSchema["properties"]
        assert "call_benefit" in fetch_tool.inputSchema["properties"]
        assert "detail" in fetch_tool.inputSchema["properties"]

        # Check domain enum values (no thinking for fetch)
        domain_schema = fetch_tool.inputSchema["properties"]["domain"]
        # For required enums, the structure is different
        if "enum" in domain_schema:
            enum_values = domain_schema["enum"]
        else:
            # Check if it's in anyOf structure
            enum_values = domain_schema.get("anyOf", [{}])[0].get("enum", [])
        assert "article" in enum_values
        assert "trial" in enum_values
        assert "variant" in enum_values
        assert "thinking" not in enum_values

    async def test_mcp_search_article_integration(self):
        """Test end-to-end article search through MCP."""
        mock_result = json.dumps([
            {
                "pmid": "12345",
                "title": "Test Article",
                "abstract": "Test abstract",
            }
        ])

        with patch(
            "biomcp.articles.unified.search_articles_unified"
        ) as mock_search:
            mock_search.return_value = mock_result

            # Import search function directly since we can't test through MCP without Context
            from biomcp.router import search

            # Call the search function
            result = await search(
                query="",
                domain="article",
                genes="BRAF",
                page_size=10,
            )

            # Verify the result structure
            assert "results" in result
            # May include thinking reminder as first result
            actual_results = [
                r for r in result["results"] if r["id"] != "thinking-reminder"
            ]
            assert len(actual_results) == 1
            assert actual_results[0]["id"] == "12345"

    async def test_mcp_fetch_variant_integration(self):
        """Test end-to-end variant fetch through MCP."""
        mock_result = json.dumps([
            {
                "_id": "rs121913529",
                "gene": {"symbol": "BRAF"},
                "clinvar": {"clinical_significance": "Pathogenic"},
            }
        ])

        with patch("biomcp.variants.getter.get_variant") as mock_get:
            mock_get.return_value = mock_result

            from biomcp.router import fetch

            # Call the fetch function
            result = await fetch(
                domain="variant",
                id="rs121913529",
            )

            # Verify the result structure
            assert result["id"] == "rs121913529"
            assert "title" in result
            assert "text" in result
            assert "url" in result
            assert "metadata" in result

    async def test_mcp_unified_query_integration(self):
        """Test unified query through MCP."""
        with patch("biomcp.query_router.execute_routing_plan") as mock_execute:
            mock_execute.return_value = {
                "articles": json.dumps([
                    {"pmid": "111", "title": "Article 1"}
                ]),
                "variants": json.dumps([
                    {"_id": "rs222", "gene": {"symbol": "TP53"}}
                ]),
            }

            from biomcp.router import search

            # Call search with unified query
            result = await search(
                query="gene:BRAF AND disease:cancer",
                max_results_per_domain=10,
            )

            # Should get results from multiple domains
            assert "results" in result
            # May include thinking reminder
            actual_results = [
                r for r in result["results"] if r["id"] != "thinking-reminder"
            ]
            assert len(actual_results) >= 2

    async def test_mcp_thinking_integration(self):
        """Test sequential thinking through MCP."""
        with patch(
            "biomcp.thinking.sequential._sequential_thinking"
        ) as mock_think:
            mock_think.return_value = {
                "thought": "Processed thought",
                "analysis": "Test analysis",
            }

            from biomcp.thinking_tool import think

            # Call the think tool directly
            result = await think(
                thought="Test thought",
                thoughtNumber=1,
                totalThoughts=3,
                nextThoughtNeeded=True,
            )

            # Verify thinking result
            assert result["domain"] == "thinking"
            assert result["thoughtNumber"] == 1
            assert result["nextThoughtNeeded"] is True

    async def test_mcp_error_handling(self):
        """Test MCP error handling."""
        from biomcp.exceptions import InvalidDomainError
        from biomcp.router import search

        # Test with invalid domain
        with pytest.raises(InvalidDomainError) as exc_info:
            await search(
                query="",
                domain="invalid_domain",
            )

        assert "Unknown domain" in str(exc_info.value)

    async def test_mcp_fetch_all_trial_sections(self):
        """Test fetching trial with all sections through MCP."""
        mock_protocol = {"title": "Test Trial", "nct_id": "NCT123"}
        mock_locations = {"locations": [{"city": "Boston"}]}

        with (
            patch("biomcp.trials.getter._trial_protocol") as mock_p,
            patch("biomcp.trials.getter._trial_locations") as mock_l,
            patch("biomcp.trials.getter._trial_outcomes") as mock_o,
            patch("biomcp.trials.getter._trial_references") as mock_r,
        ):
            mock_p.return_value = json.dumps(mock_protocol)
            mock_l.return_value = json.dumps(mock_locations)
            mock_o.return_value = json.dumps({"outcomes": {}})
            mock_r.return_value = json.dumps({"references": []})

            from biomcp.router import fetch

            result = await fetch(
                domain="trial",
                id="NCT123",
                detail="all",
            )

            # Verify all sections are included
            assert result["id"] == "NCT123"
            assert "locations" in result["metadata"]
            assert "outcomes" in result["metadata"]
            assert "references" in result["metadata"]

    async def test_mcp_parameter_parsing(self):
        """Test parameter parsing through MCP."""
        mock_result = json.dumps([])

        with patch(
            "biomcp.articles.unified.search_articles_unified"
        ) as mock_search:
            mock_search.return_value = mock_result

            from biomcp.router import search

            # Test with various parameter formats
            await search(
                query="",
                domain="article",
                genes='["BRAF", "KRAS"]',  # JSON string
                diseases="cancer,melanoma",  # Comma-separated
                keywords=["test1", "test2"],  # Already a list
            )

            # Verify parameters were parsed correctly
            call_args = mock_search.call_args[0][0]
            assert call_args.genes == ["BRAF", "KRAS"]
            assert call_args.diseases == ["cancer", "melanoma"]
            assert call_args.keywords == ["test1", "test2"]

```

--------------------------------------------------------------------------------
/tests/tdd/test_biothings_integration_real.py:
--------------------------------------------------------------------------------

```python
"""Integration tests for BioThings API - calls real APIs."""

import pytest

from biomcp.integrations import BioThingsClient


@pytest.mark.integration
class TestRealBioThingsAPIs:
    """Integration tests that call real BioThings APIs."""

    @pytest.fixture
    def client(self):
        """Create a real BioThings client."""
        return BioThingsClient()

    @pytest.mark.asyncio
    async def test_mygene_tp53(self, client):
        """Test real MyGene.info API with TP53."""
        result = await client.get_gene_info("TP53")

        assert result is not None
        assert result.symbol == "TP53"
        assert result.name == "tumor protein p53"
        assert result.entrezgene in ["7157", 7157]
        assert "tumor suppressor" in result.summary.lower()
        # Check for either lowercase or uppercase P53 in aliases
        assert any("p53" in alias.lower() for alias in result.alias)

    @pytest.mark.asyncio
    async def test_mygene_braf(self, client):
        """Test real MyGene.info API with BRAF."""
        result = await client.get_gene_info("BRAF")

        assert result is not None
        assert result.symbol == "BRAF"
        assert "proto-oncogene" in result.name.lower()
        assert result.type_of_gene == "protein-coding"

    @pytest.mark.asyncio
    async def test_mygene_by_entrez_id(self, client):
        """Test real MyGene.info API with Entrez ID."""
        result = await client.get_gene_info("673")  # BRAF

        assert result is not None
        assert result.symbol == "BRAF"
        assert result.gene_id == "673"

    @pytest.mark.asyncio
    async def test_mydisease_melanoma(self, client):
        """Test real MyDisease.info API with melanoma."""
        result = await client.get_disease_info("melanoma")

        if result is None:
            # API might be down or melanoma might not be found directly
            # Try a more specific search
            result = await client.get_disease_info(
                "MONDO:0005105"
            )  # MONDO ID for melanoma

        assert result is not None, "Disease info should be returned"
        # The API may return subtypes of melanoma
        if result.name:
            assert "melanoma" in result.name.lower() or (
                result.definition and "melanoma" in result.definition.lower()
            )
        assert result.disease_id is not None
        # Synonyms might be empty for specific subtypes
        assert result.synonyms is not None

    @pytest.mark.asyncio
    async def test_mydisease_gist(self, client):
        """Test real MyDisease.info API with GIST."""
        result = await client.get_disease_info("GIST")

        if result is None:
            # API might be down or GIST might not be found directly
            # Try the full name
            result = await client.get_disease_info(
                "gastrointestinal stromal tumor"
            )

        assert result is not None, "Disease info should be returned"
        # GIST might return as a variant name
        if result.name:
            assert (
                "gist" in result.name.lower()
                or "stromal" in result.name.lower()
            )
        assert result.disease_id is not None
        # GIST should have synonyms including full name if available
        assert result.synonyms is not None

    @pytest.mark.asyncio
    async def test_mydisease_by_mondo_id(self, client):
        """Test real MyDisease.info API with MONDO ID."""
        result = await client.get_disease_info("MONDO:0005105")  # melanoma

        assert result is not None
        assert result.disease_id == "MONDO:0005105"
        # The result should have mondo data
        assert result.mondo is not None
        assert result.mondo.get("mondo") == "MONDO:0005105"
        # Name field might come from different sources in the API
        if result.name:
            assert "melanoma" in result.name.lower()

    @pytest.mark.asyncio
    async def test_disease_synonyms_expansion(self, client):
        """Test disease synonym expansion."""
        synonyms = await client.get_disease_synonyms("lung cancer")

        assert len(synonyms) >= 1  # At least includes the original term
        assert "lung cancer" in [s.lower() for s in synonyms]
        # May or may not include formal terms depending on API results
        # Just check we got some results back
        assert synonyms is not None and len(synonyms) > 0

    @pytest.mark.asyncio
    async def test_batch_genes(self, client):
        """Test batch gene retrieval."""
        # Test single gene retrieval as a workaround since batch requires special POST encoding
        # This validates the gene getter can handle multiple calls efficiently
        genes = ["TP53", "BRAF", "EGFR"]
        results = []

        for gene in genes:
            result = await client.get_gene_info(gene)
            if result:
                results.append(result)

        assert len(results) == 3
        gene_symbols = [r.symbol for r in results]
        assert "TP53" in gene_symbols
        assert "BRAF" in gene_symbols
        assert "EGFR" in gene_symbols

    @pytest.mark.asyncio
    async def test_invalid_gene(self, client):
        """Test handling of invalid gene."""
        result = await client.get_gene_info("INVALID_GENE_XYZ123")
        assert result is None

    @pytest.mark.asyncio
    async def test_invalid_disease(self, client):
        """Test handling of invalid disease."""
        result = await client.get_disease_info("INVALID_DISEASE_XYZ123")
        assert result is None

    @pytest.mark.asyncio
    async def test_mychem_aspirin(self, client):
        """Test real MyChem.info API with aspirin."""
        # Use DrugBank ID for reliable results
        result = await client.get_drug_info("DB00945")

        assert result is not None
        # API returns various forms - could be aspirin or acetylsalicylic acid
        assert result.name is not None
        assert result.drugbank_id == "DB00945"
        # Should have at least one identifier
        assert any([
            result.drugbank_id,
            result.chembl_id,
            result.chebi_id,
            result.pubchem_cid,
        ])

    @pytest.mark.asyncio
    async def test_mychem_imatinib(self, client):
        """Test real MyChem.info API with imatinib."""
        # Use DrugBank ID for reliable results
        result = await client.get_drug_info("DB00619")

        assert result is not None
        assert result.name is not None
        assert "imatinib" in result.name.lower()
        assert result.drugbank_id == "DB00619"
        # Should have at least one identifier
        assert any([
            result.drugbank_id,
            result.chembl_id,
            result.chebi_id,
            result.pubchem_cid,
        ])

    @pytest.mark.asyncio
    async def test_mychem_by_drugbank_id(self, client):
        """Test real MyChem.info API with DrugBank ID."""
        result = await client.get_drug_info("DB00945")  # Aspirin

        assert result is not None
        assert result.drugbank_id == "DB00945"
        assert (
            result.name is not None
        )  # Could be Acetylsalicylic acid or similar

    @pytest.mark.asyncio
    async def test_invalid_drug(self, client):
        """Test handling of invalid drug."""
        result = await client.get_drug_info("INVALID_DRUG_XYZ123")
        assert result is None

    @pytest.mark.asyncio
    async def test_mychem_pembrolizumab(self, client):
        """Test real MyChem.info API with pembrolizumab."""
        result = await client.get_drug_info("pembrolizumab")

        assert result is not None
        assert result.name == "Pembrolizumab"
        assert result.drugbank_id == "DB09037"
        assert result.unii == "DPT0O3T46P"
        assert "PD-1" in result.description
        assert "antibody" in result.description.lower()


@pytest.mark.integration
class TestGeneToolIntegration:
    """Test the gene getter tool with real APIs."""

    @pytest.mark.asyncio
    async def test_gene_getter_tool(self):
        """Test the gene_getter tool function."""
        from biomcp.genes.getter import get_gene

        result = await get_gene("TP53", output_json=False)

        assert "TP53" in result
        assert "tumor protein p53" in result
        assert "tumor suppressor" in result.lower()
        # Links might be formatted differently
        assert "ncbi" in result.lower() or "gene" in result.lower()

    @pytest.mark.asyncio
    async def test_gene_getter_json(self):
        """Test gene_getter with JSON output."""
        import json

        from biomcp.genes.getter import get_gene

        result = await get_gene("BRAF", output_json=True)
        data = json.loads(result)

        assert data["symbol"] == "BRAF"
        assert "_links" in data
        assert "NCBI Gene" in data["_links"]


@pytest.mark.integration
class TestDiseaseToolIntegration:
    """Test the disease getter tool with real APIs."""

    @pytest.mark.asyncio
    async def test_disease_getter_tool(self):
        """Test the disease_getter tool function."""
        from biomcp.diseases.getter import get_disease

        result = await get_disease("melanoma", output_json=False)

        assert "melanoma" in result.lower()
        assert "MONDO:" in result
        # In markdown format, links are shown as "MONDO Browser:" not "_links"
        assert "Browser:" in result or "https://" in result

    @pytest.mark.asyncio
    async def test_disease_getter_json(self):
        """Test disease_getter with JSON output."""
        import json

        from biomcp.diseases.getter import get_disease

        result = await get_disease("GIST", output_json=True)
        data = json.loads(result)

        # API might return error or different structure
        if "error" in data:
            pytest.skip("Disease not found in API")
        else:
            # Check for key fields
            assert "disease_id" in data or "id" in data or "_id" in data
            assert "MONDO:" in str(data)


@pytest.mark.integration
class TestDrugToolIntegration:
    """Test the drug getter tool with real APIs."""

    @pytest.mark.asyncio
    async def test_drug_getter_tool(self):
        """Test the drug_getter tool function."""
        from biomcp.drugs.getter import get_drug

        result = await get_drug("DB00945", output_json=False)  # Aspirin

        assert "Drug:" in result
        assert "DrugBank ID" in result
        assert "DB00945" in result
        assert "External Links" in result

    @pytest.mark.asyncio
    async def test_drug_getter_json(self):
        """Test drug_getter with JSON output."""
        import json

        from biomcp.drugs.getter import get_drug

        result = await get_drug("DB00619", output_json=True)  # Imatinib
        data = json.loads(result)

        # Check for basic fields
        assert "drug_id" in data
        assert "drugbank_id" in data
        assert data["drugbank_id"] == "DB00619"
        assert "_links" in data
        # Should have at least one database link
        assert any(
            key in data["_links"]
            for key in ["DrugBank", "ChEMBL", "PubChem", "ChEBI"]
        )

```