genomoncology/biomcp # codebase.md

This is page 11 of 15. Use http://codebase.md/genomoncology/biomcp?lines=false&page={x} to view the full context.

# Directory Structure

```
├── .github
│   ├── actions
│   │   └── setup-python-env
│   │       └── action.yml
│   ├── dependabot.yml
│   └── workflows
│       ├── ci.yml
│       ├── deploy-docs.yml
│       ├── main.yml.disabled
│       ├── on-release-main.yml
│       └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│   ├── apis
│   │   ├── error-codes.md
│   │   ├── overview.md
│   │   └── python-sdk.md
│   ├── assets
│   │   ├── biomcp-cursor-locations.png
│   │   ├── favicon.ico
│   │   ├── icon.png
│   │   ├── logo.png
│   │   ├── mcp_architecture.txt
│   │   └── remote-connection
│   │       ├── 00_connectors.png
│   │       ├── 01_add_custom_connector.png
│   │       ├── 02_connector_enabled.png
│   │       ├── 03_connect_to_biomcp.png
│   │       ├── 04_select_google_oauth.png
│   │       └── 05_success_connect.png
│   ├── backend-services-reference
│   │   ├── 01-overview.md
│   │   ├── 02-biothings-suite.md
│   │   ├── 03-cbioportal.md
│   │   ├── 04-clinicaltrials-gov.md
│   │   ├── 05-nci-cts-api.md
│   │   ├── 06-pubtator3.md
│   │   └── 07-alphagenome.md
│   ├── blog
│   │   ├── ai-assisted-clinical-trial-search-analysis.md
│   │   ├── images
│   │   │   ├── deep-researcher-video.png
│   │   │   ├── researcher-announce.png
│   │   │   ├── researcher-drop-down.png
│   │   │   ├── researcher-prompt.png
│   │   │   ├── trial-search-assistant.png
│   │   │   └── what_is_biomcp_thumbnail.png
│   │   └── researcher-persona-resource.md
│   ├── changelog.md
│   ├── CNAME
│   ├── concepts
│   │   ├── 01-what-is-biomcp.md
│   │   ├── 02-the-deep-researcher-persona.md
│   │   └── 03-sequential-thinking-with-the-think-tool.md
│   ├── developer-guides
│   │   ├── 01-server-deployment.md
│   │   ├── 02-contributing-and-testing.md
│   │   ├── 03-third-party-endpoints.md
│   │   ├── 04-transport-protocol.md
│   │   ├── 05-error-handling.md
│   │   ├── 06-http-client-and-caching.md
│   │   ├── 07-performance-optimizations.md
│   │   └── generate_endpoints.py
│   ├── faq-condensed.md
│   ├── FDA_SECURITY.md
│   ├── genomoncology.md
│   ├── getting-started
│   │   ├── 01-quickstart-cli.md
│   │   ├── 02-claude-desktop-integration.md
│   │   └── 03-authentication-and-api-keys.md
│   ├── how-to-guides
│   │   ├── 01-find-articles-and-cbioportal-data.md
│   │   ├── 02-find-trials-with-nci-and-biothings.md
│   │   ├── 03-get-comprehensive-variant-annotations.md
│   │   ├── 04-predict-variant-effects-with-alphagenome.md
│   │   ├── 05-logging-and-monitoring-with-bigquery.md
│   │   └── 06-search-nci-organizations-and-interventions.md
│   ├── index.md
│   ├── policies.md
│   ├── reference
│   │   ├── architecture-diagrams.md
│   │   ├── quick-architecture.md
│   │   ├── quick-reference.md
│   │   └── visual-architecture.md
│   ├── robots.txt
│   ├── stylesheets
│   │   ├── announcement.css
│   │   └── extra.css
│   ├── troubleshooting.md
│   ├── tutorials
│   │   ├── biothings-prompts.md
│   │   ├── claude-code-biomcp-alphagenome.md
│   │   ├── nci-prompts.md
│   │   ├── openfda-integration.md
│   │   ├── openfda-prompts.md
│   │   ├── pydantic-ai-integration.md
│   │   └── remote-connection.md
│   ├── user-guides
│   │   ├── 01-command-line-interface.md
│   │   ├── 02-mcp-tools-reference.md
│   │   └── 03-integrating-with-ides-and-clients.md
│   └── workflows
│       └── all-workflows.md
├── example_scripts
│   ├── mcp_integration.py
│   └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│   ├── check_docs_in_mkdocs.py
│   ├── check_http_imports.py
│   └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│   └── biomcp
│       ├── __init__.py
│       ├── __main__.py
│       ├── articles
│       │   ├── __init__.py
│       │   ├── autocomplete.py
│       │   ├── fetch.py
│       │   ├── preprints.py
│       │   ├── search_optimized.py
│       │   ├── search.py
│       │   └── unified.py
│       ├── biomarkers
│       │   ├── __init__.py
│       │   └── search.py
│       ├── cbioportal_helper.py
│       ├── circuit_breaker.py
│       ├── cli
│       │   ├── __init__.py
│       │   ├── articles.py
│       │   ├── biomarkers.py
│       │   ├── diseases.py
│       │   ├── health.py
│       │   ├── interventions.py
│       │   ├── main.py
│       │   ├── openfda.py
│       │   ├── organizations.py
│       │   ├── server.py
│       │   ├── trials.py
│       │   └── variants.py
│       ├── connection_pool.py
│       ├── constants.py
│       ├── core.py
│       ├── diseases
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── domain_handlers.py
│       ├── drugs
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── exceptions.py
│       ├── genes
│       │   ├── __init__.py
│       │   └── getter.py
│       ├── http_client_simple.py
│       ├── http_client.py
│       ├── individual_tools.py
│       ├── integrations
│       │   ├── __init__.py
│       │   ├── biothings_client.py
│       │   └── cts_api.py
│       ├── interventions
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── logging_filter.py
│       ├── metrics_handler.py
│       ├── metrics.py
│       ├── openfda
│       │   ├── __init__.py
│       │   ├── adverse_events_helpers.py
│       │   ├── adverse_events.py
│       │   ├── cache.py
│       │   ├── constants.py
│       │   ├── device_events_helpers.py
│       │   ├── device_events.py
│       │   ├── drug_approvals.py
│       │   ├── drug_labels_helpers.py
│       │   ├── drug_labels.py
│       │   ├── drug_recalls_helpers.py
│       │   ├── drug_recalls.py
│       │   ├── drug_shortages_detail_helpers.py
│       │   ├── drug_shortages_helpers.py
│       │   ├── drug_shortages.py
│       │   ├── exceptions.py
│       │   ├── input_validation.py
│       │   ├── rate_limiter.py
│       │   ├── utils.py
│       │   └── validation.py
│       ├── organizations
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   └── search.py
│       ├── parameter_parser.py
│       ├── prefetch.py
│       ├── query_parser.py
│       ├── query_router.py
│       ├── rate_limiter.py
│       ├── render.py
│       ├── request_batcher.py
│       ├── resources
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── instructions.md
│       │   └── researcher.md
│       ├── retry.py
│       ├── router_handlers.py
│       ├── router.py
│       ├── shared_context.py
│       ├── thinking
│       │   ├── __init__.py
│       │   ├── sequential.py
│       │   └── session.py
│       ├── thinking_tool.py
│       ├── thinking_tracker.py
│       ├── trials
│       │   ├── __init__.py
│       │   ├── getter.py
│       │   ├── nci_getter.py
│       │   ├── nci_search.py
│       │   └── search.py
│       ├── utils
│       │   ├── __init__.py
│       │   ├── cancer_types_api.py
│       │   ├── cbio_http_adapter.py
│       │   ├── endpoint_registry.py
│       │   ├── gene_validator.py
│       │   ├── metrics.py
│       │   ├── mutation_filter.py
│       │   ├── query_utils.py
│       │   ├── rate_limiter.py
│       │   └── request_cache.py
│       ├── variants
│       │   ├── __init__.py
│       │   ├── alphagenome.py
│       │   ├── cancer_types.py
│       │   ├── cbio_external_client.py
│       │   ├── cbioportal_mutations.py
│       │   ├── cbioportal_search_helpers.py
│       │   ├── cbioportal_search.py
│       │   ├── constants.py
│       │   ├── external.py
│       │   ├── filters.py
│       │   ├── getter.py
│       │   ├── links.py
│       │   └── search.py
│       └── workers
│           ├── __init__.py
│           ├── worker_entry_stytch.js
│           ├── worker_entry.js
│           └── worker.py
├── tests
│   ├── bdd
│   │   ├── cli_help
│   │   │   ├── help.feature
│   │   │   └── test_help.py
│   │   ├── conftest.py
│   │   ├── features
│   │   │   └── alphagenome_integration.feature
│   │   ├── fetch_articles
│   │   │   ├── fetch.feature
│   │   │   └── test_fetch.py
│   │   ├── get_trials
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── get_variants
│   │   │   ├── get.feature
│   │   │   └── test_get.py
│   │   ├── search_articles
│   │   │   ├── autocomplete.feature
│   │   │   ├── search.feature
│   │   │   ├── test_autocomplete.py
│   │   │   └── test_search.py
│   │   ├── search_trials
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   ├── search_variants
│   │   │   ├── search.feature
│   │   │   └── test_search.py
│   │   └── steps
│   │       └── test_alphagenome_steps.py
│   ├── config
│   │   └── test_smithery_config.py
│   ├── conftest.py
│   ├── data
│   │   ├── ct_gov
│   │   │   ├── clinical_trials_api_v2.yaml
│   │   │   ├── trials_NCT04280705.json
│   │   │   └── trials_NCT04280705.txt
│   │   ├── myvariant
│   │   │   ├── myvariant_api.yaml
│   │   │   ├── myvariant_field_descriptions.csv
│   │   │   ├── variants_full_braf_v600e.json
│   │   │   ├── variants_full_braf_v600e.txt
│   │   │   └── variants_part_braf_v600_multiple.json
│   │   ├── openfda
│   │   │   ├── drugsfda_detail.json
│   │   │   ├── drugsfda_search.json
│   │   │   ├── enforcement_detail.json
│   │   │   └── enforcement_search.json
│   │   └── pubtator
│   │       ├── pubtator_autocomplete.json
│   │       └── pubtator3_paper.txt
│   ├── integration
│   │   ├── test_openfda_integration.py
│   │   ├── test_preprints_integration.py
│   │   ├── test_simple.py
│   │   └── test_variants_integration.py
│   ├── tdd
│   │   ├── articles
│   │   │   ├── test_autocomplete.py
│   │   │   ├── test_cbioportal_integration.py
│   │   │   ├── test_fetch.py
│   │   │   ├── test_preprints.py
│   │   │   ├── test_search.py
│   │   │   └── test_unified.py
│   │   ├── conftest.py
│   │   ├── drugs
│   │   │   ├── __init__.py
│   │   │   └── test_drug_getter.py
│   │   ├── openfda
│   │   │   ├── __init__.py
│   │   │   ├── test_adverse_events.py
│   │   │   ├── test_device_events.py
│   │   │   ├── test_drug_approvals.py
│   │   │   ├── test_drug_labels.py
│   │   │   ├── test_drug_recalls.py
│   │   │   ├── test_drug_shortages.py
│   │   │   └── test_security.py
│   │   ├── test_biothings_integration_real.py
│   │   ├── test_biothings_integration.py
│   │   ├── test_circuit_breaker.py
│   │   ├── test_concurrent_requests.py
│   │   ├── test_connection_pool.py
│   │   ├── test_domain_handlers.py
│   │   ├── test_drug_approvals.py
│   │   ├── test_drug_recalls.py
│   │   ├── test_drug_shortages.py
│   │   ├── test_endpoint_documentation.py
│   │   ├── test_error_scenarios.py
│   │   ├── test_europe_pmc_fetch.py
│   │   ├── test_mcp_integration.py
│   │   ├── test_mcp_tools.py
│   │   ├── test_metrics.py
│   │   ├── test_nci_integration.py
│   │   ├── test_nci_mcp_tools.py
│   │   ├── test_network_policies.py
│   │   ├── test_offline_mode.py
│   │   ├── test_openfda_unified.py
│   │   ├── test_pten_r173_search.py
│   │   ├── test_render.py
│   │   ├── test_request_batcher.py.disabled
│   │   ├── test_retry.py
│   │   ├── test_router.py
│   │   ├── test_shared_context.py.disabled
│   │   ├── test_unified_biothings.py
│   │   ├── thinking
│   │   │   ├── __init__.py
│   │   │   └── test_sequential.py
│   │   ├── trials
│   │   │   ├── test_backward_compatibility.py
│   │   │   ├── test_getter.py
│   │   │   └── test_search.py
│   │   ├── utils
│   │   │   ├── test_gene_validator.py
│   │   │   ├── test_mutation_filter.py
│   │   │   ├── test_rate_limiter.py
│   │   │   └── test_request_cache.py
│   │   ├── variants
│   │   │   ├── constants.py
│   │   │   ├── test_alphagenome_api_key.py
│   │   │   ├── test_alphagenome_comprehensive.py
│   │   │   ├── test_alphagenome.py
│   │   │   ├── test_cbioportal_mutations.py
│   │   │   ├── test_cbioportal_search.py
│   │   │   ├── test_external_integration.py
│   │   │   ├── test_external.py
│   │   │   ├── test_extract_gene_aa_change.py
│   │   │   ├── test_filters.py
│   │   │   ├── test_getter.py
│   │   │   ├── test_links.py
│   │   │   └── test_search.py
│   │   └── workers
│   │       └── test_worker_sanitization.js
│   └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```

# Files

--------------------------------------------------------------------------------
/src/biomcp/utils/endpoint_registry.py:
--------------------------------------------------------------------------------

```python
"""Registry for tracking all external HTTP endpoints used by BioMCP."""

from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any
from urllib.parse import urlparse


class EndpointCategory(str, Enum):
    """Categories of external endpoints."""

    BIOMEDICAL_LITERATURE = "biomedical_literature"
    CLINICAL_TRIALS = "clinical_trials"
    VARIANT_DATABASES = "variant_databases"
    CANCER_GENOMICS = "cancer_genomics"
    HEALTH_MONITORING = "health_monitoring"
    REGULATORY_DATA = "regulatory_data"


class DataType(str, Enum):
    """Types of data accessed from endpoints."""

    RESEARCH_ARTICLES = "research_articles"
    CLINICAL_TRIAL_DATA = "clinical_trial_data"
    GENETIC_VARIANTS = "genetic_variants"
    CANCER_MUTATIONS = "cancer_mutations"
    GENE_ANNOTATIONS = "gene_annotations"
    SERVICE_STATUS = "service_status"
    ADVERSE_EVENTS = "adverse_events"
    DRUG_LABELS = "drug_labels"
    DEVICE_EVENTS = "device_events"


@dataclass
class EndpointInfo:
    """Information about an external endpoint."""

    url: str
    category: EndpointCategory
    data_types: list[DataType] = field(default_factory=list)
    description: str = ""
    compliance_notes: str = ""
    rate_limit: str | None = None
    authentication: str | None = None

    @property
    def domain(self) -> str:
        """Extract domain from URL."""
        parsed = urlparse(self.url)
        return parsed.netloc


class EndpointRegistry:
    """Registry for tracking all external endpoints."""

    def __init__(self):
        self._endpoints: dict[str, EndpointInfo] = {}
        self._initialize_known_endpoints()

    def _initialize_known_endpoints(self):
        """Initialize registry with known endpoints."""
        # PubMed/PubTator3
        self.register(
            "pubtator3_search",
            EndpointInfo(
                url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.RESEARCH_ARTICLES],
                description="PubTator3 API for searching biomedical literature with entity annotations",
                compliance_notes="Public NIH/NCBI service, no PII transmitted",
                rate_limit="20 requests/second",
            ),
        )

        self.register(
            "pubtator3_export",
            EndpointInfo(
                url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.RESEARCH_ARTICLES],
                description="PubTator3 API for fetching full article annotations in BioC-JSON format",
                compliance_notes="Public NIH/NCBI service, no PII transmitted",
                rate_limit="20 requests/second",
            ),
        )

        self.register(
            "pubtator3_autocomplete",
            EndpointInfo(
                url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="PubTator3 API for entity name autocomplete suggestions",
                compliance_notes="Public NIH/NCBI service, no PII transmitted",
                rate_limit="20 requests/second",
            ),
        )

        # ClinicalTrials.gov
        self.register(
            "clinicaltrials_search",
            EndpointInfo(
                url="https://clinicaltrials.gov/api/v2/studies",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="ClinicalTrials.gov API v2 for searching clinical trials",
                compliance_notes="Public NIH service, may contain trial participant criteria",
                rate_limit="10 requests/second",
            ),
        )

        # MyVariant.info
        self.register(
            "myvariant_query",
            EndpointInfo(
                url="https://myvariant.info/v1/query",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENETIC_VARIANTS],
                description="MyVariant.info API for querying genetic variants",
                compliance_notes="Public service aggregating variant databases, no patient data",
                rate_limit="1000 requests/hour (anonymous)",
            ),
        )

        self.register(
            "myvariant_variant",
            EndpointInfo(
                url="https://myvariant.info/v1/variant",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENETIC_VARIANTS],
                description="MyVariant.info API for fetching specific variant details",
                compliance_notes="Public service aggregating variant databases, no patient data",
                rate_limit="1000 requests/hour (anonymous)",
            ),
        )

        # Preprint servers
        self.register(
            "biorxiv_api",
            EndpointInfo(
                url="https://api.biorxiv.org/details/biorxiv",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.RESEARCH_ARTICLES],
                description="bioRxiv API for searching biology preprints",
                compliance_notes="Public preprint server, no PII transmitted",
                rate_limit="Not specified",
            ),
        )

        self.register(
            "medrxiv_api",
            EndpointInfo(
                url="https://api.biorxiv.org/details/medrxiv",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.RESEARCH_ARTICLES],
                description="medRxiv API for searching medical preprints",
                compliance_notes="Public preprint server, no PII transmitted",
                rate_limit="Not specified",
            ),
        )

        self.register(
            "europe_pmc",
            EndpointInfo(
                url="https://www.ebi.ac.uk/europepmc/webservices/rest/search",
                category=EndpointCategory.BIOMEDICAL_LITERATURE,
                data_types=[DataType.RESEARCH_ARTICLES],
                description="Europe PMC REST API for searching biomedical literature",
                compliance_notes="Public EMBL-EBI service, no PII transmitted",
                rate_limit="Not specified",
            ),
        )

        # External variant sources
        self.register(
            "gdc_ssms",
            EndpointInfo(
                url="https://api.gdc.cancer.gov/ssms",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.CANCER_MUTATIONS],
                description="NCI GDC API for somatic mutations",
                compliance_notes="Public NCI service, aggregate cancer genomics data",
                rate_limit="Not specified",
            ),
        )

        self.register(
            "gdc_ssm_occurrences",
            EndpointInfo(
                url="https://api.gdc.cancer.gov/ssm_occurrences",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.CANCER_MUTATIONS],
                description="NCI GDC API for mutation occurrences in cancer samples",
                compliance_notes="Public NCI service, aggregate cancer genomics data",
                rate_limit="Not specified",
            ),
        )

        self.register(
            "ensembl_variation",
            EndpointInfo(
                url="https://rest.ensembl.org/variation/human",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENETIC_VARIANTS],
                description="Ensembl REST API for human genetic variation data",
                compliance_notes="Public EMBL-EBI service, population genetics data",
                rate_limit="15 requests/second",
            ),
        )

        self.register(
            "cbioportal_api",
            EndpointInfo(
                url="https://www.cbioportal.org/api",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[
                    DataType.CANCER_MUTATIONS,
                    DataType.CLINICAL_TRIAL_DATA,
                ],
                description="cBioPortal API for cancer genomics data",
                compliance_notes="Public MSKCC/Dana-Farber service, aggregate cancer genomics",
                rate_limit="5 requests/second",
                authentication="Optional API token for increased rate limits",
            ),
        )

        # Specific cBioPortal endpoints
        self.register(
            "cbioportal_genes",
            EndpointInfo(
                url="https://www.cbioportal.org/api/genes",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="cBioPortal API for gene information",
                compliance_notes="Public MSKCC/Dana-Farber service, gene metadata",
                rate_limit="5 requests/second",
            ),
        )

        self.register(
            "cbioportal_cancer_types",
            EndpointInfo(
                url="https://www.cbioportal.org/api/cancer-types",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[DataType.CANCER_MUTATIONS],
                description="cBioPortal API for cancer type hierarchy",
                compliance_notes="Public MSKCC/Dana-Farber service, cancer type metadata",
                rate_limit="5 requests/second",
            ),
        )

        self.register(
            "cbioportal_molecular_profiles",
            EndpointInfo(
                url="https://www.cbioportal.org/api/molecular-profiles",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[DataType.CANCER_MUTATIONS],
                description="cBioPortal API for molecular profiles",
                compliance_notes="Public MSKCC/Dana-Farber service, study metadata",
                rate_limit="5 requests/second",
            ),
        )

        self.register(
            "cbioportal_mutations",
            EndpointInfo(
                url="https://www.cbioportal.org/api/mutations",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[DataType.CANCER_MUTATIONS],
                description="cBioPortal API for mutation data",
                compliance_notes="Public MSKCC/Dana-Farber service, aggregate mutation data",
                rate_limit="5 requests/second",
            ),
        )

        self.register(
            "cbioportal_studies",
            EndpointInfo(
                url="https://www.cbioportal.org/api/studies",
                category=EndpointCategory.CANCER_GENOMICS,
                data_types=[
                    DataType.CLINICAL_TRIAL_DATA,
                    DataType.CANCER_MUTATIONS,
                ],
                description="cBioPortal API for cancer studies",
                compliance_notes="Public MSKCC/Dana-Farber service, study metadata",
                rate_limit="5 requests/second",
            ),
        )

        # BioThings Suite APIs
        self.register(
            "mygene_query",
            EndpointInfo(
                url="https://mygene.info/v3/query",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyGene.info API for querying gene information",
                compliance_notes="Public BioThings service, gene annotation data",
                rate_limit="10 requests/second",
            ),
        )

        self.register(
            "mygene_gene",
            EndpointInfo(
                url="https://mygene.info/v3/gene",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyGene.info API for fetching specific gene details",
                compliance_notes="Public BioThings service, gene annotation data",
                rate_limit="10 requests/second",
            ),
        )

        self.register(
            "mydisease_query",
            EndpointInfo(
                url="https://mydisease.info/v1/query",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyDisease.info API for querying disease information",
                compliance_notes="Public BioThings service, disease ontology data",
                rate_limit="10 requests/second",
            ),
        )

        self.register(
            "mydisease_disease",
            EndpointInfo(
                url="https://mydisease.info/v1/disease",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyDisease.info API for fetching specific disease details",
                compliance_notes="Public BioThings service, disease ontology data",
                rate_limit="10 requests/second",
            ),
        )

        self.register(
            "mychem_query",
            EndpointInfo(
                url="https://mychem.info/v1/query",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyChem.info API for querying drug/chemical information",
                compliance_notes="Public BioThings service, drug/chemical annotation data",
                rate_limit="10 requests/second",
            ),
        )

        self.register(
            "mychem_chem",
            EndpointInfo(
                url="https://mychem.info/v1/chem",
                category=EndpointCategory.VARIANT_DATABASES,
                data_types=[DataType.GENE_ANNOTATIONS],
                description="MyChem.info API for fetching specific drug/chemical details",
                compliance_notes="Public BioThings service, drug/chemical annotation data",
                rate_limit="10 requests/second",
            ),
        )

        # NCI Clinical Trials Search API
        self.register(
            "nci_trials",
            EndpointInfo(
                url="https://clinicaltrialsapi.cancer.gov/api/v2/trials",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="NCI Clinical Trials Search API for cancer trials",
                compliance_notes="Public NCI service, cancer trial data",
                rate_limit="Not specified",
                authentication="Optional NCI_API_KEY for increased access",
            ),
        )

        self.register(
            "nci_organizations",
            EndpointInfo(
                url="https://clinicaltrialsapi.cancer.gov/api/v2/organizations",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="NCI API for cancer research organizations",
                compliance_notes="Public NCI service, organization metadata",
                rate_limit="Not specified",
                authentication="Optional NCI_API_KEY for increased access",
            ),
        )

        self.register(
            "nci_diseases",
            EndpointInfo(
                url="https://clinicaltrialsapi.cancer.gov/api/v2/diseases",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="NCI API for cancer disease vocabulary",
                compliance_notes="Public NCI service, disease ontology",
                rate_limit="Not specified",
                authentication="Optional NCI_API_KEY for increased access",
            ),
        )

        self.register(
            "nci_interventions",
            EndpointInfo(
                url="https://clinicaltrialsapi.cancer.gov/api/v2/interventions",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="NCI API for cancer treatment interventions",
                compliance_notes="Public NCI service, intervention metadata",
                rate_limit="Not specified",
                authentication="Optional NCI_API_KEY for increased access",
            ),
        )

        self.register(
            "nci_biomarkers",
            EndpointInfo(
                url="https://clinicaltrialsapi.cancer.gov/api/v2/biomarkers",
                category=EndpointCategory.CLINICAL_TRIALS,
                data_types=[DataType.CLINICAL_TRIAL_DATA],
                description="NCI API for biomarkers used in clinical trials",
                compliance_notes="Public NCI service, biomarker metadata",
                rate_limit="Not specified",
                authentication="Optional NCI_API_KEY for increased access",
            ),
        )

        # OpenFDA APIs
        self.register(
            "openfda_drug_events",
            EndpointInfo(
                url="https://api.fda.gov/drug/event.json",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.ADVERSE_EVENTS],
                description="FDA Adverse Event Reporting System (FAERS) for drug safety data",
                compliance_notes="Public FDA service, voluntary adverse event reports, no PII",
                rate_limit="40 requests/minute (240 with API key)",
                authentication="Optional OPENFDA_API_KEY for increased rate limits",
            ),
        )

        self.register(
            "openfda_drug_labels",
            EndpointInfo(
                url="https://api.fda.gov/drug/label.json",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.DRUG_LABELS],
                description="FDA Structured Product Labeling (SPL) for drug prescribing information",
                compliance_notes="Public FDA service, official drug labeling data",
                rate_limit="40 requests/minute (240 with API key)",
                authentication="Optional OPENFDA_API_KEY for increased rate limits",
            ),
        )

        self.register(
            "openfda_device_events",
            EndpointInfo(
                url="https://api.fda.gov/device/event.json",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.DEVICE_EVENTS],
                description="FDA MAUDE database for medical device adverse events",
                compliance_notes="Public FDA service, device malfunction and adverse event reports",
                rate_limit="40 requests/minute (240 with API key)",
                authentication="Optional OPENFDA_API_KEY for increased rate limits",
            ),
        )

        self.register(
            "openfda_drugsfda",
            EndpointInfo(
                url="https://api.fda.gov/drug/drugsfda.json",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.DRUG_LABELS],
                description="FDA Drugs@FDA database for drug approval information",
                compliance_notes="Public FDA service, official drug approval records",
                rate_limit="40 requests/minute (240 with API key)",
                authentication="Optional OPENFDA_API_KEY for increased rate limits",
            ),
        )

        self.register(
            "openfda_drug_enforcement",
            EndpointInfo(
                url="https://api.fda.gov/drug/enforcement.json",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.ADVERSE_EVENTS],
                description="FDA Enforcement database for drug recall information",
                compliance_notes="Public FDA service, drug recall and enforcement actions",
                rate_limit="40 requests/minute (240 with API key)",
                authentication="Optional OPENFDA_API_KEY for increased rate limits",
            ),
        )

        # Note: Drug shortage endpoint is not yet available via OpenFDA
        # Using placeholder for future migration when FDA provides official endpoint
        self.register(
            "fda_drug_shortages",
            EndpointInfo(
                url="https://www.fda.gov/media/169066/download",
                category=EndpointCategory.REGULATORY_DATA,
                data_types=[DataType.DRUG_LABELS],
                description="FDA Drug Shortages database (cached locally)",
                compliance_notes="Public FDA service, drug shortage status information",
                rate_limit="Cached with 24-hour TTL",
                authentication="None required",
            ),
        )

    def register(self, key: str, endpoint: EndpointInfo):
        """Register an endpoint for tracking.

        Args:
            key: Unique identifier for the endpoint
            endpoint: Endpoint metadata including URL, description, and compliance notes
        """
        self._endpoints[key] = endpoint

    def get_all_endpoints(self) -> dict[str, EndpointInfo]:
        """Get all registered endpoints.

        Returns:
            Dictionary mapping endpoint keys to their metadata
        """
        return self._endpoints.copy()

    def get_endpoints_by_category(
        self, category: EndpointCategory
    ) -> dict[str, EndpointInfo]:
        """Get endpoints filtered by category.

        Args:
            category: The category to filter by

        Returns:
            Dictionary of endpoints belonging to the specified category
        """
        return {
            key: info
            for key, info in self._endpoints.items()
            if info.category == category
        }

    def get_unique_domains(self) -> set[str]:
        """Get all unique domains accessed by BioMCP.

        Returns:
            Set of unique domain names (e.g., 'api.ncbi.nlm.nih.gov')
        """
        return {info.domain for info in self._endpoints.values()}

    def generate_markdown_report(self) -> str:
        """Generate markdown documentation of all endpoints."""
        lines = [
            "# Third-Party Endpoints Used by BioMCP",
            "",
            "_This file is auto-generated from the endpoint registry._",
            "",
            "## Overview",
            "",
            f"BioMCP connects to {len(self.get_unique_domains())} external domains across {len(self._endpoints)} endpoints.",
            "",
            "## Endpoints by Category",
            "",
        ]

        # Group by category
        for category in EndpointCategory:
            endpoints = self.get_endpoints_by_category(category)
            if not endpoints:
                continue

            lines.append(f"### {category.value.replace('_', ' ').title()}")
            lines.append("")

            for key, info in sorted(endpoints.items()):
                lines.append(f"#### {key}")
                lines.append("")
                lines.append(f"- **URL**: `{info.url}`")
                lines.append(f"- **Description**: {info.description}")
                lines.append(
                    f"- **Data Types**: {', '.join(dt.value for dt in info.data_types)}"
                )
                lines.append(
                    f"- **Rate Limit**: {info.rate_limit or 'Not specified'}"
                )

                if info.authentication:
                    lines.append(
                        f"- **Authentication**: {info.authentication}"
                    )

                if info.compliance_notes:
                    lines.append(
                        f"- **Compliance Notes**: {info.compliance_notes}"
                    )

                lines.append("")

        # Add summary section
        lines.extend([
            "## Domain Summary",
            "",
            "| Domain               | Category              | Endpoints |",
            "| -------------------- | --------------------- | --------- |",
        ])

        domain_stats: dict[str, dict[str, Any]] = {}
        for info in self._endpoints.values():
            domain = info.domain
            if domain not in domain_stats:
                domain_stats[domain] = {
                    "category": info.category.value,
                    "count": 0,
                }
            domain_stats[domain]["count"] = (
                int(domain_stats[domain]["count"]) + 1
            )

        for domain, stats in sorted(domain_stats.items()):
            lines.append(
                f"| {domain} | {stats['category']} | {stats['count']} |"
            )

        lines.extend([
            "",
            "## Compliance and Privacy",
            "",
            "All endpoints accessed by BioMCP:",
            "",
            "- Use publicly available APIs",
            "- Do not transmit personally identifiable information (PII)",
            "- Access only aggregate or de-identified data",
            "- Comply with respective terms of service",
            "",
            "## Network Control",
            "",
            "For air-gapped or restricted environments, BioMCP supports:",
            "",
            "- Offline mode via `BIOMCP_OFFLINE=true` environment variable",
            "- Custom proxy configuration via standard HTTP(S)\\_PROXY variables",
            "- SSL certificate pinning for enhanced security",
            "",
        ])

        return "\n".join(lines)

    def save_markdown_report(self, output_path: Path | None = None):
        """Save markdown report to file."""
        if output_path is None:
            output_path = (
                Path(__file__).parent.parent.parent
                / "THIRD_PARTY_ENDPOINTS.md"
            )

        output_path.write_text(self.generate_markdown_report())
        return output_path


# Global registry instance
_registry = EndpointRegistry()


def get_registry() -> EndpointRegistry:
    """Get the global endpoint registry."""
    return _registry

```

--------------------------------------------------------------------------------
/tests/data/ct_gov/clinical_trials_api_v2.yaml:
--------------------------------------------------------------------------------

```yaml
openapi: "3.0.3"
info:
  title: "ClinicalTrials.gov REST API"
  description:
    "This API is made available to provide users meta data, statistics,\
    \ and the most recent version of the clinical trials available on ClinicalTrials.gov."
  version: "2.0.3"
tags:
  - name: "Studies"
    description: "Related to clinical trial studies"
  - name: "Stats"
    description: "Data statistics"
  - name: "Version"
    description: "Version info"
servers:
  - url: "https://clinicaltrials.gov/api/v2"
    description: "This server"
paths:
  /studies:
    get:
      summary: "Studies"
      description:
        "Returns data of studies matching query and filter parameters.\
        \ The studies are returned page by page.\nIf response contains `nextPageToken`,\
        \ use its value in `pageToken` to get next page.\nThe last page will not contain\
        \ `nextPageToken`. A page may have empty `studies` array.\nRequest for each\
        \ subsequent page **must** have the same parameters as for the first page,\
        \ except\n`countTotal`, `pageSize`, and `pageToken` parameters.\n\nIf neither\
        \ queries nor filters are set, all studies will be returned.\nIf any query\
        \ parameter contains only NCT IDs (comma- and/or space-separated), filters\
        \ are ignored.\n\n`query.*` parameters are in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\n\
        Those parameters affect ranking of studies, if sorted by relevance. See `sort`\
        \ parameter for details.\n\n`filter.*` and `postFilter.*` parameters have\
        \ same effect as there is no aggregation calculation. \nBoth are available\
        \ just to simplify applying parameters from search request.\nBoth do not affect\
        \ ranking of studies.\n\nNote: When trying JSON format in your browser, do\
        \ not set too large `pageSize` parameter, if `fields` is\nunlimited. That\
        \ may return too much data for the browser to parse and render."
      tags:
        - "Studies"
      operationId: "listStudies"
      parameters:
        - name: "format"
          in: "query"
          description:
            "Must be one of the following:\n* `csv`- return CSV table with\
            \ one page of study data; first page will contain header with column names;\
            \ available fields are listed on [CSV Download](/data-api/about-api/csv-download)\
            \ page\n* `json`- return JSON with one page of study data; every study object\
            \ is placed in a separate line; `markup` type fields format depends on `markupFormat`\
            \ parameter"
          required: false
          schema:
            type: "string"
            enum:
              - "csv"
              - "json"
            default: "json"
        - name: "markupFormat"
          in: "query"
          description:
            "Format of `markup` type fields:\n* `markdown`- [markdown](https://spec.commonmark.org/0.28/)\
            \ format\n* `legacy`- compatible with classic PRS\n\nApplicable only to\
            \ `json` format."
          required: false
          schema:
            type: "string"
            enum:
              - "markdown"
              - "legacy"
            default: "markdown"
        - name: "query.cond"
          in: "query"
          description:
            "\"Conditions or disease\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"ConditionSearch Area\" on [Search Areas](/data-api/about-api/search-areas#ConditionSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
          examples:
            example1:
              value: "lung cancer"
            example2:
              value: "(head OR neck) AND pain"
        - name: "query.term"
          in: "query"
          description:
            "\"Other terms\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"BasicSearch Area\" on [Search Areas](/data-api/about-api/search-areas#BasicSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
          examples:
            example1:
              value: "AREA[LastUpdatePostDate]RANGE[2023-01-15,MAX]"
        - name: "query.locn"
          in: "query"
          description:
            "\"Location terms\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"LocationSearch Area\" on [Search Areas](/data-api/about-api/search-areas#LocationSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.titles"
          in: "query"
          description:
            "\"Title / acronym\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"TitleSearch Area\" on [Search Areas](/data-api/about-api/search-areas#TitleSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.intr"
          in: "query"
          description:
            "\"Intervention / treatment\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"InterventionSearch Area\" on [Search Areas](/data-api/about-api/search-areas#InterventionSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.outc"
          in: "query"
          description:
            "\"Outcome measure\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"OutcomeSearch Area\" on [Search Areas](/data-api/about-api/search-areas#OutcomeSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.spons"
          in: "query"
          description:
            "\"Sponsor / collaborator\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"SponsorSearch Area\" on [Search Areas](/data-api/about-api/search-areas#SponsorSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.lead"
          in: "query"
          description:
            "Searches in \"LeadSponsorName\" field. See [Study Data Structure](/data-api/about-api/study-data-structure#LeadSponsorName)\
            \ for more details. The query is in [Essie expression syntax](/find-studies/constructing-complex-search-queries)."
          required: false
          schema:
            type: "string"
        - name: "query.id"
          in: "query"
          description:
            "\"Study IDs\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\
            \ See \"IdSearch Area\" on [Search Areas](/data-api/about-api/search-areas#IdSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "query.patient"
          in: "query"
          description:
            "See \"PatientSearch Area\" on [Search Areas](/data-api/about-api/search-areas#PatientSearch)\
            \ for more details."
          required: false
          schema:
            type: "string"
        - name: "filter.overallStatus"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description: "Filter by comma- or pipe-separated list of statuses"
          required: false
          schema:
            type: "array"
            items:
              $ref: "#/components/schemas/Status"
          examples:
            example1:
              value:
                - "NOT_YET_RECRUITING"
                - "RECRUITING"
            example2:
              value:
                - "COMPLETED"
        - name: "filter.geo"
          in: "query"
          description:
            "Filter by geo-function. Currently only distance function is\
            \ supported.\nFormat: `distance(latitude,longitude,distance)`"
          required: false
          schema:
            type: "string"
            pattern:
              "^distance\\(-?\\d+(\\.\\d+)?,-?\\d+(\\.\\d+)?,\\d+(\\.\\d+)?(km|mi)?\\\
              )$"
          examples:
            example1:
              value: "distance(39.0035707,-77.1013313,50mi)"
        - name: "filter.ids"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "Filter by comma- or pipe-separated list of NCT IDs (a.k.a. ClinicalTrials.gov\
            \ identifiers).\nThe provided IDs will be searched in [NCTId](data-api/about-api/study-data-structure#NCTId)\
            \ and\n[NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\
            \ fields."
          required: false
          schema:
            type: "array"
            items:
              type: "string"
              pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$"
          examples:
            example1:
              value:
                - "NCT04852770"
                - "NCT01728545"
                - "NCT02109302"
        - name: "filter.advanced"
          in: "query"
          description: "Filter by query in [Essie expression syntax](/find-studies/constructing-complex-search-queries)"
          required: false
          schema:
            type: "string"
          examples:
            example1:
              value: "AREA[StartDate]2022"
            example2:
              value:
                "AREA[MinimumAge]RANGE[MIN, 16 years] AND AREA[MaximumAge]RANGE[16\
                \ years, MAX]"
        - name: "filter.synonyms"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "Filter by comma- or pipe-separated list of `area`:`synonym_id`\
            \ pairs"
          required: false
          schema:
            type: "array"
            items:
              type: "string"
          examples:
            example1:
              value:
                - "ConditionSearch:1651367"
                - "BasicSearch:2013558"
        - name: "postFilter.overallStatus"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description: "Filter by comma- or pipe-separated list of statuses"
          required: false
          schema:
            type: "array"
            items:
              $ref: "#/components/schemas/Status"
          examples:
            example1:
              value:
                - "NOT_YET_RECRUITING"
                - "RECRUITING"
            example2:
              value:
                - "COMPLETED"
        - name: "postFilter.geo"
          in: "query"
          description:
            "Filter by geo-function. Currently only distance function is\
            \ supported.\nFormat: `distance(latitude,longitude,distance)`"
          required: false
          schema:
            type: "string"
            pattern:
              "^distance\\(-?\\d+(\\.\\d+)?,-?\\d+(\\.\\d+)?,\\d+(\\.\\d+)?(km|mi)?\\\
              )$"
          examples:
            example1:
              value: "distance(39.0035707,-77.1013313,50mi)"
        - name: "postFilter.ids"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "Filter by comma- or pipe-separated list of NCT IDs (a.k.a. ClinicalTrials.gov\
            \ identifiers).\nThe provided IDs will be searched in [NCTId](data-api/about-api/study-data-structure#NCTId)\
            \ and\n[NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\
            \ fields."
          required: false
          schema:
            type: "array"
            items:
              type: "string"
              pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$"
          examples:
            example1:
              value:
                - "NCT04852770"
                - "NCT01728545"
                - "NCT02109302"
        - name: "postFilter.advanced"
          in: "query"
          description: "Filter by query in [Essie expression syntax](/find-studies/constructing-complex-search-queries)"
          required: false
          schema:
            type: "string"
          examples:
            example1:
              value: "AREA[StartDate]2022"
            example2:
              value:
                "AREA[MinimumAge]RANGE[MIN, 16 years] AND AREA[MaximumAge]RANGE[16\
                \ years, MAX]"
        - name: "postFilter.synonyms"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "Filter by comma- or pipe-separated list of `area`:`synonym_id`\
            \ pairs"
          required: false
          schema:
            type: "array"
            items:
              type: "string"
          examples:
            example1:
              value:
                - "ConditionSearch:1651367"
                - "BasicSearch:2013558"
        - name: "aggFilters"
          in: "query"
          description:
            "Apply aggregation filters, aggregation counts will not be provided.\n\
            The value is comma- or pipe-separated list of pairs `filter_id`:`space-separated\
            \ list of option keys` for the checked options."
          required: false
          schema:
            type: "string"
          examples:
            example1:
              value: "results:with,status:com"
            example2:
              value: "status:not rec,sex:f,healthy:y"
        - name: "geoDecay"
          in: "query"
          description:
            "Set proximity factor by distance from `filter.geo` location\
            \ to the closest [LocationGeoPoint](/data-api/about-api/study-data-structure#LocationGeoPoint)\
            \ of a study.\nIgnored, if `filter.geo` parameter is not set or response\
            \ contains more than 10,000 studies."
          required: false
          schema:
            type: "string"
            pattern:
              "^func:(gauss|exp|linear),scale:(\\d+(\\.\\d+)?(km|mi)),offset:(\\\
              d+(\\.\\d+)?(km|mi)),decay:(\\d+(\\.\\d+)?)$"
            default: "func:exp,scale:300mi,offset:0mi,decay:0.5"
          examples:
            example1:
              value: "func:linear,scale:100km,offset:10km,decay:0.1"
            example2:
              value: "func:gauss,scale:500mi,offset:0mi,decay:0.3"
        - name: "fields"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "If specified, must be non-empty comma- or pipe-separated list\
            \ of fields to return. If unspecified, all fields will be returned.\nOrder\
            \ of the fields does not matter.\n\nFor `csv` format, specify list of columns.\
            \ The column names are available on [CSV Download](/data-api/about-api/csv-download).\n\
            \nFor `json` format, every list item is either area name, piece name, field\
            \ name, or special name.\nIf a piece or a field is a branch node, all descendant\
            \ fields will be included.\nAll area names are available on [Search Areas](/data-api/about-api/search-areas),\n\
            the piece and field names — on [Data Structure](/data-api/about-api/study-data-structure)\
            \ and also can be retrieved at `/studies/metadata` endpoint.\nThere is a\
            \ special name, `@query`, which expands to all fields queried by search."
          required: false
          schema:
            type: "array"
            minItems: 1
            items:
              type: "string"
              pattern: "^([a-zA-Z][a-zA-Z0-9\\-. ]*)|(@query)$"
          examples:
            example1:
              value:
                - "NCTId"
                - "BriefTitle"
                - "OverallStatus"
                - "HasResults"
            example2:
              value: "ProtocolSection"
        - name: "sort"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "Comma- or pipe-separated list of sorting options of the studies.\
            \ The returning studies are not sorted by default for a performance reason.\n\
            Every list item contains a field/piece name and an optional sort direction\
            \ (`asc` for ascending or `desc` for descending)\nafter colon character.\n\
            \nAll piece and field names can be found on [Data Structure](/data-api/about-api/study-data-structure)\
            \ and also can be retrieved\nat `/studies/metadata` endpoint. Currently,\
            \ only date and numeric fields are allowed for sorting.\nThere is a special\
            \ \"field\" `@relevance` to sort by relevance to a search query.\n\nStudies\
            \ missing sort field are always last. Default sort direction:\n* Date field\
            \ - `desc`\n* Numeric field - `asc`\n* `@relevance` - `desc`"
          required: false
          schema:
            type: "array"
            maxItems: 2
            default: []
            items:
              type: "string"
              pattern: "^(([a-zA-Z][a-zA-Z0-9\\-. ]*)|(@relevance))(:(asc|desc))?$"
          examples:
            example1:
              value:
                - "@relevance"
            example2:
              value:
                - "LastUpdatePostDate"
            example3:
              value:
                - "EnrollmentCount:desc"
                - "NumArmGroups"
        - name: "countTotal"
          in: "query"
          description:
            "Count total number of studies in all pages and return `totalCount`\
            \ field with first page, if `true`.\nFor CSV, the result can be found in\
            \ `x-total-count` response header.\nThe parameter is ignored for the subsequent\
            \ pages."
          required: false
          schema:
            type: "boolean"
            default: false
        - name: "pageSize"
          in: "query"
          description:
            "Page size is maximum number of studies to return in response.\
            \ It does not have to be the same for every page.\nIf not specified or set\
            \ to 0, the default value will be used. It will be coerced down to  1,000,\
            \ if greater than that."
          required: false
          schema:
            type: "integer"
            format: "int32"
            minimum: 0
            default: 10
          examples:
            example1:
              value: 2
            example2:
              value: 100
        - name: "pageToken"
          in: "query"
          description:
            "Token to get next page. Set it to a `nextPageToken` value returned\
            \ with the previous page in JSON format.\nFor CSV, it can be found in `x-next-page-token`\
            \ response header.\nDo not specify it for first page."
          required: false
          schema:
            type: "string"
      responses:
        "200":
          description: "OK"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/PagedStudies"
              example:
                totalCount: 438897
                studies:
                  - protocolSection:
                      identificationModule:
                        nctId: "NCT03540771"
                        briefTitle:
                          "Introducing Palliative Care (PC) Within the Treatment\
                          \ of End Stage Liver Disease (ESLD)"
                      statusModule:
                        overallStatus: "RECRUITING"
                    hasResults: false
                  - protocolSection:
                      identificationModule:
                        nctId: "NCT03630471"
                        briefTitle:
                          "Effectiveness of a Problem-solving Intervention\
                          \ for Common Adolescent Mental Health Problems in India"
                      statusModule:
                        overallStatus: "COMPLETED"
                    hasResults: false
                  - protocolSection:
                      identificationModule:
                        nctId: "NCT00587795"
                        briefTitle:
                          "Orthopedic Study of the Aircast StabilAir Wrist\
                          \ Fracture Brace"
                      statusModule:
                        overallStatus: "TERMINATED"
                    hasResults: true
                nextPageToken: "abracadabra"
        "400":
          description: "Bad Request"
          content:
            text/plain:
              schema:
                $ref: "#/components/schemas/errorMessage"
  /studies/{nctId}:
    get:
      summary: "Single Study"
      description: "Returns data of a single study."
      tags:
        - "Studies"
      operationId: "fetchStudy"
      parameters:
        - name: "nctId"
          in: "path"
          description:
            "NCT Number of a study. If found in [NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\
            \ field,\n301 HTTP redirect to the actual study will be returned."
          required: true
          schema:
            type: "string"
            pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$"
          examples:
            example1:
              value: "NCT00841061"
            example2:
              value: "NCT04000165"
        - name: "format"
          in: "query"
          description:
            "Must be one of the following:\n* `csv`- return CSV table; available\
            \ fields are listed on [CSV Download](/data-api/about-api/csv-download)\n\
            * `json`- return JSON object; format of `markup` fields depends on `markupFormat`\
            \ parameter\n* `json.zip`- put JSON object into a .json file and download\
            \ it as zip archive; field values of type `markup` are in [markdown](https://spec.commonmark.org/0.28/)\
            \ format\n* `fhir.json` - return FHIR JSON; fields are not customizable;\
            \ see [Access Data in FHIR](/data-api/fhir)\n* `ris`- return RIS record;\
            \ available tags are listed on [RIS Download](/data-api/about-api/ris-download)"
          required: false
          schema:
            type: "string"
            enum:
              - "csv"
              - "json"
              - "json.zip"
              - "fhir.json"
              - "ris"
            default: "json"
        - name: "markupFormat"
          in: "query"
          description:
            "Format of `markup` type fields:\n* `markdown`- [markdown](https://spec.commonmark.org/0.28/)\
            \ format\n* `legacy`- compatible with classic PRS\n\nApplicable only to\
            \ `json` format."
          required: false
          schema:
            type: "string"
            enum:
              - "markdown"
              - "legacy"
            default: "markdown"
        - name: "fields"
          in: "query"
          style: "pipeDelimited"
          explode: false
          description:
            "If specified, must be non-empty comma- or pipe-separated list\
            \ of fields to return. If unspecified, all fields will be returned.\nOrder\
            \ of the fields does not matter.\n\nFor `csv` format, specify list of columns.\
            \ The column names are available on [CSV Download](/data-api/about-api/csv-download).\n\
            \nFor `json` and `json.zip` formats, every list item is either area name,\
            \ piece name, or field name.\nIf a piece or a field is a branch node, all\
            \ descendant fields will be included.\nAll area names are available on [Search\
            \ Areas](/data-api/about-api/search-areas),\nthe piece and field names -\
            \ on [Data Structure](/data-api/about-api/study-data-structure) and also\
            \ can be retrieved at `/studies/metadata` endpoint.\n\nFor `fhir.json` format,\
            \ all available fields are returned and this parameter must be unspecified.\n\
            \nFor `ris` format, specify list of tags. The tag names are available on\
            \ [RIS Download](/data-api/about-api/ris-download)."
          required: false
          schema:
            type: "array"
            minItems: 1
            items:
              type: "string"
              pattern: "^[a-zA-Z][a-zA-Z0-9\\-. ]*$"
          examples:
            example1:
              value:
                - "NCTId"
                - "BriefTitle"
                - "Reference"
            example2:
              value:
                - "ConditionsModule"
                - "EligibilityModule"
      responses:
        "200":
          description: "OK"
          content:
            text/csv:
              schema:
                $ref: "#/components/schemas/StudiesCsv"
            application/json:
              schema:
                $ref: "#/components/schemas/Study"
            application/zip:
              schema:
                $ref: "#/components/schemas/StudiesZip"
            application/fhir+json:
              schema:
                $ref: "#/components/schemas/StudyFhir"
        "301":
          description: "Moved Permanently"
          content: {}
        "400":
          description: "Bad Request"
          content:
            text/plain:
              schema:
                $ref: "#/components/schemas/errorMessage"
        "404":
          description: "Not Found"
          content:
            text/plain:
              schema:
                $ref: "#/components/schemas/errorMessage"
  /studies/metadata:
    get:
      summary: "Data Model Fields"
      description: "Returns study data model fields."
      tags:
        - "Studies"
      operationId: "studiesMetadata"
      parameters:
        - name: "includeIndexedOnly"
          in: "query"
          description: "Include indexed-only fields, if `true`"
          required: false
          schema:
            type: "boolean"
            default: false
        - name: "includeHistoricOnly"
          in: "query"
          description: "Include fields available only in historic data, if `true`"
          required: false
          schema:
            type: "boolean"
            default: false
      responses:
        "200":
          description: "OK"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/FieldNodeList"
        "400":
          description: "Bad Request"
          content:
            text/plain:
              schema:
                $ref: "#/components/schemas/errorMessage"

```

--------------------------------------------------------------------------------
/src/biomcp/trials/search.py:
--------------------------------------------------------------------------------

```python
import json
import logging
from ssl import TLSVersion
from typing import Annotated

from pydantic import BaseModel, Field, field_validator, model_validator

from .. import StrEnum, ensure_list, http_client, render
from ..constants import CLINICAL_TRIALS_BASE_URL
from ..integrations import BioThingsClient

logger = logging.getLogger(__name__)


class SortOrder(StrEnum):
    RELEVANCE = "RELEVANCE"
    LAST_UPDATE = "LAST_UPDATE"
    ENROLLMENT = "ENROLLMENT"
    START_DATE = "START_DATE"
    COMPLETION_DATE = "COMPLETION_DATE"
    SUBMITTED_DATE = "SUBMITTED_DATE"


class TrialPhase(StrEnum):
    EARLY_PHASE1 = "EARLY_PHASE1"
    PHASE1 = "PHASE1"
    PHASE2 = "PHASE2"
    PHASE3 = "PHASE3"
    PHASE4 = "PHASE4"
    NOT_APPLICABLE = "NOT_APPLICABLE"


class RecruitingStatus(StrEnum):
    OPEN = "OPEN"
    CLOSED = "CLOSED"
    ANY = "ANY"


class StudyType(StrEnum):
    INTERVENTIONAL = "INTERVENTIONAL"
    OBSERVATIONAL = "OBSERVATIONAL"
    EXPANDED_ACCESS = "EXPANDED_ACCESS"
    OTHER = "OTHER"


class InterventionType(StrEnum):
    DRUG = "DRUG"
    DEVICE = "DEVICE"
    BIOLOGICAL = "BIOLOGICAL"
    PROCEDURE = "PROCEDURE"
    RADIATION = "RADIATION"
    BEHAVIORAL = "BEHAVIORAL"
    GENETIC = "GENETIC"
    DIETARY = "DIETARY"
    DIAGNOSTIC_TEST = "DIAGNOSTIC_TEST"
    OTHER = "OTHER"


class SponsorType(StrEnum):
    INDUSTRY = "INDUSTRY"
    GOVERNMENT = "GOVERNMENT"
    ACADEMIC = "ACADEMIC"
    OTHER = "OTHER"


class StudyDesign(StrEnum):
    RANDOMIZED = "RANDOMIZED"
    NON_RANDOMIZED = "NON_RANDOMIZED"
    OBSERVATIONAL = "OBSERVATIONAL"


class DateField(StrEnum):
    LAST_UPDATE = "LAST_UPDATE"
    STUDY_START = "STUDY_START"
    PRIMARY_COMPLETION = "PRIMARY_COMPLETION"
    OUTCOME_POSTING = "OUTCOME_POSTING"
    COMPLETION = "COMPLETION"
    FIRST_POSTING = "FIRST_POSTING"
    SUBMITTED_DATE = "SUBMITTED_DATE"


class PrimaryPurpose(StrEnum):
    TREATMENT = "TREATMENT"
    PREVENTION = "PREVENTION"
    DIAGNOSTIC = "DIAGNOSTIC"
    SUPPORTIVE_CARE = "SUPPORTIVE_CARE"
    SCREENING = "SCREENING"
    HEALTH_SERVICES = "HEALTH_SERVICES"
    BASIC_SCIENCE = "BASIC_SCIENCE"
    DEVICE_FEASIBILITY = "DEVICE_FEASIBILITY"
    OTHER = "OTHER"


class AgeGroup(StrEnum):
    CHILD = "CHILD"
    ADULT = "ADULT"
    SENIOR = "SENIOR"
    ALL = "ALL"


class LineOfTherapy(StrEnum):
    FIRST_LINE = "1L"
    SECOND_LINE = "2L"
    THIRD_LINE_PLUS = "3L+"


CTGOV_SORT_MAPPING = {
    SortOrder.RELEVANCE: "@relevance",
    SortOrder.LAST_UPDATE: "LastUpdatePostDate:desc",
    SortOrder.ENROLLMENT: "EnrollmentCount:desc",
    SortOrder.START_DATE: "StudyStartDate:desc",
    SortOrder.COMPLETION_DATE: "PrimaryCompletionDate:desc",
    SortOrder.SUBMITTED_DATE: "StudyFirstSubmitDate:desc",
}

CTGOV_PHASE_MAPPING = {
    TrialPhase.EARLY_PHASE1: ("EARLY_PHASE1",),
    TrialPhase.PHASE1: ("PHASE1",),
    TrialPhase.PHASE2: ("PHASE2",),
    TrialPhase.PHASE3: ("PHASE3",),
    TrialPhase.PHASE4: ("PHASE4",),
    TrialPhase.NOT_APPLICABLE: ("NOT_APPLICABLE",),
}

OPEN_STATUSES = (
    "AVAILABLE",
    "ENROLLING_BY_INVITATION",
    "NOT_YET_RECRUITING",
    "RECRUITING",
)
CLOSED_STATUSES = (
    "ACTIVE_NOT_RECRUITING",
    "COMPLETED",
    "SUSPENDED",
    "TERMINATED",
    "WITHDRAWN",
)
CTGOV_RECRUITING_STATUS_MAPPING = {
    RecruitingStatus.OPEN: OPEN_STATUSES,
    RecruitingStatus.CLOSED: CLOSED_STATUSES,
    RecruitingStatus.ANY: None,
}

CTGOV_STUDY_TYPE_MAPPING = {
    StudyType.INTERVENTIONAL: ("Interventional",),
    StudyType.OBSERVATIONAL: ("Observational",),
    StudyType.EXPANDED_ACCESS: ("Expanded Access",),
    StudyType.OTHER: ("Other",),
}

CTGOV_INTERVENTION_TYPE_MAPPING = {
    InterventionType.DRUG: ("Drug",),
    InterventionType.DEVICE: ("Device",),
    InterventionType.BIOLOGICAL: ("Biological",),
    InterventionType.PROCEDURE: ("Procedure",),
    InterventionType.RADIATION: ("Radiation",),
    InterventionType.BEHAVIORAL: ("Behavioral",),
    InterventionType.GENETIC: ("Genetic",),
    InterventionType.DIETARY: ("Dietary",),
    InterventionType.DIAGNOSTIC_TEST: ("Diagnostic Test",),
    InterventionType.OTHER: ("Other",),
}

CTGOV_SPONSOR_TYPE_MAPPING = {
    SponsorType.INDUSTRY: ("Industry",),
    SponsorType.GOVERNMENT: ("Government",),
    SponsorType.ACADEMIC: ("Academic",),
    SponsorType.OTHER: ("Other",),
}

CTGOV_STUDY_DESIGN_MAPPING = {
    StudyDesign.RANDOMIZED: ("Randomized",),
    StudyDesign.NON_RANDOMIZED: ("Non-Randomized",),
    StudyDesign.OBSERVATIONAL: ("Observational",),
}

CTGOV_DATE_FIELD_MAPPING = {
    DateField.LAST_UPDATE: "LastUpdatePostDate",
    DateField.STUDY_START: "StartDate",
    DateField.PRIMARY_COMPLETION: "PrimaryCompletionDate",
    DateField.OUTCOME_POSTING: "ResultsFirstPostDate",
    DateField.COMPLETION: "CompletionDate",
    DateField.FIRST_POSTING: "StudyFirstPostDate",
    DateField.SUBMITTED_DATE: "StudyFirstSubmitDate",
}

CTGOV_PRIMARY_PURPOSE_MAPPING = {
    PrimaryPurpose.TREATMENT: ("Treatment",),
    PrimaryPurpose.PREVENTION: ("Prevention",),
    PrimaryPurpose.DIAGNOSTIC: ("Diagnostic",),
    PrimaryPurpose.SUPPORTIVE_CARE: ("Supportive Care",),
    PrimaryPurpose.SCREENING: ("Screening",),
    PrimaryPurpose.HEALTH_SERVICES: ("Health Services",),
    PrimaryPurpose.BASIC_SCIENCE: ("Basic Science",),
    PrimaryPurpose.DEVICE_FEASIBILITY: ("Device Feasibility",),
    PrimaryPurpose.OTHER: ("Other",),
}

CTGOV_AGE_GROUP_MAPPING = {
    AgeGroup.CHILD: ("Child",),
    AgeGroup.ADULT: ("Adult",),
    AgeGroup.SENIOR: ("Older Adult",),
    AgeGroup.ALL: None,
}

# Line of therapy patterns for EligibilityCriteria search
LINE_OF_THERAPY_PATTERNS = {
    LineOfTherapy.FIRST_LINE: [
        '"first line"',
        '"first-line"',
        '"1st line"',
        '"frontline"',
        '"treatment naive"',
        '"previously untreated"',
    ],
    LineOfTherapy.SECOND_LINE: [
        '"second line"',
        '"second-line"',
        '"2nd line"',
        '"one prior line"',
        '"1 prior line"',
    ],
    LineOfTherapy.THIRD_LINE_PLUS: [
        '"third line"',
        '"third-line"',
        '"3rd line"',
        '"≥2 prior"',
        '"at least 2 prior"',
        '"heavily pretreated"',
    ],
}

DEFAULT_FORMAT = "csv"
DEFAULT_MARKUP = "markdown"

SEARCH_FIELDS = [
    "NCT Number",
    "Study Title",
    "Study URL",
    "Study Status",
    "Brief Summary",
    "Study Results",
    "Conditions",
    "Interventions",
    "Phases",
    "Enrollment",
    "Study Type",
    "Study Design",
    "Start Date",
    "Completion Date",
]

SEARCH_FIELDS_PARAM = [",".join(SEARCH_FIELDS)]


class TrialQuery(BaseModel):
    """Parameters for querying clinical trial data from ClinicalTrials.gov."""

    conditions: list[str] | None = Field(
        default=None,
        description="List of condition terms.",
    )
    terms: list[str] | None = Field(
        default=None,
        description="General search terms that don't fit specific categories.",
    )
    interventions: list[str] | None = Field(
        default=None,
        description="Intervention names.",
    )
    recruiting_status: RecruitingStatus | None = Field(
        default=None,
        description="Study recruitment status. Use 'OPEN' for actively recruiting trials, 'CLOSED' for completed/terminated trials, or 'ANY' for all trials. Common aliases like 'recruiting', 'active', 'enrolling' map to 'OPEN'.",
    )
    study_type: StudyType | None = Field(
        default=None,
        description="Type of study.",
    )
    nct_ids: list[str] | None = Field(
        default=None,
        description="Clinical trial NCT IDs",
    )
    lat: float | None = Field(
        default=None,
        description="Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.",
    )
    long: float | None = Field(
        default=None,
        description="Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.",
    )
    distance: int | None = Field(
        default=None,
        description="Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)",
    )
    min_date: str | None = Field(
        default=None,
        description="Minimum date for filtering",
    )
    max_date: str | None = Field(
        default=None,
        description="Maximum date for filtering",
    )
    date_field: DateField | None = Field(
        default=None,
        description="Date field to filter on",
    )
    phase: TrialPhase | None = Field(
        default=None,
        description="Trial phase filter",
    )
    age_group: AgeGroup | None = Field(
        default=None,
        description="Age group filter",
    )
    primary_purpose: PrimaryPurpose | None = Field(
        default=None,
        description="Primary purpose of the trial",
    )
    intervention_type: InterventionType | None = Field(
        default=None,
        description="Type of intervention",
    )
    sponsor_type: SponsorType | None = Field(
        default=None,
        description="Type of sponsor",
    )
    study_design: StudyDesign | None = Field(
        default=None,
        description="Study design",
    )
    sort: SortOrder | None = Field(
        default=None,
        description="Sort order for results",
    )
    next_page_hash: str | None = Field(
        default=None,
        description="Token to retrieve the next page of results",
    )
    # New eligibility-focused fields
    prior_therapies: list[str] | None = Field(
        default=None,
        description="Prior therapies to search for in eligibility criteria",
    )
    progression_on: list[str] | None = Field(
        default=None,
        description="Therapies the patient has progressed on",
    )
    required_mutations: list[str] | None = Field(
        default=None,
        description="Required mutations in eligibility criteria",
    )
    excluded_mutations: list[str] | None = Field(
        default=None,
        description="Excluded mutations in eligibility criteria",
    )
    biomarker_expression: dict[str, str] | None = Field(
        default=None,
        description="Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})",
    )
    line_of_therapy: LineOfTherapy | None = Field(
        default=None,
        description="Line of therapy filter",
    )
    allow_brain_mets: bool | None = Field(
        default=None,
        description="Whether to allow trials that accept brain metastases",
    )
    return_fields: list[str] | None = Field(
        default=None,
        description="Specific fields to return in the response",
    )
    page_size: int | None = Field(
        default=None,
        description="Number of results per page",
        ge=1,
        le=1000,
    )
    expand_synonyms: bool = Field(
        default=True,
        description="Expand condition searches with disease synonyms from MyDisease.info",
    )

    @field_validator("recruiting_status", mode="before")
    @classmethod
    def normalize_recruiting_status(cls, v):
        """Normalize common recruiting status aliases to enum values."""
        if isinstance(v, str):
            v_lower = v.lower()
            # Map common aliases
            alias_map = {
                "recruiting": "OPEN",
                "active": "OPEN",
                "enrolling": "OPEN",
                "closed": "CLOSED",
                "completed": "CLOSED",
                "terminated": "CLOSED",
            }
            return alias_map.get(v_lower, v)
        return v

    # Field validators for list fields
    @model_validator(mode="before")
    def convert_list_fields(cls, data):
        """Convert string values to lists for list fields."""
        if isinstance(data, dict):
            for field_name in [
                "conditions",
                "terms",
                "interventions",
                "nct_ids",
                "prior_therapies",
                "progression_on",
                "required_mutations",
                "excluded_mutations",
                "return_fields",
            ]:
                if field_name in data and data[field_name] is not None:
                    data[field_name] = ensure_list(
                        data[field_name], split_strings=True
                    )
        return data


def _inject_ids(
    params: dict[str, list[str]], ids: list[str], has_other_filters: bool
) -> None:
    """Inject NCT IDs into params using intersection or id-only semantics.

    Args:
        params: The parameter dictionary to modify
        ids: List of NCT IDs to inject
        has_other_filters: Whether other filters are present
    """
    ids_csv = ",".join(ids)
    if has_other_filters:  # intersection path
        params["filter.ids"] = [ids_csv]
    elif len(ids_csv) < 1800:  # pure-ID & small
        params["query.id"] = [ids_csv]
    else:  # pure-ID & large
        params["filter.ids"] = [ids_csv]


def _build_prior_therapy_essie(therapies: list[str]) -> list[str]:
    """Build Essie fragments for prior therapy search."""
    fragments = []
    for therapy in therapies:
        if therapy.strip():  # Skip empty strings
            fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (prior OR previous OR received))'
            fragments.append(fragment)
    return fragments


def _build_progression_essie(therapies: list[str]) -> list[str]:
    """Build Essie fragments for progression on therapy search."""
    fragments = []
    for therapy in therapies:
        if therapy.strip():  # Skip empty strings
            fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (progression OR resistant OR refractory))'
            fragments.append(fragment)
    return fragments


def _build_required_mutations_essie(mutations: list[str]) -> list[str]:
    """Build Essie fragments for required mutations."""
    fragments = []
    for mutation in mutations:
        if mutation.strip():  # Skip empty strings
            fragment = f'AREA[EligibilityCriteria]("{mutation}")'
            fragments.append(fragment)
    return fragments


def _build_excluded_mutations_essie(mutations: list[str]) -> list[str]:
    """Build Essie fragments for excluded mutations."""
    fragments = []
    for mutation in mutations:
        if mutation.strip():  # Skip empty strings
            fragment = f'AREA[EligibilityCriteria](NOT "{mutation}")'
            fragments.append(fragment)
    return fragments


def _build_biomarker_expression_essie(biomarkers: dict[str, str]) -> list[str]:
    """Build Essie fragments for biomarker expression requirements."""
    fragments = []
    for marker, expression in biomarkers.items():
        if marker.strip() and expression.strip():  # Skip empty values
            fragment = (
                f'AREA[EligibilityCriteria]("{marker}" AND "{expression}")'
            )
            fragments.append(fragment)
    return fragments


def _build_line_of_therapy_essie(line: LineOfTherapy) -> str:
    """Build Essie fragment for line of therapy."""
    patterns = LINE_OF_THERAPY_PATTERNS.get(line, [])
    if patterns:
        # Join all patterns with OR within a single AREA block
        pattern_str = " OR ".join(patterns)
        return f"AREA[EligibilityCriteria]({pattern_str})"
    return ""


def _build_brain_mets_essie(allow: bool) -> str:
    """Build Essie fragment for brain metastases filter."""
    if allow is False:
        return 'AREA[EligibilityCriteria](NOT "brain metastases")'
    return ""


async def convert_query(query: TrialQuery) -> dict[str, list[str]]:  # noqa: C901
    """Convert a TrialQuery object into a dict of query params
    for the ClinicalTrials.gov API (v2). Each key maps to one or
    more strings in a list, consistent with parse_qs outputs.
    """
    # Start with required fields
    params: dict[str, list[str]] = {
        "format": [DEFAULT_FORMAT],
        "markupFormat": [DEFAULT_MARKUP],
    }

    # Track whether we have other filters (for NCT ID intersection logic)
    has_other_filters = False

    # Handle conditions with optional synonym expansion
    if query.conditions:
        has_other_filters = True
        expanded_conditions = []

        if query.expand_synonyms:
            # Expand each condition with synonyms
            client = BioThingsClient()
            for condition in query.conditions:
                try:
                    synonyms = await client.get_disease_synonyms(condition)
                    expanded_conditions.extend(synonyms)
                except Exception as e:
                    logger.warning(
                        f"Failed to get synonyms for {condition}: {e}"
                    )
                    expanded_conditions.append(condition)
        else:
            expanded_conditions = query.conditions

        # Remove duplicates while preserving order
        seen = set()
        unique_conditions = []
        for cond in expanded_conditions:
            if cond.lower() not in seen:
                seen.add(cond.lower())
                unique_conditions.append(cond)

        if len(unique_conditions) == 1:
            params["query.cond"] = [unique_conditions[0]]
        else:
            # Join multiple terms with OR, wrapped in parentheses
            params["query.cond"] = [f"({' OR '.join(unique_conditions)})"]

    # Handle terms and interventions (no synonym expansion)
    for key, val in [
        ("query.term", query.terms),
        ("query.intr", query.interventions),
    ]:
        if val:
            has_other_filters = True
            if len(val) == 1:
                params[key] = [val[0]]
            else:
                # Join multiple terms with OR, wrapped in parentheses
                params[key] = [f"({' OR '.join(val)})"]

    # Collect Essie fragments for eligibility criteria
    essie_fragments: list[str] = []

    # Prior therapies
    if query.prior_therapies:
        has_other_filters = True
        essie_fragments.extend(
            _build_prior_therapy_essie(query.prior_therapies)
        )

    # Progression on therapies
    if query.progression_on:
        has_other_filters = True
        essie_fragments.extend(_build_progression_essie(query.progression_on))

    # Required mutations
    if query.required_mutations:
        has_other_filters = True
        essie_fragments.extend(
            _build_required_mutations_essie(query.required_mutations)
        )

    # Excluded mutations
    if query.excluded_mutations:
        has_other_filters = True
        essie_fragments.extend(
            _build_excluded_mutations_essie(query.excluded_mutations)
        )

    # Biomarker expression
    if query.biomarker_expression:
        has_other_filters = True
        essie_fragments.extend(
            _build_biomarker_expression_essie(query.biomarker_expression)
        )

    # Line of therapy
    if query.line_of_therapy:
        has_other_filters = True
        line_fragment = _build_line_of_therapy_essie(query.line_of_therapy)
        if line_fragment:
            essie_fragments.append(line_fragment)

    # Brain metastases filter
    if query.allow_brain_mets is not None:
        has_other_filters = True
        brain_fragment = _build_brain_mets_essie(query.allow_brain_mets)
        if brain_fragment:
            essie_fragments.append(brain_fragment)

    # Combine all Essie fragments with AND and append to query.term
    if essie_fragments:
        combined_essie = " AND ".join(essie_fragments)
        if "query.term" in params:
            # Append to existing terms with AND
            params["query.term"][0] = (
                f"{params['query.term'][0]} AND {combined_essie}"
            )
        else:
            params["query.term"] = [combined_essie]

    # Geospatial
    if query.lat is not None and query.long is not None:
        has_other_filters = True
        geo_val = f"distance({query.lat},{query.long},{query.distance}mi)"
        params["filter.geo"] = [geo_val]

    # Collect advanced filters in a list
    advanced_filters: list[str] = []

    # Date filter
    if query.date_field and (query.min_date or query.max_date):
        has_other_filters = True
        date_field = CTGOV_DATE_FIELD_MAPPING[query.date_field]
        min_val = query.min_date or "MIN"
        max_val = query.max_date or "MAX"
        advanced_filters.append(
            f"AREA[{date_field}]RANGE[{min_val},{max_val}]",
        )

    # Prepare a map of "AREA[...] -> (query_value, mapping_dict)"
    advanced_map = {
        "DesignPrimaryPurpose": (
            query.primary_purpose,
            CTGOV_PRIMARY_PURPOSE_MAPPING,
        ),
        "StudyType": (query.study_type, CTGOV_STUDY_TYPE_MAPPING),
        "InterventionType": (
            query.intervention_type,
            CTGOV_INTERVENTION_TYPE_MAPPING,
        ),
        "SponsorType": (query.sponsor_type, CTGOV_SPONSOR_TYPE_MAPPING),
        "StudyDesign": (query.study_design, CTGOV_STUDY_DESIGN_MAPPING),
        "Phase": (query.phase, CTGOV_PHASE_MAPPING),
    }

    # Append advanced filters
    for area, (qval, mapping) in advanced_map.items():
        if qval:
            has_other_filters = True
            # Check if mapping is a dict before using get method
            mapped = (
                mapping.get(qval)
                if mapping and isinstance(mapping, dict)
                else None
            )
            # Use the first mapped value if available, otherwise the literal
            value = mapped[0] if mapped else qval
            advanced_filters.append(f"AREA[{area}]{value}")

    # Age group
    if query.age_group and query.age_group != "ALL":
        has_other_filters = True
        mapped = CTGOV_AGE_GROUP_MAPPING[query.age_group]
        if mapped:
            advanced_filters.append(f"AREA[StdAge]{mapped[0]}")
        else:
            advanced_filters.append(f"AREA[StdAge]{query.age_group}")

    # If we collected any advanced filters, join them with AND
    if advanced_filters:
        params["filter.advanced"] = [" AND ".join(advanced_filters)]

    # NCT IDs - now using intersection semantics
    # Must be done BEFORE recruiting status to properly detect user-set filters
    if query.nct_ids:
        _inject_ids(params, query.nct_ids, has_other_filters)

    # Recruiting status - apply AFTER NCT ID injection
    # Only count as a user filter if explicitly set to something other than default
    if query.recruiting_status not in (None, RecruitingStatus.OPEN):
        # User explicitly set a non-default status
        if query.recruiting_status is not None:  # Type guard for mypy
            statuses = CTGOV_RECRUITING_STATUS_MAPPING.get(
                query.recruiting_status
            )
            if statuses:
                params["filter.overallStatus"] = [",".join(statuses)]
    elif not query.nct_ids or has_other_filters:
        # Apply default OPEN status only if:
        # 1. No NCT IDs provided, OR
        # 2. NCT IDs provided with other filters (intersection mode)
        params["filter.overallStatus"] = [",".join(OPEN_STATUSES)]

    # Sort & paging
    if query.sort is None:
        sort_val = CTGOV_SORT_MAPPING[SortOrder.RELEVANCE]
    else:
        sort_val = CTGOV_SORT_MAPPING.get(query.sort, query.sort)

    params["sort"] = [sort_val]
    if query.next_page_hash:
        params["pageToken"] = [query.next_page_hash]

    # Finally, add fields to limit payload size
    if query.return_fields:
        # Use custom fields if specified
        params["fields"] = [",".join(query.return_fields)]
    else:
        # Use default fields
        params["fields"] = SEARCH_FIELDS_PARAM

    # Set page size
    if query.page_size:
        params["pageSize"] = [str(query.page_size)]
    else:
        params["pageSize"] = ["40"]

    return params


async def search_trials(
    query: TrialQuery,
    output_json: bool = False,
) -> str:
    """Search ClinicalTrials.gov for clinical trials."""
    params = await convert_query(query)

    # Log filter mode if NCT IDs are present
    if query.nct_ids:
        # Check if we're using intersection or id-only mode
        # Only count explicit user-set filters, not defaults
        has_other_filters = any([
            query.conditions,
            query.terms,
            query.interventions,
            query.lat is not None and query.long is not None,
            query.date_field and (query.min_date or query.max_date),
            query.primary_purpose,
            query.study_type,
            query.intervention_type,
            query.sponsor_type,
            query.study_design,
            query.phase,
            query.age_group and query.age_group != AgeGroup.ALL,
            query.recruiting_status not in (None, RecruitingStatus.OPEN),
            query.prior_therapies,
            query.progression_on,
            query.required_mutations,
            query.excluded_mutations,
            query.biomarker_expression,
            query.line_of_therapy,
            query.allow_brain_mets is not None,
        ])

        if has_other_filters:
            logger.debug(
                "Filter mode: intersection (NCT IDs AND other filters)"
            )
        else:
            logger.debug("Filter mode: id-only (NCT IDs only)")

    response, error = await http_client.request_api(
        url=CLINICAL_TRIALS_BASE_URL,
        request=params,
        method="GET",
        tls_version=TLSVersion.TLSv1_2,
        domain="trial",
    )

    data = response
    if error:
        data = {"error": f"Error {error.code}: {error.message}"}

    if data and not output_json:
        return render.to_markdown(data)
    else:
        return json.dumps(data, indent=2)


async def _trial_searcher(
    call_benefit: Annotated[
        str,
        "Define and summarize why this function is being called and the intended benefit",
    ],
    conditions: Annotated[
        list[str] | str | None,
        "Condition terms (e.g., 'breast cancer') - list or comma-separated string",
    ] = None,
    terms: Annotated[
        list[str] | str | None,
        "General search terms - list or comma-separated string",
    ] = None,
    interventions: Annotated[
        list[str] | str | None,
        "Intervention names (e.g., 'pembrolizumab') - list or comma-separated string",
    ] = None,
    recruiting_status: Annotated[
        RecruitingStatus | str | None,
        "Study recruitment status (OPEN, CLOSED, ANY)",
    ] = None,
    study_type: Annotated[StudyType | str | None, "Type of study"] = None,
    nct_ids: Annotated[
        list[str] | str | None,
        "Clinical trial NCT IDs - list or comma-separated string",
    ] = None,
    lat: Annotated[
        float | None,
        "Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.",
    ] = None,
    long: Annotated[
        float | None,
        "Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.",
    ] = None,
    distance: Annotated[
        float | None,
        "Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)",
    ] = None,
    min_date: Annotated[
        str | None, "Minimum date for filtering (YYYY-MM-DD)"
    ] = None,
    max_date: Annotated[
        str | None, "Maximum date for filtering (YYYY-MM-DD)"
    ] = None,
    date_field: Annotated[
        DateField | str | None, "Date field to filter on"
    ] = None,
    phase: Annotated[TrialPhase | str | None, "Trial phase filter"] = None,
    age_group: Annotated[AgeGroup | str | None, "Age group filter"] = None,
    primary_purpose: Annotated[
        PrimaryPurpose | str | None, "Primary purpose of the trial"
    ] = None,
    intervention_type: Annotated[
        InterventionType | str | None, "Type of intervention"
    ] = None,
    sponsor_type: Annotated[
        SponsorType | str | None, "Type of sponsor"
    ] = None,
    study_design: Annotated[StudyDesign | str | None, "Study design"] = None,
    sort: Annotated[SortOrder | str | None, "Sort order for results"] = None,
    next_page_hash: Annotated[
        str | None, "Token to retrieve the next page of results"
    ] = None,
    prior_therapies: Annotated[
        list[str] | str | None,
        "Prior therapies to search for in eligibility criteria - list or comma-separated string",
    ] = None,
    progression_on: Annotated[
        list[str] | str | None,
        "Therapies the patient has progressed on - list or comma-separated string",
    ] = None,
    required_mutations: Annotated[
        list[str] | str | None,
        "Required mutations in eligibility criteria - list or comma-separated string",
    ] = None,
    excluded_mutations: Annotated[
        list[str] | str | None,
        "Excluded mutations in eligibility criteria - list or comma-separated string",
    ] = None,
    biomarker_expression: Annotated[
        dict[str, str] | None,
        "Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})",
    ] = None,
    line_of_therapy: Annotated[
        LineOfTherapy | str | None,
        "Line of therapy filter (1L, 2L, 3L+)",
    ] = None,
    allow_brain_mets: Annotated[
        bool | None,
        "Whether to allow trials that accept brain metastases",
    ] = None,
    return_fields: Annotated[
        list[str] | str | None,
        "Specific fields to return in the response - list or comma-separated string",
    ] = None,
    page_size: Annotated[
        int | None,
        "Number of results per page (1-1000)",
    ] = None,
    expand_synonyms: Annotated[
        bool,
        "Expand condition searches with disease synonyms from MyDisease.info",
    ] = True,
) -> str:
    """
    Searches for clinical trials based on specified criteria.

    Parameters:
    - call_benefit: Define and summarize why this function is being called and the intended benefit
    - conditions: Condition terms (e.g., "breast cancer") - list or comma-separated string
    - terms: General search terms - list or comma-separated string
    - interventions: Intervention names (e.g., "pembrolizumab") - list or comma-separated string
    - recruiting_status: Study recruitment status (OPEN, CLOSED, ANY)
    - study_type: Type of study
    - nct_ids: Clinical trial NCT IDs - list or comma-separated string
    - lat: Latitude for location search
    - long: Longitude for location search
    - distance: Distance from lat/long in miles
    - min_date: Minimum date for filtering (YYYY-MM-DD)
    - max_date: Maximum date for filtering (YYYY-MM-DD)
    - date_field: Date field to filter on
    - phase: Trial phase filter
    - age_group: Age group filter
    - primary_purpose: Primary purpose of the trial
    - intervention_type: Type of intervention
    - sponsor_type: Type of sponsor
    - study_design: Study design
    - sort: Sort order for results
    - next_page_hash: Token to retrieve the next page of results
    - prior_therapies: Prior therapies to search for in eligibility criteria - list or comma-separated string
    - progression_on: Therapies the patient has progressed on - list or comma-separated string
    - required_mutations: Required mutations in eligibility criteria - list or comma-separated string
    - excluded_mutations: Excluded mutations in eligibility criteria - list or comma-separated string
    - biomarker_expression: Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})
    - line_of_therapy: Line of therapy filter (1L, 2L, 3L+)
    - allow_brain_mets: Whether to allow trials that accept brain metastases
    - return_fields: Specific fields to return in the response - list or comma-separated string
    - page_size: Number of results per page (1-1000)
    - expand_synonyms: Expand condition searches with disease synonyms from MyDisease.info

    Returns:
    Markdown formatted list of clinical trials
    """
    # Convert individual parameters to a TrialQuery object
    query = TrialQuery(
        conditions=ensure_list(conditions, split_strings=True),
        terms=ensure_list(terms, split_strings=True),
        interventions=ensure_list(interventions, split_strings=True),
        recruiting_status=recruiting_status,
        study_type=study_type,
        nct_ids=ensure_list(nct_ids, split_strings=True),
        lat=lat,
        long=long,
        distance=distance,
        min_date=min_date,
        max_date=max_date,
        date_field=date_field,
        phase=phase,
        age_group=age_group,
        primary_purpose=primary_purpose,
        intervention_type=intervention_type,
        sponsor_type=sponsor_type,
        study_design=study_design,
        sort=sort,
        next_page_hash=next_page_hash,
        prior_therapies=ensure_list(prior_therapies, split_strings=True),
        progression_on=ensure_list(progression_on, split_strings=True),
        required_mutations=ensure_list(required_mutations, split_strings=True),
        excluded_mutations=ensure_list(excluded_mutations, split_strings=True),
        biomarker_expression=biomarker_expression,
        line_of_therapy=line_of_therapy,
        allow_brain_mets=allow_brain_mets,
        return_fields=ensure_list(return_fields, split_strings=True),
        page_size=page_size,
        expand_synonyms=expand_synonyms,
    )
    return await search_trials(query, output_json=False)


async def search_trials_unified(
    query: TrialQuery,
    source: str = "clinicaltrials",
    api_key: str | None = None,
    output_json: bool = False,
) -> str:
    """
    Search for clinical trials using either ClinicalTrials.gov or NCI CTS API.

    Args:
        query: TrialQuery object with search parameters
        source: Data source - "clinicaltrials" (default) or "nci"
        api_key: API key for NCI (required if source="nci")
        output_json: Return raw JSON instead of formatted markdown

    Returns:
        Formatted markdown or JSON string with results
    """
    if source == "nci":
        # Import here to avoid circular imports
        from .nci_search import format_nci_trial_results, search_trials_nci

        results = await search_trials_nci(query, api_key)

        if output_json:
            return json.dumps(results, indent=2)
        else:
            return format_nci_trial_results(results)
    else:
        # Default to ClinicalTrials.gov
        return await search_trials(query, output_json)

```

--------------------------------------------------------------------------------
/src/biomcp/workers/worker_entry_stytch.js:
--------------------------------------------------------------------------------

```javascript
/**
 * BioMCP Worker – With Stytch OAuth (refactored)
 */

import { Hono } from "hono";
import { createRemoteJWKSet, importPKCS8, jwtVerify, SignJWT } from "jose";

// Configuration variables - will be overridden by env values
let DEBUG = false; // Default value, will be updated from env

// Constants
const DEFAULT_SESSION_ID = "default";
const MAX_SESSION_ID_LENGTH = 128;

// Helper functions
const log = (message) => {
  if (DEBUG) console.log("[DEBUG]", message);
};

// List of sensitive fields that should be redacted in logs
const SENSITIVE_FIELDS = [
  "api_key",
  "apiKey",
  "api-key",
  "token",
  "secret",
  "password",
];

/**
 * Recursively sanitize sensitive fields from an object
 * @param {object} obj - Object to sanitize
 * @returns {object} - Sanitized copy of the object
 */
const sanitizeObject = (obj) => {
  if (!obj || typeof obj !== "object") return obj;

  // Handle arrays
  if (Array.isArray(obj)) {
    return obj.map((item) => sanitizeObject(item));
  }

  // Handle objects
  const sanitized = {};
  for (const [key, value] of Object.entries(obj)) {
    // Check if this key is sensitive
    const lowerKey = key.toLowerCase();
    if (
      SENSITIVE_FIELDS.some((field) => lowerKey.includes(field.toLowerCase()))
    ) {
      sanitized[key] = "[REDACTED]";
    } else if (typeof value === "object" && value !== null) {
      // Recursively sanitize nested objects
      sanitized[key] = sanitizeObject(value);
    } else {
      sanitized[key] = value;
    }
  }
  return sanitized;
};

/**
 * Validate and sanitize session ID
 * @param {string} sessionId - Session ID from query parameter
 * @returns {string} - Sanitized session ID or 'default'
 */
const validateSessionId = (sessionId) => {
  if (!sessionId) return DEFAULT_SESSION_ID;

  // Limit length to prevent DoS
  if (sessionId.length > MAX_SESSION_ID_LENGTH) {
    log(`Session ID too long (${sessionId.length} chars), using default`);
    return DEFAULT_SESSION_ID;
  }

  // Remove potentially dangerous characters
  const sanitized = sessionId.replace(/[^a-zA-Z0-9\-_]/g, "");
  if (sanitized !== sessionId) {
    log(`Session ID contained invalid characters, sanitized: ${sanitized}`);
  }

  return sanitized || DEFAULT_SESSION_ID;
};

/**
 * Process MCP request with proper error handling
 * @param {HonoRequest} request - The incoming Hono request
 * @param {string} remoteUrl - Remote MCP server URL
 * @param {string} sessionId - Validated session ID
 * @returns {Response} - Proxy response or error
 */
const processMcpRequest = async (request, remoteUrl, sessionId) => {
  try {
    // Get body text directly (Hono request doesn't have clone)
    const bodyText = await request.text();

    // Validate it's JSON
    let bodyJson;
    try {
      bodyJson = JSON.parse(bodyText);
    } catch (e) {
      return new Response(
        JSON.stringify({
          jsonrpc: "2.0",
          error: {
            code: -32700,
            message: "Parse error",
            data: "Invalid JSON",
          },
        }),
        { status: 400, headers: { "Content-Type": "application/json" } },
      );
    }

    // Log sanitized request
    const sanitizedBody = sanitizeObject(bodyJson);
    log(`MCP POST request body: ${JSON.stringify(sanitizedBody)}`);

    // Validate required JSONRPC fields
    if (!bodyJson.jsonrpc || !bodyJson.method) {
      return new Response(
        JSON.stringify({
          jsonrpc: "2.0",
          error: {
            code: -32600,
            message: "Invalid Request",
            data: "Missing required fields: jsonrpc, method",
          },
        }),
        { status: 400, headers: { "Content-Type": "application/json" } },
      );
    }

    // Create a new Request object with the body text since we've already consumed it
    const newRequest = new Request(request.url, {
      method: "POST",
      headers: request.headers,
      body: bodyText,
    });

    // Forward to remote server
    return proxyPost(newRequest, remoteUrl, "/mcp", sessionId);
  } catch (error) {
    log(`Error processing MCP request: ${error}`);
    return new Response(
      JSON.stringify({
        jsonrpc: "2.0",
        error: {
          code: -32603,
          message: "Internal error",
          data: error.message,
        },
      }),
      { status: 500, headers: { "Content-Type": "application/json" } },
    );
  }
};

// CORS configuration
const CORS = {
  "Access-Control-Allow-Origin": "*",
  "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
  "Access-Control-Allow-Headers": "*",
  "Access-Control-Max-Age": "86400",
};

const getStytchUrl = (env, path, isPublic = false) => {
  const base = env.STYTCH_API_URL || "https://test.stytch.com/v1";
  const projectId = isPublic ? `/public/${env.STYTCH_PROJECT_ID}` : "";
  return `${base}${projectId}/${path}`;
};

// JWT validation logic
let jwks = null;

/**
 * Decode the payload of a JWT (no signature check).
 */
function decodeJwt(token) {
  try {
    const base64Url = token.split(".")[1];
    const base64 = base64Url.replace(/-/g, "+").replace(/_/g, "/");
    const json = atob(base64);
    return JSON.parse(json);
  } catch {
    return {};
  }
}

let bqTokenPromise = null;

/**
 * Fetch (and cache) a BigQuery OAuth token.
 * @param {object} env  the Hono env (c.env)
 */
async function getBQToken(env) {
  // Parse the service‐account JSON key
  const key = JSON.parse(env.BQ_SA_KEY_JSON);
  const now = Math.floor(Date.now() / 1000);

  // Convert PEM private key string into a CryptoKey
  const privateKey = await importPKCS8(key.private_key, "RS256");

  // Build the JWT assertion
  const assertion = await new SignJWT({
    iss: key.client_email,
    scope: "https://www.googleapis.com/auth/bigquery.insertdata",
    aud: "https://oauth2.googleapis.com/token",
    iat: now,
    exp: now + 3600,
  })
    .setProtectedHeader({ alg: "RS256", kid: key.private_key_id })
    .sign(privateKey);

  // Exchange the assertion for an access token
  const resp = await fetch("https://oauth2.googleapis.com/token", {
    method: "POST",
    headers: { "Content-Type": "application/x-www-form-urlencoded" },
    body: new URLSearchParams({
      grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer",
      assertion,
    }),
  });
  const { access_token } = await resp.json();
  return access_token;
}

/**
 * Insert a single row into BigQuery via streaming insert.
 * @param {object} env  the Hono env (c.env)
 * @param {object} row  { timestamp, userEmail, query }
 */
async function insertEvent(env, row) {
  try {
    const token = await getBQToken(env);

    const url =
      `https://bigquery.googleapis.com/bigquery/v2/projects/` +
      `${env.BQ_PROJECT_ID}/datasets/${env.BQ_DATASET}` +
      `/tables/${env.BQ_TABLE}/insertAll`;

    const response = await fetch(url, {
      method: "POST",
      headers: {
        Authorization: `Bearer ${token}`,
        "Content-Type": "application/json",
      },
      body: JSON.stringify({ rows: [{ json: row }] }),
    });

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`BigQuery API error: ${response.status} - ${errorText}`);
    }

    const result = await response.json();
    if (result.insertErrors) {
      throw new Error(
        `BigQuery insert errors: ${JSON.stringify(result.insertErrors)}`,
      );
    }
  } catch (error) {
    console.error(`[BigQuery] Insert failed:`, error.message);
    throw error;
  }
}

/**
 * Validate a JWT token
 */
async function validateToken(token, env) {
  if (!token) {
    throw new Error("No token provided");
  }

  try {
    log(`Validating token: ${token.substring(0, 15)}...`);

    // First try to validate as a self-issued JWT
    try {
      const encoder = new TextEncoder();
      const secret = encoder.encode(env.JWT_SECRET || "default-jwt-secret-key");

      const result = await jwtVerify(token, secret, {
        issuer: env.STYTCH_PROJECT_ID,
      });

      // Also check if token exists in KV (for revocation checking)
      const tokenHash = await crypto.subtle.digest(
        "SHA-256",
        encoder.encode(token),
      );
      const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash)))
        .replace(/\+/g, "-")
        .replace(/\//g, "_")
        .replace(/=/g, "")
        .substring(0, 32);

      const storedToken = await env.OAUTH_KV.get(`token_hash:${tokenKey}`);
      if (!storedToken) {
        log("Token not found in storage - may have been revoked");
        throw new Error("Token not found or revoked");
      }

      log("Self-issued JWT validation successful");
      return result;
    } catch (error) {
      log(
        `Self-issued JWT validation failed, trying Stytch validation: ${error.message}`,
      );

      // If self-validation fails, try Stytch validation as fallback
      if (!jwks) {
        log("Creating JWKS for Stytch validation");
        jwks = createRemoteJWKSet(
          new URL(getStytchUrl(env, ".well-known/jwks.json", true)),
        );
      }

      return await jwtVerify(token, jwks, {
        audience: env.STYTCH_PROJECT_ID,
        issuer: [`stytch.com/${env.STYTCH_PROJECT_ID}`],
        typ: "JWT",
        algorithms: ["RS256"],
      });
    }
  } catch (error) {
    log(`All token validation methods failed: ${error}`);
    throw error;
  }
}

/**
 * Function to process the authentication callback
 */
async function processAuthCallback(c, token, state, oauthRequest) {
  log("Authenticating with Stytch API...");

  try {
    // Try to authenticate the token based on token type
    const tokenType = "oauth"; // We know it's an OAuth token at this point
    let endpoint = "sessions/authenticate";
    let payload = { session_token: token };

    if (tokenType === "oauth") {
      endpoint = "oauth/authenticate";
      payload = { token: token };
    }

    log(
      `Using Stytch endpoint: ${endpoint} with payload: ${JSON.stringify(
        payload,
      )}`,
    );

    const authenticateResp = await fetch(getStytchUrl(c.env, endpoint), {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Basic ${btoa(
          `${c.env.STYTCH_PROJECT_ID}:${c.env.STYTCH_SECRET}`,
        )}`,
      },
      body: JSON.stringify(payload),
    });

    log(`Stytch auth response status: ${authenticateResp.status}`);

    if (!authenticateResp.ok) {
      const errorText = await authenticateResp.text();
      log(`Stytch authentication error: ${errorText}`);
      return new Response(`Authentication failed: ${errorText}`, {
        status: 401,
        headers: CORS,
      });
    }

    const authData = await authenticateResp.json();
    log(
      `Auth data received: ${JSON.stringify({
        user_id: authData.user_id || "unknown",
        has_user: !!authData.user,
      })}`,
    );

    // Generate an authorization code
    const authCode = crypto.randomUUID();
    log(`Generated authorization code: ${authCode}`);

    // Store the user info with the authorization code
    const authCodeData = {
      sub: authData.user_id,
      email: authData.user?.emails?.[0]?.email,
      code_challenge: oauthRequest.code_challenge,
      client_id: oauthRequest.client_id,
      redirect_uri: oauthRequest.redirect_uri,
    };

    log(`Storing auth code data: ${JSON.stringify(authCodeData)}`);
    await c.env.OAUTH_KV.put(
      `auth_code:${authCode}`,
      JSON.stringify(authCodeData),
      { expirationTtl: 300 },
    );
    log("Successfully stored auth code data");

    // Determine the redirect URI to use
    if (!oauthRequest.redirect_uri) {
      log("Missing redirect_uri - using default");
      return new Response("Missing redirect URI in OAuth request", {
        status: 400,
        headers: CORS,
      });
    }

    log(`Using redirect URI from request: ${oauthRequest.redirect_uri}`);
    log(`Using state for redirect: ${state}`);

    const redirectURL = new URL(oauthRequest.redirect_uri);
    redirectURL.searchParams.set("code", authCode);
    redirectURL.searchParams.set("state", state);

    log(`Redirecting to: ${redirectURL.toString()}`);
    return Response.redirect(redirectURL.toString(), 302);
  } catch (error) {
    console.error(`Error in processAuthCallback: ${error}`);
    return new Response(`Authentication processing error: ${error.message}`, {
      status: 500,
      headers: CORS,
    });
  }
}

// Function to proxy POST requests to remote MCP server
async function proxyPost(req, remoteServerUrl, path, sid) {
  const body = await req.text();
  const targetUrl = `${remoteServerUrl}${path}?session_id=${encodeURIComponent(
    sid,
  )}`;

  // Streamable HTTP requires both application/json and text/event-stream
  // The server will decide which format to use based on the response type
  const acceptHeader = "application/json, text/event-stream";

  const headers = {
    "Content-Type": "application/json",
    Accept: acceptHeader,
    "User-Agent": "Claude/1.0",
  };

  try {
    const response = await fetch(targetUrl, {
      method: "POST",
      headers: headers,
      body: body,
    });

    const responseText = await response.text();
    log(`Proxy response from ${targetUrl}: ${responseText.substring(0, 500)}`);

    // Check if response is SSE format
    if (
      responseText.startsWith("event:") ||
      responseText.includes("\nevent:")
    ) {
      // Parse SSE format
      const events = responseText.split("\n\n").filter((e) => e.trim());

      if (events.length === 1) {
        // Single SSE event - convert to plain JSON
        const lines = events[0].split("\n");
        const dataLine = lines.find((l) => l.startsWith("data:"));

        if (dataLine) {
          const jsonData = dataLine.substring(5).trim(); // Remove "data:" prefix
          log("Converting single SSE message to plain JSON");
          return new Response(jsonData, {
            status: response.status,
            headers: { "Content-Type": "application/json", ...CORS },
          });
        }
      } else if (events.length > 1) {
        // Multiple SSE events - return as SSE stream
        log("Returning multiple SSE messages as stream");
        return new Response(responseText, {
          status: response.status,
          headers: {
            "Content-Type": "text/event-stream",
            "Cache-Control": "no-cache",
            ...CORS,
          },
        });
      }
    }

    // Not SSE format - return as-is
    return new Response(responseText, {
      status: response.status,
      headers: { "Content-Type": "application/json", ...CORS },
    });
  } catch (error) {
    log(`Proxy fetch error: ${error.message}`);
    return new Response(JSON.stringify({ error: error.message }), {
      status: 502,
      headers: { "Content-Type": "application/json", ...CORS },
    });
  }
}

// Middleware for bearer token authentication (MCP server)
const stytchBearerTokenAuthMiddleware = async (c, next) => {
  const authHeader = c.req.header("Authorization");
  log(`Auth header present: ${!!authHeader}`);

  if (!authHeader || !authHeader.startsWith("Bearer ")) {
    return new Response("Missing or invalid access token", {
      status: 401,
      headers: CORS,
    });
  }

  const accessToken = authHeader.substring(7);
  log(`Attempting to validate token: ${accessToken.substring(0, 10)}...`);

  try {
    // Add more detailed validation logging
    log("Starting token validation...");
    const verifyResult = await validateToken(accessToken, c.env);
    log(`Token validation successful! ${verifyResult.payload.sub}`);

    // Store user info in a variable that the handler can access
    c.env.userID = verifyResult.payload.sub;
    c.env.accessToken = accessToken;
  } catch (error) {
    log(`Token validation detailed error: ${error.code} ${error.message}`);
    return new Response(`Unauthorized: Invalid token - ${error.message}`, {
      status: 401,
      headers: CORS,
    });
  }

  return next();
};

// Create our main app with Hono
const app = new Hono();

// Configure the routes
app
  // Error handler
  .onError((err, c) => {
    console.error(`Application error: ${err}`);
    return new Response("Server error", {
      status: 500,
      headers: CORS,
    });
  })

  // Handle CORS preflight requests
  .options("*", (c) => new Response(null, { status: 204, headers: CORS }))

  // Status endpoints
  .get("/status", (c) => {
    const REMOTE_MCP_SERVER_URL =
      c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000";
    return new Response(
      JSON.stringify({
        worker: "BioMCP-OAuth",
        remote: REMOTE_MCP_SERVER_URL,
        forwardPath: "/messages",
        resourceEndpoint: null,
        debug: DEBUG,
      }),
      {
        status: 200,
        headers: { "Content-Type": "application/json", ...CORS },
      },
    );
  })

  .get("/debug", (c) => {
    const REMOTE_MCP_SERVER_URL =
      c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000";
    return new Response(
      JSON.stringify({
        worker: "BioMCP-OAuth",
        remote: REMOTE_MCP_SERVER_URL,
        forwardPath: "/messages",
        resourceEndpoint: null,
        debug: DEBUG,
      }),
      {
        status: 200,
        headers: { "Content-Type": "application/json", ...CORS },
      },
    );
  })

  // OAuth server metadata endpoint
  .get("/.well-known/oauth-authorization-server", (c) => {
    const url = new URL(c.req.url);
    return new Response(
      JSON.stringify({
        issuer: c.env.STYTCH_PROJECT_ID,
        authorization_endpoint: `${url.origin}/authorize`,
        token_endpoint: `${url.origin}/token`,
        registration_endpoint: getStytchUrl(c.env, "oauth2/register", true),
        scopes_supported: ["openid", "profile", "email", "offline_access"],
        response_types_supported: ["code"],
        response_modes_supported: ["query"],
        grant_types_supported: ["authorization_code", "refresh_token"],
        token_endpoint_auth_methods_supported: ["none"],
        code_challenge_methods_supported: ["S256"],
      }),
      {
        status: 200,
        headers: { "Content-Type": "application/json", ...CORS },
      },
    );
  })

  // OAuth redirect endpoint (redirects to Stytch's hosted UI)
  .get("/authorize", async (c) => {
    try {
      log("Authorize endpoint hit");
      const url = new URL(c.req.url);
      log(`Full authorize URL: ${url.toString()}`);
      log(
        `Search params: ${JSON.stringify(
          Object.fromEntries(url.searchParams),
        )}`,
      );

      const redirectUrl = new URL("/callback", url.origin).toString();
      log(`Redirect URL: ${redirectUrl}`);

      // Extract and forward OAuth parameters
      const clientId = url.searchParams.get("client_id") || "unknown_client";
      const redirectUri = url.searchParams.get("redirect_uri");
      let state = url.searchParams.get("state");
      const codeChallenge = url.searchParams.get("code_challenge");
      const codeChallengeMethod = url.searchParams.get("code_challenge_method");

      // Generate a state if one isn't provided
      if (!state) {
        state = crypto.randomUUID();
        log(`Generated state parameter: ${state}`);
      }

      log("OAuth params:", {
        clientId,
        redirectUri,
        state,
        codeChallenge: !!codeChallenge,
        codeChallengeMethod,
      });

      // Store OAuth request parameters in KV for use during callback
      const oauthRequestData = {
        client_id: clientId,
        redirect_uri: redirectUri,
        code_challenge: codeChallenge,
        code_challenge_method: codeChallengeMethod,
        original_state: state, // Store the original state explicitly
      };

      // Also store a mapping from any state value to the original state
      // This is crucial for handling cases where Stytch modifies the state
      try {
        // Use a consistent key based on timestamp for lookups
        const timestamp = Date.now().toString();
        await c.env.OAUTH_KV.put(`state_timestamp:${timestamp}`, state, {
          expirationTtl: 600,
        });

        log(`Saving OAuth request data: ${JSON.stringify(oauthRequestData)}`);
        await c.env.OAUTH_KV.put(
          `oauth_request:${state}`,
          JSON.stringify(oauthRequestData),
          { expirationTtl: 600 },
        );

        // Also store timestamp for this state to allow fallback lookup
        await c.env.OAUTH_KV.put(`timestamp_for_state:${state}`, timestamp, {
          expirationTtl: 600,
        });

        log("Successfully stored OAuth request data in KV");
      } catch (kvError) {
        log(`Error storing OAuth data in KV: ${kvError}`);
        return new Response("Internal server error storing OAuth data", {
          status: 500,
          headers: CORS,
        });
      }

      // Redirect to Stytch's hosted login UI
      const stytchLoginUrl = `${
        c.env.STYTCH_OAUTH_URL ||
        "https://test.stytch.com/v1/public/oauth/google/start"
      }?public_token=${
        c.env.STYTCH_PUBLIC_TOKEN
      }&login_redirect_url=${encodeURIComponent(
        redirectUrl,
      )}&state=${encodeURIComponent(state)}`;

      log(`Redirecting to Stytch: ${stytchLoginUrl}`);
      return Response.redirect(stytchLoginUrl, 302);
    } catch (error) {
      console.error(`Error in authorize endpoint: ${error}`);
      return new Response(`Authorization error: ${error.message}`, {
        status: 500,
        headers: CORS,
      });
    }
  })

  // OAuth callback endpoint
  .get("/callback", async (c) => {
    try {
      log("Callback hit, logging all details");
      const url = new URL(c.req.url);
      log(`Full URL: ${url.toString()}`);
      log(
        `Search params: ${JSON.stringify(
          Object.fromEntries(url.searchParams),
        )}`,
      );

      // Stytch's callback format - get the token
      const token =
        url.searchParams.get("stytch_token_type") === "oauth"
          ? url.searchParams.get("token")
          : url.searchParams.get("token") ||
            url.searchParams.get("stytch_token");

      log(`Token type: ${url.searchParams.get("stytch_token_type")}`);
      log(`Token found: ${!!token}`);

      // We need a token to proceed
      if (!token) {
        log("Invalid callback - missing token");
        return new Response("Invalid callback request: missing token", {
          status: 400,
          headers: CORS,
        });
      }

      // Look for the most recent OAuth request
      let mostRecentState = null;
      let mostRecentTimestamp = null;
      try {
        // Find the most recent timestamp
        const timestamps = await c.env.OAUTH_KV.list({
          prefix: "state_timestamp:",
        });
        if (timestamps.keys.length > 0) {
          // Sort timestamps in descending order (most recent first)
          const sortedTimestamps = timestamps.keys.sort((a, b) => {
            const timeA = parseInt(a.name.replace("state_timestamp:", ""));
            const timeB = parseInt(b.name.replace("state_timestamp:", ""));
            return timeB - timeA; // descending order
          });

          mostRecentTimestamp = sortedTimestamps[0].name;
          // Get the state associated with this timestamp
          mostRecentState = await c.env.OAUTH_KV.get(mostRecentTimestamp);
          log(`Found most recent state: ${mostRecentState}`);
        }
      } catch (error) {
        log(`Error finding recent state: ${error}`);
      }

      // If we have a state from the most recent OAuth request, use it
      let oauthRequest = null;
      let state = mostRecentState;

      if (state) {
        try {
          const oauthRequestJson = await c.env.OAUTH_KV.get(
            `oauth_request:${state}`,
          );
          if (oauthRequestJson) {
            oauthRequest = JSON.parse(oauthRequestJson);
            log(`Found OAuth request for state: ${state}`);
          }
        } catch (error) {
          log(`Error getting OAuth request: ${error}`);
        }
      }

      // If we couldn't find the OAuth request, try other alternatives
      if (!oauthRequest) {
        log(
          "No OAuth request found for most recent state, checking other requests",
        );

        try {
          // List all OAuth requests and use the most recent one
          const requests = await c.env.OAUTH_KV.list({
            prefix: "oauth_request:",
          });
          if (requests.keys.length > 0) {
            const oauthRequestJson = await c.env.OAUTH_KV.get(
              requests.keys[0].name,
            );
            if (oauthRequestJson) {
              oauthRequest = JSON.parse(oauthRequestJson);
              // Extract the state from the key
              state = requests.keys[0].name.replace("oauth_request:", "");
              log(`Using most recent OAuth request with state: ${state}`);
            }
          }
        } catch (error) {
          log(`Error finding alternative OAuth request: ${error}`);
        }
      }

      // Final fallback - use hardcoded values for Claude
      if (!oauthRequest) {
        log("No OAuth request found, using fallback values");
        oauthRequest = {
          client_id: "biomcp-client",
          redirect_uri: "https://claude.ai/api/mcp/auth_callback",
          code_challenge: null,
          original_state: state || "unknown_state",
        };
      }

      // If we have an original_state in the OAuth request, use that
      if (oauthRequest.original_state) {
        state = oauthRequest.original_state;
        log(`Using original state from OAuth request: ${state}`);
      }

      // Proceed with authentication
      return processAuthCallback(c, token, state, oauthRequest);
    } catch (error) {
      console.error(`Callback error: ${error}`);
      return new Response(
        `Server error during authentication: ${error.message}`,
        {
          status: 500,
          headers: CORS,
        },
      );
    }
  })

  // Token exchange endpoint
  .post("/token", async (c) => {
    try {
      log("Token endpoint hit");
      const formData = await c.req.formData();
      const grantType = formData.get("grant_type");
      const code = formData.get("code");
      const redirectUri = formData.get("redirect_uri");
      const clientId = formData.get("client_id");
      const codeVerifier = formData.get("code_verifier");

      log("Token request params:", {
        grantType,
        code: !!code,
        redirectUri,
        clientId,
        codeVerifier: !!codeVerifier,
      });

      if (
        grantType !== "authorization_code" ||
        !code ||
        !redirectUri ||
        !clientId ||
        !codeVerifier
      ) {
        log("Invalid token request parameters");
        return new Response(JSON.stringify({ error: "invalid_request" }), {
          status: 400,
          headers: { "Content-Type": "application/json", ...CORS },
        });
      }

      // Retrieve the stored authorization code data
      let authCodeJson;
      try {
        authCodeJson = await c.env.OAUTH_KV.get(`auth_code:${code}`);
        log(`Auth code data retrieved: ${!!authCodeJson}`);
      } catch (kvError) {
        log(`Error retrieving auth code data: ${kvError}`);
        return new Response(JSON.stringify({ error: "server_error" }), {
          status: 500,
          headers: { "Content-Type": "application/json", ...CORS },
        });
      }

      if (!authCodeJson) {
        log("Invalid or expired authorization code");
        return new Response(JSON.stringify({ error: "invalid_grant" }), {
          status: 400,
          headers: { "Content-Type": "application/json", ...CORS },
        });
      }

      let authCodeData;
      try {
        authCodeData = JSON.parse(authCodeJson);
        log(`Auth code data parsed: ${JSON.stringify(authCodeData)}`);
      } catch (parseError) {
        log(`Error parsing auth code data: ${parseError}`);
        return new Response(JSON.stringify({ error: "server_error" }), {
          status: 500,
          headers: { "Content-Type": "application/json", ...CORS },
        });
      }

      // Verify the code_verifier against the stored code_challenge
      if (authCodeData.code_challenge) {
        log("Verifying PKCE code challenge");
        const encoder = new TextEncoder();
        const data = encoder.encode(codeVerifier);
        const digest = await crypto.subtle.digest("SHA-256", data);

        // Convert to base64url encoding
        const base64Digest = btoa(
          String.fromCharCode(...new Uint8Array(digest)),
        )
          .replace(/\+/g, "-")
          .replace(/\//g, "_")
          .replace(/=/g, "");

        log("Code challenge comparison:", {
          stored: authCodeData.code_challenge,
          computed: base64Digest,
          match: base64Digest === authCodeData.code_challenge,
        });

        if (base64Digest !== authCodeData.code_challenge) {
          log("PKCE verification failed");
          return new Response(JSON.stringify({ error: "invalid_grant" }), {
            status: 400,
            headers: { "Content-Type": "application/json", ...CORS },
          });
        }
      }

      // Delete the used authorization code
      try {
        await c.env.OAUTH_KV.delete(`auth_code:${code}`);
        log("Used authorization code deleted");
      } catch (deleteError) {
        log(`Error deleting used auth code: ${deleteError}`);
        // Continue anyway since this isn't critical
      }

      // Generate JWT access token instead of UUID
      const encoder = new TextEncoder();
      const secret = encoder.encode(
        c.env.JWT_SECRET || "default-jwt-secret-key",
      );

      // Create JWT payload
      const accessTokenPayload = {
        sub: authCodeData.sub,
        email: authCodeData.email,
        client_id: clientId,
        scope: "openid profile email",
        iss: c.env.STYTCH_PROJECT_ID,
        aud: clientId,
        exp: Math.floor(Date.now() / 1000) + 3600, // 1 hour expiry
        iat: Math.floor(Date.now() / 1000),
      };

      // Sign JWT
      const accessToken = await new SignJWT(accessTokenPayload)
        .setProtectedHeader({ alg: "HS256" })
        .setIssuedAt()
        .setExpirationTime("1h")
        .sign(secret);

      log(`Generated JWT access token: ${accessToken.substring(0, 20)}...`);

      // Generate refresh token (still using UUID for simplicity)
      const refreshToken = crypto.randomUUID();

      // Store token information - use a hash of the token as the key to avoid length limits
      const tokenHash = await crypto.subtle.digest(
        "SHA-256",
        encoder.encode(accessToken),
      );
      const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash)))
        .replace(/\+/g, "-")
        .replace(/\//g, "_")
        .replace(/=/g, "")
        .substring(0, 32); // Use first 32 chars of hash

      try {
        log(`Storing access token with key: access_token:${tokenKey}`);
        await c.env.OAUTH_KV.put(
          `access_token:${tokenKey}`,
          JSON.stringify({
            token: accessToken,
            hash: tokenKey,
            ...accessTokenPayload,
          }),
          { expirationTtl: 3600 },
        );

        // Also store a mapping from the full token to the hash for validation
        await c.env.OAUTH_KV.put(`token_hash:${tokenKey}`, accessToken, {
          expirationTtl: 3600,
        });

        log("Storing refresh token");
        await c.env.OAUTH_KV.put(
          `refresh_token:${refreshToken}`,
          JSON.stringify({
            sub: authCodeData.sub,
            client_id: clientId,
          }),
          { expirationTtl: 30 * 24 * 60 * 60 },
        );

        log("Token data successfully stored");
      } catch (storeError) {
        log(`Error storing token data: ${storeError}`);
        return new Response(JSON.stringify({ error: "server_error" }), {
          status: 500,
          headers: { "Content-Type": "application/json", ...CORS },
        });
      }

      // Return the tokens
      const tokenResponse = {
        access_token: accessToken,
        token_type: "Bearer",
        expires_in: 3600,
        refresh_token: refreshToken,
        scope: "openid profile email",
      };

      log("Returning token response");
      return new Response(JSON.stringify(tokenResponse), {
        status: 200,
        headers: { "Content-Type": "application/json", ...CORS },
      });
    } catch (error) {
      console.error(`Token endpoint error: ${error}`);
      return new Response(JSON.stringify({ error: "server_error" }), {
        status: 500,
        headers: { "Content-Type": "application/json", ...CORS },
      });
    }
  })

  // Messages endpoint for all paths that start with /messages
  .post("/messages*", async (c) => {
    log("All messages endpoints hit");
    const REMOTE_MCP_SERVER_URL =
      c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000";
    const sid = new URL(c.req.url).searchParams.get("session_id");

    if (!sid) {
      return new Response("Missing session_id", {
        status: 400,
        headers: CORS,
      });
    }

    // Read the body
    const body = await c.req.text();
    const authHeader = c.req.header("Authorization") || "";
    let userEmail = "unknown";

    if (authHeader.startsWith("Bearer ")) {
      const token = authHeader.slice(7);
      const claims = decodeJwt(token);
      userEmail =
        claims.email || claims.preferred_username || claims.sub || "unknown";
    }

    log(`[Proxy] user=${userEmail}  query=${body}`);

    let sendToBQ = false;
    let parsed;
    let domain = null;
    let toolName = null;
    let sanitizedBody = body; // Default to original body

    try {
      parsed = JSON.parse(body);
      const args = parsed.params?.arguments;

      // Check if this is a think tool call
      toolName = parsed.params?.name;
      if (toolName === "think") {
        sendToBQ = false;
        log("[BigQuery] Skipping think tool call");
      } else if (args && Object.keys(args).length > 0) {
        // Extract domain from the arguments (for search/fetch tools)
        domain = args.domain || null;

        // Skip logging if domain is "thinking" or "think"
        if (domain === "thinking" || domain === "think") {
          sendToBQ = false;
        } else {
          sendToBQ = true;
        }

        // Sanitize sensitive data before logging to BigQuery
        if (sendToBQ) {
          // Use the comprehensive sanitization function
          const sanitized = sanitizeObject(parsed);
          sanitizedBody = JSON.stringify(sanitized);

          // Log if we actually sanitized something
          if (JSON.stringify(parsed) !== sanitizedBody) {
            log(
              "[BigQuery] Sanitized sensitive fields from query before logging",
            );
          }
        }
      }
    } catch (e) {
      console.log("[BigQuery] skipping insert—cannot parse JSON body", e);
    }

    const { BQ_SA_KEY_JSON, BQ_PROJECT_ID, BQ_DATASET, BQ_TABLE } = c.env;

    if (sendToBQ && BQ_SA_KEY_JSON && BQ_PROJECT_ID && BQ_DATASET && BQ_TABLE) {
      const eventRow = {
        timestamp: new Date().toISOString(),
        userEmail,
        query: sanitizedBody, // Use sanitized body instead of original
      };
      // fire & forget
      c.executionCtx.waitUntil(
        insertEvent(c.env, eventRow).catch((error) => {
          console.error("[BigQuery] Insert failed:", error);
        }),
      );
    } else {
      const missing = [
        !sendToBQ
          ? toolName === "think"
            ? "think tool"
            : domain === "thinking" || domain === "think"
            ? `domain is ${domain}`
            : "no query args"
          : null,
        !BQ_SA_KEY_JSON && "BQ_SA_KEY_JSON",
        !BQ_PROJECT_ID && "BQ_PROJECT_ID",
        !BQ_DATASET && "BQ_DATASET",
        !BQ_TABLE && "BQ_TABLE",
      ].filter(Boolean);
      console.log("[BigQuery] skipping insert—", missing.join(", "));
    }

    // Make a new Request object with the body we've already read
    const newRequest = new Request(c.req.url, {
      method: c.req.method,
      headers: c.req.headers,
      body: body,
    });

    // Forward everything to proxyPost like the auth-less version does
    return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/messages", sid);
  });

// MCP endpoint (Streamable HTTP transport) - separate chain to avoid wildcard route issues
app
  .on("HEAD", "/mcp", stytchBearerTokenAuthMiddleware, (c) => {
    log("MCP HEAD endpoint hit - checking endpoint availability");
    // For Streamable HTTP, HEAD /mcp should return 204 to indicate the endpoint exists
    return new Response(null, {
      status: 204,
      headers: CORS,
    });
  })
  .get("/mcp", stytchBearerTokenAuthMiddleware, async (c) => {
    log("MCP GET endpoint hit - Streamable HTTP transport");
    const REMOTE_MCP_SERVER_URL =
      c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000";

    // For Streamable HTTP, GET /mcp with session_id initiates event stream
    const sessionId = new URL(c.req.url).searchParams.get("session_id");

    if (!sessionId) {
      // Without session_id, just return 204 to indicate endpoint exists
      return new Response(null, {
        status: 204,
        headers: CORS,
      });
    }

    // Proxy the GET request to the backend's /mcp endpoint for streaming
    const targetUrl = `${REMOTE_MCP_SERVER_URL}/mcp?session_id=${encodeURIComponent(
      sessionId,
    )}`;
    log(`Proxying GET /mcp to: ${targetUrl}`);

    try {
      const response = await fetch(targetUrl, {
        method: "GET",
        headers: {
          Accept: "text/event-stream",
          "User-Agent": "Claude/1.0",
        },
      });

      // For SSE, we need to stream the response
      if (response.headers.get("content-type")?.includes("text/event-stream")) {
        log("Streaming SSE response from backend");
        // Return the streamed response directly
        return new Response(response.body, {
          status: response.status,
          headers: {
            "Content-Type": "text/event-stream",
            "Cache-Control": "no-cache",
            Connection: "keep-alive",
            ...CORS,
          },
        });
      } else {
        // Non-streaming response
        const responseText = await response.text();
        return new Response(responseText, {
          status: response.status,
          headers: {
            "Content-Type":
              response.headers.get("content-type") || "text/plain",
            ...CORS,
          },
        });
      }
    } catch (error) {
      log(`Error proxying GET /mcp: ${error}`);
      return new Response(`Proxy error: ${error.message}`, {
        status: 502,
        headers: CORS,
      });
    }
  })
  .post("/mcp", stytchBearerTokenAuthMiddleware, async (c) => {
    log("MCP POST endpoint hit - Streamable HTTP transport");
    const REMOTE_MCP_SERVER_URL =
      c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000";

    // Extract and validate session ID
    const rawSessionId = new URL(c.req.url).searchParams.get("session_id");
    const sessionId = validateSessionId(rawSessionId);

    // Get the request body
    const bodyText = await c.req.text();
    log(`MCP POST request body: ${bodyText.substring(0, 200)}`);

    // Create new request for proxying
    const newRequest = new Request(c.req.url, {
      method: "POST",
      headers: c.req.headers,
      body: bodyText,
    });

    // Use the updated proxyPost function that handles SSE properly
    return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/mcp", sessionId);
  })

  // Default 404 response
  .all(
    "*",
    () =>
      new Response("Not Found", {
        status: 404,
        headers: CORS,
      }),
  );

// Export the app as the main worker fetch handler
export default {
  fetch: (request, env, ctx) => {
    // Initialize DEBUG from environment variables
    DEBUG = env.DEBUG === "true" || env.DEBUG === true;

    return app.fetch(request, env, ctx);
  },
};

```

--------------------------------------------------------------------------------
/src/biomcp/individual_tools.py:
--------------------------------------------------------------------------------

```python
"""Individual MCP tools for specific biomedical search and fetch operations.

This module provides the original 9 individual tools that offer direct access
to specific search and fetch functionality, complementing the unified tools.
"""

import logging
from typing import Annotated, Literal

from pydantic import Field

from biomcp.articles.fetch import _article_details
from biomcp.articles.search import _article_searcher
from biomcp.cbioportal_helper import (
    get_cbioportal_summary_for_genes,
    get_variant_cbioportal_summary,
)
from biomcp.core import ensure_list, mcp_app
from biomcp.diseases.getter import _disease_details
from biomcp.drugs.getter import _drug_details
from biomcp.genes.getter import _gene_details
from biomcp.metrics import track_performance
from biomcp.trials.getter import (
    _trial_locations,
    _trial_outcomes,
    _trial_protocol,
    _trial_references,
)
from biomcp.trials.search import _trial_searcher
from biomcp.variants.getter import _variant_details
from biomcp.variants.search import _variant_searcher

logger = logging.getLogger(__name__)


# Article Tools
@mcp_app.tool()
@track_performance("biomcp.article_searcher")
async def article_searcher(
    chemicals: Annotated[
        list[str] | str | None,
        Field(description="Chemical/drug names to search for"),
    ] = None,
    diseases: Annotated[
        list[str] | str | None,
        Field(description="Disease names to search for"),
    ] = None,
    genes: Annotated[
        list[str] | str | None,
        Field(description="Gene symbols to search for"),
    ] = None,
    keywords: Annotated[
        list[str] | str | None,
        Field(description="Free-text keywords to search for"),
    ] = None,
    variants: Annotated[
        list[str] | str | None,
        Field(
            description="Variant strings to search for (e.g., 'V600E', 'p.D277Y')"
        ),
    ] = None,
    include_preprints: Annotated[
        bool,
        Field(description="Include preprints from bioRxiv/medRxiv"),
    ] = True,
    include_cbioportal: Annotated[
        bool,
        Field(
            description="Include cBioPortal cancer genomics summary when searching by gene"
        ),
    ] = True,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 10,
) -> str:
    """Search PubMed/PubTator3 for research articles and preprints.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Use this tool to find scientific literature ABOUT genes, variants, diseases, or chemicals.
    Results include articles from PubMed and optionally preprints from bioRxiv/medRxiv.

    Important: This searches for ARTICLES ABOUT these topics, not database records.
    For genetic variant database records, use variant_searcher instead.

    Example usage:
    - Find articles about BRAF mutations in melanoma
    - Search for papers on a specific drug's effects
    - Locate research on gene-disease associations
    """
    # Convert single values to lists
    chemicals = ensure_list(chemicals) if chemicals else None
    diseases = ensure_list(diseases) if diseases else None
    genes = ensure_list(genes) if genes else None
    keywords = ensure_list(keywords) if keywords else None
    variants = ensure_list(variants) if variants else None

    result = await _article_searcher(
        call_benefit="Direct article search for specific biomedical topics",
        chemicals=chemicals,
        diseases=diseases,
        genes=genes,
        keywords=keywords,
        variants=variants,
        include_preprints=include_preprints,
        include_cbioportal=include_cbioportal,
    )

    # Add cBioPortal summary if searching by gene
    if include_cbioportal and genes:
        request_params = {
            "keywords": keywords,
            "diseases": diseases,
            "chemicals": chemicals,
            "variants": variants,
        }
        cbioportal_summary = await get_cbioportal_summary_for_genes(
            genes, request_params
        )
        if cbioportal_summary:
            result = cbioportal_summary + "\n\n---\n\n" + result

    return result


@mcp_app.tool()
@track_performance("biomcp.article_getter")
async def article_getter(
    pmid: Annotated[
        str,
        Field(
            description="Article identifier - either a PubMed ID (e.g., '38768446' or 'PMC11193658') or DOI (e.g., '10.1101/2024.01.20.23288905')"
        ),
    ],
) -> str:
    """Fetch detailed information for a specific article.

    Retrieves the full abstract and available text for an article by its identifier.
    Supports:
    - PubMed IDs (PMID) for published articles
    - PMC IDs for articles in PubMed Central
    - DOIs for preprints from Europe PMC

    Returns formatted text including:
    - Title
    - Abstract
    - Full text (when available from PMC for published articles)
    - Source information (PubMed or Europe PMC)
    """
    return await _article_details(
        call_benefit="Fetch detailed article information for analysis",
        pmid=pmid,
    )


# Trial Tools
@mcp_app.tool()
@track_performance("biomcp.trial_searcher")
async def trial_searcher(
    conditions: Annotated[
        list[str] | str | None,
        Field(description="Medical conditions to search for"),
    ] = None,
    interventions: Annotated[
        list[str] | str | None,
        Field(description="Treatment interventions to search for"),
    ] = None,
    other_terms: Annotated[
        list[str] | str | None,
        Field(description="Additional search terms"),
    ] = None,
    recruiting_status: Annotated[
        Literal["OPEN", "CLOSED", "ANY"] | None,
        Field(description="Filter by recruiting status"),
    ] = None,
    phase: Annotated[
        Literal[
            "EARLY_PHASE1",
            "PHASE1",
            "PHASE2",
            "PHASE3",
            "PHASE4",
            "NOT_APPLICABLE",
        ]
        | None,
        Field(description="Filter by clinical trial phase"),
    ] = None,
    location: Annotated[
        str | None,
        Field(description="Location term for geographic filtering"),
    ] = None,
    lat: Annotated[
        float | None,
        Field(
            description="Latitude for location-based search. AI agents should geocode city names before using.",
            ge=-90,
            le=90,
        ),
    ] = None,
    long: Annotated[
        float | None,
        Field(
            description="Longitude for location-based search. AI agents should geocode city names before using.",
            ge=-180,
            le=180,
        ),
    ] = None,
    distance: Annotated[
        int | None,
        Field(
            description="Distance in miles from lat/long coordinates",
            ge=1,
        ),
    ] = None,
    age_group: Annotated[
        Literal["CHILD", "ADULT", "OLDER_ADULT"] | None,
        Field(description="Filter by age group"),
    ] = None,
    sex: Annotated[
        Literal["FEMALE", "MALE", "ALL"] | None,
        Field(description="Filter by biological sex"),
    ] = None,
    healthy_volunteers: Annotated[
        Literal["YES", "NO"] | None,
        Field(description="Filter by healthy volunteer eligibility"),
    ] = None,
    study_type: Annotated[
        Literal["INTERVENTIONAL", "OBSERVATIONAL", "EXPANDED_ACCESS"] | None,
        Field(description="Filter by study type"),
    ] = None,
    funder_type: Annotated[
        Literal["NIH", "OTHER_GOV", "INDUSTRY", "OTHER"] | None,
        Field(description="Filter by funding source"),
    ] = None,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 10,
) -> str:
    """Search ClinicalTrials.gov for clinical studies.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Comprehensive search tool for finding clinical trials based on multiple criteria.
    Supports filtering by conditions, interventions, location, phase, and eligibility.

    Location search notes:
    - Use either location term OR lat/long coordinates, not both
    - For city-based searches, AI agents should geocode to lat/long first
    - Distance parameter only works with lat/long coordinates

    Returns a formatted list of matching trials with key details.
    """
    # Validate location parameters
    if location and (lat is not None or long is not None):
        raise ValueError(
            "Use either location term OR lat/long coordinates, not both"
        )

    if (lat is not None and long is None) or (
        lat is None and long is not None
    ):
        raise ValueError(
            "Both latitude and longitude must be provided together"
        )

    if distance is not None and (lat is None or long is None):
        raise ValueError(
            "Distance parameter requires both latitude and longitude"
        )

    # Convert single values to lists
    conditions = ensure_list(conditions) if conditions else None
    interventions = ensure_list(interventions) if interventions else None
    other_terms = ensure_list(other_terms) if other_terms else None

    return await _trial_searcher(
        call_benefit="Direct clinical trial search for specific criteria",
        conditions=conditions,
        interventions=interventions,
        terms=other_terms,
        recruiting_status=recruiting_status,
        phase=phase,
        lat=lat,
        long=long,
        distance=distance,
        age_group=age_group,
        study_type=study_type,
        page_size=page_size,
    )


@mcp_app.tool()
@track_performance("biomcp.trial_getter")
async def trial_getter(
    nct_id: Annotated[
        str,
        Field(description="NCT ID (e.g., 'NCT06524388')"),
    ],
) -> str:
    """Fetch comprehensive details for a specific clinical trial.

    Retrieves all available information for a clinical trial by its NCT ID.
    This includes protocol details, locations, outcomes, and references.

    For specific sections only, use the specialized getter tools:
    - trial_protocol_getter: Core protocol information
    - trial_locations_getter: Site locations and contacts
    - trial_outcomes_getter: Primary/secondary outcomes and results
    - trial_references_getter: Publications and references
    """
    results = []

    # Get all sections
    protocol = await _trial_protocol(
        call_benefit="Fetch comprehensive trial details for analysis",
        nct_id=nct_id,
    )
    if protocol:
        results.append(protocol)

    locations = await _trial_locations(
        call_benefit="Fetch comprehensive trial details for analysis",
        nct_id=nct_id,
    )
    if locations:
        results.append(locations)

    outcomes = await _trial_outcomes(
        call_benefit="Fetch comprehensive trial details for analysis",
        nct_id=nct_id,
    )
    if outcomes:
        results.append(outcomes)

    references = await _trial_references(
        call_benefit="Fetch comprehensive trial details for analysis",
        nct_id=nct_id,
    )
    if references:
        results.append(references)

    return (
        "\n\n".join(results)
        if results
        else f"No data found for trial {nct_id}"
    )


@mcp_app.tool()
@track_performance("biomcp.trial_protocol_getter")
async def trial_protocol_getter(
    nct_id: Annotated[
        str,
        Field(description="NCT ID (e.g., 'NCT06524388')"),
    ],
) -> str:
    """Fetch core protocol information for a clinical trial.

    Retrieves essential protocol details including:
    - Official title and brief summary
    - Study status and sponsor information
    - Study design (type, phase, allocation, masking)
    - Eligibility criteria
    - Primary completion date
    """
    return await _trial_protocol(
        call_benefit="Fetch trial protocol information for eligibility assessment",
        nct_id=nct_id,
    )


@mcp_app.tool()
@track_performance("biomcp.trial_references_getter")
async def trial_references_getter(
    nct_id: Annotated[
        str,
        Field(description="NCT ID (e.g., 'NCT06524388')"),
    ],
) -> str:
    """Fetch publications and references for a clinical trial.

    Retrieves all linked publications including:
    - Published results papers
    - Background literature
    - Protocol publications
    - Related analyses

    Includes PubMed IDs when available for easy cross-referencing.
    """
    return await _trial_references(
        call_benefit="Fetch trial publications and references for evidence review",
        nct_id=nct_id,
    )


@mcp_app.tool()
@track_performance("biomcp.trial_outcomes_getter")
async def trial_outcomes_getter(
    nct_id: Annotated[
        str,
        Field(description="NCT ID (e.g., 'NCT06524388')"),
    ],
) -> str:
    """Fetch outcome measures and results for a clinical trial.

    Retrieves detailed outcome information including:
    - Primary outcome measures
    - Secondary outcome measures
    - Results data (if available)
    - Adverse events (if reported)

    Note: Results are only available for completed trials that have posted data.
    """
    return await _trial_outcomes(
        call_benefit="Fetch trial outcome measures and results for efficacy assessment",
        nct_id=nct_id,
    )


@mcp_app.tool()
@track_performance("biomcp.trial_locations_getter")
async def trial_locations_getter(
    nct_id: Annotated[
        str,
        Field(description="NCT ID (e.g., 'NCT06524388')"),
    ],
) -> str:
    """Fetch contact and location details for a clinical trial.

    Retrieves all study locations including:
    - Facility names and addresses
    - Principal investigator information
    - Contact details (when recruiting)
    - Recruitment status by site

    Useful for finding trials near specific locations or contacting study teams.
    """
    return await _trial_locations(
        call_benefit="Fetch trial locations and contacts for enrollment information",
        nct_id=nct_id,
    )


# Variant Tools
@mcp_app.tool()
@track_performance("biomcp.variant_searcher")
async def variant_searcher(
    gene: Annotated[
        str | None,
        Field(description="Gene symbol (e.g., 'BRAF', 'TP53')"),
    ] = None,
    hgvs: Annotated[
        str | None,
        Field(description="HGVS notation (genomic, coding, or protein)"),
    ] = None,
    hgvsp: Annotated[
        str | None,
        Field(description="Protein change in HGVS format (e.g., 'p.V600E')"),
    ] = None,
    hgvsc: Annotated[
        str | None,
        Field(description="Coding sequence change (e.g., 'c.1799T>A')"),
    ] = None,
    rsid: Annotated[
        str | None,
        Field(description="dbSNP rsID (e.g., 'rs113488022')"),
    ] = None,
    region: Annotated[
        str | None,
        Field(description="Genomic region (e.g., 'chr7:140753336-140753337')"),
    ] = None,
    significance: Annotated[
        Literal[
            "pathogenic",
            "likely_pathogenic",
            "uncertain_significance",
            "likely_benign",
            "benign",
            "conflicting",
        ]
        | None,
        Field(description="Clinical significance filter"),
    ] = None,
    frequency_min: Annotated[
        float | None,
        Field(description="Minimum allele frequency", ge=0, le=1),
    ] = None,
    frequency_max: Annotated[
        float | None,
        Field(description="Maximum allele frequency", ge=0, le=1),
    ] = None,
    consequence: Annotated[
        str | None,
        Field(description="Variant consequence (e.g., 'missense_variant')"),
    ] = None,
    cadd_score_min: Annotated[
        float | None,
        Field(description="Minimum CADD score for pathogenicity"),
    ] = None,
    sift_prediction: Annotated[
        Literal["deleterious", "tolerated"] | None,
        Field(description="SIFT functional prediction"),
    ] = None,
    polyphen_prediction: Annotated[
        Literal["probably_damaging", "possibly_damaging", "benign"] | None,
        Field(description="PolyPhen-2 functional prediction"),
    ] = None,
    include_cbioportal: Annotated[
        bool,
        Field(
            description="Include cBioPortal cancer genomics summary when searching by gene"
        ),
    ] = True,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 10,
) -> str:
    """Search MyVariant.info for genetic variant DATABASE RECORDS.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Important: This searches for variant DATABASE RECORDS (frequency, significance, etc.),
    NOT articles about variants. For articles about variants, use article_searcher.

    Searches the comprehensive variant database including:
    - Population frequencies (gnomAD, 1000 Genomes, etc.)
    - Clinical significance (ClinVar)
    - Functional predictions (SIFT, PolyPhen, CADD)
    - Gene and protein consequences

    Search by various identifiers or filter by clinical/functional criteria.
    """
    result = await _variant_searcher(
        call_benefit="Direct variant database search for genetic analysis",
        gene=gene,
        hgvsp=hgvsp,
        hgvsc=hgvsc,
        rsid=rsid,
        region=region,
        significance=significance,
        min_frequency=frequency_min,
        max_frequency=frequency_max,
        cadd=cadd_score_min,
        sift=sift_prediction,
        polyphen=polyphen_prediction,
        size=page_size,
        offset=(page - 1) * page_size if page > 1 else 0,
    )

    # Add cBioPortal summary if searching by gene
    if include_cbioportal and gene:
        cbioportal_summary = await get_variant_cbioportal_summary(gene)
        if cbioportal_summary:
            result = cbioportal_summary + "\n\n" + result

    return result


@mcp_app.tool()
@track_performance("biomcp.variant_getter")
async def variant_getter(
    variant_id: Annotated[
        str,
        Field(
            description="Variant ID (HGVS, rsID, or MyVariant ID like 'chr7:g.140753336A>T')"
        ),
    ],
    include_external: Annotated[
        bool,
        Field(
            description="Include external annotations (TCGA, 1000 Genomes, functional predictions)"
        ),
    ] = True,
) -> str:
    """Fetch comprehensive details for a specific genetic variant.

    Retrieves all available information for a variant including:
    - Gene location and consequences
    - Population frequencies across databases
    - Clinical significance from ClinVar
    - Functional predictions
    - External annotations (TCGA cancer data, conservation scores)

    Accepts various ID formats:
    - HGVS: NM_004333.4:c.1799T>A
    - rsID: rs113488022
    - MyVariant ID: chr7:g.140753336A>T
    """
    return await _variant_details(
        call_benefit="Fetch comprehensive variant annotations for interpretation",
        variant_id=variant_id,
        include_external=include_external,
    )


@mcp_app.tool()
@track_performance("biomcp.alphagenome_predictor")
async def alphagenome_predictor(
    chromosome: Annotated[
        str,
        Field(description="Chromosome (e.g., 'chr7', 'chrX')"),
    ],
    position: Annotated[
        int,
        Field(description="1-based genomic position of the variant"),
    ],
    reference: Annotated[
        str,
        Field(description="Reference allele(s) (e.g., 'A', 'ATG')"),
    ],
    alternate: Annotated[
        str,
        Field(description="Alternate allele(s) (e.g., 'T', 'A')"),
    ],
    interval_size: Annotated[
        int,
        Field(
            description="Size of genomic interval to analyze in bp (max 1,000,000)",
            ge=2000,
            le=1000000,
        ),
    ] = 131072,
    tissue_types: Annotated[
        list[str] | str | None,
        Field(
            description="UBERON ontology terms for tissue-specific predictions (e.g., 'UBERON:0002367' for external ear)"
        ),
    ] = None,
    significance_threshold: Annotated[
        float,
        Field(
            description="Threshold for significant log2 fold changes (default: 0.5)",
            ge=0.0,
            le=5.0,
        ),
    ] = 0.5,
    api_key: Annotated[
        str | None,
        Field(
            description="AlphaGenome API key. Check if user mentioned 'my AlphaGenome API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
) -> str:
    """Predict variant effects on gene regulation using Google DeepMind's AlphaGenome.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your analysis strategy!

    AlphaGenome provides state-of-the-art predictions for how genetic variants
    affect gene regulation, including:
    - Gene expression changes (RNA-seq)
    - Chromatin accessibility impacts (ATAC-seq, DNase-seq)
    - Splicing alterations
    - Promoter activity changes (CAGE)

    This tool requires:
    1. AlphaGenome to be installed (see error message for instructions)
    2. An API key from https://deepmind.google.com/science/alphagenome

    API Key Options:
    - Provide directly via the api_key parameter
    - Or set ALPHAGENOME_API_KEY environment variable

    Example usage:
    - Predict regulatory effects of BRAF V600E mutation: chr7:140753336 A>T
    - Assess non-coding variant impact on gene expression
    - Evaluate promoter variants in specific tissues

    Note: This is an optional tool that enhances variant interpretation
    with AI predictions. Standard annotations remain available via variant_getter.
    """
    from biomcp.variants.alphagenome import predict_variant_effects

    # Convert tissue_types to list if needed
    tissue_types_list = ensure_list(tissue_types) if tissue_types else None

    # Call the prediction function
    return await predict_variant_effects(
        chromosome=chromosome,
        position=position,
        reference=reference,
        alternate=alternate,
        interval_size=interval_size,
        tissue_types=tissue_types_list,
        significance_threshold=significance_threshold,
        api_key=api_key,
    )


# Gene Tools
@mcp_app.tool()
@track_performance("biomcp.gene_getter")
async def gene_getter(
    gene_id_or_symbol: Annotated[
        str,
        Field(
            description="Gene symbol (e.g., 'TP53', 'BRAF') or Entrez ID (e.g., '7157')"
        ),
    ],
) -> str:
    """Get detailed gene information from MyGene.info.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal!

    Provides real-time gene annotations including:
    - Official gene name and symbol
    - Gene summary/description
    - Aliases and alternative names
    - Gene type (protein-coding, etc.)
    - Links to external databases

    This tool fetches CURRENT gene information from MyGene.info, ensuring
    you always have the latest annotations and nomenclature.

    Example usage:
    - Get information about TP53 tumor suppressor
    - Look up BRAF kinase gene details
    - Find the official name for a gene by its alias

    Note: For genetic variants, use variant_searcher. For articles about genes, use article_searcher.
    """
    return await _gene_details(
        call_benefit="Get up-to-date gene annotations and information",
        gene_id_or_symbol=gene_id_or_symbol,
    )


# Disease Tools
@mcp_app.tool()
@track_performance("biomcp.disease_getter")
async def disease_getter(
    disease_id_or_name: Annotated[
        str,
        Field(
            description="Disease name (e.g., 'melanoma', 'lung cancer') or ontology ID (e.g., 'MONDO:0016575', 'DOID:1909')"
        ),
    ],
) -> str:
    """Get detailed disease information from MyDisease.info.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal!

    Provides real-time disease annotations including:
    - Official disease name and definition
    - Disease synonyms and alternative names
    - Ontology mappings (MONDO, DOID, OMIM, etc.)
    - Associated phenotypes
    - Links to disease databases

    This tool fetches CURRENT disease information from MyDisease.info, ensuring
    you always have the latest ontology mappings and definitions.

    Example usage:
    - Get the definition of GIST (Gastrointestinal Stromal Tumor)
    - Look up synonyms for melanoma
    - Find the MONDO ID for a disease by name

    Note: For clinical trials about diseases, use trial_searcher. For articles about diseases, use article_searcher.
    """
    return await _disease_details(
        call_benefit="Get up-to-date disease definitions and ontology information",
        disease_id_or_name=disease_id_or_name,
    )


@mcp_app.tool()
@track_performance("biomcp.drug_getter")
async def drug_getter(
    drug_id_or_name: Annotated[
        str,
        Field(
            description="Drug name (e.g., 'aspirin', 'imatinib') or ID (e.g., 'DB00945', 'CHEMBL941')"
        ),
    ],
) -> str:
    """Get detailed drug/chemical information from MyChem.info.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal!

    This tool provides comprehensive drug information including:
    - Chemical properties (formula, InChIKey)
    - Drug identifiers (DrugBank, ChEMBL, PubChem)
    - Trade names and brand names
    - Clinical indications
    - Mechanism of action
    - Pharmacology details
    - Links to drug databases

    This tool fetches CURRENT drug information from MyChem.info, part of the
    BioThings suite, ensuring you always have the latest drug data.

    Example usage:
    - Get information about imatinib (Gleevec)
    - Look up details for DrugBank ID DB00619
    - Find the mechanism of action for pembrolizumab

    Note: For clinical trials about drugs, use trial_searcher. For articles about drugs, use article_searcher.
    """
    return await _drug_details(drug_id_or_name)


# NCI-Specific Tools
@mcp_app.tool()
@track_performance("biomcp.nci_organization_searcher")
async def nci_organization_searcher(
    name: Annotated[
        str | None,
        Field(
            description="Organization name to search for (partial match supported)"
        ),
    ] = None,
    organization_type: Annotated[
        str | None,
        Field(
            description="Type of organization (e.g., 'Academic', 'Industry', 'Government')"
        ),
    ] = None,
    city: Annotated[
        str | None,
        Field(
            description="City where organization is located. IMPORTANT: Always use with state to avoid API errors"
        ),
    ] = None,
    state: Annotated[
        str | None,
        Field(
            description="State/province code (e.g., 'CA', 'NY'). IMPORTANT: Always use with city to avoid API errors"
        ),
    ] = None,
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 20,
) -> str:
    """Search for organizations in the NCI Clinical Trials database.

    Searches the National Cancer Institute's curated database of organizations
    involved in cancer clinical trials. This includes:
    - Academic medical centers
    - Community hospitals
    - Industry sponsors
    - Government facilities
    - Research networks

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    IMPORTANT: To avoid API errors, always use city AND state together when searching by location.
    The NCI API has limitations on broad searches.

    Example usage:
    - Find cancer centers in Boston, MA (city AND state)
    - Search for "MD Anderson" in Houston, TX
    - List academic organizations in Cleveland, OH
    - Search by organization name alone (without location)
    """
    from biomcp.integrations.cts_api import CTSAPIError
    from biomcp.organizations import search_organizations
    from biomcp.organizations.search import format_organization_results

    try:
        results = await search_organizations(
            name=name,
            org_type=organization_type,
            city=city,
            state=state,
            page_size=page_size,
            page=page,
            api_key=api_key,
        )
        return format_organization_results(results)
    except CTSAPIError as e:
        # Check for Elasticsearch bucket limit error
        error_msg = str(e)
        if "too_many_buckets_exception" in error_msg or "75000" in error_msg:
            return (
                "⚠️ **Search Too Broad**\n\n"
                "The NCI API cannot process this search because it returns too many results.\n\n"
                "**To fix this, try:**\n"
                "1. **Always use city AND state together** for location searches\n"
                "2. Add an organization name (even partial) to narrow results\n"
                "3. Use multiple filters together (name + location, or name + type)\n\n"
                "**Examples that work:**\n"
                "- `nci_organization_searcher(city='Cleveland', state='OH')`\n"
                "- `nci_organization_searcher(name='Cleveland Clinic')`\n"
                "- `nci_organization_searcher(name='cancer', city='Boston', state='MA')`\n"
                "- `nci_organization_searcher(organization_type='Academic', city='Houston', state='TX')`"
            )
        raise


@mcp_app.tool()
@track_performance("biomcp.nci_organization_getter")
async def nci_organization_getter(
    organization_id: Annotated[
        str,
        Field(description="NCI organization ID (e.g., 'NCI-2011-03337')"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
) -> str:
    """Get detailed information about a specific organization from NCI.

    Retrieves comprehensive details about an organization including:
    - Full name and aliases
    - Address and contact information
    - Organization type and role
    - Associated clinical trials
    - Research focus areas

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    Example usage:
    - Get details about a specific cancer center
    - Find contact information for trial sponsors
    - View organization's trial portfolio
    """
    from biomcp.organizations import get_organization
    from biomcp.organizations.getter import format_organization_details

    org_data = await get_organization(
        org_id=organization_id,
        api_key=api_key,
    )

    return format_organization_details(org_data)


@mcp_app.tool()
@track_performance("biomcp.nci_intervention_searcher")
async def nci_intervention_searcher(
    name: Annotated[
        str | None,
        Field(
            description="Intervention name to search for (e.g., 'pembrolizumab')"
        ),
    ] = None,
    intervention_type: Annotated[
        str | None,
        Field(
            description="Type of intervention: 'Drug', 'Device', 'Biological', 'Procedure', 'Radiation', 'Behavioral', 'Genetic', 'Dietary', 'Other'"
        ),
    ] = None,
    synonyms: Annotated[
        bool,
        Field(description="Include synonym matches in search"),
    ] = True,
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int | None,
        Field(
            description="Results per page. If not specified, returns all matching results.",
            ge=1,
            le=100,
        ),
    ] = None,
) -> str:
    """Search for interventions in the NCI Clinical Trials database.

    Searches the National Cancer Institute's curated database of interventions
    used in cancer clinical trials. This includes:
    - FDA-approved drugs
    - Investigational agents
    - Medical devices
    - Surgical procedures
    - Radiation therapies
    - Behavioral interventions

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    Example usage:
    - Find all trials using pembrolizumab
    - Search for CAR-T cell therapies
    - List radiation therapy protocols
    - Find dietary interventions
    """
    from biomcp.integrations.cts_api import CTSAPIError
    from biomcp.interventions import search_interventions
    from biomcp.interventions.search import format_intervention_results

    try:
        results = await search_interventions(
            name=name,
            intervention_type=intervention_type,
            synonyms=synonyms,
            page_size=page_size,
            page=page,
            api_key=api_key,
        )
        return format_intervention_results(results)
    except CTSAPIError as e:
        # Check for Elasticsearch bucket limit error
        error_msg = str(e)
        if "too_many_buckets_exception" in error_msg or "75000" in error_msg:
            return (
                "⚠️ **Search Too Broad**\n\n"
                "The NCI API cannot process this search because it returns too many results.\n\n"
                "**Try adding more specific filters:**\n"
                "- Add an intervention name (even partial)\n"
                "- Specify an intervention type (e.g., 'Drug', 'Device')\n"
                "- Search for a specific drug or therapy name\n\n"
                "**Example searches that work better:**\n"
                "- Search for 'pembrolizumab' instead of all drugs\n"
                "- Search for 'CAR-T' to find CAR-T cell therapies\n"
                "- Filter by type: Drug, Device, Procedure, etc."
            )
        raise


@mcp_app.tool()
@track_performance("biomcp.nci_intervention_getter")
async def nci_intervention_getter(
    intervention_id: Annotated[
        str,
        Field(description="NCI intervention ID (e.g., 'INT123456')"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
) -> str:
    """Get detailed information about a specific intervention from NCI.

    Retrieves comprehensive details about an intervention including:
    - Full name and synonyms
    - Intervention type and category
    - Mechanism of action (for drugs)
    - FDA approval status
    - Associated clinical trials
    - Combination therapies

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    Example usage:
    - Get details about a specific drug
    - Find all trials using a device
    - View combination therapy protocols
    """
    from biomcp.interventions import get_intervention
    from biomcp.interventions.getter import format_intervention_details

    intervention_data = await get_intervention(
        intervention_id=intervention_id,
        api_key=api_key,
    )

    return format_intervention_details(intervention_data)


# Biomarker Tools
@mcp_app.tool()
@track_performance("biomcp.nci_biomarker_searcher")
async def nci_biomarker_searcher(
    name: Annotated[
        str | None,
        Field(
            description="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')"
        ),
    ] = None,
    biomarker_type: Annotated[
        str | None,
        Field(description="Type of biomarker ('reference_gene' or 'branch')"),
    ] = None,
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 20,
) -> str:
    """Search for biomarkers in the NCI Clinical Trials database.

    Searches for biomarkers used in clinical trial eligibility criteria.
    This is essential for precision medicine trials that select patients
    based on specific biomarker characteristics.

    Biomarker examples:
    - Gene mutations (e.g., BRAF V600E, EGFR T790M)
    - Protein expression (e.g., PD-L1 ≥ 50%, HER2 positive)
    - Gene fusions (e.g., ALK fusion, ROS1 fusion)
    - Other molecular markers (e.g., MSI-H, TMB-high)

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    Note: Biomarker data availability may be limited in CTRP.
    Results focus on biomarkers used in trial eligibility criteria.

    Example usage:
    - Search for PD-L1 expression biomarkers
    - Find trials requiring EGFR mutations
    - Look up biomarkers tested by NGS
    - Search for HER2 expression markers
    """
    from biomcp.biomarkers import search_biomarkers
    from biomcp.biomarkers.search import format_biomarker_results
    from biomcp.integrations.cts_api import CTSAPIError

    try:
        results = await search_biomarkers(
            name=name,
            biomarker_type=biomarker_type,
            page_size=page_size,
            page=page,
            api_key=api_key,
        )
        return format_biomarker_results(results)
    except CTSAPIError as e:
        # Check for Elasticsearch bucket limit error
        error_msg = str(e)
        if "too_many_buckets_exception" in error_msg or "75000" in error_msg:
            return (
                "⚠️ **Search Too Broad**\n\n"
                "The NCI API cannot process this search because it returns too many results.\n\n"
                "**Try adding more specific filters:**\n"
                "- Add a biomarker name (even partial)\n"
                "- Specify a gene symbol\n"
                "- Add an assay type (e.g., 'IHC', 'NGS')\n\n"
                "**Example searches that work:**\n"
                "- `nci_biomarker_searcher(name='PD-L1')`\n"
                "- `nci_biomarker_searcher(gene='EGFR', biomarker_type='mutation')`\n"
                "- `nci_biomarker_searcher(assay_type='IHC')`"
            )
        raise


# NCI Disease Tools
@mcp_app.tool()
@track_performance("biomcp.nci_disease_searcher")
async def nci_disease_searcher(
    name: Annotated[
        str | None,
        Field(description="Disease name to search for (partial match)"),
    ] = None,
    include_synonyms: Annotated[
        bool,
        Field(description="Include synonym matches in search"),
    ] = True,
    category: Annotated[
        str | None,
        Field(description="Disease category/type filter"),
    ] = None,
    api_key: Annotated[
        str | None,
        Field(
            description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one."
        ),
    ] = None,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    page_size: Annotated[
        int,
        Field(description="Results per page", ge=1, le=100),
    ] = 20,
) -> str:
    """Search NCI's controlled vocabulary of cancer conditions.

    Searches the National Cancer Institute's curated database of cancer
    conditions and diseases used in clinical trials. This is different from
    the general disease_getter tool which uses MyDisease.info.

    NCI's disease vocabulary provides:
    - Official cancer terminology used in trials
    - Disease synonyms and alternative names
    - Hierarchical disease classifications
    - Standardized disease codes for trial matching

    Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/

    Example usage:
    - Search for specific cancer types (e.g., "melanoma")
    - Find all lung cancer subtypes
    - Look up official names for disease synonyms
    - Get standardized disease terms for trial searches

    Note: This is specifically for NCI's cancer disease vocabulary.
    For general disease information, use the disease_getter tool.
    """
    from biomcp.diseases import search_diseases
    from biomcp.diseases.search import format_disease_results
    from biomcp.integrations.cts_api import CTSAPIError

    try:
        results = await search_diseases(
            name=name,
            include_synonyms=include_synonyms,
            category=category,
            page_size=page_size,
            page=page,
            api_key=api_key,
        )
        return format_disease_results(results)
    except CTSAPIError as e:
        # Check for Elasticsearch bucket limit error
        error_msg = str(e)
        if "too_many_buckets_exception" in error_msg or "75000" in error_msg:
            return (
                "⚠️ **Search Too Broad**\n\n"
                "The NCI API cannot process this search because it returns too many results.\n\n"
                "**Try adding more specific filters:**\n"
                "- Add a disease name (even partial)\n"
                "- Specify a disease category\n"
                "- Use more specific search terms\n\n"
                "**Example searches that work:**\n"
                "- `nci_disease_searcher(name='melanoma')`\n"
                "- `nci_disease_searcher(name='lung', category='maintype')`\n"
                "- `nci_disease_searcher(name='NSCLC')`"
            )
        raise


# OpenFDA Tools
@mcp_app.tool()
@track_performance("biomcp.openfda_adverse_searcher")
async def openfda_adverse_searcher(
    drug: Annotated[
        str | None,
        Field(description="Drug name to search for adverse events"),
    ] = None,
    reaction: Annotated[
        str | None,
        Field(description="Adverse reaction term to search for"),
    ] = None,
    serious: Annotated[
        bool | None,
        Field(description="Filter for serious events only"),
    ] = None,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA adverse event reports (FAERS) for drug safety information.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Searches FDA's Adverse Event Reporting System for:
    - Drug side effects and adverse reactions
    - Serious event reports (death, hospitalization, disability)
    - Safety signal patterns across patient populations

    Note: These reports do not establish causation - they are voluntary reports
    that may contain incomplete or unverified information.
    """
    from biomcp.openfda import search_adverse_events

    skip = (page - 1) * limit
    return await search_adverse_events(
        drug=drug,
        reaction=reaction,
        serious=serious,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_adverse_getter")
async def openfda_adverse_getter(
    report_id: Annotated[
        str,
        Field(description="Safety report ID"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get detailed information for a specific FDA adverse event report.

    Retrieves complete details including:
    - Patient demographics and medical history
    - All drugs involved and dosages
    - Complete list of adverse reactions
    - Event narrative and outcomes
    - Reporter information
    """
    from biomcp.openfda import get_adverse_event

    return await get_adverse_event(report_id, api_key=api_key)


@mcp_app.tool()
@track_performance("biomcp.openfda_label_searcher")
async def openfda_label_searcher(
    name: Annotated[
        str | None,
        Field(description="Drug name to search for"),
    ] = None,
    indication: Annotated[
        str | None,
        Field(description="Search for drugs indicated for this condition"),
    ] = None,
    boxed_warning: Annotated[
        bool,
        Field(description="Filter for drugs with boxed warnings"),
    ] = False,
    section: Annotated[
        str | None,
        Field(
            description="Specific label section (e.g., 'contraindications', 'warnings')"
        ),
    ] = None,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA drug product labels (SPL) for prescribing information.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Searches official FDA drug labels for:
    - Approved indications and usage
    - Dosage and administration guidelines
    - Contraindications and warnings
    - Drug interactions and adverse reactions
    - Special population considerations

    Label sections include: indications, dosage, contraindications, warnings,
    adverse, interactions, pregnancy, pediatric, geriatric, overdose
    """
    from biomcp.openfda import search_drug_labels

    skip = (page - 1) * limit
    return await search_drug_labels(
        name=name,
        indication=indication,
        boxed_warning=boxed_warning,
        section=section,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_label_getter")
async def openfda_label_getter(
    set_id: Annotated[
        str,
        Field(description="Label set ID"),
    ],
    sections: Annotated[
        list[str] | None,
        Field(
            description="Specific sections to retrieve (default: key sections)"
        ),
    ] = None,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get complete FDA drug label information by set ID.

    Retrieves the full prescribing information including:
    - Complete indications and usage text
    - Detailed dosing instructions
    - All warnings and precautions
    - Clinical pharmacology and studies
    - Manufacturing and storage information

    Specify sections to retrieve specific parts, or leave empty for default key sections.
    """
    from biomcp.openfda import get_drug_label

    return await get_drug_label(set_id, sections, api_key=api_key)


@mcp_app.tool()
@track_performance("biomcp.openfda_device_searcher")
async def openfda_device_searcher(
    device: Annotated[
        str | None,
        Field(description="Device name to search for"),
    ] = None,
    manufacturer: Annotated[
        str | None,
        Field(description="Manufacturer name"),
    ] = None,
    problem: Annotated[
        str | None,
        Field(description="Device problem description"),
    ] = None,
    product_code: Annotated[
        str | None,
        Field(description="FDA product code"),
    ] = None,
    genomics_only: Annotated[
        bool,
        Field(description="Filter to genomic/diagnostic devices only"),
    ] = True,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA device adverse event reports (MAUDE) for medical device issues.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Searches FDA's device adverse event database for:
    - Device malfunctions and failures
    - Patient injuries related to devices
    - Genomic test and diagnostic device issues

    By default, filters to genomic/diagnostic devices relevant to precision medicine.
    Set genomics_only=False to search all medical devices.
    """
    from biomcp.openfda import search_device_events

    skip = (page - 1) * limit
    return await search_device_events(
        device=device,
        manufacturer=manufacturer,
        problem=problem,
        product_code=product_code,
        genomics_only=genomics_only,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_device_getter")
async def openfda_device_getter(
    mdr_report_key: Annotated[
        str,
        Field(description="MDR report key"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get detailed information for a specific FDA device event report.

    Retrieves complete device event details including:
    - Device identification and specifications
    - Complete event narrative
    - Patient outcomes and impacts
    - Manufacturer analysis and actions
    - Remedial actions taken
    """
    from biomcp.openfda import get_device_event

    return await get_device_event(mdr_report_key, api_key=api_key)


@mcp_app.tool()
@track_performance("biomcp.openfda_approval_searcher")
async def openfda_approval_searcher(
    drug: Annotated[
        str | None,
        Field(description="Drug name (brand or generic) to search for"),
    ] = None,
    application_number: Annotated[
        str | None,
        Field(description="NDA or BLA application number"),
    ] = None,
    approval_year: Annotated[
        str | None,
        Field(description="Year of approval (YYYY format)"),
    ] = None,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA drug approval records from Drugs@FDA database.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Returns information about:
    - Application numbers and sponsors
    - Brand and generic names
    - Product formulations and strengths
    - Marketing status and approval dates
    - Submission history

    Useful for verifying if a drug is FDA-approved and when.
    """
    from biomcp.openfda import search_drug_approvals

    skip = (page - 1) * limit
    return await search_drug_approvals(
        drug=drug,
        application_number=application_number,
        approval_year=approval_year,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_approval_getter")
async def openfda_approval_getter(
    application_number: Annotated[
        str,
        Field(description="NDA or BLA application number"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get detailed FDA drug approval information for a specific application.

    Returns comprehensive approval details including:
    - Full product list with dosage forms and strengths
    - Complete submission history
    - Marketing status timeline
    - Therapeutic equivalence codes
    - Pharmacologic class information
    """
    from biomcp.openfda import get_drug_approval

    return await get_drug_approval(application_number, api_key=api_key)


@mcp_app.tool()
@track_performance("biomcp.openfda_recall_searcher")
async def openfda_recall_searcher(
    drug: Annotated[
        str | None,
        Field(description="Drug name to search for recalls"),
    ] = None,
    recall_class: Annotated[
        str | None,
        Field(
            description="Recall classification (1=most serious, 2=moderate, 3=least serious)"
        ),
    ] = None,
    status: Annotated[
        str | None,
        Field(description="Recall status (ongoing, completed, terminated)"),
    ] = None,
    reason: Annotated[
        str | None,
        Field(description="Search text in recall reason"),
    ] = None,
    since_date: Annotated[
        str | None,
        Field(description="Show recalls after this date (YYYYMMDD format)"),
    ] = None,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA drug recall records from the Enforcement database.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Returns recall information including:
    - Classification (Class I, II, or III)
    - Recall reason and description
    - Product identification
    - Distribution information
    - Recalling firm details
    - Current status

    Class I = most serious (death/serious harm)
    Class II = moderate (temporary/reversible harm)
    Class III = least serious (unlikely to cause harm)
    """
    from biomcp.openfda import search_drug_recalls

    skip = (page - 1) * limit
    return await search_drug_recalls(
        drug=drug,
        recall_class=recall_class,
        status=status,
        reason=reason,
        since_date=since_date,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_recall_getter")
async def openfda_recall_getter(
    recall_number: Annotated[
        str,
        Field(description="FDA recall number"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get detailed FDA drug recall information for a specific recall.

    Returns complete recall details including:
    - Full product description and code information
    - Complete reason for recall
    - Distribution pattern and locations
    - Quantity of product recalled
    - Firm information and actions taken
    - Timeline of recall events
    """
    from biomcp.openfda import get_drug_recall

    return await get_drug_recall(recall_number, api_key=api_key)


@mcp_app.tool()
@track_performance("biomcp.openfda_shortage_searcher")
async def openfda_shortage_searcher(
    drug: Annotated[
        str | None,
        Field(description="Drug name (generic or brand) to search"),
    ] = None,
    status: Annotated[
        str | None,
        Field(description="Shortage status (current or resolved)"),
    ] = None,
    therapeutic_category: Annotated[
        str | None,
        Field(
            description="Therapeutic category (e.g., Oncology, Anti-infective)"
        ),
    ] = None,
    limit: Annotated[
        int,
        Field(description="Maximum number of results", ge=1, le=100),
    ] = 25,
    page: Annotated[
        int,
        Field(description="Page number (1-based)", ge=1),
    ] = 1,
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Search FDA drug shortage records.

    ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy!

    Returns shortage information including:
    - Current shortage status
    - Shortage start and resolution dates
    - Reason for shortage
    - Therapeutic category
    - Manufacturer information
    - Estimated resolution timeline

    Note: Shortage data is cached and updated periodically.
    Check FDA.gov for most current information.
    """
    from biomcp.openfda import search_drug_shortages

    skip = (page - 1) * limit
    return await search_drug_shortages(
        drug=drug,
        status=status,
        therapeutic_category=therapeutic_category,
        limit=limit,
        skip=skip,
        api_key=api_key,
    )


@mcp_app.tool()
@track_performance("biomcp.openfda_shortage_getter")
async def openfda_shortage_getter(
    drug: Annotated[
        str,
        Field(description="Drug name (generic or brand)"),
    ],
    api_key: Annotated[
        str | None,
        Field(
            description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)"
        ),
    ] = None,
) -> str:
    """Get detailed FDA drug shortage information for a specific drug.

    Returns comprehensive shortage details including:
    - Complete timeline of shortage
    - Detailed reason for shortage
    - All affected manufacturers
    - Alternative products if available
    - Resolution status and estimates
    - Additional notes and recommendations

    Data is updated periodically from FDA shortage database.
    """
    from biomcp.openfda import get_drug_shortage

    return await get_drug_shortage(drug, api_key=api_key)

```