This is page 11 of 15. Use http://codebase.md/genomoncology/biomcp?lines=false&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /src/biomcp/utils/endpoint_registry.py: -------------------------------------------------------------------------------- ```python """Registry for tracking all external HTTP endpoints used by BioMCP.""" from dataclasses import dataclass, field from enum import Enum from pathlib import Path from typing import Any from urllib.parse import urlparse class EndpointCategory(str, Enum): """Categories of external endpoints.""" BIOMEDICAL_LITERATURE = "biomedical_literature" CLINICAL_TRIALS = "clinical_trials" VARIANT_DATABASES = "variant_databases" CANCER_GENOMICS = "cancer_genomics" HEALTH_MONITORING = "health_monitoring" REGULATORY_DATA = "regulatory_data" class DataType(str, Enum): """Types of data accessed from endpoints.""" RESEARCH_ARTICLES = "research_articles" CLINICAL_TRIAL_DATA = "clinical_trial_data" GENETIC_VARIANTS = "genetic_variants" CANCER_MUTATIONS = "cancer_mutations" GENE_ANNOTATIONS = "gene_annotations" SERVICE_STATUS = "service_status" ADVERSE_EVENTS = "adverse_events" DRUG_LABELS = "drug_labels" DEVICE_EVENTS = "device_events" @dataclass class EndpointInfo: """Information about an external endpoint.""" url: str category: EndpointCategory data_types: list[DataType] = field(default_factory=list) description: str = "" compliance_notes: str = "" rate_limit: str | None = None authentication: str | None = None @property def domain(self) -> str: """Extract domain from URL.""" parsed = urlparse(self.url) return parsed.netloc class EndpointRegistry: """Registry for tracking all external endpoints.""" def __init__(self): self._endpoints: dict[str, EndpointInfo] = {} self._initialize_known_endpoints() def _initialize_known_endpoints(self): """Initialize registry with known endpoints.""" # PubMed/PubTator3 self.register( "pubtator3_search", EndpointInfo( url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/search/", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="PubTator3 API for searching biomedical literature with entity annotations", compliance_notes="Public NIH/NCBI service, no PII transmitted", rate_limit="20 requests/second", ), ) self.register( "pubtator3_export", EndpointInfo( url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/export/biocjson", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="PubTator3 API for fetching full article annotations in BioC-JSON format", compliance_notes="Public NIH/NCBI service, no PII transmitted", rate_limit="20 requests/second", ), ) self.register( "pubtator3_autocomplete", EndpointInfo( url="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/entity/autocomplete/", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.GENE_ANNOTATIONS], description="PubTator3 API for entity name autocomplete suggestions", compliance_notes="Public NIH/NCBI service, no PII transmitted", rate_limit="20 requests/second", ), ) # ClinicalTrials.gov self.register( "clinicaltrials_search", EndpointInfo( url="https://clinicaltrials.gov/api/v2/studies", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="ClinicalTrials.gov API v2 for searching clinical trials", compliance_notes="Public NIH service, may contain trial participant criteria", rate_limit="10 requests/second", ), ) # MyVariant.info self.register( "myvariant_query", EndpointInfo( url="https://myvariant.info/v1/query", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENETIC_VARIANTS], description="MyVariant.info API for querying genetic variants", compliance_notes="Public service aggregating variant databases, no patient data", rate_limit="1000 requests/hour (anonymous)", ), ) self.register( "myvariant_variant", EndpointInfo( url="https://myvariant.info/v1/variant", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENETIC_VARIANTS], description="MyVariant.info API for fetching specific variant details", compliance_notes="Public service aggregating variant databases, no patient data", rate_limit="1000 requests/hour (anonymous)", ), ) # Preprint servers self.register( "biorxiv_api", EndpointInfo( url="https://api.biorxiv.org/details/biorxiv", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="bioRxiv API for searching biology preprints", compliance_notes="Public preprint server, no PII transmitted", rate_limit="Not specified", ), ) self.register( "medrxiv_api", EndpointInfo( url="https://api.biorxiv.org/details/medrxiv", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="medRxiv API for searching medical preprints", compliance_notes="Public preprint server, no PII transmitted", rate_limit="Not specified", ), ) self.register( "europe_pmc", EndpointInfo( url="https://www.ebi.ac.uk/europepmc/webservices/rest/search", category=EndpointCategory.BIOMEDICAL_LITERATURE, data_types=[DataType.RESEARCH_ARTICLES], description="Europe PMC REST API for searching biomedical literature", compliance_notes="Public EMBL-EBI service, no PII transmitted", rate_limit="Not specified", ), ) # External variant sources self.register( "gdc_ssms", EndpointInfo( url="https://api.gdc.cancer.gov/ssms", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.CANCER_MUTATIONS], description="NCI GDC API for somatic mutations", compliance_notes="Public NCI service, aggregate cancer genomics data", rate_limit="Not specified", ), ) self.register( "gdc_ssm_occurrences", EndpointInfo( url="https://api.gdc.cancer.gov/ssm_occurrences", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.CANCER_MUTATIONS], description="NCI GDC API for mutation occurrences in cancer samples", compliance_notes="Public NCI service, aggregate cancer genomics data", rate_limit="Not specified", ), ) self.register( "ensembl_variation", EndpointInfo( url="https://rest.ensembl.org/variation/human", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENETIC_VARIANTS], description="Ensembl REST API for human genetic variation data", compliance_notes="Public EMBL-EBI service, population genetics data", rate_limit="15 requests/second", ), ) self.register( "cbioportal_api", EndpointInfo( url="https://www.cbioportal.org/api", category=EndpointCategory.CANCER_GENOMICS, data_types=[ DataType.CANCER_MUTATIONS, DataType.CLINICAL_TRIAL_DATA, ], description="cBioPortal API for cancer genomics data", compliance_notes="Public MSKCC/Dana-Farber service, aggregate cancer genomics", rate_limit="5 requests/second", authentication="Optional API token for increased rate limits", ), ) # Specific cBioPortal endpoints self.register( "cbioportal_genes", EndpointInfo( url="https://www.cbioportal.org/api/genes", category=EndpointCategory.CANCER_GENOMICS, data_types=[DataType.GENE_ANNOTATIONS], description="cBioPortal API for gene information", compliance_notes="Public MSKCC/Dana-Farber service, gene metadata", rate_limit="5 requests/second", ), ) self.register( "cbioportal_cancer_types", EndpointInfo( url="https://www.cbioportal.org/api/cancer-types", category=EndpointCategory.CANCER_GENOMICS, data_types=[DataType.CANCER_MUTATIONS], description="cBioPortal API for cancer type hierarchy", compliance_notes="Public MSKCC/Dana-Farber service, cancer type metadata", rate_limit="5 requests/second", ), ) self.register( "cbioportal_molecular_profiles", EndpointInfo( url="https://www.cbioportal.org/api/molecular-profiles", category=EndpointCategory.CANCER_GENOMICS, data_types=[DataType.CANCER_MUTATIONS], description="cBioPortal API for molecular profiles", compliance_notes="Public MSKCC/Dana-Farber service, study metadata", rate_limit="5 requests/second", ), ) self.register( "cbioportal_mutations", EndpointInfo( url="https://www.cbioportal.org/api/mutations", category=EndpointCategory.CANCER_GENOMICS, data_types=[DataType.CANCER_MUTATIONS], description="cBioPortal API for mutation data", compliance_notes="Public MSKCC/Dana-Farber service, aggregate mutation data", rate_limit="5 requests/second", ), ) self.register( "cbioportal_studies", EndpointInfo( url="https://www.cbioportal.org/api/studies", category=EndpointCategory.CANCER_GENOMICS, data_types=[ DataType.CLINICAL_TRIAL_DATA, DataType.CANCER_MUTATIONS, ], description="cBioPortal API for cancer studies", compliance_notes="Public MSKCC/Dana-Farber service, study metadata", rate_limit="5 requests/second", ), ) # BioThings Suite APIs self.register( "mygene_query", EndpointInfo( url="https://mygene.info/v3/query", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyGene.info API for querying gene information", compliance_notes="Public BioThings service, gene annotation data", rate_limit="10 requests/second", ), ) self.register( "mygene_gene", EndpointInfo( url="https://mygene.info/v3/gene", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyGene.info API for fetching specific gene details", compliance_notes="Public BioThings service, gene annotation data", rate_limit="10 requests/second", ), ) self.register( "mydisease_query", EndpointInfo( url="https://mydisease.info/v1/query", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyDisease.info API for querying disease information", compliance_notes="Public BioThings service, disease ontology data", rate_limit="10 requests/second", ), ) self.register( "mydisease_disease", EndpointInfo( url="https://mydisease.info/v1/disease", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyDisease.info API for fetching specific disease details", compliance_notes="Public BioThings service, disease ontology data", rate_limit="10 requests/second", ), ) self.register( "mychem_query", EndpointInfo( url="https://mychem.info/v1/query", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyChem.info API for querying drug/chemical information", compliance_notes="Public BioThings service, drug/chemical annotation data", rate_limit="10 requests/second", ), ) self.register( "mychem_chem", EndpointInfo( url="https://mychem.info/v1/chem", category=EndpointCategory.VARIANT_DATABASES, data_types=[DataType.GENE_ANNOTATIONS], description="MyChem.info API for fetching specific drug/chemical details", compliance_notes="Public BioThings service, drug/chemical annotation data", rate_limit="10 requests/second", ), ) # NCI Clinical Trials Search API self.register( "nci_trials", EndpointInfo( url="https://clinicaltrialsapi.cancer.gov/api/v2/trials", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="NCI Clinical Trials Search API for cancer trials", compliance_notes="Public NCI service, cancer trial data", rate_limit="Not specified", authentication="Optional NCI_API_KEY for increased access", ), ) self.register( "nci_organizations", EndpointInfo( url="https://clinicaltrialsapi.cancer.gov/api/v2/organizations", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="NCI API for cancer research organizations", compliance_notes="Public NCI service, organization metadata", rate_limit="Not specified", authentication="Optional NCI_API_KEY for increased access", ), ) self.register( "nci_diseases", EndpointInfo( url="https://clinicaltrialsapi.cancer.gov/api/v2/diseases", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="NCI API for cancer disease vocabulary", compliance_notes="Public NCI service, disease ontology", rate_limit="Not specified", authentication="Optional NCI_API_KEY for increased access", ), ) self.register( "nci_interventions", EndpointInfo( url="https://clinicaltrialsapi.cancer.gov/api/v2/interventions", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="NCI API for cancer treatment interventions", compliance_notes="Public NCI service, intervention metadata", rate_limit="Not specified", authentication="Optional NCI_API_KEY for increased access", ), ) self.register( "nci_biomarkers", EndpointInfo( url="https://clinicaltrialsapi.cancer.gov/api/v2/biomarkers", category=EndpointCategory.CLINICAL_TRIALS, data_types=[DataType.CLINICAL_TRIAL_DATA], description="NCI API for biomarkers used in clinical trials", compliance_notes="Public NCI service, biomarker metadata", rate_limit="Not specified", authentication="Optional NCI_API_KEY for increased access", ), ) # OpenFDA APIs self.register( "openfda_drug_events", EndpointInfo( url="https://api.fda.gov/drug/event.json", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.ADVERSE_EVENTS], description="FDA Adverse Event Reporting System (FAERS) for drug safety data", compliance_notes="Public FDA service, voluntary adverse event reports, no PII", rate_limit="40 requests/minute (240 with API key)", authentication="Optional OPENFDA_API_KEY for increased rate limits", ), ) self.register( "openfda_drug_labels", EndpointInfo( url="https://api.fda.gov/drug/label.json", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.DRUG_LABELS], description="FDA Structured Product Labeling (SPL) for drug prescribing information", compliance_notes="Public FDA service, official drug labeling data", rate_limit="40 requests/minute (240 with API key)", authentication="Optional OPENFDA_API_KEY for increased rate limits", ), ) self.register( "openfda_device_events", EndpointInfo( url="https://api.fda.gov/device/event.json", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.DEVICE_EVENTS], description="FDA MAUDE database for medical device adverse events", compliance_notes="Public FDA service, device malfunction and adverse event reports", rate_limit="40 requests/minute (240 with API key)", authentication="Optional OPENFDA_API_KEY for increased rate limits", ), ) self.register( "openfda_drugsfda", EndpointInfo( url="https://api.fda.gov/drug/drugsfda.json", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.DRUG_LABELS], description="FDA Drugs@FDA database for drug approval information", compliance_notes="Public FDA service, official drug approval records", rate_limit="40 requests/minute (240 with API key)", authentication="Optional OPENFDA_API_KEY for increased rate limits", ), ) self.register( "openfda_drug_enforcement", EndpointInfo( url="https://api.fda.gov/drug/enforcement.json", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.ADVERSE_EVENTS], description="FDA Enforcement database for drug recall information", compliance_notes="Public FDA service, drug recall and enforcement actions", rate_limit="40 requests/minute (240 with API key)", authentication="Optional OPENFDA_API_KEY for increased rate limits", ), ) # Note: Drug shortage endpoint is not yet available via OpenFDA # Using placeholder for future migration when FDA provides official endpoint self.register( "fda_drug_shortages", EndpointInfo( url="https://www.fda.gov/media/169066/download", category=EndpointCategory.REGULATORY_DATA, data_types=[DataType.DRUG_LABELS], description="FDA Drug Shortages database (cached locally)", compliance_notes="Public FDA service, drug shortage status information", rate_limit="Cached with 24-hour TTL", authentication="None required", ), ) def register(self, key: str, endpoint: EndpointInfo): """Register an endpoint for tracking. Args: key: Unique identifier for the endpoint endpoint: Endpoint metadata including URL, description, and compliance notes """ self._endpoints[key] = endpoint def get_all_endpoints(self) -> dict[str, EndpointInfo]: """Get all registered endpoints. Returns: Dictionary mapping endpoint keys to their metadata """ return self._endpoints.copy() def get_endpoints_by_category( self, category: EndpointCategory ) -> dict[str, EndpointInfo]: """Get endpoints filtered by category. Args: category: The category to filter by Returns: Dictionary of endpoints belonging to the specified category """ return { key: info for key, info in self._endpoints.items() if info.category == category } def get_unique_domains(self) -> set[str]: """Get all unique domains accessed by BioMCP. Returns: Set of unique domain names (e.g., 'api.ncbi.nlm.nih.gov') """ return {info.domain for info in self._endpoints.values()} def generate_markdown_report(self) -> str: """Generate markdown documentation of all endpoints.""" lines = [ "# Third-Party Endpoints Used by BioMCP", "", "_This file is auto-generated from the endpoint registry._", "", "## Overview", "", f"BioMCP connects to {len(self.get_unique_domains())} external domains across {len(self._endpoints)} endpoints.", "", "## Endpoints by Category", "", ] # Group by category for category in EndpointCategory: endpoints = self.get_endpoints_by_category(category) if not endpoints: continue lines.append(f"### {category.value.replace('_', ' ').title()}") lines.append("") for key, info in sorted(endpoints.items()): lines.append(f"#### {key}") lines.append("") lines.append(f"- **URL**: `{info.url}`") lines.append(f"- **Description**: {info.description}") lines.append( f"- **Data Types**: {', '.join(dt.value for dt in info.data_types)}" ) lines.append( f"- **Rate Limit**: {info.rate_limit or 'Not specified'}" ) if info.authentication: lines.append( f"- **Authentication**: {info.authentication}" ) if info.compliance_notes: lines.append( f"- **Compliance Notes**: {info.compliance_notes}" ) lines.append("") # Add summary section lines.extend([ "## Domain Summary", "", "| Domain | Category | Endpoints |", "| -------------------- | --------------------- | --------- |", ]) domain_stats: dict[str, dict[str, Any]] = {} for info in self._endpoints.values(): domain = info.domain if domain not in domain_stats: domain_stats[domain] = { "category": info.category.value, "count": 0, } domain_stats[domain]["count"] = ( int(domain_stats[domain]["count"]) + 1 ) for domain, stats in sorted(domain_stats.items()): lines.append( f"| {domain} | {stats['category']} | {stats['count']} |" ) lines.extend([ "", "## Compliance and Privacy", "", "All endpoints accessed by BioMCP:", "", "- Use publicly available APIs", "- Do not transmit personally identifiable information (PII)", "- Access only aggregate or de-identified data", "- Comply with respective terms of service", "", "## Network Control", "", "For air-gapped or restricted environments, BioMCP supports:", "", "- Offline mode via `BIOMCP_OFFLINE=true` environment variable", "- Custom proxy configuration via standard HTTP(S)\\_PROXY variables", "- SSL certificate pinning for enhanced security", "", ]) return "\n".join(lines) def save_markdown_report(self, output_path: Path | None = None): """Save markdown report to file.""" if output_path is None: output_path = ( Path(__file__).parent.parent.parent / "THIRD_PARTY_ENDPOINTS.md" ) output_path.write_text(self.generate_markdown_report()) return output_path # Global registry instance _registry = EndpointRegistry() def get_registry() -> EndpointRegistry: """Get the global endpoint registry.""" return _registry ``` -------------------------------------------------------------------------------- /tests/data/ct_gov/clinical_trials_api_v2.yaml: -------------------------------------------------------------------------------- ```yaml openapi: "3.0.3" info: title: "ClinicalTrials.gov REST API" description: "This API is made available to provide users meta data, statistics,\ \ and the most recent version of the clinical trials available on ClinicalTrials.gov." version: "2.0.3" tags: - name: "Studies" description: "Related to clinical trial studies" - name: "Stats" description: "Data statistics" - name: "Version" description: "Version info" servers: - url: "https://clinicaltrials.gov/api/v2" description: "This server" paths: /studies: get: summary: "Studies" description: "Returns data of studies matching query and filter parameters.\ \ The studies are returned page by page.\nIf response contains `nextPageToken`,\ \ use its value in `pageToken` to get next page.\nThe last page will not contain\ \ `nextPageToken`. A page may have empty `studies` array.\nRequest for each\ \ subsequent page **must** have the same parameters as for the first page,\ \ except\n`countTotal`, `pageSize`, and `pageToken` parameters.\n\nIf neither\ \ queries nor filters are set, all studies will be returned.\nIf any query\ \ parameter contains only NCT IDs (comma- and/or space-separated), filters\ \ are ignored.\n\n`query.*` parameters are in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\n\ Those parameters affect ranking of studies, if sorted by relevance. See `sort`\ \ parameter for details.\n\n`filter.*` and `postFilter.*` parameters have\ \ same effect as there is no aggregation calculation. \nBoth are available\ \ just to simplify applying parameters from search request.\nBoth do not affect\ \ ranking of studies.\n\nNote: When trying JSON format in your browser, do\ \ not set too large `pageSize` parameter, if `fields` is\nunlimited. That\ \ may return too much data for the browser to parse and render." tags: - "Studies" operationId: "listStudies" parameters: - name: "format" in: "query" description: "Must be one of the following:\n* `csv`- return CSV table with\ \ one page of study data; first page will contain header with column names;\ \ available fields are listed on [CSV Download](/data-api/about-api/csv-download)\ \ page\n* `json`- return JSON with one page of study data; every study object\ \ is placed in a separate line; `markup` type fields format depends on `markupFormat`\ \ parameter" required: false schema: type: "string" enum: - "csv" - "json" default: "json" - name: "markupFormat" in: "query" description: "Format of `markup` type fields:\n* `markdown`- [markdown](https://spec.commonmark.org/0.28/)\ \ format\n* `legacy`- compatible with classic PRS\n\nApplicable only to\ \ `json` format." required: false schema: type: "string" enum: - "markdown" - "legacy" default: "markdown" - name: "query.cond" in: "query" description: "\"Conditions or disease\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"ConditionSearch Area\" on [Search Areas](/data-api/about-api/search-areas#ConditionSearch)\ \ for more details." required: false schema: type: "string" examples: example1: value: "lung cancer" example2: value: "(head OR neck) AND pain" - name: "query.term" in: "query" description: "\"Other terms\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"BasicSearch Area\" on [Search Areas](/data-api/about-api/search-areas#BasicSearch)\ \ for more details." required: false schema: type: "string" examples: example1: value: "AREA[LastUpdatePostDate]RANGE[2023-01-15,MAX]" - name: "query.locn" in: "query" description: "\"Location terms\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"LocationSearch Area\" on [Search Areas](/data-api/about-api/search-areas#LocationSearch)\ \ for more details." required: false schema: type: "string" - name: "query.titles" in: "query" description: "\"Title / acronym\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"TitleSearch Area\" on [Search Areas](/data-api/about-api/search-areas#TitleSearch)\ \ for more details." required: false schema: type: "string" - name: "query.intr" in: "query" description: "\"Intervention / treatment\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"InterventionSearch Area\" on [Search Areas](/data-api/about-api/search-areas#InterventionSearch)\ \ for more details." required: false schema: type: "string" - name: "query.outc" in: "query" description: "\"Outcome measure\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"OutcomeSearch Area\" on [Search Areas](/data-api/about-api/search-areas#OutcomeSearch)\ \ for more details." required: false schema: type: "string" - name: "query.spons" in: "query" description: "\"Sponsor / collaborator\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"SponsorSearch Area\" on [Search Areas](/data-api/about-api/search-areas#SponsorSearch)\ \ for more details." required: false schema: type: "string" - name: "query.lead" in: "query" description: "Searches in \"LeadSponsorName\" field. See [Study Data Structure](/data-api/about-api/study-data-structure#LeadSponsorName)\ \ for more details. The query is in [Essie expression syntax](/find-studies/constructing-complex-search-queries)." required: false schema: type: "string" - name: "query.id" in: "query" description: "\"Study IDs\" query in [Essie expression syntax](/find-studies/constructing-complex-search-queries).\ \ See \"IdSearch Area\" on [Search Areas](/data-api/about-api/search-areas#IdSearch)\ \ for more details." required: false schema: type: "string" - name: "query.patient" in: "query" description: "See \"PatientSearch Area\" on [Search Areas](/data-api/about-api/search-areas#PatientSearch)\ \ for more details." required: false schema: type: "string" - name: "filter.overallStatus" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of statuses" required: false schema: type: "array" items: $ref: "#/components/schemas/Status" examples: example1: value: - "NOT_YET_RECRUITING" - "RECRUITING" example2: value: - "COMPLETED" - name: "filter.geo" in: "query" description: "Filter by geo-function. Currently only distance function is\ \ supported.\nFormat: `distance(latitude,longitude,distance)`" required: false schema: type: "string" pattern: "^distance\\(-?\\d+(\\.\\d+)?,-?\\d+(\\.\\d+)?,\\d+(\\.\\d+)?(km|mi)?\\\ )$" examples: example1: value: "distance(39.0035707,-77.1013313,50mi)" - name: "filter.ids" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of NCT IDs (a.k.a. ClinicalTrials.gov\ \ identifiers).\nThe provided IDs will be searched in [NCTId](data-api/about-api/study-data-structure#NCTId)\ \ and\n[NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\ \ fields." required: false schema: type: "array" items: type: "string" pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$" examples: example1: value: - "NCT04852770" - "NCT01728545" - "NCT02109302" - name: "filter.advanced" in: "query" description: "Filter by query in [Essie expression syntax](/find-studies/constructing-complex-search-queries)" required: false schema: type: "string" examples: example1: value: "AREA[StartDate]2022" example2: value: "AREA[MinimumAge]RANGE[MIN, 16 years] AND AREA[MaximumAge]RANGE[16\ \ years, MAX]" - name: "filter.synonyms" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of `area`:`synonym_id`\ \ pairs" required: false schema: type: "array" items: type: "string" examples: example1: value: - "ConditionSearch:1651367" - "BasicSearch:2013558" - name: "postFilter.overallStatus" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of statuses" required: false schema: type: "array" items: $ref: "#/components/schemas/Status" examples: example1: value: - "NOT_YET_RECRUITING" - "RECRUITING" example2: value: - "COMPLETED" - name: "postFilter.geo" in: "query" description: "Filter by geo-function. Currently only distance function is\ \ supported.\nFormat: `distance(latitude,longitude,distance)`" required: false schema: type: "string" pattern: "^distance\\(-?\\d+(\\.\\d+)?,-?\\d+(\\.\\d+)?,\\d+(\\.\\d+)?(km|mi)?\\\ )$" examples: example1: value: "distance(39.0035707,-77.1013313,50mi)" - name: "postFilter.ids" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of NCT IDs (a.k.a. ClinicalTrials.gov\ \ identifiers).\nThe provided IDs will be searched in [NCTId](data-api/about-api/study-data-structure#NCTId)\ \ and\n[NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\ \ fields." required: false schema: type: "array" items: type: "string" pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$" examples: example1: value: - "NCT04852770" - "NCT01728545" - "NCT02109302" - name: "postFilter.advanced" in: "query" description: "Filter by query in [Essie expression syntax](/find-studies/constructing-complex-search-queries)" required: false schema: type: "string" examples: example1: value: "AREA[StartDate]2022" example2: value: "AREA[MinimumAge]RANGE[MIN, 16 years] AND AREA[MaximumAge]RANGE[16\ \ years, MAX]" - name: "postFilter.synonyms" in: "query" style: "pipeDelimited" explode: false description: "Filter by comma- or pipe-separated list of `area`:`synonym_id`\ \ pairs" required: false schema: type: "array" items: type: "string" examples: example1: value: - "ConditionSearch:1651367" - "BasicSearch:2013558" - name: "aggFilters" in: "query" description: "Apply aggregation filters, aggregation counts will not be provided.\n\ The value is comma- or pipe-separated list of pairs `filter_id`:`space-separated\ \ list of option keys` for the checked options." required: false schema: type: "string" examples: example1: value: "results:with,status:com" example2: value: "status:not rec,sex:f,healthy:y" - name: "geoDecay" in: "query" description: "Set proximity factor by distance from `filter.geo` location\ \ to the closest [LocationGeoPoint](/data-api/about-api/study-data-structure#LocationGeoPoint)\ \ of a study.\nIgnored, if `filter.geo` parameter is not set or response\ \ contains more than 10,000 studies." required: false schema: type: "string" pattern: "^func:(gauss|exp|linear),scale:(\\d+(\\.\\d+)?(km|mi)),offset:(\\\ d+(\\.\\d+)?(km|mi)),decay:(\\d+(\\.\\d+)?)$" default: "func:exp,scale:300mi,offset:0mi,decay:0.5" examples: example1: value: "func:linear,scale:100km,offset:10km,decay:0.1" example2: value: "func:gauss,scale:500mi,offset:0mi,decay:0.3" - name: "fields" in: "query" style: "pipeDelimited" explode: false description: "If specified, must be non-empty comma- or pipe-separated list\ \ of fields to return. If unspecified, all fields will be returned.\nOrder\ \ of the fields does not matter.\n\nFor `csv` format, specify list of columns.\ \ The column names are available on [CSV Download](/data-api/about-api/csv-download).\n\ \nFor `json` format, every list item is either area name, piece name, field\ \ name, or special name.\nIf a piece or a field is a branch node, all descendant\ \ fields will be included.\nAll area names are available on [Search Areas](/data-api/about-api/search-areas),\n\ the piece and field names — on [Data Structure](/data-api/about-api/study-data-structure)\ \ and also can be retrieved at `/studies/metadata` endpoint.\nThere is a\ \ special name, `@query`, which expands to all fields queried by search." required: false schema: type: "array" minItems: 1 items: type: "string" pattern: "^([a-zA-Z][a-zA-Z0-9\\-. ]*)|(@query)$" examples: example1: value: - "NCTId" - "BriefTitle" - "OverallStatus" - "HasResults" example2: value: "ProtocolSection" - name: "sort" in: "query" style: "pipeDelimited" explode: false description: "Comma- or pipe-separated list of sorting options of the studies.\ \ The returning studies are not sorted by default for a performance reason.\n\ Every list item contains a field/piece name and an optional sort direction\ \ (`asc` for ascending or `desc` for descending)\nafter colon character.\n\ \nAll piece and field names can be found on [Data Structure](/data-api/about-api/study-data-structure)\ \ and also can be retrieved\nat `/studies/metadata` endpoint. Currently,\ \ only date and numeric fields are allowed for sorting.\nThere is a special\ \ \"field\" `@relevance` to sort by relevance to a search query.\n\nStudies\ \ missing sort field are always last. Default sort direction:\n* Date field\ \ - `desc`\n* Numeric field - `asc`\n* `@relevance` - `desc`" required: false schema: type: "array" maxItems: 2 default: [] items: type: "string" pattern: "^(([a-zA-Z][a-zA-Z0-9\\-. ]*)|(@relevance))(:(asc|desc))?$" examples: example1: value: - "@relevance" example2: value: - "LastUpdatePostDate" example3: value: - "EnrollmentCount:desc" - "NumArmGroups" - name: "countTotal" in: "query" description: "Count total number of studies in all pages and return `totalCount`\ \ field with first page, if `true`.\nFor CSV, the result can be found in\ \ `x-total-count` response header.\nThe parameter is ignored for the subsequent\ \ pages." required: false schema: type: "boolean" default: false - name: "pageSize" in: "query" description: "Page size is maximum number of studies to return in response.\ \ It does not have to be the same for every page.\nIf not specified or set\ \ to 0, the default value will be used. It will be coerced down to 1,000,\ \ if greater than that." required: false schema: type: "integer" format: "int32" minimum: 0 default: 10 examples: example1: value: 2 example2: value: 100 - name: "pageToken" in: "query" description: "Token to get next page. Set it to a `nextPageToken` value returned\ \ with the previous page in JSON format.\nFor CSV, it can be found in `x-next-page-token`\ \ response header.\nDo not specify it for first page." required: false schema: type: "string" responses: "200": description: "OK" content: application/json: schema: $ref: "#/components/schemas/PagedStudies" example: totalCount: 438897 studies: - protocolSection: identificationModule: nctId: "NCT03540771" briefTitle: "Introducing Palliative Care (PC) Within the Treatment\ \ of End Stage Liver Disease (ESLD)" statusModule: overallStatus: "RECRUITING" hasResults: false - protocolSection: identificationModule: nctId: "NCT03630471" briefTitle: "Effectiveness of a Problem-solving Intervention\ \ for Common Adolescent Mental Health Problems in India" statusModule: overallStatus: "COMPLETED" hasResults: false - protocolSection: identificationModule: nctId: "NCT00587795" briefTitle: "Orthopedic Study of the Aircast StabilAir Wrist\ \ Fracture Brace" statusModule: overallStatus: "TERMINATED" hasResults: true nextPageToken: "abracadabra" "400": description: "Bad Request" content: text/plain: schema: $ref: "#/components/schemas/errorMessage" /studies/{nctId}: get: summary: "Single Study" description: "Returns data of a single study." tags: - "Studies" operationId: "fetchStudy" parameters: - name: "nctId" in: "path" description: "NCT Number of a study. If found in [NCTIdAlias](data-api/about-api/study-data-structure#NCTIdAlias)\ \ field,\n301 HTTP redirect to the actual study will be returned." required: true schema: type: "string" pattern: "^[Nn][Cc][Tt]0*[1-9]\\d{0,7}$" examples: example1: value: "NCT00841061" example2: value: "NCT04000165" - name: "format" in: "query" description: "Must be one of the following:\n* `csv`- return CSV table; available\ \ fields are listed on [CSV Download](/data-api/about-api/csv-download)\n\ * `json`- return JSON object; format of `markup` fields depends on `markupFormat`\ \ parameter\n* `json.zip`- put JSON object into a .json file and download\ \ it as zip archive; field values of type `markup` are in [markdown](https://spec.commonmark.org/0.28/)\ \ format\n* `fhir.json` - return FHIR JSON; fields are not customizable;\ \ see [Access Data in FHIR](/data-api/fhir)\n* `ris`- return RIS record;\ \ available tags are listed on [RIS Download](/data-api/about-api/ris-download)" required: false schema: type: "string" enum: - "csv" - "json" - "json.zip" - "fhir.json" - "ris" default: "json" - name: "markupFormat" in: "query" description: "Format of `markup` type fields:\n* `markdown`- [markdown](https://spec.commonmark.org/0.28/)\ \ format\n* `legacy`- compatible with classic PRS\n\nApplicable only to\ \ `json` format." required: false schema: type: "string" enum: - "markdown" - "legacy" default: "markdown" - name: "fields" in: "query" style: "pipeDelimited" explode: false description: "If specified, must be non-empty comma- or pipe-separated list\ \ of fields to return. If unspecified, all fields will be returned.\nOrder\ \ of the fields does not matter.\n\nFor `csv` format, specify list of columns.\ \ The column names are available on [CSV Download](/data-api/about-api/csv-download).\n\ \nFor `json` and `json.zip` formats, every list item is either area name,\ \ piece name, or field name.\nIf a piece or a field is a branch node, all\ \ descendant fields will be included.\nAll area names are available on [Search\ \ Areas](/data-api/about-api/search-areas),\nthe piece and field names -\ \ on [Data Structure](/data-api/about-api/study-data-structure) and also\ \ can be retrieved at `/studies/metadata` endpoint.\n\nFor `fhir.json` format,\ \ all available fields are returned and this parameter must be unspecified.\n\ \nFor `ris` format, specify list of tags. The tag names are available on\ \ [RIS Download](/data-api/about-api/ris-download)." required: false schema: type: "array" minItems: 1 items: type: "string" pattern: "^[a-zA-Z][a-zA-Z0-9\\-. ]*$" examples: example1: value: - "NCTId" - "BriefTitle" - "Reference" example2: value: - "ConditionsModule" - "EligibilityModule" responses: "200": description: "OK" content: text/csv: schema: $ref: "#/components/schemas/StudiesCsv" application/json: schema: $ref: "#/components/schemas/Study" application/zip: schema: $ref: "#/components/schemas/StudiesZip" application/fhir+json: schema: $ref: "#/components/schemas/StudyFhir" "301": description: "Moved Permanently" content: {} "400": description: "Bad Request" content: text/plain: schema: $ref: "#/components/schemas/errorMessage" "404": description: "Not Found" content: text/plain: schema: $ref: "#/components/schemas/errorMessage" /studies/metadata: get: summary: "Data Model Fields" description: "Returns study data model fields." tags: - "Studies" operationId: "studiesMetadata" parameters: - name: "includeIndexedOnly" in: "query" description: "Include indexed-only fields, if `true`" required: false schema: type: "boolean" default: false - name: "includeHistoricOnly" in: "query" description: "Include fields available only in historic data, if `true`" required: false schema: type: "boolean" default: false responses: "200": description: "OK" content: application/json: schema: $ref: "#/components/schemas/FieldNodeList" "400": description: "Bad Request" content: text/plain: schema: $ref: "#/components/schemas/errorMessage" ``` -------------------------------------------------------------------------------- /src/biomcp/trials/search.py: -------------------------------------------------------------------------------- ```python import json import logging from ssl import TLSVersion from typing import Annotated from pydantic import BaseModel, Field, field_validator, model_validator from .. import StrEnum, ensure_list, http_client, render from ..constants import CLINICAL_TRIALS_BASE_URL from ..integrations import BioThingsClient logger = logging.getLogger(__name__) class SortOrder(StrEnum): RELEVANCE = "RELEVANCE" LAST_UPDATE = "LAST_UPDATE" ENROLLMENT = "ENROLLMENT" START_DATE = "START_DATE" COMPLETION_DATE = "COMPLETION_DATE" SUBMITTED_DATE = "SUBMITTED_DATE" class TrialPhase(StrEnum): EARLY_PHASE1 = "EARLY_PHASE1" PHASE1 = "PHASE1" PHASE2 = "PHASE2" PHASE3 = "PHASE3" PHASE4 = "PHASE4" NOT_APPLICABLE = "NOT_APPLICABLE" class RecruitingStatus(StrEnum): OPEN = "OPEN" CLOSED = "CLOSED" ANY = "ANY" class StudyType(StrEnum): INTERVENTIONAL = "INTERVENTIONAL" OBSERVATIONAL = "OBSERVATIONAL" EXPANDED_ACCESS = "EXPANDED_ACCESS" OTHER = "OTHER" class InterventionType(StrEnum): DRUG = "DRUG" DEVICE = "DEVICE" BIOLOGICAL = "BIOLOGICAL" PROCEDURE = "PROCEDURE" RADIATION = "RADIATION" BEHAVIORAL = "BEHAVIORAL" GENETIC = "GENETIC" DIETARY = "DIETARY" DIAGNOSTIC_TEST = "DIAGNOSTIC_TEST" OTHER = "OTHER" class SponsorType(StrEnum): INDUSTRY = "INDUSTRY" GOVERNMENT = "GOVERNMENT" ACADEMIC = "ACADEMIC" OTHER = "OTHER" class StudyDesign(StrEnum): RANDOMIZED = "RANDOMIZED" NON_RANDOMIZED = "NON_RANDOMIZED" OBSERVATIONAL = "OBSERVATIONAL" class DateField(StrEnum): LAST_UPDATE = "LAST_UPDATE" STUDY_START = "STUDY_START" PRIMARY_COMPLETION = "PRIMARY_COMPLETION" OUTCOME_POSTING = "OUTCOME_POSTING" COMPLETION = "COMPLETION" FIRST_POSTING = "FIRST_POSTING" SUBMITTED_DATE = "SUBMITTED_DATE" class PrimaryPurpose(StrEnum): TREATMENT = "TREATMENT" PREVENTION = "PREVENTION" DIAGNOSTIC = "DIAGNOSTIC" SUPPORTIVE_CARE = "SUPPORTIVE_CARE" SCREENING = "SCREENING" HEALTH_SERVICES = "HEALTH_SERVICES" BASIC_SCIENCE = "BASIC_SCIENCE" DEVICE_FEASIBILITY = "DEVICE_FEASIBILITY" OTHER = "OTHER" class AgeGroup(StrEnum): CHILD = "CHILD" ADULT = "ADULT" SENIOR = "SENIOR" ALL = "ALL" class LineOfTherapy(StrEnum): FIRST_LINE = "1L" SECOND_LINE = "2L" THIRD_LINE_PLUS = "3L+" CTGOV_SORT_MAPPING = { SortOrder.RELEVANCE: "@relevance", SortOrder.LAST_UPDATE: "LastUpdatePostDate:desc", SortOrder.ENROLLMENT: "EnrollmentCount:desc", SortOrder.START_DATE: "StudyStartDate:desc", SortOrder.COMPLETION_DATE: "PrimaryCompletionDate:desc", SortOrder.SUBMITTED_DATE: "StudyFirstSubmitDate:desc", } CTGOV_PHASE_MAPPING = { TrialPhase.EARLY_PHASE1: ("EARLY_PHASE1",), TrialPhase.PHASE1: ("PHASE1",), TrialPhase.PHASE2: ("PHASE2",), TrialPhase.PHASE3: ("PHASE3",), TrialPhase.PHASE4: ("PHASE4",), TrialPhase.NOT_APPLICABLE: ("NOT_APPLICABLE",), } OPEN_STATUSES = ( "AVAILABLE", "ENROLLING_BY_INVITATION", "NOT_YET_RECRUITING", "RECRUITING", ) CLOSED_STATUSES = ( "ACTIVE_NOT_RECRUITING", "COMPLETED", "SUSPENDED", "TERMINATED", "WITHDRAWN", ) CTGOV_RECRUITING_STATUS_MAPPING = { RecruitingStatus.OPEN: OPEN_STATUSES, RecruitingStatus.CLOSED: CLOSED_STATUSES, RecruitingStatus.ANY: None, } CTGOV_STUDY_TYPE_MAPPING = { StudyType.INTERVENTIONAL: ("Interventional",), StudyType.OBSERVATIONAL: ("Observational",), StudyType.EXPANDED_ACCESS: ("Expanded Access",), StudyType.OTHER: ("Other",), } CTGOV_INTERVENTION_TYPE_MAPPING = { InterventionType.DRUG: ("Drug",), InterventionType.DEVICE: ("Device",), InterventionType.BIOLOGICAL: ("Biological",), InterventionType.PROCEDURE: ("Procedure",), InterventionType.RADIATION: ("Radiation",), InterventionType.BEHAVIORAL: ("Behavioral",), InterventionType.GENETIC: ("Genetic",), InterventionType.DIETARY: ("Dietary",), InterventionType.DIAGNOSTIC_TEST: ("Diagnostic Test",), InterventionType.OTHER: ("Other",), } CTGOV_SPONSOR_TYPE_MAPPING = { SponsorType.INDUSTRY: ("Industry",), SponsorType.GOVERNMENT: ("Government",), SponsorType.ACADEMIC: ("Academic",), SponsorType.OTHER: ("Other",), } CTGOV_STUDY_DESIGN_MAPPING = { StudyDesign.RANDOMIZED: ("Randomized",), StudyDesign.NON_RANDOMIZED: ("Non-Randomized",), StudyDesign.OBSERVATIONAL: ("Observational",), } CTGOV_DATE_FIELD_MAPPING = { DateField.LAST_UPDATE: "LastUpdatePostDate", DateField.STUDY_START: "StartDate", DateField.PRIMARY_COMPLETION: "PrimaryCompletionDate", DateField.OUTCOME_POSTING: "ResultsFirstPostDate", DateField.COMPLETION: "CompletionDate", DateField.FIRST_POSTING: "StudyFirstPostDate", DateField.SUBMITTED_DATE: "StudyFirstSubmitDate", } CTGOV_PRIMARY_PURPOSE_MAPPING = { PrimaryPurpose.TREATMENT: ("Treatment",), PrimaryPurpose.PREVENTION: ("Prevention",), PrimaryPurpose.DIAGNOSTIC: ("Diagnostic",), PrimaryPurpose.SUPPORTIVE_CARE: ("Supportive Care",), PrimaryPurpose.SCREENING: ("Screening",), PrimaryPurpose.HEALTH_SERVICES: ("Health Services",), PrimaryPurpose.BASIC_SCIENCE: ("Basic Science",), PrimaryPurpose.DEVICE_FEASIBILITY: ("Device Feasibility",), PrimaryPurpose.OTHER: ("Other",), } CTGOV_AGE_GROUP_MAPPING = { AgeGroup.CHILD: ("Child",), AgeGroup.ADULT: ("Adult",), AgeGroup.SENIOR: ("Older Adult",), AgeGroup.ALL: None, } # Line of therapy patterns for EligibilityCriteria search LINE_OF_THERAPY_PATTERNS = { LineOfTherapy.FIRST_LINE: [ '"first line"', '"first-line"', '"1st line"', '"frontline"', '"treatment naive"', '"previously untreated"', ], LineOfTherapy.SECOND_LINE: [ '"second line"', '"second-line"', '"2nd line"', '"one prior line"', '"1 prior line"', ], LineOfTherapy.THIRD_LINE_PLUS: [ '"third line"', '"third-line"', '"3rd line"', '"≥2 prior"', '"at least 2 prior"', '"heavily pretreated"', ], } DEFAULT_FORMAT = "csv" DEFAULT_MARKUP = "markdown" SEARCH_FIELDS = [ "NCT Number", "Study Title", "Study URL", "Study Status", "Brief Summary", "Study Results", "Conditions", "Interventions", "Phases", "Enrollment", "Study Type", "Study Design", "Start Date", "Completion Date", ] SEARCH_FIELDS_PARAM = [",".join(SEARCH_FIELDS)] class TrialQuery(BaseModel): """Parameters for querying clinical trial data from ClinicalTrials.gov.""" conditions: list[str] | None = Field( default=None, description="List of condition terms.", ) terms: list[str] | None = Field( default=None, description="General search terms that don't fit specific categories.", ) interventions: list[str] | None = Field( default=None, description="Intervention names.", ) recruiting_status: RecruitingStatus | None = Field( default=None, description="Study recruitment status. Use 'OPEN' for actively recruiting trials, 'CLOSED' for completed/terminated trials, or 'ANY' for all trials. Common aliases like 'recruiting', 'active', 'enrolling' map to 'OPEN'.", ) study_type: StudyType | None = Field( default=None, description="Type of study.", ) nct_ids: list[str] | None = Field( default=None, description="Clinical trial NCT IDs", ) lat: float | None = Field( default=None, description="Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", ) long: float | None = Field( default=None, description="Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", ) distance: int | None = Field( default=None, description="Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)", ) min_date: str | None = Field( default=None, description="Minimum date for filtering", ) max_date: str | None = Field( default=None, description="Maximum date for filtering", ) date_field: DateField | None = Field( default=None, description="Date field to filter on", ) phase: TrialPhase | None = Field( default=None, description="Trial phase filter", ) age_group: AgeGroup | None = Field( default=None, description="Age group filter", ) primary_purpose: PrimaryPurpose | None = Field( default=None, description="Primary purpose of the trial", ) intervention_type: InterventionType | None = Field( default=None, description="Type of intervention", ) sponsor_type: SponsorType | None = Field( default=None, description="Type of sponsor", ) study_design: StudyDesign | None = Field( default=None, description="Study design", ) sort: SortOrder | None = Field( default=None, description="Sort order for results", ) next_page_hash: str | None = Field( default=None, description="Token to retrieve the next page of results", ) # New eligibility-focused fields prior_therapies: list[str] | None = Field( default=None, description="Prior therapies to search for in eligibility criteria", ) progression_on: list[str] | None = Field( default=None, description="Therapies the patient has progressed on", ) required_mutations: list[str] | None = Field( default=None, description="Required mutations in eligibility criteria", ) excluded_mutations: list[str] | None = Field( default=None, description="Excluded mutations in eligibility criteria", ) biomarker_expression: dict[str, str] | None = Field( default=None, description="Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})", ) line_of_therapy: LineOfTherapy | None = Field( default=None, description="Line of therapy filter", ) allow_brain_mets: bool | None = Field( default=None, description="Whether to allow trials that accept brain metastases", ) return_fields: list[str] | None = Field( default=None, description="Specific fields to return in the response", ) page_size: int | None = Field( default=None, description="Number of results per page", ge=1, le=1000, ) expand_synonyms: bool = Field( default=True, description="Expand condition searches with disease synonyms from MyDisease.info", ) @field_validator("recruiting_status", mode="before") @classmethod def normalize_recruiting_status(cls, v): """Normalize common recruiting status aliases to enum values.""" if isinstance(v, str): v_lower = v.lower() # Map common aliases alias_map = { "recruiting": "OPEN", "active": "OPEN", "enrolling": "OPEN", "closed": "CLOSED", "completed": "CLOSED", "terminated": "CLOSED", } return alias_map.get(v_lower, v) return v # Field validators for list fields @model_validator(mode="before") def convert_list_fields(cls, data): """Convert string values to lists for list fields.""" if isinstance(data, dict): for field_name in [ "conditions", "terms", "interventions", "nct_ids", "prior_therapies", "progression_on", "required_mutations", "excluded_mutations", "return_fields", ]: if field_name in data and data[field_name] is not None: data[field_name] = ensure_list( data[field_name], split_strings=True ) return data def _inject_ids( params: dict[str, list[str]], ids: list[str], has_other_filters: bool ) -> None: """Inject NCT IDs into params using intersection or id-only semantics. Args: params: The parameter dictionary to modify ids: List of NCT IDs to inject has_other_filters: Whether other filters are present """ ids_csv = ",".join(ids) if has_other_filters: # intersection path params["filter.ids"] = [ids_csv] elif len(ids_csv) < 1800: # pure-ID & small params["query.id"] = [ids_csv] else: # pure-ID & large params["filter.ids"] = [ids_csv] def _build_prior_therapy_essie(therapies: list[str]) -> list[str]: """Build Essie fragments for prior therapy search.""" fragments = [] for therapy in therapies: if therapy.strip(): # Skip empty strings fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (prior OR previous OR received))' fragments.append(fragment) return fragments def _build_progression_essie(therapies: list[str]) -> list[str]: """Build Essie fragments for progression on therapy search.""" fragments = [] for therapy in therapies: if therapy.strip(): # Skip empty strings fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (progression OR resistant OR refractory))' fragments.append(fragment) return fragments def _build_required_mutations_essie(mutations: list[str]) -> list[str]: """Build Essie fragments for required mutations.""" fragments = [] for mutation in mutations: if mutation.strip(): # Skip empty strings fragment = f'AREA[EligibilityCriteria]("{mutation}")' fragments.append(fragment) return fragments def _build_excluded_mutations_essie(mutations: list[str]) -> list[str]: """Build Essie fragments for excluded mutations.""" fragments = [] for mutation in mutations: if mutation.strip(): # Skip empty strings fragment = f'AREA[EligibilityCriteria](NOT "{mutation}")' fragments.append(fragment) return fragments def _build_biomarker_expression_essie(biomarkers: dict[str, str]) -> list[str]: """Build Essie fragments for biomarker expression requirements.""" fragments = [] for marker, expression in biomarkers.items(): if marker.strip() and expression.strip(): # Skip empty values fragment = ( f'AREA[EligibilityCriteria]("{marker}" AND "{expression}")' ) fragments.append(fragment) return fragments def _build_line_of_therapy_essie(line: LineOfTherapy) -> str: """Build Essie fragment for line of therapy.""" patterns = LINE_OF_THERAPY_PATTERNS.get(line, []) if patterns: # Join all patterns with OR within a single AREA block pattern_str = " OR ".join(patterns) return f"AREA[EligibilityCriteria]({pattern_str})" return "" def _build_brain_mets_essie(allow: bool) -> str: """Build Essie fragment for brain metastases filter.""" if allow is False: return 'AREA[EligibilityCriteria](NOT "brain metastases")' return "" async def convert_query(query: TrialQuery) -> dict[str, list[str]]: # noqa: C901 """Convert a TrialQuery object into a dict of query params for the ClinicalTrials.gov API (v2). Each key maps to one or more strings in a list, consistent with parse_qs outputs. """ # Start with required fields params: dict[str, list[str]] = { "format": [DEFAULT_FORMAT], "markupFormat": [DEFAULT_MARKUP], } # Track whether we have other filters (for NCT ID intersection logic) has_other_filters = False # Handle conditions with optional synonym expansion if query.conditions: has_other_filters = True expanded_conditions = [] if query.expand_synonyms: # Expand each condition with synonyms client = BioThingsClient() for condition in query.conditions: try: synonyms = await client.get_disease_synonyms(condition) expanded_conditions.extend(synonyms) except Exception as e: logger.warning( f"Failed to get synonyms for {condition}: {e}" ) expanded_conditions.append(condition) else: expanded_conditions = query.conditions # Remove duplicates while preserving order seen = set() unique_conditions = [] for cond in expanded_conditions: if cond.lower() not in seen: seen.add(cond.lower()) unique_conditions.append(cond) if len(unique_conditions) == 1: params["query.cond"] = [unique_conditions[0]] else: # Join multiple terms with OR, wrapped in parentheses params["query.cond"] = [f"({' OR '.join(unique_conditions)})"] # Handle terms and interventions (no synonym expansion) for key, val in [ ("query.term", query.terms), ("query.intr", query.interventions), ]: if val: has_other_filters = True if len(val) == 1: params[key] = [val[0]] else: # Join multiple terms with OR, wrapped in parentheses params[key] = [f"({' OR '.join(val)})"] # Collect Essie fragments for eligibility criteria essie_fragments: list[str] = [] # Prior therapies if query.prior_therapies: has_other_filters = True essie_fragments.extend( _build_prior_therapy_essie(query.prior_therapies) ) # Progression on therapies if query.progression_on: has_other_filters = True essie_fragments.extend(_build_progression_essie(query.progression_on)) # Required mutations if query.required_mutations: has_other_filters = True essie_fragments.extend( _build_required_mutations_essie(query.required_mutations) ) # Excluded mutations if query.excluded_mutations: has_other_filters = True essie_fragments.extend( _build_excluded_mutations_essie(query.excluded_mutations) ) # Biomarker expression if query.biomarker_expression: has_other_filters = True essie_fragments.extend( _build_biomarker_expression_essie(query.biomarker_expression) ) # Line of therapy if query.line_of_therapy: has_other_filters = True line_fragment = _build_line_of_therapy_essie(query.line_of_therapy) if line_fragment: essie_fragments.append(line_fragment) # Brain metastases filter if query.allow_brain_mets is not None: has_other_filters = True brain_fragment = _build_brain_mets_essie(query.allow_brain_mets) if brain_fragment: essie_fragments.append(brain_fragment) # Combine all Essie fragments with AND and append to query.term if essie_fragments: combined_essie = " AND ".join(essie_fragments) if "query.term" in params: # Append to existing terms with AND params["query.term"][0] = ( f"{params['query.term'][0]} AND {combined_essie}" ) else: params["query.term"] = [combined_essie] # Geospatial if query.lat is not None and query.long is not None: has_other_filters = True geo_val = f"distance({query.lat},{query.long},{query.distance}mi)" params["filter.geo"] = [geo_val] # Collect advanced filters in a list advanced_filters: list[str] = [] # Date filter if query.date_field and (query.min_date or query.max_date): has_other_filters = True date_field = CTGOV_DATE_FIELD_MAPPING[query.date_field] min_val = query.min_date or "MIN" max_val = query.max_date or "MAX" advanced_filters.append( f"AREA[{date_field}]RANGE[{min_val},{max_val}]", ) # Prepare a map of "AREA[...] -> (query_value, mapping_dict)" advanced_map = { "DesignPrimaryPurpose": ( query.primary_purpose, CTGOV_PRIMARY_PURPOSE_MAPPING, ), "StudyType": (query.study_type, CTGOV_STUDY_TYPE_MAPPING), "InterventionType": ( query.intervention_type, CTGOV_INTERVENTION_TYPE_MAPPING, ), "SponsorType": (query.sponsor_type, CTGOV_SPONSOR_TYPE_MAPPING), "StudyDesign": (query.study_design, CTGOV_STUDY_DESIGN_MAPPING), "Phase": (query.phase, CTGOV_PHASE_MAPPING), } # Append advanced filters for area, (qval, mapping) in advanced_map.items(): if qval: has_other_filters = True # Check if mapping is a dict before using get method mapped = ( mapping.get(qval) if mapping and isinstance(mapping, dict) else None ) # Use the first mapped value if available, otherwise the literal value = mapped[0] if mapped else qval advanced_filters.append(f"AREA[{area}]{value}") # Age group if query.age_group and query.age_group != "ALL": has_other_filters = True mapped = CTGOV_AGE_GROUP_MAPPING[query.age_group] if mapped: advanced_filters.append(f"AREA[StdAge]{mapped[0]}") else: advanced_filters.append(f"AREA[StdAge]{query.age_group}") # If we collected any advanced filters, join them with AND if advanced_filters: params["filter.advanced"] = [" AND ".join(advanced_filters)] # NCT IDs - now using intersection semantics # Must be done BEFORE recruiting status to properly detect user-set filters if query.nct_ids: _inject_ids(params, query.nct_ids, has_other_filters) # Recruiting status - apply AFTER NCT ID injection # Only count as a user filter if explicitly set to something other than default if query.recruiting_status not in (None, RecruitingStatus.OPEN): # User explicitly set a non-default status if query.recruiting_status is not None: # Type guard for mypy statuses = CTGOV_RECRUITING_STATUS_MAPPING.get( query.recruiting_status ) if statuses: params["filter.overallStatus"] = [",".join(statuses)] elif not query.nct_ids or has_other_filters: # Apply default OPEN status only if: # 1. No NCT IDs provided, OR # 2. NCT IDs provided with other filters (intersection mode) params["filter.overallStatus"] = [",".join(OPEN_STATUSES)] # Sort & paging if query.sort is None: sort_val = CTGOV_SORT_MAPPING[SortOrder.RELEVANCE] else: sort_val = CTGOV_SORT_MAPPING.get(query.sort, query.sort) params["sort"] = [sort_val] if query.next_page_hash: params["pageToken"] = [query.next_page_hash] # Finally, add fields to limit payload size if query.return_fields: # Use custom fields if specified params["fields"] = [",".join(query.return_fields)] else: # Use default fields params["fields"] = SEARCH_FIELDS_PARAM # Set page size if query.page_size: params["pageSize"] = [str(query.page_size)] else: params["pageSize"] = ["40"] return params async def search_trials( query: TrialQuery, output_json: bool = False, ) -> str: """Search ClinicalTrials.gov for clinical trials.""" params = await convert_query(query) # Log filter mode if NCT IDs are present if query.nct_ids: # Check if we're using intersection or id-only mode # Only count explicit user-set filters, not defaults has_other_filters = any([ query.conditions, query.terms, query.interventions, query.lat is not None and query.long is not None, query.date_field and (query.min_date or query.max_date), query.primary_purpose, query.study_type, query.intervention_type, query.sponsor_type, query.study_design, query.phase, query.age_group and query.age_group != AgeGroup.ALL, query.recruiting_status not in (None, RecruitingStatus.OPEN), query.prior_therapies, query.progression_on, query.required_mutations, query.excluded_mutations, query.biomarker_expression, query.line_of_therapy, query.allow_brain_mets is not None, ]) if has_other_filters: logger.debug( "Filter mode: intersection (NCT IDs AND other filters)" ) else: logger.debug("Filter mode: id-only (NCT IDs only)") response, error = await http_client.request_api( url=CLINICAL_TRIALS_BASE_URL, request=params, method="GET", tls_version=TLSVersion.TLSv1_2, domain="trial", ) data = response if error: data = {"error": f"Error {error.code}: {error.message}"} if data and not output_json: return render.to_markdown(data) else: return json.dumps(data, indent=2) async def _trial_searcher( call_benefit: Annotated[ str, "Define and summarize why this function is being called and the intended benefit", ], conditions: Annotated[ list[str] | str | None, "Condition terms (e.g., 'breast cancer') - list or comma-separated string", ] = None, terms: Annotated[ list[str] | str | None, "General search terms - list or comma-separated string", ] = None, interventions: Annotated[ list[str] | str | None, "Intervention names (e.g., 'pembrolizumab') - list or comma-separated string", ] = None, recruiting_status: Annotated[ RecruitingStatus | str | None, "Study recruitment status (OPEN, CLOSED, ANY)", ] = None, study_type: Annotated[StudyType | str | None, "Type of study"] = None, nct_ids: Annotated[ list[str] | str | None, "Clinical trial NCT IDs - list or comma-separated string", ] = None, lat: Annotated[ float | None, "Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", ] = None, long: Annotated[ float | None, "Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", ] = None, distance: Annotated[ float | None, "Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)", ] = None, min_date: Annotated[ str | None, "Minimum date for filtering (YYYY-MM-DD)" ] = None, max_date: Annotated[ str | None, "Maximum date for filtering (YYYY-MM-DD)" ] = None, date_field: Annotated[ DateField | str | None, "Date field to filter on" ] = None, phase: Annotated[TrialPhase | str | None, "Trial phase filter"] = None, age_group: Annotated[AgeGroup | str | None, "Age group filter"] = None, primary_purpose: Annotated[ PrimaryPurpose | str | None, "Primary purpose of the trial" ] = None, intervention_type: Annotated[ InterventionType | str | None, "Type of intervention" ] = None, sponsor_type: Annotated[ SponsorType | str | None, "Type of sponsor" ] = None, study_design: Annotated[StudyDesign | str | None, "Study design"] = None, sort: Annotated[SortOrder | str | None, "Sort order for results"] = None, next_page_hash: Annotated[ str | None, "Token to retrieve the next page of results" ] = None, prior_therapies: Annotated[ list[str] | str | None, "Prior therapies to search for in eligibility criteria - list or comma-separated string", ] = None, progression_on: Annotated[ list[str] | str | None, "Therapies the patient has progressed on - list or comma-separated string", ] = None, required_mutations: Annotated[ list[str] | str | None, "Required mutations in eligibility criteria - list or comma-separated string", ] = None, excluded_mutations: Annotated[ list[str] | str | None, "Excluded mutations in eligibility criteria - list or comma-separated string", ] = None, biomarker_expression: Annotated[ dict[str, str] | None, "Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})", ] = None, line_of_therapy: Annotated[ LineOfTherapy | str | None, "Line of therapy filter (1L, 2L, 3L+)", ] = None, allow_brain_mets: Annotated[ bool | None, "Whether to allow trials that accept brain metastases", ] = None, return_fields: Annotated[ list[str] | str | None, "Specific fields to return in the response - list or comma-separated string", ] = None, page_size: Annotated[ int | None, "Number of results per page (1-1000)", ] = None, expand_synonyms: Annotated[ bool, "Expand condition searches with disease synonyms from MyDisease.info", ] = True, ) -> str: """ Searches for clinical trials based on specified criteria. Parameters: - call_benefit: Define and summarize why this function is being called and the intended benefit - conditions: Condition terms (e.g., "breast cancer") - list or comma-separated string - terms: General search terms - list or comma-separated string - interventions: Intervention names (e.g., "pembrolizumab") - list or comma-separated string - recruiting_status: Study recruitment status (OPEN, CLOSED, ANY) - study_type: Type of study - nct_ids: Clinical trial NCT IDs - list or comma-separated string - lat: Latitude for location search - long: Longitude for location search - distance: Distance from lat/long in miles - min_date: Minimum date for filtering (YYYY-MM-DD) - max_date: Maximum date for filtering (YYYY-MM-DD) - date_field: Date field to filter on - phase: Trial phase filter - age_group: Age group filter - primary_purpose: Primary purpose of the trial - intervention_type: Type of intervention - sponsor_type: Type of sponsor - study_design: Study design - sort: Sort order for results - next_page_hash: Token to retrieve the next page of results - prior_therapies: Prior therapies to search for in eligibility criteria - list or comma-separated string - progression_on: Therapies the patient has progressed on - list or comma-separated string - required_mutations: Required mutations in eligibility criteria - list or comma-separated string - excluded_mutations: Excluded mutations in eligibility criteria - list or comma-separated string - biomarker_expression: Biomarker expression requirements (e.g., {'PD-L1': '≥50%'}) - line_of_therapy: Line of therapy filter (1L, 2L, 3L+) - allow_brain_mets: Whether to allow trials that accept brain metastases - return_fields: Specific fields to return in the response - list or comma-separated string - page_size: Number of results per page (1-1000) - expand_synonyms: Expand condition searches with disease synonyms from MyDisease.info Returns: Markdown formatted list of clinical trials """ # Convert individual parameters to a TrialQuery object query = TrialQuery( conditions=ensure_list(conditions, split_strings=True), terms=ensure_list(terms, split_strings=True), interventions=ensure_list(interventions, split_strings=True), recruiting_status=recruiting_status, study_type=study_type, nct_ids=ensure_list(nct_ids, split_strings=True), lat=lat, long=long, distance=distance, min_date=min_date, max_date=max_date, date_field=date_field, phase=phase, age_group=age_group, primary_purpose=primary_purpose, intervention_type=intervention_type, sponsor_type=sponsor_type, study_design=study_design, sort=sort, next_page_hash=next_page_hash, prior_therapies=ensure_list(prior_therapies, split_strings=True), progression_on=ensure_list(progression_on, split_strings=True), required_mutations=ensure_list(required_mutations, split_strings=True), excluded_mutations=ensure_list(excluded_mutations, split_strings=True), biomarker_expression=biomarker_expression, line_of_therapy=line_of_therapy, allow_brain_mets=allow_brain_mets, return_fields=ensure_list(return_fields, split_strings=True), page_size=page_size, expand_synonyms=expand_synonyms, ) return await search_trials(query, output_json=False) async def search_trials_unified( query: TrialQuery, source: str = "clinicaltrials", api_key: str | None = None, output_json: bool = False, ) -> str: """ Search for clinical trials using either ClinicalTrials.gov or NCI CTS API. Args: query: TrialQuery object with search parameters source: Data source - "clinicaltrials" (default) or "nci" api_key: API key for NCI (required if source="nci") output_json: Return raw JSON instead of formatted markdown Returns: Formatted markdown or JSON string with results """ if source == "nci": # Import here to avoid circular imports from .nci_search import format_nci_trial_results, search_trials_nci results = await search_trials_nci(query, api_key) if output_json: return json.dumps(results, indent=2) else: return format_nci_trial_results(results) else: # Default to ClinicalTrials.gov return await search_trials(query, output_json) ``` -------------------------------------------------------------------------------- /src/biomcp/workers/worker_entry_stytch.js: -------------------------------------------------------------------------------- ```javascript /** * BioMCP Worker – With Stytch OAuth (refactored) */ import { Hono } from "hono"; import { createRemoteJWKSet, importPKCS8, jwtVerify, SignJWT } from "jose"; // Configuration variables - will be overridden by env values let DEBUG = false; // Default value, will be updated from env // Constants const DEFAULT_SESSION_ID = "default"; const MAX_SESSION_ID_LENGTH = 128; // Helper functions const log = (message) => { if (DEBUG) console.log("[DEBUG]", message); }; // List of sensitive fields that should be redacted in logs const SENSITIVE_FIELDS = [ "api_key", "apiKey", "api-key", "token", "secret", "password", ]; /** * Recursively sanitize sensitive fields from an object * @param {object} obj - Object to sanitize * @returns {object} - Sanitized copy of the object */ const sanitizeObject = (obj) => { if (!obj || typeof obj !== "object") return obj; // Handle arrays if (Array.isArray(obj)) { return obj.map((item) => sanitizeObject(item)); } // Handle objects const sanitized = {}; for (const [key, value] of Object.entries(obj)) { // Check if this key is sensitive const lowerKey = key.toLowerCase(); if ( SENSITIVE_FIELDS.some((field) => lowerKey.includes(field.toLowerCase())) ) { sanitized[key] = "[REDACTED]"; } else if (typeof value === "object" && value !== null) { // Recursively sanitize nested objects sanitized[key] = sanitizeObject(value); } else { sanitized[key] = value; } } return sanitized; }; /** * Validate and sanitize session ID * @param {string} sessionId - Session ID from query parameter * @returns {string} - Sanitized session ID or 'default' */ const validateSessionId = (sessionId) => { if (!sessionId) return DEFAULT_SESSION_ID; // Limit length to prevent DoS if (sessionId.length > MAX_SESSION_ID_LENGTH) { log(`Session ID too long (${sessionId.length} chars), using default`); return DEFAULT_SESSION_ID; } // Remove potentially dangerous characters const sanitized = sessionId.replace(/[^a-zA-Z0-9\-_]/g, ""); if (sanitized !== sessionId) { log(`Session ID contained invalid characters, sanitized: ${sanitized}`); } return sanitized || DEFAULT_SESSION_ID; }; /** * Process MCP request with proper error handling * @param {HonoRequest} request - The incoming Hono request * @param {string} remoteUrl - Remote MCP server URL * @param {string} sessionId - Validated session ID * @returns {Response} - Proxy response or error */ const processMcpRequest = async (request, remoteUrl, sessionId) => { try { // Get body text directly (Hono request doesn't have clone) const bodyText = await request.text(); // Validate it's JSON let bodyJson; try { bodyJson = JSON.parse(bodyText); } catch (e) { return new Response( JSON.stringify({ jsonrpc: "2.0", error: { code: -32700, message: "Parse error", data: "Invalid JSON", }, }), { status: 400, headers: { "Content-Type": "application/json" } }, ); } // Log sanitized request const sanitizedBody = sanitizeObject(bodyJson); log(`MCP POST request body: ${JSON.stringify(sanitizedBody)}`); // Validate required JSONRPC fields if (!bodyJson.jsonrpc || !bodyJson.method) { return new Response( JSON.stringify({ jsonrpc: "2.0", error: { code: -32600, message: "Invalid Request", data: "Missing required fields: jsonrpc, method", }, }), { status: 400, headers: { "Content-Type": "application/json" } }, ); } // Create a new Request object with the body text since we've already consumed it const newRequest = new Request(request.url, { method: "POST", headers: request.headers, body: bodyText, }); // Forward to remote server return proxyPost(newRequest, remoteUrl, "/mcp", sessionId); } catch (error) { log(`Error processing MCP request: ${error}`); return new Response( JSON.stringify({ jsonrpc: "2.0", error: { code: -32603, message: "Internal error", data: error.message, }, }), { status: 500, headers: { "Content-Type": "application/json" } }, ); } }; // CORS configuration const CORS = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "GET, POST, OPTIONS", "Access-Control-Allow-Headers": "*", "Access-Control-Max-Age": "86400", }; const getStytchUrl = (env, path, isPublic = false) => { const base = env.STYTCH_API_URL || "https://test.stytch.com/v1"; const projectId = isPublic ? `/public/${env.STYTCH_PROJECT_ID}` : ""; return `${base}${projectId}/${path}`; }; // JWT validation logic let jwks = null; /** * Decode the payload of a JWT (no signature check). */ function decodeJwt(token) { try { const base64Url = token.split(".")[1]; const base64 = base64Url.replace(/-/g, "+").replace(/_/g, "/"); const json = atob(base64); return JSON.parse(json); } catch { return {}; } } let bqTokenPromise = null; /** * Fetch (and cache) a BigQuery OAuth token. * @param {object} env the Hono env (c.env) */ async function getBQToken(env) { // Parse the service‐account JSON key const key = JSON.parse(env.BQ_SA_KEY_JSON); const now = Math.floor(Date.now() / 1000); // Convert PEM private key string into a CryptoKey const privateKey = await importPKCS8(key.private_key, "RS256"); // Build the JWT assertion const assertion = await new SignJWT({ iss: key.client_email, scope: "https://www.googleapis.com/auth/bigquery.insertdata", aud: "https://oauth2.googleapis.com/token", iat: now, exp: now + 3600, }) .setProtectedHeader({ alg: "RS256", kid: key.private_key_id }) .sign(privateKey); // Exchange the assertion for an access token const resp = await fetch("https://oauth2.googleapis.com/token", { method: "POST", headers: { "Content-Type": "application/x-www-form-urlencoded" }, body: new URLSearchParams({ grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer", assertion, }), }); const { access_token } = await resp.json(); return access_token; } /** * Insert a single row into BigQuery via streaming insert. * @param {object} env the Hono env (c.env) * @param {object} row { timestamp, userEmail, query } */ async function insertEvent(env, row) { try { const token = await getBQToken(env); const url = `https://bigquery.googleapis.com/bigquery/v2/projects/` + `${env.BQ_PROJECT_ID}/datasets/${env.BQ_DATASET}` + `/tables/${env.BQ_TABLE}/insertAll`; const response = await fetch(url, { method: "POST", headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json", }, body: JSON.stringify({ rows: [{ json: row }] }), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`BigQuery API error: ${response.status} - ${errorText}`); } const result = await response.json(); if (result.insertErrors) { throw new Error( `BigQuery insert errors: ${JSON.stringify(result.insertErrors)}`, ); } } catch (error) { console.error(`[BigQuery] Insert failed:`, error.message); throw error; } } /** * Validate a JWT token */ async function validateToken(token, env) { if (!token) { throw new Error("No token provided"); } try { log(`Validating token: ${token.substring(0, 15)}...`); // First try to validate as a self-issued JWT try { const encoder = new TextEncoder(); const secret = encoder.encode(env.JWT_SECRET || "default-jwt-secret-key"); const result = await jwtVerify(token, secret, { issuer: env.STYTCH_PROJECT_ID, }); // Also check if token exists in KV (for revocation checking) const tokenHash = await crypto.subtle.digest( "SHA-256", encoder.encode(token), ); const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash))) .replace(/\+/g, "-") .replace(/\//g, "_") .replace(/=/g, "") .substring(0, 32); const storedToken = await env.OAUTH_KV.get(`token_hash:${tokenKey}`); if (!storedToken) { log("Token not found in storage - may have been revoked"); throw new Error("Token not found or revoked"); } log("Self-issued JWT validation successful"); return result; } catch (error) { log( `Self-issued JWT validation failed, trying Stytch validation: ${error.message}`, ); // If self-validation fails, try Stytch validation as fallback if (!jwks) { log("Creating JWKS for Stytch validation"); jwks = createRemoteJWKSet( new URL(getStytchUrl(env, ".well-known/jwks.json", true)), ); } return await jwtVerify(token, jwks, { audience: env.STYTCH_PROJECT_ID, issuer: [`stytch.com/${env.STYTCH_PROJECT_ID}`], typ: "JWT", algorithms: ["RS256"], }); } } catch (error) { log(`All token validation methods failed: ${error}`); throw error; } } /** * Function to process the authentication callback */ async function processAuthCallback(c, token, state, oauthRequest) { log("Authenticating with Stytch API..."); try { // Try to authenticate the token based on token type const tokenType = "oauth"; // We know it's an OAuth token at this point let endpoint = "sessions/authenticate"; let payload = { session_token: token }; if (tokenType === "oauth") { endpoint = "oauth/authenticate"; payload = { token: token }; } log( `Using Stytch endpoint: ${endpoint} with payload: ${JSON.stringify( payload, )}`, ); const authenticateResp = await fetch(getStytchUrl(c.env, endpoint), { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Basic ${btoa( `${c.env.STYTCH_PROJECT_ID}:${c.env.STYTCH_SECRET}`, )}`, }, body: JSON.stringify(payload), }); log(`Stytch auth response status: ${authenticateResp.status}`); if (!authenticateResp.ok) { const errorText = await authenticateResp.text(); log(`Stytch authentication error: ${errorText}`); return new Response(`Authentication failed: ${errorText}`, { status: 401, headers: CORS, }); } const authData = await authenticateResp.json(); log( `Auth data received: ${JSON.stringify({ user_id: authData.user_id || "unknown", has_user: !!authData.user, })}`, ); // Generate an authorization code const authCode = crypto.randomUUID(); log(`Generated authorization code: ${authCode}`); // Store the user info with the authorization code const authCodeData = { sub: authData.user_id, email: authData.user?.emails?.[0]?.email, code_challenge: oauthRequest.code_challenge, client_id: oauthRequest.client_id, redirect_uri: oauthRequest.redirect_uri, }; log(`Storing auth code data: ${JSON.stringify(authCodeData)}`); await c.env.OAUTH_KV.put( `auth_code:${authCode}`, JSON.stringify(authCodeData), { expirationTtl: 300 }, ); log("Successfully stored auth code data"); // Determine the redirect URI to use if (!oauthRequest.redirect_uri) { log("Missing redirect_uri - using default"); return new Response("Missing redirect URI in OAuth request", { status: 400, headers: CORS, }); } log(`Using redirect URI from request: ${oauthRequest.redirect_uri}`); log(`Using state for redirect: ${state}`); const redirectURL = new URL(oauthRequest.redirect_uri); redirectURL.searchParams.set("code", authCode); redirectURL.searchParams.set("state", state); log(`Redirecting to: ${redirectURL.toString()}`); return Response.redirect(redirectURL.toString(), 302); } catch (error) { console.error(`Error in processAuthCallback: ${error}`); return new Response(`Authentication processing error: ${error.message}`, { status: 500, headers: CORS, }); } } // Function to proxy POST requests to remote MCP server async function proxyPost(req, remoteServerUrl, path, sid) { const body = await req.text(); const targetUrl = `${remoteServerUrl}${path}?session_id=${encodeURIComponent( sid, )}`; // Streamable HTTP requires both application/json and text/event-stream // The server will decide which format to use based on the response type const acceptHeader = "application/json, text/event-stream"; const headers = { "Content-Type": "application/json", Accept: acceptHeader, "User-Agent": "Claude/1.0", }; try { const response = await fetch(targetUrl, { method: "POST", headers: headers, body: body, }); const responseText = await response.text(); log(`Proxy response from ${targetUrl}: ${responseText.substring(0, 500)}`); // Check if response is SSE format if ( responseText.startsWith("event:") || responseText.includes("\nevent:") ) { // Parse SSE format const events = responseText.split("\n\n").filter((e) => e.trim()); if (events.length === 1) { // Single SSE event - convert to plain JSON const lines = events[0].split("\n"); const dataLine = lines.find((l) => l.startsWith("data:")); if (dataLine) { const jsonData = dataLine.substring(5).trim(); // Remove "data:" prefix log("Converting single SSE message to plain JSON"); return new Response(jsonData, { status: response.status, headers: { "Content-Type": "application/json", ...CORS }, }); } } else if (events.length > 1) { // Multiple SSE events - return as SSE stream log("Returning multiple SSE messages as stream"); return new Response(responseText, { status: response.status, headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", ...CORS, }, }); } } // Not SSE format - return as-is return new Response(responseText, { status: response.status, headers: { "Content-Type": "application/json", ...CORS }, }); } catch (error) { log(`Proxy fetch error: ${error.message}`); return new Response(JSON.stringify({ error: error.message }), { status: 502, headers: { "Content-Type": "application/json", ...CORS }, }); } } // Middleware for bearer token authentication (MCP server) const stytchBearerTokenAuthMiddleware = async (c, next) => { const authHeader = c.req.header("Authorization"); log(`Auth header present: ${!!authHeader}`); if (!authHeader || !authHeader.startsWith("Bearer ")) { return new Response("Missing or invalid access token", { status: 401, headers: CORS, }); } const accessToken = authHeader.substring(7); log(`Attempting to validate token: ${accessToken.substring(0, 10)}...`); try { // Add more detailed validation logging log("Starting token validation..."); const verifyResult = await validateToken(accessToken, c.env); log(`Token validation successful! ${verifyResult.payload.sub}`); // Store user info in a variable that the handler can access c.env.userID = verifyResult.payload.sub; c.env.accessToken = accessToken; } catch (error) { log(`Token validation detailed error: ${error.code} ${error.message}`); return new Response(`Unauthorized: Invalid token - ${error.message}`, { status: 401, headers: CORS, }); } return next(); }; // Create our main app with Hono const app = new Hono(); // Configure the routes app // Error handler .onError((err, c) => { console.error(`Application error: ${err}`); return new Response("Server error", { status: 500, headers: CORS, }); }) // Handle CORS preflight requests .options("*", (c) => new Response(null, { status: 204, headers: CORS })) // Status endpoints .get("/status", (c) => { const REMOTE_MCP_SERVER_URL = c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; return new Response( JSON.stringify({ worker: "BioMCP-OAuth", remote: REMOTE_MCP_SERVER_URL, forwardPath: "/messages", resourceEndpoint: null, debug: DEBUG, }), { status: 200, headers: { "Content-Type": "application/json", ...CORS }, }, ); }) .get("/debug", (c) => { const REMOTE_MCP_SERVER_URL = c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; return new Response( JSON.stringify({ worker: "BioMCP-OAuth", remote: REMOTE_MCP_SERVER_URL, forwardPath: "/messages", resourceEndpoint: null, debug: DEBUG, }), { status: 200, headers: { "Content-Type": "application/json", ...CORS }, }, ); }) // OAuth server metadata endpoint .get("/.well-known/oauth-authorization-server", (c) => { const url = new URL(c.req.url); return new Response( JSON.stringify({ issuer: c.env.STYTCH_PROJECT_ID, authorization_endpoint: `${url.origin}/authorize`, token_endpoint: `${url.origin}/token`, registration_endpoint: getStytchUrl(c.env, "oauth2/register", true), scopes_supported: ["openid", "profile", "email", "offline_access"], response_types_supported: ["code"], response_modes_supported: ["query"], grant_types_supported: ["authorization_code", "refresh_token"], token_endpoint_auth_methods_supported: ["none"], code_challenge_methods_supported: ["S256"], }), { status: 200, headers: { "Content-Type": "application/json", ...CORS }, }, ); }) // OAuth redirect endpoint (redirects to Stytch's hosted UI) .get("/authorize", async (c) => { try { log("Authorize endpoint hit"); const url = new URL(c.req.url); log(`Full authorize URL: ${url.toString()}`); log( `Search params: ${JSON.stringify( Object.fromEntries(url.searchParams), )}`, ); const redirectUrl = new URL("/callback", url.origin).toString(); log(`Redirect URL: ${redirectUrl}`); // Extract and forward OAuth parameters const clientId = url.searchParams.get("client_id") || "unknown_client"; const redirectUri = url.searchParams.get("redirect_uri"); let state = url.searchParams.get("state"); const codeChallenge = url.searchParams.get("code_challenge"); const codeChallengeMethod = url.searchParams.get("code_challenge_method"); // Generate a state if one isn't provided if (!state) { state = crypto.randomUUID(); log(`Generated state parameter: ${state}`); } log("OAuth params:", { clientId, redirectUri, state, codeChallenge: !!codeChallenge, codeChallengeMethod, }); // Store OAuth request parameters in KV for use during callback const oauthRequestData = { client_id: clientId, redirect_uri: redirectUri, code_challenge: codeChallenge, code_challenge_method: codeChallengeMethod, original_state: state, // Store the original state explicitly }; // Also store a mapping from any state value to the original state // This is crucial for handling cases where Stytch modifies the state try { // Use a consistent key based on timestamp for lookups const timestamp = Date.now().toString(); await c.env.OAUTH_KV.put(`state_timestamp:${timestamp}`, state, { expirationTtl: 600, }); log(`Saving OAuth request data: ${JSON.stringify(oauthRequestData)}`); await c.env.OAUTH_KV.put( `oauth_request:${state}`, JSON.stringify(oauthRequestData), { expirationTtl: 600 }, ); // Also store timestamp for this state to allow fallback lookup await c.env.OAUTH_KV.put(`timestamp_for_state:${state}`, timestamp, { expirationTtl: 600, }); log("Successfully stored OAuth request data in KV"); } catch (kvError) { log(`Error storing OAuth data in KV: ${kvError}`); return new Response("Internal server error storing OAuth data", { status: 500, headers: CORS, }); } // Redirect to Stytch's hosted login UI const stytchLoginUrl = `${ c.env.STYTCH_OAUTH_URL || "https://test.stytch.com/v1/public/oauth/google/start" }?public_token=${ c.env.STYTCH_PUBLIC_TOKEN }&login_redirect_url=${encodeURIComponent( redirectUrl, )}&state=${encodeURIComponent(state)}`; log(`Redirecting to Stytch: ${stytchLoginUrl}`); return Response.redirect(stytchLoginUrl, 302); } catch (error) { console.error(`Error in authorize endpoint: ${error}`); return new Response(`Authorization error: ${error.message}`, { status: 500, headers: CORS, }); } }) // OAuth callback endpoint .get("/callback", async (c) => { try { log("Callback hit, logging all details"); const url = new URL(c.req.url); log(`Full URL: ${url.toString()}`); log( `Search params: ${JSON.stringify( Object.fromEntries(url.searchParams), )}`, ); // Stytch's callback format - get the token const token = url.searchParams.get("stytch_token_type") === "oauth" ? url.searchParams.get("token") : url.searchParams.get("token") || url.searchParams.get("stytch_token"); log(`Token type: ${url.searchParams.get("stytch_token_type")}`); log(`Token found: ${!!token}`); // We need a token to proceed if (!token) { log("Invalid callback - missing token"); return new Response("Invalid callback request: missing token", { status: 400, headers: CORS, }); } // Look for the most recent OAuth request let mostRecentState = null; let mostRecentTimestamp = null; try { // Find the most recent timestamp const timestamps = await c.env.OAUTH_KV.list({ prefix: "state_timestamp:", }); if (timestamps.keys.length > 0) { // Sort timestamps in descending order (most recent first) const sortedTimestamps = timestamps.keys.sort((a, b) => { const timeA = parseInt(a.name.replace("state_timestamp:", "")); const timeB = parseInt(b.name.replace("state_timestamp:", "")); return timeB - timeA; // descending order }); mostRecentTimestamp = sortedTimestamps[0].name; // Get the state associated with this timestamp mostRecentState = await c.env.OAUTH_KV.get(mostRecentTimestamp); log(`Found most recent state: ${mostRecentState}`); } } catch (error) { log(`Error finding recent state: ${error}`); } // If we have a state from the most recent OAuth request, use it let oauthRequest = null; let state = mostRecentState; if (state) { try { const oauthRequestJson = await c.env.OAUTH_KV.get( `oauth_request:${state}`, ); if (oauthRequestJson) { oauthRequest = JSON.parse(oauthRequestJson); log(`Found OAuth request for state: ${state}`); } } catch (error) { log(`Error getting OAuth request: ${error}`); } } // If we couldn't find the OAuth request, try other alternatives if (!oauthRequest) { log( "No OAuth request found for most recent state, checking other requests", ); try { // List all OAuth requests and use the most recent one const requests = await c.env.OAUTH_KV.list({ prefix: "oauth_request:", }); if (requests.keys.length > 0) { const oauthRequestJson = await c.env.OAUTH_KV.get( requests.keys[0].name, ); if (oauthRequestJson) { oauthRequest = JSON.parse(oauthRequestJson); // Extract the state from the key state = requests.keys[0].name.replace("oauth_request:", ""); log(`Using most recent OAuth request with state: ${state}`); } } } catch (error) { log(`Error finding alternative OAuth request: ${error}`); } } // Final fallback - use hardcoded values for Claude if (!oauthRequest) { log("No OAuth request found, using fallback values"); oauthRequest = { client_id: "biomcp-client", redirect_uri: "https://claude.ai/api/mcp/auth_callback", code_challenge: null, original_state: state || "unknown_state", }; } // If we have an original_state in the OAuth request, use that if (oauthRequest.original_state) { state = oauthRequest.original_state; log(`Using original state from OAuth request: ${state}`); } // Proceed with authentication return processAuthCallback(c, token, state, oauthRequest); } catch (error) { console.error(`Callback error: ${error}`); return new Response( `Server error during authentication: ${error.message}`, { status: 500, headers: CORS, }, ); } }) // Token exchange endpoint .post("/token", async (c) => { try { log("Token endpoint hit"); const formData = await c.req.formData(); const grantType = formData.get("grant_type"); const code = formData.get("code"); const redirectUri = formData.get("redirect_uri"); const clientId = formData.get("client_id"); const codeVerifier = formData.get("code_verifier"); log("Token request params:", { grantType, code: !!code, redirectUri, clientId, codeVerifier: !!codeVerifier, }); if ( grantType !== "authorization_code" || !code || !redirectUri || !clientId || !codeVerifier ) { log("Invalid token request parameters"); return new Response(JSON.stringify({ error: "invalid_request" }), { status: 400, headers: { "Content-Type": "application/json", ...CORS }, }); } // Retrieve the stored authorization code data let authCodeJson; try { authCodeJson = await c.env.OAUTH_KV.get(`auth_code:${code}`); log(`Auth code data retrieved: ${!!authCodeJson}`); } catch (kvError) { log(`Error retrieving auth code data: ${kvError}`); return new Response(JSON.stringify({ error: "server_error" }), { status: 500, headers: { "Content-Type": "application/json", ...CORS }, }); } if (!authCodeJson) { log("Invalid or expired authorization code"); return new Response(JSON.stringify({ error: "invalid_grant" }), { status: 400, headers: { "Content-Type": "application/json", ...CORS }, }); } let authCodeData; try { authCodeData = JSON.parse(authCodeJson); log(`Auth code data parsed: ${JSON.stringify(authCodeData)}`); } catch (parseError) { log(`Error parsing auth code data: ${parseError}`); return new Response(JSON.stringify({ error: "server_error" }), { status: 500, headers: { "Content-Type": "application/json", ...CORS }, }); } // Verify the code_verifier against the stored code_challenge if (authCodeData.code_challenge) { log("Verifying PKCE code challenge"); const encoder = new TextEncoder(); const data = encoder.encode(codeVerifier); const digest = await crypto.subtle.digest("SHA-256", data); // Convert to base64url encoding const base64Digest = btoa( String.fromCharCode(...new Uint8Array(digest)), ) .replace(/\+/g, "-") .replace(/\//g, "_") .replace(/=/g, ""); log("Code challenge comparison:", { stored: authCodeData.code_challenge, computed: base64Digest, match: base64Digest === authCodeData.code_challenge, }); if (base64Digest !== authCodeData.code_challenge) { log("PKCE verification failed"); return new Response(JSON.stringify({ error: "invalid_grant" }), { status: 400, headers: { "Content-Type": "application/json", ...CORS }, }); } } // Delete the used authorization code try { await c.env.OAUTH_KV.delete(`auth_code:${code}`); log("Used authorization code deleted"); } catch (deleteError) { log(`Error deleting used auth code: ${deleteError}`); // Continue anyway since this isn't critical } // Generate JWT access token instead of UUID const encoder = new TextEncoder(); const secret = encoder.encode( c.env.JWT_SECRET || "default-jwt-secret-key", ); // Create JWT payload const accessTokenPayload = { sub: authCodeData.sub, email: authCodeData.email, client_id: clientId, scope: "openid profile email", iss: c.env.STYTCH_PROJECT_ID, aud: clientId, exp: Math.floor(Date.now() / 1000) + 3600, // 1 hour expiry iat: Math.floor(Date.now() / 1000), }; // Sign JWT const accessToken = await new SignJWT(accessTokenPayload) .setProtectedHeader({ alg: "HS256" }) .setIssuedAt() .setExpirationTime("1h") .sign(secret); log(`Generated JWT access token: ${accessToken.substring(0, 20)}...`); // Generate refresh token (still using UUID for simplicity) const refreshToken = crypto.randomUUID(); // Store token information - use a hash of the token as the key to avoid length limits const tokenHash = await crypto.subtle.digest( "SHA-256", encoder.encode(accessToken), ); const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash))) .replace(/\+/g, "-") .replace(/\//g, "_") .replace(/=/g, "") .substring(0, 32); // Use first 32 chars of hash try { log(`Storing access token with key: access_token:${tokenKey}`); await c.env.OAUTH_KV.put( `access_token:${tokenKey}`, JSON.stringify({ token: accessToken, hash: tokenKey, ...accessTokenPayload, }), { expirationTtl: 3600 }, ); // Also store a mapping from the full token to the hash for validation await c.env.OAUTH_KV.put(`token_hash:${tokenKey}`, accessToken, { expirationTtl: 3600, }); log("Storing refresh token"); await c.env.OAUTH_KV.put( `refresh_token:${refreshToken}`, JSON.stringify({ sub: authCodeData.sub, client_id: clientId, }), { expirationTtl: 30 * 24 * 60 * 60 }, ); log("Token data successfully stored"); } catch (storeError) { log(`Error storing token data: ${storeError}`); return new Response(JSON.stringify({ error: "server_error" }), { status: 500, headers: { "Content-Type": "application/json", ...CORS }, }); } // Return the tokens const tokenResponse = { access_token: accessToken, token_type: "Bearer", expires_in: 3600, refresh_token: refreshToken, scope: "openid profile email", }; log("Returning token response"); return new Response(JSON.stringify(tokenResponse), { status: 200, headers: { "Content-Type": "application/json", ...CORS }, }); } catch (error) { console.error(`Token endpoint error: ${error}`); return new Response(JSON.stringify({ error: "server_error" }), { status: 500, headers: { "Content-Type": "application/json", ...CORS }, }); } }) // Messages endpoint for all paths that start with /messages .post("/messages*", async (c) => { log("All messages endpoints hit"); const REMOTE_MCP_SERVER_URL = c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; const sid = new URL(c.req.url).searchParams.get("session_id"); if (!sid) { return new Response("Missing session_id", { status: 400, headers: CORS, }); } // Read the body const body = await c.req.text(); const authHeader = c.req.header("Authorization") || ""; let userEmail = "unknown"; if (authHeader.startsWith("Bearer ")) { const token = authHeader.slice(7); const claims = decodeJwt(token); userEmail = claims.email || claims.preferred_username || claims.sub || "unknown"; } log(`[Proxy] user=${userEmail} query=${body}`); let sendToBQ = false; let parsed; let domain = null; let toolName = null; let sanitizedBody = body; // Default to original body try { parsed = JSON.parse(body); const args = parsed.params?.arguments; // Check if this is a think tool call toolName = parsed.params?.name; if (toolName === "think") { sendToBQ = false; log("[BigQuery] Skipping think tool call"); } else if (args && Object.keys(args).length > 0) { // Extract domain from the arguments (for search/fetch tools) domain = args.domain || null; // Skip logging if domain is "thinking" or "think" if (domain === "thinking" || domain === "think") { sendToBQ = false; } else { sendToBQ = true; } // Sanitize sensitive data before logging to BigQuery if (sendToBQ) { // Use the comprehensive sanitization function const sanitized = sanitizeObject(parsed); sanitizedBody = JSON.stringify(sanitized); // Log if we actually sanitized something if (JSON.stringify(parsed) !== sanitizedBody) { log( "[BigQuery] Sanitized sensitive fields from query before logging", ); } } } } catch (e) { console.log("[BigQuery] skipping insert—cannot parse JSON body", e); } const { BQ_SA_KEY_JSON, BQ_PROJECT_ID, BQ_DATASET, BQ_TABLE } = c.env; if (sendToBQ && BQ_SA_KEY_JSON && BQ_PROJECT_ID && BQ_DATASET && BQ_TABLE) { const eventRow = { timestamp: new Date().toISOString(), userEmail, query: sanitizedBody, // Use sanitized body instead of original }; // fire & forget c.executionCtx.waitUntil( insertEvent(c.env, eventRow).catch((error) => { console.error("[BigQuery] Insert failed:", error); }), ); } else { const missing = [ !sendToBQ ? toolName === "think" ? "think tool" : domain === "thinking" || domain === "think" ? `domain is ${domain}` : "no query args" : null, !BQ_SA_KEY_JSON && "BQ_SA_KEY_JSON", !BQ_PROJECT_ID && "BQ_PROJECT_ID", !BQ_DATASET && "BQ_DATASET", !BQ_TABLE && "BQ_TABLE", ].filter(Boolean); console.log("[BigQuery] skipping insert—", missing.join(", ")); } // Make a new Request object with the body we've already read const newRequest = new Request(c.req.url, { method: c.req.method, headers: c.req.headers, body: body, }); // Forward everything to proxyPost like the auth-less version does return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/messages", sid); }); // MCP endpoint (Streamable HTTP transport) - separate chain to avoid wildcard route issues app .on("HEAD", "/mcp", stytchBearerTokenAuthMiddleware, (c) => { log("MCP HEAD endpoint hit - checking endpoint availability"); // For Streamable HTTP, HEAD /mcp should return 204 to indicate the endpoint exists return new Response(null, { status: 204, headers: CORS, }); }) .get("/mcp", stytchBearerTokenAuthMiddleware, async (c) => { log("MCP GET endpoint hit - Streamable HTTP transport"); const REMOTE_MCP_SERVER_URL = c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; // For Streamable HTTP, GET /mcp with session_id initiates event stream const sessionId = new URL(c.req.url).searchParams.get("session_id"); if (!sessionId) { // Without session_id, just return 204 to indicate endpoint exists return new Response(null, { status: 204, headers: CORS, }); } // Proxy the GET request to the backend's /mcp endpoint for streaming const targetUrl = `${REMOTE_MCP_SERVER_URL}/mcp?session_id=${encodeURIComponent( sessionId, )}`; log(`Proxying GET /mcp to: ${targetUrl}`); try { const response = await fetch(targetUrl, { method: "GET", headers: { Accept: "text/event-stream", "User-Agent": "Claude/1.0", }, }); // For SSE, we need to stream the response if (response.headers.get("content-type")?.includes("text/event-stream")) { log("Streaming SSE response from backend"); // Return the streamed response directly return new Response(response.body, { status: response.status, headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...CORS, }, }); } else { // Non-streaming response const responseText = await response.text(); return new Response(responseText, { status: response.status, headers: { "Content-Type": response.headers.get("content-type") || "text/plain", ...CORS, }, }); } } catch (error) { log(`Error proxying GET /mcp: ${error}`); return new Response(`Proxy error: ${error.message}`, { status: 502, headers: CORS, }); } }) .post("/mcp", stytchBearerTokenAuthMiddleware, async (c) => { log("MCP POST endpoint hit - Streamable HTTP transport"); const REMOTE_MCP_SERVER_URL = c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; // Extract and validate session ID const rawSessionId = new URL(c.req.url).searchParams.get("session_id"); const sessionId = validateSessionId(rawSessionId); // Get the request body const bodyText = await c.req.text(); log(`MCP POST request body: ${bodyText.substring(0, 200)}`); // Create new request for proxying const newRequest = new Request(c.req.url, { method: "POST", headers: c.req.headers, body: bodyText, }); // Use the updated proxyPost function that handles SSE properly return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/mcp", sessionId); }) // Default 404 response .all( "*", () => new Response("Not Found", { status: 404, headers: CORS, }), ); // Export the app as the main worker fetch handler export default { fetch: (request, env, ctx) => { // Initialize DEBUG from environment variables DEBUG = env.DEBUG === "true" || env.DEBUG === true; return app.fetch(request, env, ctx); }, }; ``` -------------------------------------------------------------------------------- /src/biomcp/individual_tools.py: -------------------------------------------------------------------------------- ```python """Individual MCP tools for specific biomedical search and fetch operations. This module provides the original 9 individual tools that offer direct access to specific search and fetch functionality, complementing the unified tools. """ import logging from typing import Annotated, Literal from pydantic import Field from biomcp.articles.fetch import _article_details from biomcp.articles.search import _article_searcher from biomcp.cbioportal_helper import ( get_cbioportal_summary_for_genes, get_variant_cbioportal_summary, ) from biomcp.core import ensure_list, mcp_app from biomcp.diseases.getter import _disease_details from biomcp.drugs.getter import _drug_details from biomcp.genes.getter import _gene_details from biomcp.metrics import track_performance from biomcp.trials.getter import ( _trial_locations, _trial_outcomes, _trial_protocol, _trial_references, ) from biomcp.trials.search import _trial_searcher from biomcp.variants.getter import _variant_details from biomcp.variants.search import _variant_searcher logger = logging.getLogger(__name__) # Article Tools @mcp_app.tool() @track_performance("biomcp.article_searcher") async def article_searcher( chemicals: Annotated[ list[str] | str | None, Field(description="Chemical/drug names to search for"), ] = None, diseases: Annotated[ list[str] | str | None, Field(description="Disease names to search for"), ] = None, genes: Annotated[ list[str] | str | None, Field(description="Gene symbols to search for"), ] = None, keywords: Annotated[ list[str] | str | None, Field(description="Free-text keywords to search for"), ] = None, variants: Annotated[ list[str] | str | None, Field( description="Variant strings to search for (e.g., 'V600E', 'p.D277Y')" ), ] = None, include_preprints: Annotated[ bool, Field(description="Include preprints from bioRxiv/medRxiv"), ] = True, include_cbioportal: Annotated[ bool, Field( description="Include cBioPortal cancer genomics summary when searching by gene" ), ] = True, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 10, ) -> str: """Search PubMed/PubTator3 for research articles and preprints. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Use this tool to find scientific literature ABOUT genes, variants, diseases, or chemicals. Results include articles from PubMed and optionally preprints from bioRxiv/medRxiv. Important: This searches for ARTICLES ABOUT these topics, not database records. For genetic variant database records, use variant_searcher instead. Example usage: - Find articles about BRAF mutations in melanoma - Search for papers on a specific drug's effects - Locate research on gene-disease associations """ # Convert single values to lists chemicals = ensure_list(chemicals) if chemicals else None diseases = ensure_list(diseases) if diseases else None genes = ensure_list(genes) if genes else None keywords = ensure_list(keywords) if keywords else None variants = ensure_list(variants) if variants else None result = await _article_searcher( call_benefit="Direct article search for specific biomedical topics", chemicals=chemicals, diseases=diseases, genes=genes, keywords=keywords, variants=variants, include_preprints=include_preprints, include_cbioportal=include_cbioportal, ) # Add cBioPortal summary if searching by gene if include_cbioportal and genes: request_params = { "keywords": keywords, "diseases": diseases, "chemicals": chemicals, "variants": variants, } cbioportal_summary = await get_cbioportal_summary_for_genes( genes, request_params ) if cbioportal_summary: result = cbioportal_summary + "\n\n---\n\n" + result return result @mcp_app.tool() @track_performance("biomcp.article_getter") async def article_getter( pmid: Annotated[ str, Field( description="Article identifier - either a PubMed ID (e.g., '38768446' or 'PMC11193658') or DOI (e.g., '10.1101/2024.01.20.23288905')" ), ], ) -> str: """Fetch detailed information for a specific article. Retrieves the full abstract and available text for an article by its identifier. Supports: - PubMed IDs (PMID) for published articles - PMC IDs for articles in PubMed Central - DOIs for preprints from Europe PMC Returns formatted text including: - Title - Abstract - Full text (when available from PMC for published articles) - Source information (PubMed or Europe PMC) """ return await _article_details( call_benefit="Fetch detailed article information for analysis", pmid=pmid, ) # Trial Tools @mcp_app.tool() @track_performance("biomcp.trial_searcher") async def trial_searcher( conditions: Annotated[ list[str] | str | None, Field(description="Medical conditions to search for"), ] = None, interventions: Annotated[ list[str] | str | None, Field(description="Treatment interventions to search for"), ] = None, other_terms: Annotated[ list[str] | str | None, Field(description="Additional search terms"), ] = None, recruiting_status: Annotated[ Literal["OPEN", "CLOSED", "ANY"] | None, Field(description="Filter by recruiting status"), ] = None, phase: Annotated[ Literal[ "EARLY_PHASE1", "PHASE1", "PHASE2", "PHASE3", "PHASE4", "NOT_APPLICABLE", ] | None, Field(description="Filter by clinical trial phase"), ] = None, location: Annotated[ str | None, Field(description="Location term for geographic filtering"), ] = None, lat: Annotated[ float | None, Field( description="Latitude for location-based search. AI agents should geocode city names before using.", ge=-90, le=90, ), ] = None, long: Annotated[ float | None, Field( description="Longitude for location-based search. AI agents should geocode city names before using.", ge=-180, le=180, ), ] = None, distance: Annotated[ int | None, Field( description="Distance in miles from lat/long coordinates", ge=1, ), ] = None, age_group: Annotated[ Literal["CHILD", "ADULT", "OLDER_ADULT"] | None, Field(description="Filter by age group"), ] = None, sex: Annotated[ Literal["FEMALE", "MALE", "ALL"] | None, Field(description="Filter by biological sex"), ] = None, healthy_volunteers: Annotated[ Literal["YES", "NO"] | None, Field(description="Filter by healthy volunteer eligibility"), ] = None, study_type: Annotated[ Literal["INTERVENTIONAL", "OBSERVATIONAL", "EXPANDED_ACCESS"] | None, Field(description="Filter by study type"), ] = None, funder_type: Annotated[ Literal["NIH", "OTHER_GOV", "INDUSTRY", "OTHER"] | None, Field(description="Filter by funding source"), ] = None, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 10, ) -> str: """Search ClinicalTrials.gov for clinical studies. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Comprehensive search tool for finding clinical trials based on multiple criteria. Supports filtering by conditions, interventions, location, phase, and eligibility. Location search notes: - Use either location term OR lat/long coordinates, not both - For city-based searches, AI agents should geocode to lat/long first - Distance parameter only works with lat/long coordinates Returns a formatted list of matching trials with key details. """ # Validate location parameters if location and (lat is not None or long is not None): raise ValueError( "Use either location term OR lat/long coordinates, not both" ) if (lat is not None and long is None) or ( lat is None and long is not None ): raise ValueError( "Both latitude and longitude must be provided together" ) if distance is not None and (lat is None or long is None): raise ValueError( "Distance parameter requires both latitude and longitude" ) # Convert single values to lists conditions = ensure_list(conditions) if conditions else None interventions = ensure_list(interventions) if interventions else None other_terms = ensure_list(other_terms) if other_terms else None return await _trial_searcher( call_benefit="Direct clinical trial search for specific criteria", conditions=conditions, interventions=interventions, terms=other_terms, recruiting_status=recruiting_status, phase=phase, lat=lat, long=long, distance=distance, age_group=age_group, study_type=study_type, page_size=page_size, ) @mcp_app.tool() @track_performance("biomcp.trial_getter") async def trial_getter( nct_id: Annotated[ str, Field(description="NCT ID (e.g., 'NCT06524388')"), ], ) -> str: """Fetch comprehensive details for a specific clinical trial. Retrieves all available information for a clinical trial by its NCT ID. This includes protocol details, locations, outcomes, and references. For specific sections only, use the specialized getter tools: - trial_protocol_getter: Core protocol information - trial_locations_getter: Site locations and contacts - trial_outcomes_getter: Primary/secondary outcomes and results - trial_references_getter: Publications and references """ results = [] # Get all sections protocol = await _trial_protocol( call_benefit="Fetch comprehensive trial details for analysis", nct_id=nct_id, ) if protocol: results.append(protocol) locations = await _trial_locations( call_benefit="Fetch comprehensive trial details for analysis", nct_id=nct_id, ) if locations: results.append(locations) outcomes = await _trial_outcomes( call_benefit="Fetch comprehensive trial details for analysis", nct_id=nct_id, ) if outcomes: results.append(outcomes) references = await _trial_references( call_benefit="Fetch comprehensive trial details for analysis", nct_id=nct_id, ) if references: results.append(references) return ( "\n\n".join(results) if results else f"No data found for trial {nct_id}" ) @mcp_app.tool() @track_performance("biomcp.trial_protocol_getter") async def trial_protocol_getter( nct_id: Annotated[ str, Field(description="NCT ID (e.g., 'NCT06524388')"), ], ) -> str: """Fetch core protocol information for a clinical trial. Retrieves essential protocol details including: - Official title and brief summary - Study status and sponsor information - Study design (type, phase, allocation, masking) - Eligibility criteria - Primary completion date """ return await _trial_protocol( call_benefit="Fetch trial protocol information for eligibility assessment", nct_id=nct_id, ) @mcp_app.tool() @track_performance("biomcp.trial_references_getter") async def trial_references_getter( nct_id: Annotated[ str, Field(description="NCT ID (e.g., 'NCT06524388')"), ], ) -> str: """Fetch publications and references for a clinical trial. Retrieves all linked publications including: - Published results papers - Background literature - Protocol publications - Related analyses Includes PubMed IDs when available for easy cross-referencing. """ return await _trial_references( call_benefit="Fetch trial publications and references for evidence review", nct_id=nct_id, ) @mcp_app.tool() @track_performance("biomcp.trial_outcomes_getter") async def trial_outcomes_getter( nct_id: Annotated[ str, Field(description="NCT ID (e.g., 'NCT06524388')"), ], ) -> str: """Fetch outcome measures and results for a clinical trial. Retrieves detailed outcome information including: - Primary outcome measures - Secondary outcome measures - Results data (if available) - Adverse events (if reported) Note: Results are only available for completed trials that have posted data. """ return await _trial_outcomes( call_benefit="Fetch trial outcome measures and results for efficacy assessment", nct_id=nct_id, ) @mcp_app.tool() @track_performance("biomcp.trial_locations_getter") async def trial_locations_getter( nct_id: Annotated[ str, Field(description="NCT ID (e.g., 'NCT06524388')"), ], ) -> str: """Fetch contact and location details for a clinical trial. Retrieves all study locations including: - Facility names and addresses - Principal investigator information - Contact details (when recruiting) - Recruitment status by site Useful for finding trials near specific locations or contacting study teams. """ return await _trial_locations( call_benefit="Fetch trial locations and contacts for enrollment information", nct_id=nct_id, ) # Variant Tools @mcp_app.tool() @track_performance("biomcp.variant_searcher") async def variant_searcher( gene: Annotated[ str | None, Field(description="Gene symbol (e.g., 'BRAF', 'TP53')"), ] = None, hgvs: Annotated[ str | None, Field(description="HGVS notation (genomic, coding, or protein)"), ] = None, hgvsp: Annotated[ str | None, Field(description="Protein change in HGVS format (e.g., 'p.V600E')"), ] = None, hgvsc: Annotated[ str | None, Field(description="Coding sequence change (e.g., 'c.1799T>A')"), ] = None, rsid: Annotated[ str | None, Field(description="dbSNP rsID (e.g., 'rs113488022')"), ] = None, region: Annotated[ str | None, Field(description="Genomic region (e.g., 'chr7:140753336-140753337')"), ] = None, significance: Annotated[ Literal[ "pathogenic", "likely_pathogenic", "uncertain_significance", "likely_benign", "benign", "conflicting", ] | None, Field(description="Clinical significance filter"), ] = None, frequency_min: Annotated[ float | None, Field(description="Minimum allele frequency", ge=0, le=1), ] = None, frequency_max: Annotated[ float | None, Field(description="Maximum allele frequency", ge=0, le=1), ] = None, consequence: Annotated[ str | None, Field(description="Variant consequence (e.g., 'missense_variant')"), ] = None, cadd_score_min: Annotated[ float | None, Field(description="Minimum CADD score for pathogenicity"), ] = None, sift_prediction: Annotated[ Literal["deleterious", "tolerated"] | None, Field(description="SIFT functional prediction"), ] = None, polyphen_prediction: Annotated[ Literal["probably_damaging", "possibly_damaging", "benign"] | None, Field(description="PolyPhen-2 functional prediction"), ] = None, include_cbioportal: Annotated[ bool, Field( description="Include cBioPortal cancer genomics summary when searching by gene" ), ] = True, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 10, ) -> str: """Search MyVariant.info for genetic variant DATABASE RECORDS. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Important: This searches for variant DATABASE RECORDS (frequency, significance, etc.), NOT articles about variants. For articles about variants, use article_searcher. Searches the comprehensive variant database including: - Population frequencies (gnomAD, 1000 Genomes, etc.) - Clinical significance (ClinVar) - Functional predictions (SIFT, PolyPhen, CADD) - Gene and protein consequences Search by various identifiers or filter by clinical/functional criteria. """ result = await _variant_searcher( call_benefit="Direct variant database search for genetic analysis", gene=gene, hgvsp=hgvsp, hgvsc=hgvsc, rsid=rsid, region=region, significance=significance, min_frequency=frequency_min, max_frequency=frequency_max, cadd=cadd_score_min, sift=sift_prediction, polyphen=polyphen_prediction, size=page_size, offset=(page - 1) * page_size if page > 1 else 0, ) # Add cBioPortal summary if searching by gene if include_cbioportal and gene: cbioportal_summary = await get_variant_cbioportal_summary(gene) if cbioportal_summary: result = cbioportal_summary + "\n\n" + result return result @mcp_app.tool() @track_performance("biomcp.variant_getter") async def variant_getter( variant_id: Annotated[ str, Field( description="Variant ID (HGVS, rsID, or MyVariant ID like 'chr7:g.140753336A>T')" ), ], include_external: Annotated[ bool, Field( description="Include external annotations (TCGA, 1000 Genomes, functional predictions)" ), ] = True, ) -> str: """Fetch comprehensive details for a specific genetic variant. Retrieves all available information for a variant including: - Gene location and consequences - Population frequencies across databases - Clinical significance from ClinVar - Functional predictions - External annotations (TCGA cancer data, conservation scores) Accepts various ID formats: - HGVS: NM_004333.4:c.1799T>A - rsID: rs113488022 - MyVariant ID: chr7:g.140753336A>T """ return await _variant_details( call_benefit="Fetch comprehensive variant annotations for interpretation", variant_id=variant_id, include_external=include_external, ) @mcp_app.tool() @track_performance("biomcp.alphagenome_predictor") async def alphagenome_predictor( chromosome: Annotated[ str, Field(description="Chromosome (e.g., 'chr7', 'chrX')"), ], position: Annotated[ int, Field(description="1-based genomic position of the variant"), ], reference: Annotated[ str, Field(description="Reference allele(s) (e.g., 'A', 'ATG')"), ], alternate: Annotated[ str, Field(description="Alternate allele(s) (e.g., 'T', 'A')"), ], interval_size: Annotated[ int, Field( description="Size of genomic interval to analyze in bp (max 1,000,000)", ge=2000, le=1000000, ), ] = 131072, tissue_types: Annotated[ list[str] | str | None, Field( description="UBERON ontology terms for tissue-specific predictions (e.g., 'UBERON:0002367' for external ear)" ), ] = None, significance_threshold: Annotated[ float, Field( description="Threshold for significant log2 fold changes (default: 0.5)", ge=0.0, le=5.0, ), ] = 0.5, api_key: Annotated[ str | None, Field( description="AlphaGenome API key. Check if user mentioned 'my AlphaGenome API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, ) -> str: """Predict variant effects on gene regulation using Google DeepMind's AlphaGenome. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your analysis strategy! AlphaGenome provides state-of-the-art predictions for how genetic variants affect gene regulation, including: - Gene expression changes (RNA-seq) - Chromatin accessibility impacts (ATAC-seq, DNase-seq) - Splicing alterations - Promoter activity changes (CAGE) This tool requires: 1. AlphaGenome to be installed (see error message for instructions) 2. An API key from https://deepmind.google.com/science/alphagenome API Key Options: - Provide directly via the api_key parameter - Or set ALPHAGENOME_API_KEY environment variable Example usage: - Predict regulatory effects of BRAF V600E mutation: chr7:140753336 A>T - Assess non-coding variant impact on gene expression - Evaluate promoter variants in specific tissues Note: This is an optional tool that enhances variant interpretation with AI predictions. Standard annotations remain available via variant_getter. """ from biomcp.variants.alphagenome import predict_variant_effects # Convert tissue_types to list if needed tissue_types_list = ensure_list(tissue_types) if tissue_types else None # Call the prediction function return await predict_variant_effects( chromosome=chromosome, position=position, reference=reference, alternate=alternate, interval_size=interval_size, tissue_types=tissue_types_list, significance_threshold=significance_threshold, api_key=api_key, ) # Gene Tools @mcp_app.tool() @track_performance("biomcp.gene_getter") async def gene_getter( gene_id_or_symbol: Annotated[ str, Field( description="Gene symbol (e.g., 'TP53', 'BRAF') or Entrez ID (e.g., '7157')" ), ], ) -> str: """Get detailed gene information from MyGene.info. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! Provides real-time gene annotations including: - Official gene name and symbol - Gene summary/description - Aliases and alternative names - Gene type (protein-coding, etc.) - Links to external databases This tool fetches CURRENT gene information from MyGene.info, ensuring you always have the latest annotations and nomenclature. Example usage: - Get information about TP53 tumor suppressor - Look up BRAF kinase gene details - Find the official name for a gene by its alias Note: For genetic variants, use variant_searcher. For articles about genes, use article_searcher. """ return await _gene_details( call_benefit="Get up-to-date gene annotations and information", gene_id_or_symbol=gene_id_or_symbol, ) # Disease Tools @mcp_app.tool() @track_performance("biomcp.disease_getter") async def disease_getter( disease_id_or_name: Annotated[ str, Field( description="Disease name (e.g., 'melanoma', 'lung cancer') or ontology ID (e.g., 'MONDO:0016575', 'DOID:1909')" ), ], ) -> str: """Get detailed disease information from MyDisease.info. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! Provides real-time disease annotations including: - Official disease name and definition - Disease synonyms and alternative names - Ontology mappings (MONDO, DOID, OMIM, etc.) - Associated phenotypes - Links to disease databases This tool fetches CURRENT disease information from MyDisease.info, ensuring you always have the latest ontology mappings and definitions. Example usage: - Get the definition of GIST (Gastrointestinal Stromal Tumor) - Look up synonyms for melanoma - Find the MONDO ID for a disease by name Note: For clinical trials about diseases, use trial_searcher. For articles about diseases, use article_searcher. """ return await _disease_details( call_benefit="Get up-to-date disease definitions and ontology information", disease_id_or_name=disease_id_or_name, ) @mcp_app.tool() @track_performance("biomcp.drug_getter") async def drug_getter( drug_id_or_name: Annotated[ str, Field( description="Drug name (e.g., 'aspirin', 'imatinib') or ID (e.g., 'DB00945', 'CHEMBL941')" ), ], ) -> str: """Get detailed drug/chemical information from MyChem.info. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! This tool provides comprehensive drug information including: - Chemical properties (formula, InChIKey) - Drug identifiers (DrugBank, ChEMBL, PubChem) - Trade names and brand names - Clinical indications - Mechanism of action - Pharmacology details - Links to drug databases This tool fetches CURRENT drug information from MyChem.info, part of the BioThings suite, ensuring you always have the latest drug data. Example usage: - Get information about imatinib (Gleevec) - Look up details for DrugBank ID DB00619 - Find the mechanism of action for pembrolizumab Note: For clinical trials about drugs, use trial_searcher. For articles about drugs, use article_searcher. """ return await _drug_details(drug_id_or_name) # NCI-Specific Tools @mcp_app.tool() @track_performance("biomcp.nci_organization_searcher") async def nci_organization_searcher( name: Annotated[ str | None, Field( description="Organization name to search for (partial match supported)" ), ] = None, organization_type: Annotated[ str | None, Field( description="Type of organization (e.g., 'Academic', 'Industry', 'Government')" ), ] = None, city: Annotated[ str | None, Field( description="City where organization is located. IMPORTANT: Always use with state to avoid API errors" ), ] = None, state: Annotated[ str | None, Field( description="State/province code (e.g., 'CA', 'NY'). IMPORTANT: Always use with city to avoid API errors" ), ] = None, api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 20, ) -> str: """Search for organizations in the NCI Clinical Trials database. Searches the National Cancer Institute's curated database of organizations involved in cancer clinical trials. This includes: - Academic medical centers - Community hospitals - Industry sponsors - Government facilities - Research networks Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ IMPORTANT: To avoid API errors, always use city AND state together when searching by location. The NCI API has limitations on broad searches. Example usage: - Find cancer centers in Boston, MA (city AND state) - Search for "MD Anderson" in Houston, TX - List academic organizations in Cleveland, OH - Search by organization name alone (without location) """ from biomcp.integrations.cts_api import CTSAPIError from biomcp.organizations import search_organizations from biomcp.organizations.search import format_organization_results try: results = await search_organizations( name=name, org_type=organization_type, city=city, state=state, page_size=page_size, page=page, api_key=api_key, ) return format_organization_results(results) except CTSAPIError as e: # Check for Elasticsearch bucket limit error error_msg = str(e) if "too_many_buckets_exception" in error_msg or "75000" in error_msg: return ( "⚠️ **Search Too Broad**\n\n" "The NCI API cannot process this search because it returns too many results.\n\n" "**To fix this, try:**\n" "1. **Always use city AND state together** for location searches\n" "2. Add an organization name (even partial) to narrow results\n" "3. Use multiple filters together (name + location, or name + type)\n\n" "**Examples that work:**\n" "- `nci_organization_searcher(city='Cleveland', state='OH')`\n" "- `nci_organization_searcher(name='Cleveland Clinic')`\n" "- `nci_organization_searcher(name='cancer', city='Boston', state='MA')`\n" "- `nci_organization_searcher(organization_type='Academic', city='Houston', state='TX')`" ) raise @mcp_app.tool() @track_performance("biomcp.nci_organization_getter") async def nci_organization_getter( organization_id: Annotated[ str, Field(description="NCI organization ID (e.g., 'NCI-2011-03337')"), ], api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, ) -> str: """Get detailed information about a specific organization from NCI. Retrieves comprehensive details about an organization including: - Full name and aliases - Address and contact information - Organization type and role - Associated clinical trials - Research focus areas Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ Example usage: - Get details about a specific cancer center - Find contact information for trial sponsors - View organization's trial portfolio """ from biomcp.organizations import get_organization from biomcp.organizations.getter import format_organization_details org_data = await get_organization( org_id=organization_id, api_key=api_key, ) return format_organization_details(org_data) @mcp_app.tool() @track_performance("biomcp.nci_intervention_searcher") async def nci_intervention_searcher( name: Annotated[ str | None, Field( description="Intervention name to search for (e.g., 'pembrolizumab')" ), ] = None, intervention_type: Annotated[ str | None, Field( description="Type of intervention: 'Drug', 'Device', 'Biological', 'Procedure', 'Radiation', 'Behavioral', 'Genetic', 'Dietary', 'Other'" ), ] = None, synonyms: Annotated[ bool, Field(description="Include synonym matches in search"), ] = True, api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int | None, Field( description="Results per page. If not specified, returns all matching results.", ge=1, le=100, ), ] = None, ) -> str: """Search for interventions in the NCI Clinical Trials database. Searches the National Cancer Institute's curated database of interventions used in cancer clinical trials. This includes: - FDA-approved drugs - Investigational agents - Medical devices - Surgical procedures - Radiation therapies - Behavioral interventions Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ Example usage: - Find all trials using pembrolizumab - Search for CAR-T cell therapies - List radiation therapy protocols - Find dietary interventions """ from biomcp.integrations.cts_api import CTSAPIError from biomcp.interventions import search_interventions from biomcp.interventions.search import format_intervention_results try: results = await search_interventions( name=name, intervention_type=intervention_type, synonyms=synonyms, page_size=page_size, page=page, api_key=api_key, ) return format_intervention_results(results) except CTSAPIError as e: # Check for Elasticsearch bucket limit error error_msg = str(e) if "too_many_buckets_exception" in error_msg or "75000" in error_msg: return ( "⚠️ **Search Too Broad**\n\n" "The NCI API cannot process this search because it returns too many results.\n\n" "**Try adding more specific filters:**\n" "- Add an intervention name (even partial)\n" "- Specify an intervention type (e.g., 'Drug', 'Device')\n" "- Search for a specific drug or therapy name\n\n" "**Example searches that work better:**\n" "- Search for 'pembrolizumab' instead of all drugs\n" "- Search for 'CAR-T' to find CAR-T cell therapies\n" "- Filter by type: Drug, Device, Procedure, etc." ) raise @mcp_app.tool() @track_performance("biomcp.nci_intervention_getter") async def nci_intervention_getter( intervention_id: Annotated[ str, Field(description="NCI intervention ID (e.g., 'INT123456')"), ], api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, ) -> str: """Get detailed information about a specific intervention from NCI. Retrieves comprehensive details about an intervention including: - Full name and synonyms - Intervention type and category - Mechanism of action (for drugs) - FDA approval status - Associated clinical trials - Combination therapies Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ Example usage: - Get details about a specific drug - Find all trials using a device - View combination therapy protocols """ from biomcp.interventions import get_intervention from biomcp.interventions.getter import format_intervention_details intervention_data = await get_intervention( intervention_id=intervention_id, api_key=api_key, ) return format_intervention_details(intervention_data) # Biomarker Tools @mcp_app.tool() @track_performance("biomcp.nci_biomarker_searcher") async def nci_biomarker_searcher( name: Annotated[ str | None, Field( description="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')" ), ] = None, biomarker_type: Annotated[ str | None, Field(description="Type of biomarker ('reference_gene' or 'branch')"), ] = None, api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 20, ) -> str: """Search for biomarkers in the NCI Clinical Trials database. Searches for biomarkers used in clinical trial eligibility criteria. This is essential for precision medicine trials that select patients based on specific biomarker characteristics. Biomarker examples: - Gene mutations (e.g., BRAF V600E, EGFR T790M) - Protein expression (e.g., PD-L1 ≥ 50%, HER2 positive) - Gene fusions (e.g., ALK fusion, ROS1 fusion) - Other molecular markers (e.g., MSI-H, TMB-high) Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ Note: Biomarker data availability may be limited in CTRP. Results focus on biomarkers used in trial eligibility criteria. Example usage: - Search for PD-L1 expression biomarkers - Find trials requiring EGFR mutations - Look up biomarkers tested by NGS - Search for HER2 expression markers """ from biomcp.biomarkers import search_biomarkers from biomcp.biomarkers.search import format_biomarker_results from biomcp.integrations.cts_api import CTSAPIError try: results = await search_biomarkers( name=name, biomarker_type=biomarker_type, page_size=page_size, page=page, api_key=api_key, ) return format_biomarker_results(results) except CTSAPIError as e: # Check for Elasticsearch bucket limit error error_msg = str(e) if "too_many_buckets_exception" in error_msg or "75000" in error_msg: return ( "⚠️ **Search Too Broad**\n\n" "The NCI API cannot process this search because it returns too many results.\n\n" "**Try adding more specific filters:**\n" "- Add a biomarker name (even partial)\n" "- Specify a gene symbol\n" "- Add an assay type (e.g., 'IHC', 'NGS')\n\n" "**Example searches that work:**\n" "- `nci_biomarker_searcher(name='PD-L1')`\n" "- `nci_biomarker_searcher(gene='EGFR', biomarker_type='mutation')`\n" "- `nci_biomarker_searcher(assay_type='IHC')`" ) raise # NCI Disease Tools @mcp_app.tool() @track_performance("biomcp.nci_disease_searcher") async def nci_disease_searcher( name: Annotated[ str | None, Field(description="Disease name to search for (partial match)"), ] = None, include_synonyms: Annotated[ bool, Field(description="Include synonym matches in search"), ] = True, category: Annotated[ str | None, Field(description="Disease category/type filter"), ] = None, api_key: Annotated[ str | None, Field( description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." ), ] = None, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, page_size: Annotated[ int, Field(description="Results per page", ge=1, le=100), ] = 20, ) -> str: """Search NCI's controlled vocabulary of cancer conditions. Searches the National Cancer Institute's curated database of cancer conditions and diseases used in clinical trials. This is different from the general disease_getter tool which uses MyDisease.info. NCI's disease vocabulary provides: - Official cancer terminology used in trials - Disease synonyms and alternative names - Hierarchical disease classifications - Standardized disease codes for trial matching Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ Example usage: - Search for specific cancer types (e.g., "melanoma") - Find all lung cancer subtypes - Look up official names for disease synonyms - Get standardized disease terms for trial searches Note: This is specifically for NCI's cancer disease vocabulary. For general disease information, use the disease_getter tool. """ from biomcp.diseases import search_diseases from biomcp.diseases.search import format_disease_results from biomcp.integrations.cts_api import CTSAPIError try: results = await search_diseases( name=name, include_synonyms=include_synonyms, category=category, page_size=page_size, page=page, api_key=api_key, ) return format_disease_results(results) except CTSAPIError as e: # Check for Elasticsearch bucket limit error error_msg = str(e) if "too_many_buckets_exception" in error_msg or "75000" in error_msg: return ( "⚠️ **Search Too Broad**\n\n" "The NCI API cannot process this search because it returns too many results.\n\n" "**Try adding more specific filters:**\n" "- Add a disease name (even partial)\n" "- Specify a disease category\n" "- Use more specific search terms\n\n" "**Example searches that work:**\n" "- `nci_disease_searcher(name='melanoma')`\n" "- `nci_disease_searcher(name='lung', category='maintype')`\n" "- `nci_disease_searcher(name='NSCLC')`" ) raise # OpenFDA Tools @mcp_app.tool() @track_performance("biomcp.openfda_adverse_searcher") async def openfda_adverse_searcher( drug: Annotated[ str | None, Field(description="Drug name to search for adverse events"), ] = None, reaction: Annotated[ str | None, Field(description="Adverse reaction term to search for"), ] = None, serious: Annotated[ bool | None, Field(description="Filter for serious events only"), ] = None, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA adverse event reports (FAERS) for drug safety information. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Searches FDA's Adverse Event Reporting System for: - Drug side effects and adverse reactions - Serious event reports (death, hospitalization, disability) - Safety signal patterns across patient populations Note: These reports do not establish causation - they are voluntary reports that may contain incomplete or unverified information. """ from biomcp.openfda import search_adverse_events skip = (page - 1) * limit return await search_adverse_events( drug=drug, reaction=reaction, serious=serious, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_adverse_getter") async def openfda_adverse_getter( report_id: Annotated[ str, Field(description="Safety report ID"), ], api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get detailed information for a specific FDA adverse event report. Retrieves complete details including: - Patient demographics and medical history - All drugs involved and dosages - Complete list of adverse reactions - Event narrative and outcomes - Reporter information """ from biomcp.openfda import get_adverse_event return await get_adverse_event(report_id, api_key=api_key) @mcp_app.tool() @track_performance("biomcp.openfda_label_searcher") async def openfda_label_searcher( name: Annotated[ str | None, Field(description="Drug name to search for"), ] = None, indication: Annotated[ str | None, Field(description="Search for drugs indicated for this condition"), ] = None, boxed_warning: Annotated[ bool, Field(description="Filter for drugs with boxed warnings"), ] = False, section: Annotated[ str | None, Field( description="Specific label section (e.g., 'contraindications', 'warnings')" ), ] = None, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA drug product labels (SPL) for prescribing information. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Searches official FDA drug labels for: - Approved indications and usage - Dosage and administration guidelines - Contraindications and warnings - Drug interactions and adverse reactions - Special population considerations Label sections include: indications, dosage, contraindications, warnings, adverse, interactions, pregnancy, pediatric, geriatric, overdose """ from biomcp.openfda import search_drug_labels skip = (page - 1) * limit return await search_drug_labels( name=name, indication=indication, boxed_warning=boxed_warning, section=section, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_label_getter") async def openfda_label_getter( set_id: Annotated[ str, Field(description="Label set ID"), ], sections: Annotated[ list[str] | None, Field( description="Specific sections to retrieve (default: key sections)" ), ] = None, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get complete FDA drug label information by set ID. Retrieves the full prescribing information including: - Complete indications and usage text - Detailed dosing instructions - All warnings and precautions - Clinical pharmacology and studies - Manufacturing and storage information Specify sections to retrieve specific parts, or leave empty for default key sections. """ from biomcp.openfda import get_drug_label return await get_drug_label(set_id, sections, api_key=api_key) @mcp_app.tool() @track_performance("biomcp.openfda_device_searcher") async def openfda_device_searcher( device: Annotated[ str | None, Field(description="Device name to search for"), ] = None, manufacturer: Annotated[ str | None, Field(description="Manufacturer name"), ] = None, problem: Annotated[ str | None, Field(description="Device problem description"), ] = None, product_code: Annotated[ str | None, Field(description="FDA product code"), ] = None, genomics_only: Annotated[ bool, Field(description="Filter to genomic/diagnostic devices only"), ] = True, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA device adverse event reports (MAUDE) for medical device issues. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Searches FDA's device adverse event database for: - Device malfunctions and failures - Patient injuries related to devices - Genomic test and diagnostic device issues By default, filters to genomic/diagnostic devices relevant to precision medicine. Set genomics_only=False to search all medical devices. """ from biomcp.openfda import search_device_events skip = (page - 1) * limit return await search_device_events( device=device, manufacturer=manufacturer, problem=problem, product_code=product_code, genomics_only=genomics_only, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_device_getter") async def openfda_device_getter( mdr_report_key: Annotated[ str, Field(description="MDR report key"), ], api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get detailed information for a specific FDA device event report. Retrieves complete device event details including: - Device identification and specifications - Complete event narrative - Patient outcomes and impacts - Manufacturer analysis and actions - Remedial actions taken """ from biomcp.openfda import get_device_event return await get_device_event(mdr_report_key, api_key=api_key) @mcp_app.tool() @track_performance("biomcp.openfda_approval_searcher") async def openfda_approval_searcher( drug: Annotated[ str | None, Field(description="Drug name (brand or generic) to search for"), ] = None, application_number: Annotated[ str | None, Field(description="NDA or BLA application number"), ] = None, approval_year: Annotated[ str | None, Field(description="Year of approval (YYYY format)"), ] = None, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA drug approval records from Drugs@FDA database. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Returns information about: - Application numbers and sponsors - Brand and generic names - Product formulations and strengths - Marketing status and approval dates - Submission history Useful for verifying if a drug is FDA-approved and when. """ from biomcp.openfda import search_drug_approvals skip = (page - 1) * limit return await search_drug_approvals( drug=drug, application_number=application_number, approval_year=approval_year, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_approval_getter") async def openfda_approval_getter( application_number: Annotated[ str, Field(description="NDA or BLA application number"), ], api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get detailed FDA drug approval information for a specific application. Returns comprehensive approval details including: - Full product list with dosage forms and strengths - Complete submission history - Marketing status timeline - Therapeutic equivalence codes - Pharmacologic class information """ from biomcp.openfda import get_drug_approval return await get_drug_approval(application_number, api_key=api_key) @mcp_app.tool() @track_performance("biomcp.openfda_recall_searcher") async def openfda_recall_searcher( drug: Annotated[ str | None, Field(description="Drug name to search for recalls"), ] = None, recall_class: Annotated[ str | None, Field( description="Recall classification (1=most serious, 2=moderate, 3=least serious)" ), ] = None, status: Annotated[ str | None, Field(description="Recall status (ongoing, completed, terminated)"), ] = None, reason: Annotated[ str | None, Field(description="Search text in recall reason"), ] = None, since_date: Annotated[ str | None, Field(description="Show recalls after this date (YYYYMMDD format)"), ] = None, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA drug recall records from the Enforcement database. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Returns recall information including: - Classification (Class I, II, or III) - Recall reason and description - Product identification - Distribution information - Recalling firm details - Current status Class I = most serious (death/serious harm) Class II = moderate (temporary/reversible harm) Class III = least serious (unlikely to cause harm) """ from biomcp.openfda import search_drug_recalls skip = (page - 1) * limit return await search_drug_recalls( drug=drug, recall_class=recall_class, status=status, reason=reason, since_date=since_date, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_recall_getter") async def openfda_recall_getter( recall_number: Annotated[ str, Field(description="FDA recall number"), ], api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get detailed FDA drug recall information for a specific recall. Returns complete recall details including: - Full product description and code information - Complete reason for recall - Distribution pattern and locations - Quantity of product recalled - Firm information and actions taken - Timeline of recall events """ from biomcp.openfda import get_drug_recall return await get_drug_recall(recall_number, api_key=api_key) @mcp_app.tool() @track_performance("biomcp.openfda_shortage_searcher") async def openfda_shortage_searcher( drug: Annotated[ str | None, Field(description="Drug name (generic or brand) to search"), ] = None, status: Annotated[ str | None, Field(description="Shortage status (current or resolved)"), ] = None, therapeutic_category: Annotated[ str | None, Field( description="Therapeutic category (e.g., Oncology, Anti-infective)" ), ] = None, limit: Annotated[ int, Field(description="Maximum number of results", ge=1, le=100), ] = 25, page: Annotated[ int, Field(description="Page number (1-based)", ge=1), ] = 1, api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Search FDA drug shortage records. ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! Returns shortage information including: - Current shortage status - Shortage start and resolution dates - Reason for shortage - Therapeutic category - Manufacturer information - Estimated resolution timeline Note: Shortage data is cached and updated periodically. Check FDA.gov for most current information. """ from biomcp.openfda import search_drug_shortages skip = (page - 1) * limit return await search_drug_shortages( drug=drug, status=status, therapeutic_category=therapeutic_category, limit=limit, skip=skip, api_key=api_key, ) @mcp_app.tool() @track_performance("biomcp.openfda_shortage_getter") async def openfda_shortage_getter( drug: Annotated[ str, Field(description="Drug name (generic or brand)"), ], api_key: Annotated[ str | None, Field( description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" ), ] = None, ) -> str: """Get detailed FDA drug shortage information for a specific drug. Returns comprehensive shortage details including: - Complete timeline of shortage - Detailed reason for shortage - All affected manufacturers - Alternative products if available - Resolution status and estimates - Additional notes and recommendations Data is updated periodically from FDA shortage database. """ from biomcp.openfda import get_drug_shortage return await get_drug_shortage(drug, api_key=api_key) ```