This is page 15 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /src/biomcp/trials/search.py: -------------------------------------------------------------------------------- ```python 1 | import json 2 | import logging 3 | from ssl import TLSVersion 4 | from typing import Annotated 5 | 6 | from pydantic import BaseModel, Field, field_validator, model_validator 7 | 8 | from .. import StrEnum, ensure_list, http_client, render 9 | from ..constants import CLINICAL_TRIALS_BASE_URL 10 | from ..integrations import BioThingsClient 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class SortOrder(StrEnum): 16 | RELEVANCE = "RELEVANCE" 17 | LAST_UPDATE = "LAST_UPDATE" 18 | ENROLLMENT = "ENROLLMENT" 19 | START_DATE = "START_DATE" 20 | COMPLETION_DATE = "COMPLETION_DATE" 21 | SUBMITTED_DATE = "SUBMITTED_DATE" 22 | 23 | 24 | class TrialPhase(StrEnum): 25 | EARLY_PHASE1 = "EARLY_PHASE1" 26 | PHASE1 = "PHASE1" 27 | PHASE2 = "PHASE2" 28 | PHASE3 = "PHASE3" 29 | PHASE4 = "PHASE4" 30 | NOT_APPLICABLE = "NOT_APPLICABLE" 31 | 32 | 33 | class RecruitingStatus(StrEnum): 34 | OPEN = "OPEN" 35 | CLOSED = "CLOSED" 36 | ANY = "ANY" 37 | 38 | 39 | class StudyType(StrEnum): 40 | INTERVENTIONAL = "INTERVENTIONAL" 41 | OBSERVATIONAL = "OBSERVATIONAL" 42 | EXPANDED_ACCESS = "EXPANDED_ACCESS" 43 | OTHER = "OTHER" 44 | 45 | 46 | class InterventionType(StrEnum): 47 | DRUG = "DRUG" 48 | DEVICE = "DEVICE" 49 | BIOLOGICAL = "BIOLOGICAL" 50 | PROCEDURE = "PROCEDURE" 51 | RADIATION = "RADIATION" 52 | BEHAVIORAL = "BEHAVIORAL" 53 | GENETIC = "GENETIC" 54 | DIETARY = "DIETARY" 55 | DIAGNOSTIC_TEST = "DIAGNOSTIC_TEST" 56 | OTHER = "OTHER" 57 | 58 | 59 | class SponsorType(StrEnum): 60 | INDUSTRY = "INDUSTRY" 61 | GOVERNMENT = "GOVERNMENT" 62 | ACADEMIC = "ACADEMIC" 63 | OTHER = "OTHER" 64 | 65 | 66 | class StudyDesign(StrEnum): 67 | RANDOMIZED = "RANDOMIZED" 68 | NON_RANDOMIZED = "NON_RANDOMIZED" 69 | OBSERVATIONAL = "OBSERVATIONAL" 70 | 71 | 72 | class DateField(StrEnum): 73 | LAST_UPDATE = "LAST_UPDATE" 74 | STUDY_START = "STUDY_START" 75 | PRIMARY_COMPLETION = "PRIMARY_COMPLETION" 76 | OUTCOME_POSTING = "OUTCOME_POSTING" 77 | COMPLETION = "COMPLETION" 78 | FIRST_POSTING = "FIRST_POSTING" 79 | SUBMITTED_DATE = "SUBMITTED_DATE" 80 | 81 | 82 | class PrimaryPurpose(StrEnum): 83 | TREATMENT = "TREATMENT" 84 | PREVENTION = "PREVENTION" 85 | DIAGNOSTIC = "DIAGNOSTIC" 86 | SUPPORTIVE_CARE = "SUPPORTIVE_CARE" 87 | SCREENING = "SCREENING" 88 | HEALTH_SERVICES = "HEALTH_SERVICES" 89 | BASIC_SCIENCE = "BASIC_SCIENCE" 90 | DEVICE_FEASIBILITY = "DEVICE_FEASIBILITY" 91 | OTHER = "OTHER" 92 | 93 | 94 | class AgeGroup(StrEnum): 95 | CHILD = "CHILD" 96 | ADULT = "ADULT" 97 | SENIOR = "SENIOR" 98 | ALL = "ALL" 99 | 100 | 101 | class LineOfTherapy(StrEnum): 102 | FIRST_LINE = "1L" 103 | SECOND_LINE = "2L" 104 | THIRD_LINE_PLUS = "3L+" 105 | 106 | 107 | CTGOV_SORT_MAPPING = { 108 | SortOrder.RELEVANCE: "@relevance", 109 | SortOrder.LAST_UPDATE: "LastUpdatePostDate:desc", 110 | SortOrder.ENROLLMENT: "EnrollmentCount:desc", 111 | SortOrder.START_DATE: "StudyStartDate:desc", 112 | SortOrder.COMPLETION_DATE: "PrimaryCompletionDate:desc", 113 | SortOrder.SUBMITTED_DATE: "StudyFirstSubmitDate:desc", 114 | } 115 | 116 | CTGOV_PHASE_MAPPING = { 117 | TrialPhase.EARLY_PHASE1: ("EARLY_PHASE1",), 118 | TrialPhase.PHASE1: ("PHASE1",), 119 | TrialPhase.PHASE2: ("PHASE2",), 120 | TrialPhase.PHASE3: ("PHASE3",), 121 | TrialPhase.PHASE4: ("PHASE4",), 122 | TrialPhase.NOT_APPLICABLE: ("NOT_APPLICABLE",), 123 | } 124 | 125 | OPEN_STATUSES = ( 126 | "AVAILABLE", 127 | "ENROLLING_BY_INVITATION", 128 | "NOT_YET_RECRUITING", 129 | "RECRUITING", 130 | ) 131 | CLOSED_STATUSES = ( 132 | "ACTIVE_NOT_RECRUITING", 133 | "COMPLETED", 134 | "SUSPENDED", 135 | "TERMINATED", 136 | "WITHDRAWN", 137 | ) 138 | CTGOV_RECRUITING_STATUS_MAPPING = { 139 | RecruitingStatus.OPEN: OPEN_STATUSES, 140 | RecruitingStatus.CLOSED: CLOSED_STATUSES, 141 | RecruitingStatus.ANY: None, 142 | } 143 | 144 | CTGOV_STUDY_TYPE_MAPPING = { 145 | StudyType.INTERVENTIONAL: ("Interventional",), 146 | StudyType.OBSERVATIONAL: ("Observational",), 147 | StudyType.EXPANDED_ACCESS: ("Expanded Access",), 148 | StudyType.OTHER: ("Other",), 149 | } 150 | 151 | CTGOV_INTERVENTION_TYPE_MAPPING = { 152 | InterventionType.DRUG: ("Drug",), 153 | InterventionType.DEVICE: ("Device",), 154 | InterventionType.BIOLOGICAL: ("Biological",), 155 | InterventionType.PROCEDURE: ("Procedure",), 156 | InterventionType.RADIATION: ("Radiation",), 157 | InterventionType.BEHAVIORAL: ("Behavioral",), 158 | InterventionType.GENETIC: ("Genetic",), 159 | InterventionType.DIETARY: ("Dietary",), 160 | InterventionType.DIAGNOSTIC_TEST: ("Diagnostic Test",), 161 | InterventionType.OTHER: ("Other",), 162 | } 163 | 164 | CTGOV_SPONSOR_TYPE_MAPPING = { 165 | SponsorType.INDUSTRY: ("Industry",), 166 | SponsorType.GOVERNMENT: ("Government",), 167 | SponsorType.ACADEMIC: ("Academic",), 168 | SponsorType.OTHER: ("Other",), 169 | } 170 | 171 | CTGOV_STUDY_DESIGN_MAPPING = { 172 | StudyDesign.RANDOMIZED: ("Randomized",), 173 | StudyDesign.NON_RANDOMIZED: ("Non-Randomized",), 174 | StudyDesign.OBSERVATIONAL: ("Observational",), 175 | } 176 | 177 | CTGOV_DATE_FIELD_MAPPING = { 178 | DateField.LAST_UPDATE: "LastUpdatePostDate", 179 | DateField.STUDY_START: "StartDate", 180 | DateField.PRIMARY_COMPLETION: "PrimaryCompletionDate", 181 | DateField.OUTCOME_POSTING: "ResultsFirstPostDate", 182 | DateField.COMPLETION: "CompletionDate", 183 | DateField.FIRST_POSTING: "StudyFirstPostDate", 184 | DateField.SUBMITTED_DATE: "StudyFirstSubmitDate", 185 | } 186 | 187 | CTGOV_PRIMARY_PURPOSE_MAPPING = { 188 | PrimaryPurpose.TREATMENT: ("Treatment",), 189 | PrimaryPurpose.PREVENTION: ("Prevention",), 190 | PrimaryPurpose.DIAGNOSTIC: ("Diagnostic",), 191 | PrimaryPurpose.SUPPORTIVE_CARE: ("Supportive Care",), 192 | PrimaryPurpose.SCREENING: ("Screening",), 193 | PrimaryPurpose.HEALTH_SERVICES: ("Health Services",), 194 | PrimaryPurpose.BASIC_SCIENCE: ("Basic Science",), 195 | PrimaryPurpose.DEVICE_FEASIBILITY: ("Device Feasibility",), 196 | PrimaryPurpose.OTHER: ("Other",), 197 | } 198 | 199 | CTGOV_AGE_GROUP_MAPPING = { 200 | AgeGroup.CHILD: ("Child",), 201 | AgeGroup.ADULT: ("Adult",), 202 | AgeGroup.SENIOR: ("Older Adult",), 203 | AgeGroup.ALL: None, 204 | } 205 | 206 | # Line of therapy patterns for EligibilityCriteria search 207 | LINE_OF_THERAPY_PATTERNS = { 208 | LineOfTherapy.FIRST_LINE: [ 209 | '"first line"', 210 | '"first-line"', 211 | '"1st line"', 212 | '"frontline"', 213 | '"treatment naive"', 214 | '"previously untreated"', 215 | ], 216 | LineOfTherapy.SECOND_LINE: [ 217 | '"second line"', 218 | '"second-line"', 219 | '"2nd line"', 220 | '"one prior line"', 221 | '"1 prior line"', 222 | ], 223 | LineOfTherapy.THIRD_LINE_PLUS: [ 224 | '"third line"', 225 | '"third-line"', 226 | '"3rd line"', 227 | '"≥2 prior"', 228 | '"at least 2 prior"', 229 | '"heavily pretreated"', 230 | ], 231 | } 232 | 233 | DEFAULT_FORMAT = "csv" 234 | DEFAULT_MARKUP = "markdown" 235 | 236 | SEARCH_FIELDS = [ 237 | "NCT Number", 238 | "Study Title", 239 | "Study URL", 240 | "Study Status", 241 | "Brief Summary", 242 | "Study Results", 243 | "Conditions", 244 | "Interventions", 245 | "Phases", 246 | "Enrollment", 247 | "Study Type", 248 | "Study Design", 249 | "Start Date", 250 | "Completion Date", 251 | ] 252 | 253 | SEARCH_FIELDS_PARAM = [",".join(SEARCH_FIELDS)] 254 | 255 | 256 | class TrialQuery(BaseModel): 257 | """Parameters for querying clinical trial data from ClinicalTrials.gov.""" 258 | 259 | conditions: list[str] | None = Field( 260 | default=None, 261 | description="List of condition terms.", 262 | ) 263 | terms: list[str] | None = Field( 264 | default=None, 265 | description="General search terms that don't fit specific categories.", 266 | ) 267 | interventions: list[str] | None = Field( 268 | default=None, 269 | description="Intervention names.", 270 | ) 271 | recruiting_status: RecruitingStatus | None = Field( 272 | default=None, 273 | description="Study recruitment status. Use 'OPEN' for actively recruiting trials, 'CLOSED' for completed/terminated trials, or 'ANY' for all trials. Common aliases like 'recruiting', 'active', 'enrolling' map to 'OPEN'.", 274 | ) 275 | study_type: StudyType | None = Field( 276 | default=None, 277 | description="Type of study.", 278 | ) 279 | nct_ids: list[str] | None = Field( 280 | default=None, 281 | description="Clinical trial NCT IDs", 282 | ) 283 | lat: float | None = Field( 284 | default=None, 285 | description="Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", 286 | ) 287 | long: float | None = Field( 288 | default=None, 289 | description="Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", 290 | ) 291 | distance: int | None = Field( 292 | default=None, 293 | description="Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)", 294 | ) 295 | min_date: str | None = Field( 296 | default=None, 297 | description="Minimum date for filtering", 298 | ) 299 | max_date: str | None = Field( 300 | default=None, 301 | description="Maximum date for filtering", 302 | ) 303 | date_field: DateField | None = Field( 304 | default=None, 305 | description="Date field to filter on", 306 | ) 307 | phase: TrialPhase | None = Field( 308 | default=None, 309 | description="Trial phase filter", 310 | ) 311 | age_group: AgeGroup | None = Field( 312 | default=None, 313 | description="Age group filter", 314 | ) 315 | primary_purpose: PrimaryPurpose | None = Field( 316 | default=None, 317 | description="Primary purpose of the trial", 318 | ) 319 | intervention_type: InterventionType | None = Field( 320 | default=None, 321 | description="Type of intervention", 322 | ) 323 | sponsor_type: SponsorType | None = Field( 324 | default=None, 325 | description="Type of sponsor", 326 | ) 327 | study_design: StudyDesign | None = Field( 328 | default=None, 329 | description="Study design", 330 | ) 331 | sort: SortOrder | None = Field( 332 | default=None, 333 | description="Sort order for results", 334 | ) 335 | next_page_hash: str | None = Field( 336 | default=None, 337 | description="Token to retrieve the next page of results", 338 | ) 339 | # New eligibility-focused fields 340 | prior_therapies: list[str] | None = Field( 341 | default=None, 342 | description="Prior therapies to search for in eligibility criteria", 343 | ) 344 | progression_on: list[str] | None = Field( 345 | default=None, 346 | description="Therapies the patient has progressed on", 347 | ) 348 | required_mutations: list[str] | None = Field( 349 | default=None, 350 | description="Required mutations in eligibility criteria", 351 | ) 352 | excluded_mutations: list[str] | None = Field( 353 | default=None, 354 | description="Excluded mutations in eligibility criteria", 355 | ) 356 | biomarker_expression: dict[str, str] | None = Field( 357 | default=None, 358 | description="Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})", 359 | ) 360 | line_of_therapy: LineOfTherapy | None = Field( 361 | default=None, 362 | description="Line of therapy filter", 363 | ) 364 | allow_brain_mets: bool | None = Field( 365 | default=None, 366 | description="Whether to allow trials that accept brain metastases", 367 | ) 368 | return_fields: list[str] | None = Field( 369 | default=None, 370 | description="Specific fields to return in the response", 371 | ) 372 | page_size: int | None = Field( 373 | default=None, 374 | description="Number of results per page", 375 | ge=1, 376 | le=1000, 377 | ) 378 | expand_synonyms: bool = Field( 379 | default=True, 380 | description="Expand condition searches with disease synonyms from MyDisease.info", 381 | ) 382 | 383 | @field_validator("recruiting_status", mode="before") 384 | @classmethod 385 | def normalize_recruiting_status(cls, v): 386 | """Normalize common recruiting status aliases to enum values.""" 387 | if isinstance(v, str): 388 | v_lower = v.lower() 389 | # Map common aliases 390 | alias_map = { 391 | "recruiting": "OPEN", 392 | "active": "OPEN", 393 | "enrolling": "OPEN", 394 | "closed": "CLOSED", 395 | "completed": "CLOSED", 396 | "terminated": "CLOSED", 397 | } 398 | return alias_map.get(v_lower, v) 399 | return v 400 | 401 | # Field validators for list fields 402 | @model_validator(mode="before") 403 | def convert_list_fields(cls, data): 404 | """Convert string values to lists for list fields.""" 405 | if isinstance(data, dict): 406 | for field_name in [ 407 | "conditions", 408 | "terms", 409 | "interventions", 410 | "nct_ids", 411 | "prior_therapies", 412 | "progression_on", 413 | "required_mutations", 414 | "excluded_mutations", 415 | "return_fields", 416 | ]: 417 | if field_name in data and data[field_name] is not None: 418 | data[field_name] = ensure_list( 419 | data[field_name], split_strings=True 420 | ) 421 | return data 422 | 423 | 424 | def _inject_ids( 425 | params: dict[str, list[str]], ids: list[str], has_other_filters: bool 426 | ) -> None: 427 | """Inject NCT IDs into params using intersection or id-only semantics. 428 | 429 | Args: 430 | params: The parameter dictionary to modify 431 | ids: List of NCT IDs to inject 432 | has_other_filters: Whether other filters are present 433 | """ 434 | ids_csv = ",".join(ids) 435 | if has_other_filters: # intersection path 436 | params["filter.ids"] = [ids_csv] 437 | elif len(ids_csv) < 1800: # pure-ID & small 438 | params["query.id"] = [ids_csv] 439 | else: # pure-ID & large 440 | params["filter.ids"] = [ids_csv] 441 | 442 | 443 | def _build_prior_therapy_essie(therapies: list[str]) -> list[str]: 444 | """Build Essie fragments for prior therapy search.""" 445 | fragments = [] 446 | for therapy in therapies: 447 | if therapy.strip(): # Skip empty strings 448 | fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (prior OR previous OR received))' 449 | fragments.append(fragment) 450 | return fragments 451 | 452 | 453 | def _build_progression_essie(therapies: list[str]) -> list[str]: 454 | """Build Essie fragments for progression on therapy search.""" 455 | fragments = [] 456 | for therapy in therapies: 457 | if therapy.strip(): # Skip empty strings 458 | fragment = f'AREA[EligibilityCriteria]("{therapy}" AND (progression OR resistant OR refractory))' 459 | fragments.append(fragment) 460 | return fragments 461 | 462 | 463 | def _build_required_mutations_essie(mutations: list[str]) -> list[str]: 464 | """Build Essie fragments for required mutations.""" 465 | fragments = [] 466 | for mutation in mutations: 467 | if mutation.strip(): # Skip empty strings 468 | fragment = f'AREA[EligibilityCriteria]("{mutation}")' 469 | fragments.append(fragment) 470 | return fragments 471 | 472 | 473 | def _build_excluded_mutations_essie(mutations: list[str]) -> list[str]: 474 | """Build Essie fragments for excluded mutations.""" 475 | fragments = [] 476 | for mutation in mutations: 477 | if mutation.strip(): # Skip empty strings 478 | fragment = f'AREA[EligibilityCriteria](NOT "{mutation}")' 479 | fragments.append(fragment) 480 | return fragments 481 | 482 | 483 | def _build_biomarker_expression_essie(biomarkers: dict[str, str]) -> list[str]: 484 | """Build Essie fragments for biomarker expression requirements.""" 485 | fragments = [] 486 | for marker, expression in biomarkers.items(): 487 | if marker.strip() and expression.strip(): # Skip empty values 488 | fragment = ( 489 | f'AREA[EligibilityCriteria]("{marker}" AND "{expression}")' 490 | ) 491 | fragments.append(fragment) 492 | return fragments 493 | 494 | 495 | def _build_line_of_therapy_essie(line: LineOfTherapy) -> str: 496 | """Build Essie fragment for line of therapy.""" 497 | patterns = LINE_OF_THERAPY_PATTERNS.get(line, []) 498 | if patterns: 499 | # Join all patterns with OR within a single AREA block 500 | pattern_str = " OR ".join(patterns) 501 | return f"AREA[EligibilityCriteria]({pattern_str})" 502 | return "" 503 | 504 | 505 | def _build_brain_mets_essie(allow: bool) -> str: 506 | """Build Essie fragment for brain metastases filter.""" 507 | if allow is False: 508 | return 'AREA[EligibilityCriteria](NOT "brain metastases")' 509 | return "" 510 | 511 | 512 | async def convert_query(query: TrialQuery) -> dict[str, list[str]]: # noqa: C901 513 | """Convert a TrialQuery object into a dict of query params 514 | for the ClinicalTrials.gov API (v2). Each key maps to one or 515 | more strings in a list, consistent with parse_qs outputs. 516 | """ 517 | # Start with required fields 518 | params: dict[str, list[str]] = { 519 | "format": [DEFAULT_FORMAT], 520 | "markupFormat": [DEFAULT_MARKUP], 521 | } 522 | 523 | # Track whether we have other filters (for NCT ID intersection logic) 524 | has_other_filters = False 525 | 526 | # Handle conditions with optional synonym expansion 527 | if query.conditions: 528 | has_other_filters = True 529 | expanded_conditions = [] 530 | 531 | if query.expand_synonyms: 532 | # Expand each condition with synonyms 533 | client = BioThingsClient() 534 | for condition in query.conditions: 535 | try: 536 | synonyms = await client.get_disease_synonyms(condition) 537 | expanded_conditions.extend(synonyms) 538 | except Exception as e: 539 | logger.warning( 540 | f"Failed to get synonyms for {condition}: {e}" 541 | ) 542 | expanded_conditions.append(condition) 543 | else: 544 | expanded_conditions = query.conditions 545 | 546 | # Remove duplicates while preserving order 547 | seen = set() 548 | unique_conditions = [] 549 | for cond in expanded_conditions: 550 | if cond.lower() not in seen: 551 | seen.add(cond.lower()) 552 | unique_conditions.append(cond) 553 | 554 | if len(unique_conditions) == 1: 555 | params["query.cond"] = [unique_conditions[0]] 556 | else: 557 | # Join multiple terms with OR, wrapped in parentheses 558 | params["query.cond"] = [f"({' OR '.join(unique_conditions)})"] 559 | 560 | # Handle terms and interventions (no synonym expansion) 561 | for key, val in [ 562 | ("query.term", query.terms), 563 | ("query.intr", query.interventions), 564 | ]: 565 | if val: 566 | has_other_filters = True 567 | if len(val) == 1: 568 | params[key] = [val[0]] 569 | else: 570 | # Join multiple terms with OR, wrapped in parentheses 571 | params[key] = [f"({' OR '.join(val)})"] 572 | 573 | # Collect Essie fragments for eligibility criteria 574 | essie_fragments: list[str] = [] 575 | 576 | # Prior therapies 577 | if query.prior_therapies: 578 | has_other_filters = True 579 | essie_fragments.extend( 580 | _build_prior_therapy_essie(query.prior_therapies) 581 | ) 582 | 583 | # Progression on therapies 584 | if query.progression_on: 585 | has_other_filters = True 586 | essie_fragments.extend(_build_progression_essie(query.progression_on)) 587 | 588 | # Required mutations 589 | if query.required_mutations: 590 | has_other_filters = True 591 | essie_fragments.extend( 592 | _build_required_mutations_essie(query.required_mutations) 593 | ) 594 | 595 | # Excluded mutations 596 | if query.excluded_mutations: 597 | has_other_filters = True 598 | essie_fragments.extend( 599 | _build_excluded_mutations_essie(query.excluded_mutations) 600 | ) 601 | 602 | # Biomarker expression 603 | if query.biomarker_expression: 604 | has_other_filters = True 605 | essie_fragments.extend( 606 | _build_biomarker_expression_essie(query.biomarker_expression) 607 | ) 608 | 609 | # Line of therapy 610 | if query.line_of_therapy: 611 | has_other_filters = True 612 | line_fragment = _build_line_of_therapy_essie(query.line_of_therapy) 613 | if line_fragment: 614 | essie_fragments.append(line_fragment) 615 | 616 | # Brain metastases filter 617 | if query.allow_brain_mets is not None: 618 | has_other_filters = True 619 | brain_fragment = _build_brain_mets_essie(query.allow_brain_mets) 620 | if brain_fragment: 621 | essie_fragments.append(brain_fragment) 622 | 623 | # Combine all Essie fragments with AND and append to query.term 624 | if essie_fragments: 625 | combined_essie = " AND ".join(essie_fragments) 626 | if "query.term" in params: 627 | # Append to existing terms with AND 628 | params["query.term"][0] = ( 629 | f"{params['query.term'][0]} AND {combined_essie}" 630 | ) 631 | else: 632 | params["query.term"] = [combined_essie] 633 | 634 | # Geospatial 635 | if query.lat is not None and query.long is not None: 636 | has_other_filters = True 637 | geo_val = f"distance({query.lat},{query.long},{query.distance}mi)" 638 | params["filter.geo"] = [geo_val] 639 | 640 | # Collect advanced filters in a list 641 | advanced_filters: list[str] = [] 642 | 643 | # Date filter 644 | if query.date_field and (query.min_date or query.max_date): 645 | has_other_filters = True 646 | date_field = CTGOV_DATE_FIELD_MAPPING[query.date_field] 647 | min_val = query.min_date or "MIN" 648 | max_val = query.max_date or "MAX" 649 | advanced_filters.append( 650 | f"AREA[{date_field}]RANGE[{min_val},{max_val}]", 651 | ) 652 | 653 | # Prepare a map of "AREA[...] -> (query_value, mapping_dict)" 654 | advanced_map = { 655 | "DesignPrimaryPurpose": ( 656 | query.primary_purpose, 657 | CTGOV_PRIMARY_PURPOSE_MAPPING, 658 | ), 659 | "StudyType": (query.study_type, CTGOV_STUDY_TYPE_MAPPING), 660 | "InterventionType": ( 661 | query.intervention_type, 662 | CTGOV_INTERVENTION_TYPE_MAPPING, 663 | ), 664 | "SponsorType": (query.sponsor_type, CTGOV_SPONSOR_TYPE_MAPPING), 665 | "StudyDesign": (query.study_design, CTGOV_STUDY_DESIGN_MAPPING), 666 | "Phase": (query.phase, CTGOV_PHASE_MAPPING), 667 | } 668 | 669 | # Append advanced filters 670 | for area, (qval, mapping) in advanced_map.items(): 671 | if qval: 672 | has_other_filters = True 673 | # Check if mapping is a dict before using get method 674 | mapped = ( 675 | mapping.get(qval) 676 | if mapping and isinstance(mapping, dict) 677 | else None 678 | ) 679 | # Use the first mapped value if available, otherwise the literal 680 | value = mapped[0] if mapped else qval 681 | advanced_filters.append(f"AREA[{area}]{value}") 682 | 683 | # Age group 684 | if query.age_group and query.age_group != "ALL": 685 | has_other_filters = True 686 | mapped = CTGOV_AGE_GROUP_MAPPING[query.age_group] 687 | if mapped: 688 | advanced_filters.append(f"AREA[StdAge]{mapped[0]}") 689 | else: 690 | advanced_filters.append(f"AREA[StdAge]{query.age_group}") 691 | 692 | # If we collected any advanced filters, join them with AND 693 | if advanced_filters: 694 | params["filter.advanced"] = [" AND ".join(advanced_filters)] 695 | 696 | # NCT IDs - now using intersection semantics 697 | # Must be done BEFORE recruiting status to properly detect user-set filters 698 | if query.nct_ids: 699 | _inject_ids(params, query.nct_ids, has_other_filters) 700 | 701 | # Recruiting status - apply AFTER NCT ID injection 702 | # Only count as a user filter if explicitly set to something other than default 703 | if query.recruiting_status not in (None, RecruitingStatus.OPEN): 704 | # User explicitly set a non-default status 705 | if query.recruiting_status is not None: # Type guard for mypy 706 | statuses = CTGOV_RECRUITING_STATUS_MAPPING.get( 707 | query.recruiting_status 708 | ) 709 | if statuses: 710 | params["filter.overallStatus"] = [",".join(statuses)] 711 | elif not query.nct_ids or has_other_filters: 712 | # Apply default OPEN status only if: 713 | # 1. No NCT IDs provided, OR 714 | # 2. NCT IDs provided with other filters (intersection mode) 715 | params["filter.overallStatus"] = [",".join(OPEN_STATUSES)] 716 | 717 | # Sort & paging 718 | if query.sort is None: 719 | sort_val = CTGOV_SORT_MAPPING[SortOrder.RELEVANCE] 720 | else: 721 | sort_val = CTGOV_SORT_MAPPING.get(query.sort, query.sort) 722 | 723 | params["sort"] = [sort_val] 724 | if query.next_page_hash: 725 | params["pageToken"] = [query.next_page_hash] 726 | 727 | # Finally, add fields to limit payload size 728 | if query.return_fields: 729 | # Use custom fields if specified 730 | params["fields"] = [",".join(query.return_fields)] 731 | else: 732 | # Use default fields 733 | params["fields"] = SEARCH_FIELDS_PARAM 734 | 735 | # Set page size 736 | if query.page_size: 737 | params["pageSize"] = [str(query.page_size)] 738 | else: 739 | params["pageSize"] = ["40"] 740 | 741 | return params 742 | 743 | 744 | async def search_trials( 745 | query: TrialQuery, 746 | output_json: bool = False, 747 | ) -> str: 748 | """Search ClinicalTrials.gov for clinical trials.""" 749 | params = await convert_query(query) 750 | 751 | # Log filter mode if NCT IDs are present 752 | if query.nct_ids: 753 | # Check if we're using intersection or id-only mode 754 | # Only count explicit user-set filters, not defaults 755 | has_other_filters = any([ 756 | query.conditions, 757 | query.terms, 758 | query.interventions, 759 | query.lat is not None and query.long is not None, 760 | query.date_field and (query.min_date or query.max_date), 761 | query.primary_purpose, 762 | query.study_type, 763 | query.intervention_type, 764 | query.sponsor_type, 765 | query.study_design, 766 | query.phase, 767 | query.age_group and query.age_group != AgeGroup.ALL, 768 | query.recruiting_status not in (None, RecruitingStatus.OPEN), 769 | query.prior_therapies, 770 | query.progression_on, 771 | query.required_mutations, 772 | query.excluded_mutations, 773 | query.biomarker_expression, 774 | query.line_of_therapy, 775 | query.allow_brain_mets is not None, 776 | ]) 777 | 778 | if has_other_filters: 779 | logger.debug( 780 | "Filter mode: intersection (NCT IDs AND other filters)" 781 | ) 782 | else: 783 | logger.debug("Filter mode: id-only (NCT IDs only)") 784 | 785 | response, error = await http_client.request_api( 786 | url=CLINICAL_TRIALS_BASE_URL, 787 | request=params, 788 | method="GET", 789 | tls_version=TLSVersion.TLSv1_2, 790 | domain="trial", 791 | ) 792 | 793 | data = response 794 | if error: 795 | data = {"error": f"Error {error.code}: {error.message}"} 796 | 797 | if data and not output_json: 798 | return render.to_markdown(data) 799 | else: 800 | return json.dumps(data, indent=2) 801 | 802 | 803 | async def _trial_searcher( 804 | call_benefit: Annotated[ 805 | str, 806 | "Define and summarize why this function is being called and the intended benefit", 807 | ], 808 | conditions: Annotated[ 809 | list[str] | str | None, 810 | "Condition terms (e.g., 'breast cancer') - list or comma-separated string", 811 | ] = None, 812 | terms: Annotated[ 813 | list[str] | str | None, 814 | "General search terms - list or comma-separated string", 815 | ] = None, 816 | interventions: Annotated[ 817 | list[str] | str | None, 818 | "Intervention names (e.g., 'pembrolizumab') - list or comma-separated string", 819 | ] = None, 820 | recruiting_status: Annotated[ 821 | RecruitingStatus | str | None, 822 | "Study recruitment status (OPEN, CLOSED, ANY)", 823 | ] = None, 824 | study_type: Annotated[StudyType | str | None, "Type of study"] = None, 825 | nct_ids: Annotated[ 826 | list[str] | str | None, 827 | "Clinical trial NCT IDs - list or comma-separated string", 828 | ] = None, 829 | lat: Annotated[ 830 | float | None, 831 | "Latitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", 832 | ] = None, 833 | long: Annotated[ 834 | float | None, 835 | "Longitude for location search. AI agents should geocode city/location names (e.g., 'Cleveland' → 41.4993, -81.6944) before using this parameter.", 836 | ] = None, 837 | distance: Annotated[ 838 | float | None, 839 | "Distance from lat/long in miles (default: 50 miles if lat/long provided but distance not specified)", 840 | ] = None, 841 | min_date: Annotated[ 842 | str | None, "Minimum date for filtering (YYYY-MM-DD)" 843 | ] = None, 844 | max_date: Annotated[ 845 | str | None, "Maximum date for filtering (YYYY-MM-DD)" 846 | ] = None, 847 | date_field: Annotated[ 848 | DateField | str | None, "Date field to filter on" 849 | ] = None, 850 | phase: Annotated[TrialPhase | str | None, "Trial phase filter"] = None, 851 | age_group: Annotated[AgeGroup | str | None, "Age group filter"] = None, 852 | primary_purpose: Annotated[ 853 | PrimaryPurpose | str | None, "Primary purpose of the trial" 854 | ] = None, 855 | intervention_type: Annotated[ 856 | InterventionType | str | None, "Type of intervention" 857 | ] = None, 858 | sponsor_type: Annotated[ 859 | SponsorType | str | None, "Type of sponsor" 860 | ] = None, 861 | study_design: Annotated[StudyDesign | str | None, "Study design"] = None, 862 | sort: Annotated[SortOrder | str | None, "Sort order for results"] = None, 863 | next_page_hash: Annotated[ 864 | str | None, "Token to retrieve the next page of results" 865 | ] = None, 866 | prior_therapies: Annotated[ 867 | list[str] | str | None, 868 | "Prior therapies to search for in eligibility criteria - list or comma-separated string", 869 | ] = None, 870 | progression_on: Annotated[ 871 | list[str] | str | None, 872 | "Therapies the patient has progressed on - list or comma-separated string", 873 | ] = None, 874 | required_mutations: Annotated[ 875 | list[str] | str | None, 876 | "Required mutations in eligibility criteria - list or comma-separated string", 877 | ] = None, 878 | excluded_mutations: Annotated[ 879 | list[str] | str | None, 880 | "Excluded mutations in eligibility criteria - list or comma-separated string", 881 | ] = None, 882 | biomarker_expression: Annotated[ 883 | dict[str, str] | None, 884 | "Biomarker expression requirements (e.g., {'PD-L1': '≥50%'})", 885 | ] = None, 886 | line_of_therapy: Annotated[ 887 | LineOfTherapy | str | None, 888 | "Line of therapy filter (1L, 2L, 3L+)", 889 | ] = None, 890 | allow_brain_mets: Annotated[ 891 | bool | None, 892 | "Whether to allow trials that accept brain metastases", 893 | ] = None, 894 | return_fields: Annotated[ 895 | list[str] | str | None, 896 | "Specific fields to return in the response - list or comma-separated string", 897 | ] = None, 898 | page_size: Annotated[ 899 | int | None, 900 | "Number of results per page (1-1000)", 901 | ] = None, 902 | expand_synonyms: Annotated[ 903 | bool, 904 | "Expand condition searches with disease synonyms from MyDisease.info", 905 | ] = True, 906 | ) -> str: 907 | """ 908 | Searches for clinical trials based on specified criteria. 909 | 910 | Parameters: 911 | - call_benefit: Define and summarize why this function is being called and the intended benefit 912 | - conditions: Condition terms (e.g., "breast cancer") - list or comma-separated string 913 | - terms: General search terms - list or comma-separated string 914 | - interventions: Intervention names (e.g., "pembrolizumab") - list or comma-separated string 915 | - recruiting_status: Study recruitment status (OPEN, CLOSED, ANY) 916 | - study_type: Type of study 917 | - nct_ids: Clinical trial NCT IDs - list or comma-separated string 918 | - lat: Latitude for location search 919 | - long: Longitude for location search 920 | - distance: Distance from lat/long in miles 921 | - min_date: Minimum date for filtering (YYYY-MM-DD) 922 | - max_date: Maximum date for filtering (YYYY-MM-DD) 923 | - date_field: Date field to filter on 924 | - phase: Trial phase filter 925 | - age_group: Age group filter 926 | - primary_purpose: Primary purpose of the trial 927 | - intervention_type: Type of intervention 928 | - sponsor_type: Type of sponsor 929 | - study_design: Study design 930 | - sort: Sort order for results 931 | - next_page_hash: Token to retrieve the next page of results 932 | - prior_therapies: Prior therapies to search for in eligibility criteria - list or comma-separated string 933 | - progression_on: Therapies the patient has progressed on - list or comma-separated string 934 | - required_mutations: Required mutations in eligibility criteria - list or comma-separated string 935 | - excluded_mutations: Excluded mutations in eligibility criteria - list or comma-separated string 936 | - biomarker_expression: Biomarker expression requirements (e.g., {'PD-L1': '≥50%'}) 937 | - line_of_therapy: Line of therapy filter (1L, 2L, 3L+) 938 | - allow_brain_mets: Whether to allow trials that accept brain metastases 939 | - return_fields: Specific fields to return in the response - list or comma-separated string 940 | - page_size: Number of results per page (1-1000) 941 | - expand_synonyms: Expand condition searches with disease synonyms from MyDisease.info 942 | 943 | Returns: 944 | Markdown formatted list of clinical trials 945 | """ 946 | # Convert individual parameters to a TrialQuery object 947 | query = TrialQuery( 948 | conditions=ensure_list(conditions, split_strings=True), 949 | terms=ensure_list(terms, split_strings=True), 950 | interventions=ensure_list(interventions, split_strings=True), 951 | recruiting_status=recruiting_status, 952 | study_type=study_type, 953 | nct_ids=ensure_list(nct_ids, split_strings=True), 954 | lat=lat, 955 | long=long, 956 | distance=distance, 957 | min_date=min_date, 958 | max_date=max_date, 959 | date_field=date_field, 960 | phase=phase, 961 | age_group=age_group, 962 | primary_purpose=primary_purpose, 963 | intervention_type=intervention_type, 964 | sponsor_type=sponsor_type, 965 | study_design=study_design, 966 | sort=sort, 967 | next_page_hash=next_page_hash, 968 | prior_therapies=ensure_list(prior_therapies, split_strings=True), 969 | progression_on=ensure_list(progression_on, split_strings=True), 970 | required_mutations=ensure_list(required_mutations, split_strings=True), 971 | excluded_mutations=ensure_list(excluded_mutations, split_strings=True), 972 | biomarker_expression=biomarker_expression, 973 | line_of_therapy=line_of_therapy, 974 | allow_brain_mets=allow_brain_mets, 975 | return_fields=ensure_list(return_fields, split_strings=True), 976 | page_size=page_size, 977 | expand_synonyms=expand_synonyms, 978 | ) 979 | return await search_trials(query, output_json=False) 980 | 981 | 982 | async def search_trials_unified( 983 | query: TrialQuery, 984 | source: str = "clinicaltrials", 985 | api_key: str | None = None, 986 | output_json: bool = False, 987 | ) -> str: 988 | """ 989 | Search for clinical trials using either ClinicalTrials.gov or NCI CTS API. 990 | 991 | Args: 992 | query: TrialQuery object with search parameters 993 | source: Data source - "clinicaltrials" (default) or "nci" 994 | api_key: API key for NCI (required if source="nci") 995 | output_json: Return raw JSON instead of formatted markdown 996 | 997 | Returns: 998 | Formatted markdown or JSON string with results 999 | """ 1000 | if source == "nci": 1001 | # Import here to avoid circular imports 1002 | from .nci_search import format_nci_trial_results, search_trials_nci 1003 | 1004 | results = await search_trials_nci(query, api_key) 1005 | 1006 | if output_json: 1007 | return json.dumps(results, indent=2) 1008 | else: 1009 | return format_nci_trial_results(results) 1010 | else: 1011 | # Default to ClinicalTrials.gov 1012 | return await search_trials(query, output_json) 1013 | ``` -------------------------------------------------------------------------------- /src/biomcp/workers/worker_entry_stytch.js: -------------------------------------------------------------------------------- ```javascript 1 | /** 2 | * BioMCP Worker – With Stytch OAuth (refactored) 3 | */ 4 | 5 | import { Hono } from "hono"; 6 | import { createRemoteJWKSet, importPKCS8, jwtVerify, SignJWT } from "jose"; 7 | 8 | // Configuration variables - will be overridden by env values 9 | let DEBUG = false; // Default value, will be updated from env 10 | 11 | // Constants 12 | const DEFAULT_SESSION_ID = "default"; 13 | const MAX_SESSION_ID_LENGTH = 128; 14 | 15 | // Helper functions 16 | const log = (message) => { 17 | if (DEBUG) console.log("[DEBUG]", message); 18 | }; 19 | 20 | // List of sensitive fields that should be redacted in logs 21 | const SENSITIVE_FIELDS = [ 22 | "api_key", 23 | "apiKey", 24 | "api-key", 25 | "token", 26 | "secret", 27 | "password", 28 | ]; 29 | 30 | /** 31 | * Recursively sanitize sensitive fields from an object 32 | * @param {object} obj - Object to sanitize 33 | * @returns {object} - Sanitized copy of the object 34 | */ 35 | const sanitizeObject = (obj) => { 36 | if (!obj || typeof obj !== "object") return obj; 37 | 38 | // Handle arrays 39 | if (Array.isArray(obj)) { 40 | return obj.map((item) => sanitizeObject(item)); 41 | } 42 | 43 | // Handle objects 44 | const sanitized = {}; 45 | for (const [key, value] of Object.entries(obj)) { 46 | // Check if this key is sensitive 47 | const lowerKey = key.toLowerCase(); 48 | if ( 49 | SENSITIVE_FIELDS.some((field) => lowerKey.includes(field.toLowerCase())) 50 | ) { 51 | sanitized[key] = "[REDACTED]"; 52 | } else if (typeof value === "object" && value !== null) { 53 | // Recursively sanitize nested objects 54 | sanitized[key] = sanitizeObject(value); 55 | } else { 56 | sanitized[key] = value; 57 | } 58 | } 59 | return sanitized; 60 | }; 61 | 62 | /** 63 | * Validate and sanitize session ID 64 | * @param {string} sessionId - Session ID from query parameter 65 | * @returns {string} - Sanitized session ID or 'default' 66 | */ 67 | const validateSessionId = (sessionId) => { 68 | if (!sessionId) return DEFAULT_SESSION_ID; 69 | 70 | // Limit length to prevent DoS 71 | if (sessionId.length > MAX_SESSION_ID_LENGTH) { 72 | log(`Session ID too long (${sessionId.length} chars), using default`); 73 | return DEFAULT_SESSION_ID; 74 | } 75 | 76 | // Remove potentially dangerous characters 77 | const sanitized = sessionId.replace(/[^a-zA-Z0-9\-_]/g, ""); 78 | if (sanitized !== sessionId) { 79 | log(`Session ID contained invalid characters, sanitized: ${sanitized}`); 80 | } 81 | 82 | return sanitized || DEFAULT_SESSION_ID; 83 | }; 84 | 85 | /** 86 | * Process MCP request with proper error handling 87 | * @param {HonoRequest} request - The incoming Hono request 88 | * @param {string} remoteUrl - Remote MCP server URL 89 | * @param {string} sessionId - Validated session ID 90 | * @returns {Response} - Proxy response or error 91 | */ 92 | const processMcpRequest = async (request, remoteUrl, sessionId) => { 93 | try { 94 | // Get body text directly (Hono request doesn't have clone) 95 | const bodyText = await request.text(); 96 | 97 | // Validate it's JSON 98 | let bodyJson; 99 | try { 100 | bodyJson = JSON.parse(bodyText); 101 | } catch (e) { 102 | return new Response( 103 | JSON.stringify({ 104 | jsonrpc: "2.0", 105 | error: { 106 | code: -32700, 107 | message: "Parse error", 108 | data: "Invalid JSON", 109 | }, 110 | }), 111 | { status: 400, headers: { "Content-Type": "application/json" } }, 112 | ); 113 | } 114 | 115 | // Log sanitized request 116 | const sanitizedBody = sanitizeObject(bodyJson); 117 | log(`MCP POST request body: ${JSON.stringify(sanitizedBody)}`); 118 | 119 | // Validate required JSONRPC fields 120 | if (!bodyJson.jsonrpc || !bodyJson.method) { 121 | return new Response( 122 | JSON.stringify({ 123 | jsonrpc: "2.0", 124 | error: { 125 | code: -32600, 126 | message: "Invalid Request", 127 | data: "Missing required fields: jsonrpc, method", 128 | }, 129 | }), 130 | { status: 400, headers: { "Content-Type": "application/json" } }, 131 | ); 132 | } 133 | 134 | // Create a new Request object with the body text since we've already consumed it 135 | const newRequest = new Request(request.url, { 136 | method: "POST", 137 | headers: request.headers, 138 | body: bodyText, 139 | }); 140 | 141 | // Forward to remote server 142 | return proxyPost(newRequest, remoteUrl, "/mcp", sessionId); 143 | } catch (error) { 144 | log(`Error processing MCP request: ${error}`); 145 | return new Response( 146 | JSON.stringify({ 147 | jsonrpc: "2.0", 148 | error: { 149 | code: -32603, 150 | message: "Internal error", 151 | data: error.message, 152 | }, 153 | }), 154 | { status: 500, headers: { "Content-Type": "application/json" } }, 155 | ); 156 | } 157 | }; 158 | 159 | // CORS configuration 160 | const CORS = { 161 | "Access-Control-Allow-Origin": "*", 162 | "Access-Control-Allow-Methods": "GET, POST, OPTIONS", 163 | "Access-Control-Allow-Headers": "*", 164 | "Access-Control-Max-Age": "86400", 165 | }; 166 | 167 | const getStytchUrl = (env, path, isPublic = false) => { 168 | const base = env.STYTCH_API_URL || "https://test.stytch.com/v1"; 169 | const projectId = isPublic ? `/public/${env.STYTCH_PROJECT_ID}` : ""; 170 | return `${base}${projectId}/${path}`; 171 | }; 172 | 173 | // JWT validation logic 174 | let jwks = null; 175 | 176 | /** 177 | * Decode the payload of a JWT (no signature check). 178 | */ 179 | function decodeJwt(token) { 180 | try { 181 | const base64Url = token.split(".")[1]; 182 | const base64 = base64Url.replace(/-/g, "+").replace(/_/g, "/"); 183 | const json = atob(base64); 184 | return JSON.parse(json); 185 | } catch { 186 | return {}; 187 | } 188 | } 189 | 190 | let bqTokenPromise = null; 191 | 192 | /** 193 | * Fetch (and cache) a BigQuery OAuth token. 194 | * @param {object} env the Hono env (c.env) 195 | */ 196 | async function getBQToken(env) { 197 | // Parse the service‐account JSON key 198 | const key = JSON.parse(env.BQ_SA_KEY_JSON); 199 | const now = Math.floor(Date.now() / 1000); 200 | 201 | // Convert PEM private key string into a CryptoKey 202 | const privateKey = await importPKCS8(key.private_key, "RS256"); 203 | 204 | // Build the JWT assertion 205 | const assertion = await new SignJWT({ 206 | iss: key.client_email, 207 | scope: "https://www.googleapis.com/auth/bigquery.insertdata", 208 | aud: "https://oauth2.googleapis.com/token", 209 | iat: now, 210 | exp: now + 3600, 211 | }) 212 | .setProtectedHeader({ alg: "RS256", kid: key.private_key_id }) 213 | .sign(privateKey); 214 | 215 | // Exchange the assertion for an access token 216 | const resp = await fetch("https://oauth2.googleapis.com/token", { 217 | method: "POST", 218 | headers: { "Content-Type": "application/x-www-form-urlencoded" }, 219 | body: new URLSearchParams({ 220 | grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer", 221 | assertion, 222 | }), 223 | }); 224 | const { access_token } = await resp.json(); 225 | return access_token; 226 | } 227 | 228 | /** 229 | * Insert a single row into BigQuery via streaming insert. 230 | * @param {object} env the Hono env (c.env) 231 | * @param {object} row { timestamp, userEmail, query } 232 | */ 233 | async function insertEvent(env, row) { 234 | try { 235 | const token = await getBQToken(env); 236 | 237 | const url = 238 | `https://bigquery.googleapis.com/bigquery/v2/projects/` + 239 | `${env.BQ_PROJECT_ID}/datasets/${env.BQ_DATASET}` + 240 | `/tables/${env.BQ_TABLE}/insertAll`; 241 | 242 | const response = await fetch(url, { 243 | method: "POST", 244 | headers: { 245 | Authorization: `Bearer ${token}`, 246 | "Content-Type": "application/json", 247 | }, 248 | body: JSON.stringify({ rows: [{ json: row }] }), 249 | }); 250 | 251 | if (!response.ok) { 252 | const errorText = await response.text(); 253 | throw new Error(`BigQuery API error: ${response.status} - ${errorText}`); 254 | } 255 | 256 | const result = await response.json(); 257 | if (result.insertErrors) { 258 | throw new Error( 259 | `BigQuery insert errors: ${JSON.stringify(result.insertErrors)}`, 260 | ); 261 | } 262 | } catch (error) { 263 | console.error(`[BigQuery] Insert failed:`, error.message); 264 | throw error; 265 | } 266 | } 267 | 268 | /** 269 | * Validate a JWT token 270 | */ 271 | async function validateToken(token, env) { 272 | if (!token) { 273 | throw new Error("No token provided"); 274 | } 275 | 276 | try { 277 | log(`Validating token: ${token.substring(0, 15)}...`); 278 | 279 | // First try to validate as a self-issued JWT 280 | try { 281 | const encoder = new TextEncoder(); 282 | const secret = encoder.encode(env.JWT_SECRET || "default-jwt-secret-key"); 283 | 284 | const result = await jwtVerify(token, secret, { 285 | issuer: env.STYTCH_PROJECT_ID, 286 | }); 287 | 288 | // Also check if token exists in KV (for revocation checking) 289 | const tokenHash = await crypto.subtle.digest( 290 | "SHA-256", 291 | encoder.encode(token), 292 | ); 293 | const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash))) 294 | .replace(/\+/g, "-") 295 | .replace(/\//g, "_") 296 | .replace(/=/g, "") 297 | .substring(0, 32); 298 | 299 | const storedToken = await env.OAUTH_KV.get(`token_hash:${tokenKey}`); 300 | if (!storedToken) { 301 | log("Token not found in storage - may have been revoked"); 302 | throw new Error("Token not found or revoked"); 303 | } 304 | 305 | log("Self-issued JWT validation successful"); 306 | return result; 307 | } catch (error) { 308 | log( 309 | `Self-issued JWT validation failed, trying Stytch validation: ${error.message}`, 310 | ); 311 | 312 | // If self-validation fails, try Stytch validation as fallback 313 | if (!jwks) { 314 | log("Creating JWKS for Stytch validation"); 315 | jwks = createRemoteJWKSet( 316 | new URL(getStytchUrl(env, ".well-known/jwks.json", true)), 317 | ); 318 | } 319 | 320 | return await jwtVerify(token, jwks, { 321 | audience: env.STYTCH_PROJECT_ID, 322 | issuer: [`stytch.com/${env.STYTCH_PROJECT_ID}`], 323 | typ: "JWT", 324 | algorithms: ["RS256"], 325 | }); 326 | } 327 | } catch (error) { 328 | log(`All token validation methods failed: ${error}`); 329 | throw error; 330 | } 331 | } 332 | 333 | /** 334 | * Function to process the authentication callback 335 | */ 336 | async function processAuthCallback(c, token, state, oauthRequest) { 337 | log("Authenticating with Stytch API..."); 338 | 339 | try { 340 | // Try to authenticate the token based on token type 341 | const tokenType = "oauth"; // We know it's an OAuth token at this point 342 | let endpoint = "sessions/authenticate"; 343 | let payload = { session_token: token }; 344 | 345 | if (tokenType === "oauth") { 346 | endpoint = "oauth/authenticate"; 347 | payload = { token: token }; 348 | } 349 | 350 | log( 351 | `Using Stytch endpoint: ${endpoint} with payload: ${JSON.stringify( 352 | payload, 353 | )}`, 354 | ); 355 | 356 | const authenticateResp = await fetch(getStytchUrl(c.env, endpoint), { 357 | method: "POST", 358 | headers: { 359 | "Content-Type": "application/json", 360 | Authorization: `Basic ${btoa( 361 | `${c.env.STYTCH_PROJECT_ID}:${c.env.STYTCH_SECRET}`, 362 | )}`, 363 | }, 364 | body: JSON.stringify(payload), 365 | }); 366 | 367 | log(`Stytch auth response status: ${authenticateResp.status}`); 368 | 369 | if (!authenticateResp.ok) { 370 | const errorText = await authenticateResp.text(); 371 | log(`Stytch authentication error: ${errorText}`); 372 | return new Response(`Authentication failed: ${errorText}`, { 373 | status: 401, 374 | headers: CORS, 375 | }); 376 | } 377 | 378 | const authData = await authenticateResp.json(); 379 | log( 380 | `Auth data received: ${JSON.stringify({ 381 | user_id: authData.user_id || "unknown", 382 | has_user: !!authData.user, 383 | })}`, 384 | ); 385 | 386 | // Generate an authorization code 387 | const authCode = crypto.randomUUID(); 388 | log(`Generated authorization code: ${authCode}`); 389 | 390 | // Store the user info with the authorization code 391 | const authCodeData = { 392 | sub: authData.user_id, 393 | email: authData.user?.emails?.[0]?.email, 394 | code_challenge: oauthRequest.code_challenge, 395 | client_id: oauthRequest.client_id, 396 | redirect_uri: oauthRequest.redirect_uri, 397 | }; 398 | 399 | log(`Storing auth code data: ${JSON.stringify(authCodeData)}`); 400 | await c.env.OAUTH_KV.put( 401 | `auth_code:${authCode}`, 402 | JSON.stringify(authCodeData), 403 | { expirationTtl: 300 }, 404 | ); 405 | log("Successfully stored auth code data"); 406 | 407 | // Determine the redirect URI to use 408 | if (!oauthRequest.redirect_uri) { 409 | log("Missing redirect_uri - using default"); 410 | return new Response("Missing redirect URI in OAuth request", { 411 | status: 400, 412 | headers: CORS, 413 | }); 414 | } 415 | 416 | log(`Using redirect URI from request: ${oauthRequest.redirect_uri}`); 417 | log(`Using state for redirect: ${state}`); 418 | 419 | const redirectURL = new URL(oauthRequest.redirect_uri); 420 | redirectURL.searchParams.set("code", authCode); 421 | redirectURL.searchParams.set("state", state); 422 | 423 | log(`Redirecting to: ${redirectURL.toString()}`); 424 | return Response.redirect(redirectURL.toString(), 302); 425 | } catch (error) { 426 | console.error(`Error in processAuthCallback: ${error}`); 427 | return new Response(`Authentication processing error: ${error.message}`, { 428 | status: 500, 429 | headers: CORS, 430 | }); 431 | } 432 | } 433 | 434 | // Function to proxy POST requests to remote MCP server 435 | async function proxyPost(req, remoteServerUrl, path, sid) { 436 | const body = await req.text(); 437 | const targetUrl = `${remoteServerUrl}${path}?session_id=${encodeURIComponent( 438 | sid, 439 | )}`; 440 | 441 | // Streamable HTTP requires both application/json and text/event-stream 442 | // The server will decide which format to use based on the response type 443 | const acceptHeader = "application/json, text/event-stream"; 444 | 445 | const headers = { 446 | "Content-Type": "application/json", 447 | Accept: acceptHeader, 448 | "User-Agent": "Claude/1.0", 449 | }; 450 | 451 | try { 452 | const response = await fetch(targetUrl, { 453 | method: "POST", 454 | headers: headers, 455 | body: body, 456 | }); 457 | 458 | const responseText = await response.text(); 459 | log(`Proxy response from ${targetUrl}: ${responseText.substring(0, 500)}`); 460 | 461 | // Check if response is SSE format 462 | if ( 463 | responseText.startsWith("event:") || 464 | responseText.includes("\nevent:") 465 | ) { 466 | // Parse SSE format 467 | const events = responseText.split("\n\n").filter((e) => e.trim()); 468 | 469 | if (events.length === 1) { 470 | // Single SSE event - convert to plain JSON 471 | const lines = events[0].split("\n"); 472 | const dataLine = lines.find((l) => l.startsWith("data:")); 473 | 474 | if (dataLine) { 475 | const jsonData = dataLine.substring(5).trim(); // Remove "data:" prefix 476 | log("Converting single SSE message to plain JSON"); 477 | return new Response(jsonData, { 478 | status: response.status, 479 | headers: { "Content-Type": "application/json", ...CORS }, 480 | }); 481 | } 482 | } else if (events.length > 1) { 483 | // Multiple SSE events - return as SSE stream 484 | log("Returning multiple SSE messages as stream"); 485 | return new Response(responseText, { 486 | status: response.status, 487 | headers: { 488 | "Content-Type": "text/event-stream", 489 | "Cache-Control": "no-cache", 490 | ...CORS, 491 | }, 492 | }); 493 | } 494 | } 495 | 496 | // Not SSE format - return as-is 497 | return new Response(responseText, { 498 | status: response.status, 499 | headers: { "Content-Type": "application/json", ...CORS }, 500 | }); 501 | } catch (error) { 502 | log(`Proxy fetch error: ${error.message}`); 503 | return new Response(JSON.stringify({ error: error.message }), { 504 | status: 502, 505 | headers: { "Content-Type": "application/json", ...CORS }, 506 | }); 507 | } 508 | } 509 | 510 | // Middleware for bearer token authentication (MCP server) 511 | const stytchBearerTokenAuthMiddleware = async (c, next) => { 512 | const authHeader = c.req.header("Authorization"); 513 | log(`Auth header present: ${!!authHeader}`); 514 | 515 | if (!authHeader || !authHeader.startsWith("Bearer ")) { 516 | return new Response("Missing or invalid access token", { 517 | status: 401, 518 | headers: CORS, 519 | }); 520 | } 521 | 522 | const accessToken = authHeader.substring(7); 523 | log(`Attempting to validate token: ${accessToken.substring(0, 10)}...`); 524 | 525 | try { 526 | // Add more detailed validation logging 527 | log("Starting token validation..."); 528 | const verifyResult = await validateToken(accessToken, c.env); 529 | log(`Token validation successful! ${verifyResult.payload.sub}`); 530 | 531 | // Store user info in a variable that the handler can access 532 | c.env.userID = verifyResult.payload.sub; 533 | c.env.accessToken = accessToken; 534 | } catch (error) { 535 | log(`Token validation detailed error: ${error.code} ${error.message}`); 536 | return new Response(`Unauthorized: Invalid token - ${error.message}`, { 537 | status: 401, 538 | headers: CORS, 539 | }); 540 | } 541 | 542 | return next(); 543 | }; 544 | 545 | // Create our main app with Hono 546 | const app = new Hono(); 547 | 548 | // Configure the routes 549 | app 550 | // Error handler 551 | .onError((err, c) => { 552 | console.error(`Application error: ${err}`); 553 | return new Response("Server error", { 554 | status: 500, 555 | headers: CORS, 556 | }); 557 | }) 558 | 559 | // Handle CORS preflight requests 560 | .options("*", (c) => new Response(null, { status: 204, headers: CORS })) 561 | 562 | // Status endpoints 563 | .get("/status", (c) => { 564 | const REMOTE_MCP_SERVER_URL = 565 | c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; 566 | return new Response( 567 | JSON.stringify({ 568 | worker: "BioMCP-OAuth", 569 | remote: REMOTE_MCP_SERVER_URL, 570 | forwardPath: "/messages", 571 | resourceEndpoint: null, 572 | debug: DEBUG, 573 | }), 574 | { 575 | status: 200, 576 | headers: { "Content-Type": "application/json", ...CORS }, 577 | }, 578 | ); 579 | }) 580 | 581 | .get("/debug", (c) => { 582 | const REMOTE_MCP_SERVER_URL = 583 | c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; 584 | return new Response( 585 | JSON.stringify({ 586 | worker: "BioMCP-OAuth", 587 | remote: REMOTE_MCP_SERVER_URL, 588 | forwardPath: "/messages", 589 | resourceEndpoint: null, 590 | debug: DEBUG, 591 | }), 592 | { 593 | status: 200, 594 | headers: { "Content-Type": "application/json", ...CORS }, 595 | }, 596 | ); 597 | }) 598 | 599 | // OAuth server metadata endpoint 600 | .get("/.well-known/oauth-authorization-server", (c) => { 601 | const url = new URL(c.req.url); 602 | return new Response( 603 | JSON.stringify({ 604 | issuer: c.env.STYTCH_PROJECT_ID, 605 | authorization_endpoint: `${url.origin}/authorize`, 606 | token_endpoint: `${url.origin}/token`, 607 | registration_endpoint: getStytchUrl(c.env, "oauth2/register", true), 608 | scopes_supported: ["openid", "profile", "email", "offline_access"], 609 | response_types_supported: ["code"], 610 | response_modes_supported: ["query"], 611 | grant_types_supported: ["authorization_code", "refresh_token"], 612 | token_endpoint_auth_methods_supported: ["none"], 613 | code_challenge_methods_supported: ["S256"], 614 | }), 615 | { 616 | status: 200, 617 | headers: { "Content-Type": "application/json", ...CORS }, 618 | }, 619 | ); 620 | }) 621 | 622 | // OAuth redirect endpoint (redirects to Stytch's hosted UI) 623 | .get("/authorize", async (c) => { 624 | try { 625 | log("Authorize endpoint hit"); 626 | const url = new URL(c.req.url); 627 | log(`Full authorize URL: ${url.toString()}`); 628 | log( 629 | `Search params: ${JSON.stringify( 630 | Object.fromEntries(url.searchParams), 631 | )}`, 632 | ); 633 | 634 | const redirectUrl = new URL("/callback", url.origin).toString(); 635 | log(`Redirect URL: ${redirectUrl}`); 636 | 637 | // Extract and forward OAuth parameters 638 | const clientId = url.searchParams.get("client_id") || "unknown_client"; 639 | const redirectUri = url.searchParams.get("redirect_uri"); 640 | let state = url.searchParams.get("state"); 641 | const codeChallenge = url.searchParams.get("code_challenge"); 642 | const codeChallengeMethod = url.searchParams.get("code_challenge_method"); 643 | 644 | // Generate a state if one isn't provided 645 | if (!state) { 646 | state = crypto.randomUUID(); 647 | log(`Generated state parameter: ${state}`); 648 | } 649 | 650 | log("OAuth params:", { 651 | clientId, 652 | redirectUri, 653 | state, 654 | codeChallenge: !!codeChallenge, 655 | codeChallengeMethod, 656 | }); 657 | 658 | // Store OAuth request parameters in KV for use during callback 659 | const oauthRequestData = { 660 | client_id: clientId, 661 | redirect_uri: redirectUri, 662 | code_challenge: codeChallenge, 663 | code_challenge_method: codeChallengeMethod, 664 | original_state: state, // Store the original state explicitly 665 | }; 666 | 667 | // Also store a mapping from any state value to the original state 668 | // This is crucial for handling cases where Stytch modifies the state 669 | try { 670 | // Use a consistent key based on timestamp for lookups 671 | const timestamp = Date.now().toString(); 672 | await c.env.OAUTH_KV.put(`state_timestamp:${timestamp}`, state, { 673 | expirationTtl: 600, 674 | }); 675 | 676 | log(`Saving OAuth request data: ${JSON.stringify(oauthRequestData)}`); 677 | await c.env.OAUTH_KV.put( 678 | `oauth_request:${state}`, 679 | JSON.stringify(oauthRequestData), 680 | { expirationTtl: 600 }, 681 | ); 682 | 683 | // Also store timestamp for this state to allow fallback lookup 684 | await c.env.OAUTH_KV.put(`timestamp_for_state:${state}`, timestamp, { 685 | expirationTtl: 600, 686 | }); 687 | 688 | log("Successfully stored OAuth request data in KV"); 689 | } catch (kvError) { 690 | log(`Error storing OAuth data in KV: ${kvError}`); 691 | return new Response("Internal server error storing OAuth data", { 692 | status: 500, 693 | headers: CORS, 694 | }); 695 | } 696 | 697 | // Redirect to Stytch's hosted login UI 698 | const stytchLoginUrl = `${ 699 | c.env.STYTCH_OAUTH_URL || 700 | "https://test.stytch.com/v1/public/oauth/google/start" 701 | }?public_token=${ 702 | c.env.STYTCH_PUBLIC_TOKEN 703 | }&login_redirect_url=${encodeURIComponent( 704 | redirectUrl, 705 | )}&state=${encodeURIComponent(state)}`; 706 | 707 | log(`Redirecting to Stytch: ${stytchLoginUrl}`); 708 | return Response.redirect(stytchLoginUrl, 302); 709 | } catch (error) { 710 | console.error(`Error in authorize endpoint: ${error}`); 711 | return new Response(`Authorization error: ${error.message}`, { 712 | status: 500, 713 | headers: CORS, 714 | }); 715 | } 716 | }) 717 | 718 | // OAuth callback endpoint 719 | .get("/callback", async (c) => { 720 | try { 721 | log("Callback hit, logging all details"); 722 | const url = new URL(c.req.url); 723 | log(`Full URL: ${url.toString()}`); 724 | log( 725 | `Search params: ${JSON.stringify( 726 | Object.fromEntries(url.searchParams), 727 | )}`, 728 | ); 729 | 730 | // Stytch's callback format - get the token 731 | const token = 732 | url.searchParams.get("stytch_token_type") === "oauth" 733 | ? url.searchParams.get("token") 734 | : url.searchParams.get("token") || 735 | url.searchParams.get("stytch_token"); 736 | 737 | log(`Token type: ${url.searchParams.get("stytch_token_type")}`); 738 | log(`Token found: ${!!token}`); 739 | 740 | // We need a token to proceed 741 | if (!token) { 742 | log("Invalid callback - missing token"); 743 | return new Response("Invalid callback request: missing token", { 744 | status: 400, 745 | headers: CORS, 746 | }); 747 | } 748 | 749 | // Look for the most recent OAuth request 750 | let mostRecentState = null; 751 | let mostRecentTimestamp = null; 752 | try { 753 | // Find the most recent timestamp 754 | const timestamps = await c.env.OAUTH_KV.list({ 755 | prefix: "state_timestamp:", 756 | }); 757 | if (timestamps.keys.length > 0) { 758 | // Sort timestamps in descending order (most recent first) 759 | const sortedTimestamps = timestamps.keys.sort((a, b) => { 760 | const timeA = parseInt(a.name.replace("state_timestamp:", "")); 761 | const timeB = parseInt(b.name.replace("state_timestamp:", "")); 762 | return timeB - timeA; // descending order 763 | }); 764 | 765 | mostRecentTimestamp = sortedTimestamps[0].name; 766 | // Get the state associated with this timestamp 767 | mostRecentState = await c.env.OAUTH_KV.get(mostRecentTimestamp); 768 | log(`Found most recent state: ${mostRecentState}`); 769 | } 770 | } catch (error) { 771 | log(`Error finding recent state: ${error}`); 772 | } 773 | 774 | // If we have a state from the most recent OAuth request, use it 775 | let oauthRequest = null; 776 | let state = mostRecentState; 777 | 778 | if (state) { 779 | try { 780 | const oauthRequestJson = await c.env.OAUTH_KV.get( 781 | `oauth_request:${state}`, 782 | ); 783 | if (oauthRequestJson) { 784 | oauthRequest = JSON.parse(oauthRequestJson); 785 | log(`Found OAuth request for state: ${state}`); 786 | } 787 | } catch (error) { 788 | log(`Error getting OAuth request: ${error}`); 789 | } 790 | } 791 | 792 | // If we couldn't find the OAuth request, try other alternatives 793 | if (!oauthRequest) { 794 | log( 795 | "No OAuth request found for most recent state, checking other requests", 796 | ); 797 | 798 | try { 799 | // List all OAuth requests and use the most recent one 800 | const requests = await c.env.OAUTH_KV.list({ 801 | prefix: "oauth_request:", 802 | }); 803 | if (requests.keys.length > 0) { 804 | const oauthRequestJson = await c.env.OAUTH_KV.get( 805 | requests.keys[0].name, 806 | ); 807 | if (oauthRequestJson) { 808 | oauthRequest = JSON.parse(oauthRequestJson); 809 | // Extract the state from the key 810 | state = requests.keys[0].name.replace("oauth_request:", ""); 811 | log(`Using most recent OAuth request with state: ${state}`); 812 | } 813 | } 814 | } catch (error) { 815 | log(`Error finding alternative OAuth request: ${error}`); 816 | } 817 | } 818 | 819 | // Final fallback - use hardcoded values for Claude 820 | if (!oauthRequest) { 821 | log("No OAuth request found, using fallback values"); 822 | oauthRequest = { 823 | client_id: "biomcp-client", 824 | redirect_uri: "https://claude.ai/api/mcp/auth_callback", 825 | code_challenge: null, 826 | original_state: state || "unknown_state", 827 | }; 828 | } 829 | 830 | // If we have an original_state in the OAuth request, use that 831 | if (oauthRequest.original_state) { 832 | state = oauthRequest.original_state; 833 | log(`Using original state from OAuth request: ${state}`); 834 | } 835 | 836 | // Proceed with authentication 837 | return processAuthCallback(c, token, state, oauthRequest); 838 | } catch (error) { 839 | console.error(`Callback error: ${error}`); 840 | return new Response( 841 | `Server error during authentication: ${error.message}`, 842 | { 843 | status: 500, 844 | headers: CORS, 845 | }, 846 | ); 847 | } 848 | }) 849 | 850 | // Token exchange endpoint 851 | .post("/token", async (c) => { 852 | try { 853 | log("Token endpoint hit"); 854 | const formData = await c.req.formData(); 855 | const grantType = formData.get("grant_type"); 856 | const code = formData.get("code"); 857 | const redirectUri = formData.get("redirect_uri"); 858 | const clientId = formData.get("client_id"); 859 | const codeVerifier = formData.get("code_verifier"); 860 | 861 | log("Token request params:", { 862 | grantType, 863 | code: !!code, 864 | redirectUri, 865 | clientId, 866 | codeVerifier: !!codeVerifier, 867 | }); 868 | 869 | if ( 870 | grantType !== "authorization_code" || 871 | !code || 872 | !redirectUri || 873 | !clientId || 874 | !codeVerifier 875 | ) { 876 | log("Invalid token request parameters"); 877 | return new Response(JSON.stringify({ error: "invalid_request" }), { 878 | status: 400, 879 | headers: { "Content-Type": "application/json", ...CORS }, 880 | }); 881 | } 882 | 883 | // Retrieve the stored authorization code data 884 | let authCodeJson; 885 | try { 886 | authCodeJson = await c.env.OAUTH_KV.get(`auth_code:${code}`); 887 | log(`Auth code data retrieved: ${!!authCodeJson}`); 888 | } catch (kvError) { 889 | log(`Error retrieving auth code data: ${kvError}`); 890 | return new Response(JSON.stringify({ error: "server_error" }), { 891 | status: 500, 892 | headers: { "Content-Type": "application/json", ...CORS }, 893 | }); 894 | } 895 | 896 | if (!authCodeJson) { 897 | log("Invalid or expired authorization code"); 898 | return new Response(JSON.stringify({ error: "invalid_grant" }), { 899 | status: 400, 900 | headers: { "Content-Type": "application/json", ...CORS }, 901 | }); 902 | } 903 | 904 | let authCodeData; 905 | try { 906 | authCodeData = JSON.parse(authCodeJson); 907 | log(`Auth code data parsed: ${JSON.stringify(authCodeData)}`); 908 | } catch (parseError) { 909 | log(`Error parsing auth code data: ${parseError}`); 910 | return new Response(JSON.stringify({ error: "server_error" }), { 911 | status: 500, 912 | headers: { "Content-Type": "application/json", ...CORS }, 913 | }); 914 | } 915 | 916 | // Verify the code_verifier against the stored code_challenge 917 | if (authCodeData.code_challenge) { 918 | log("Verifying PKCE code challenge"); 919 | const encoder = new TextEncoder(); 920 | const data = encoder.encode(codeVerifier); 921 | const digest = await crypto.subtle.digest("SHA-256", data); 922 | 923 | // Convert to base64url encoding 924 | const base64Digest = btoa( 925 | String.fromCharCode(...new Uint8Array(digest)), 926 | ) 927 | .replace(/\+/g, "-") 928 | .replace(/\//g, "_") 929 | .replace(/=/g, ""); 930 | 931 | log("Code challenge comparison:", { 932 | stored: authCodeData.code_challenge, 933 | computed: base64Digest, 934 | match: base64Digest === authCodeData.code_challenge, 935 | }); 936 | 937 | if (base64Digest !== authCodeData.code_challenge) { 938 | log("PKCE verification failed"); 939 | return new Response(JSON.stringify({ error: "invalid_grant" }), { 940 | status: 400, 941 | headers: { "Content-Type": "application/json", ...CORS }, 942 | }); 943 | } 944 | } 945 | 946 | // Delete the used authorization code 947 | try { 948 | await c.env.OAUTH_KV.delete(`auth_code:${code}`); 949 | log("Used authorization code deleted"); 950 | } catch (deleteError) { 951 | log(`Error deleting used auth code: ${deleteError}`); 952 | // Continue anyway since this isn't critical 953 | } 954 | 955 | // Generate JWT access token instead of UUID 956 | const encoder = new TextEncoder(); 957 | const secret = encoder.encode( 958 | c.env.JWT_SECRET || "default-jwt-secret-key", 959 | ); 960 | 961 | // Create JWT payload 962 | const accessTokenPayload = { 963 | sub: authCodeData.sub, 964 | email: authCodeData.email, 965 | client_id: clientId, 966 | scope: "openid profile email", 967 | iss: c.env.STYTCH_PROJECT_ID, 968 | aud: clientId, 969 | exp: Math.floor(Date.now() / 1000) + 3600, // 1 hour expiry 970 | iat: Math.floor(Date.now() / 1000), 971 | }; 972 | 973 | // Sign JWT 974 | const accessToken = await new SignJWT(accessTokenPayload) 975 | .setProtectedHeader({ alg: "HS256" }) 976 | .setIssuedAt() 977 | .setExpirationTime("1h") 978 | .sign(secret); 979 | 980 | log(`Generated JWT access token: ${accessToken.substring(0, 20)}...`); 981 | 982 | // Generate refresh token (still using UUID for simplicity) 983 | const refreshToken = crypto.randomUUID(); 984 | 985 | // Store token information - use a hash of the token as the key to avoid length limits 986 | const tokenHash = await crypto.subtle.digest( 987 | "SHA-256", 988 | encoder.encode(accessToken), 989 | ); 990 | const tokenKey = btoa(String.fromCharCode(...new Uint8Array(tokenHash))) 991 | .replace(/\+/g, "-") 992 | .replace(/\//g, "_") 993 | .replace(/=/g, "") 994 | .substring(0, 32); // Use first 32 chars of hash 995 | 996 | try { 997 | log(`Storing access token with key: access_token:${tokenKey}`); 998 | await c.env.OAUTH_KV.put( 999 | `access_token:${tokenKey}`, 1000 | JSON.stringify({ 1001 | token: accessToken, 1002 | hash: tokenKey, 1003 | ...accessTokenPayload, 1004 | }), 1005 | { expirationTtl: 3600 }, 1006 | ); 1007 | 1008 | // Also store a mapping from the full token to the hash for validation 1009 | await c.env.OAUTH_KV.put(`token_hash:${tokenKey}`, accessToken, { 1010 | expirationTtl: 3600, 1011 | }); 1012 | 1013 | log("Storing refresh token"); 1014 | await c.env.OAUTH_KV.put( 1015 | `refresh_token:${refreshToken}`, 1016 | JSON.stringify({ 1017 | sub: authCodeData.sub, 1018 | client_id: clientId, 1019 | }), 1020 | { expirationTtl: 30 * 24 * 60 * 60 }, 1021 | ); 1022 | 1023 | log("Token data successfully stored"); 1024 | } catch (storeError) { 1025 | log(`Error storing token data: ${storeError}`); 1026 | return new Response(JSON.stringify({ error: "server_error" }), { 1027 | status: 500, 1028 | headers: { "Content-Type": "application/json", ...CORS }, 1029 | }); 1030 | } 1031 | 1032 | // Return the tokens 1033 | const tokenResponse = { 1034 | access_token: accessToken, 1035 | token_type: "Bearer", 1036 | expires_in: 3600, 1037 | refresh_token: refreshToken, 1038 | scope: "openid profile email", 1039 | }; 1040 | 1041 | log("Returning token response"); 1042 | return new Response(JSON.stringify(tokenResponse), { 1043 | status: 200, 1044 | headers: { "Content-Type": "application/json", ...CORS }, 1045 | }); 1046 | } catch (error) { 1047 | console.error(`Token endpoint error: ${error}`); 1048 | return new Response(JSON.stringify({ error: "server_error" }), { 1049 | status: 500, 1050 | headers: { "Content-Type": "application/json", ...CORS }, 1051 | }); 1052 | } 1053 | }) 1054 | 1055 | // Messages endpoint for all paths that start with /messages 1056 | .post("/messages*", async (c) => { 1057 | log("All messages endpoints hit"); 1058 | const REMOTE_MCP_SERVER_URL = 1059 | c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; 1060 | const sid = new URL(c.req.url).searchParams.get("session_id"); 1061 | 1062 | if (!sid) { 1063 | return new Response("Missing session_id", { 1064 | status: 400, 1065 | headers: CORS, 1066 | }); 1067 | } 1068 | 1069 | // Read the body 1070 | const body = await c.req.text(); 1071 | const authHeader = c.req.header("Authorization") || ""; 1072 | let userEmail = "unknown"; 1073 | 1074 | if (authHeader.startsWith("Bearer ")) { 1075 | const token = authHeader.slice(7); 1076 | const claims = decodeJwt(token); 1077 | userEmail = 1078 | claims.email || claims.preferred_username || claims.sub || "unknown"; 1079 | } 1080 | 1081 | log(`[Proxy] user=${userEmail} query=${body}`); 1082 | 1083 | let sendToBQ = false; 1084 | let parsed; 1085 | let domain = null; 1086 | let toolName = null; 1087 | let sanitizedBody = body; // Default to original body 1088 | 1089 | try { 1090 | parsed = JSON.parse(body); 1091 | const args = parsed.params?.arguments; 1092 | 1093 | // Check if this is a think tool call 1094 | toolName = parsed.params?.name; 1095 | if (toolName === "think") { 1096 | sendToBQ = false; 1097 | log("[BigQuery] Skipping think tool call"); 1098 | } else if (args && Object.keys(args).length > 0) { 1099 | // Extract domain from the arguments (for search/fetch tools) 1100 | domain = args.domain || null; 1101 | 1102 | // Skip logging if domain is "thinking" or "think" 1103 | if (domain === "thinking" || domain === "think") { 1104 | sendToBQ = false; 1105 | } else { 1106 | sendToBQ = true; 1107 | } 1108 | 1109 | // Sanitize sensitive data before logging to BigQuery 1110 | if (sendToBQ) { 1111 | // Use the comprehensive sanitization function 1112 | const sanitized = sanitizeObject(parsed); 1113 | sanitizedBody = JSON.stringify(sanitized); 1114 | 1115 | // Log if we actually sanitized something 1116 | if (JSON.stringify(parsed) !== sanitizedBody) { 1117 | log( 1118 | "[BigQuery] Sanitized sensitive fields from query before logging", 1119 | ); 1120 | } 1121 | } 1122 | } 1123 | } catch (e) { 1124 | console.log("[BigQuery] skipping insert—cannot parse JSON body", e); 1125 | } 1126 | 1127 | const { BQ_SA_KEY_JSON, BQ_PROJECT_ID, BQ_DATASET, BQ_TABLE } = c.env; 1128 | 1129 | if (sendToBQ && BQ_SA_KEY_JSON && BQ_PROJECT_ID && BQ_DATASET && BQ_TABLE) { 1130 | const eventRow = { 1131 | timestamp: new Date().toISOString(), 1132 | userEmail, 1133 | query: sanitizedBody, // Use sanitized body instead of original 1134 | }; 1135 | // fire & forget 1136 | c.executionCtx.waitUntil( 1137 | insertEvent(c.env, eventRow).catch((error) => { 1138 | console.error("[BigQuery] Insert failed:", error); 1139 | }), 1140 | ); 1141 | } else { 1142 | const missing = [ 1143 | !sendToBQ 1144 | ? toolName === "think" 1145 | ? "think tool" 1146 | : domain === "thinking" || domain === "think" 1147 | ? `domain is ${domain}` 1148 | : "no query args" 1149 | : null, 1150 | !BQ_SA_KEY_JSON && "BQ_SA_KEY_JSON", 1151 | !BQ_PROJECT_ID && "BQ_PROJECT_ID", 1152 | !BQ_DATASET && "BQ_DATASET", 1153 | !BQ_TABLE && "BQ_TABLE", 1154 | ].filter(Boolean); 1155 | console.log("[BigQuery] skipping insert—", missing.join(", ")); 1156 | } 1157 | 1158 | // Make a new Request object with the body we've already read 1159 | const newRequest = new Request(c.req.url, { 1160 | method: c.req.method, 1161 | headers: c.req.headers, 1162 | body: body, 1163 | }); 1164 | 1165 | // Forward everything to proxyPost like the auth-less version does 1166 | return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/messages", sid); 1167 | }); 1168 | 1169 | // MCP endpoint (Streamable HTTP transport) - separate chain to avoid wildcard route issues 1170 | app 1171 | .on("HEAD", "/mcp", stytchBearerTokenAuthMiddleware, (c) => { 1172 | log("MCP HEAD endpoint hit - checking endpoint availability"); 1173 | // For Streamable HTTP, HEAD /mcp should return 204 to indicate the endpoint exists 1174 | return new Response(null, { 1175 | status: 204, 1176 | headers: CORS, 1177 | }); 1178 | }) 1179 | .get("/mcp", stytchBearerTokenAuthMiddleware, async (c) => { 1180 | log("MCP GET endpoint hit - Streamable HTTP transport"); 1181 | const REMOTE_MCP_SERVER_URL = 1182 | c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; 1183 | 1184 | // For Streamable HTTP, GET /mcp with session_id initiates event stream 1185 | const sessionId = new URL(c.req.url).searchParams.get("session_id"); 1186 | 1187 | if (!sessionId) { 1188 | // Without session_id, just return 204 to indicate endpoint exists 1189 | return new Response(null, { 1190 | status: 204, 1191 | headers: CORS, 1192 | }); 1193 | } 1194 | 1195 | // Proxy the GET request to the backend's /mcp endpoint for streaming 1196 | const targetUrl = `${REMOTE_MCP_SERVER_URL}/mcp?session_id=${encodeURIComponent( 1197 | sessionId, 1198 | )}`; 1199 | log(`Proxying GET /mcp to: ${targetUrl}`); 1200 | 1201 | try { 1202 | const response = await fetch(targetUrl, { 1203 | method: "GET", 1204 | headers: { 1205 | Accept: "text/event-stream", 1206 | "User-Agent": "Claude/1.0", 1207 | }, 1208 | }); 1209 | 1210 | // For SSE, we need to stream the response 1211 | if (response.headers.get("content-type")?.includes("text/event-stream")) { 1212 | log("Streaming SSE response from backend"); 1213 | // Return the streamed response directly 1214 | return new Response(response.body, { 1215 | status: response.status, 1216 | headers: { 1217 | "Content-Type": "text/event-stream", 1218 | "Cache-Control": "no-cache", 1219 | Connection: "keep-alive", 1220 | ...CORS, 1221 | }, 1222 | }); 1223 | } else { 1224 | // Non-streaming response 1225 | const responseText = await response.text(); 1226 | return new Response(responseText, { 1227 | status: response.status, 1228 | headers: { 1229 | "Content-Type": 1230 | response.headers.get("content-type") || "text/plain", 1231 | ...CORS, 1232 | }, 1233 | }); 1234 | } 1235 | } catch (error) { 1236 | log(`Error proxying GET /mcp: ${error}`); 1237 | return new Response(`Proxy error: ${error.message}`, { 1238 | status: 502, 1239 | headers: CORS, 1240 | }); 1241 | } 1242 | }) 1243 | .post("/mcp", stytchBearerTokenAuthMiddleware, async (c) => { 1244 | log("MCP POST endpoint hit - Streamable HTTP transport"); 1245 | const REMOTE_MCP_SERVER_URL = 1246 | c.env.REMOTE_MCP_SERVER_URL || "http://localhost:8000"; 1247 | 1248 | // Extract and validate session ID 1249 | const rawSessionId = new URL(c.req.url).searchParams.get("session_id"); 1250 | const sessionId = validateSessionId(rawSessionId); 1251 | 1252 | // Get the request body 1253 | const bodyText = await c.req.text(); 1254 | log(`MCP POST request body: ${bodyText.substring(0, 200)}`); 1255 | 1256 | // Create new request for proxying 1257 | const newRequest = new Request(c.req.url, { 1258 | method: "POST", 1259 | headers: c.req.headers, 1260 | body: bodyText, 1261 | }); 1262 | 1263 | // Use the updated proxyPost function that handles SSE properly 1264 | return proxyPost(newRequest, REMOTE_MCP_SERVER_URL, "/mcp", sessionId); 1265 | }) 1266 | 1267 | // Default 404 response 1268 | .all( 1269 | "*", 1270 | () => 1271 | new Response("Not Found", { 1272 | status: 404, 1273 | headers: CORS, 1274 | }), 1275 | ); 1276 | 1277 | // Export the app as the main worker fetch handler 1278 | export default { 1279 | fetch: (request, env, ctx) => { 1280 | // Initialize DEBUG from environment variables 1281 | DEBUG = env.DEBUG === "true" || env.DEBUG === true; 1282 | 1283 | return app.fetch(request, env, ctx); 1284 | }, 1285 | }; 1286 | ``` -------------------------------------------------------------------------------- /src/biomcp/individual_tools.py: -------------------------------------------------------------------------------- ```python 1 | """Individual MCP tools for specific biomedical search and fetch operations. 2 | 3 | This module provides the original 9 individual tools that offer direct access 4 | to specific search and fetch functionality, complementing the unified tools. 5 | """ 6 | 7 | import logging 8 | from typing import Annotated, Literal 9 | 10 | from pydantic import Field 11 | 12 | from biomcp.articles.fetch import _article_details 13 | from biomcp.articles.search import _article_searcher 14 | from biomcp.cbioportal_helper import ( 15 | get_cbioportal_summary_for_genes, 16 | get_variant_cbioportal_summary, 17 | ) 18 | from biomcp.core import ensure_list, mcp_app 19 | from biomcp.diseases.getter import _disease_details 20 | from biomcp.drugs.getter import _drug_details 21 | from biomcp.genes.getter import _gene_details 22 | from biomcp.metrics import track_performance 23 | from biomcp.trials.getter import ( 24 | _trial_locations, 25 | _trial_outcomes, 26 | _trial_protocol, 27 | _trial_references, 28 | ) 29 | from biomcp.trials.search import _trial_searcher 30 | from biomcp.variants.getter import _variant_details 31 | from biomcp.variants.search import _variant_searcher 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | # Article Tools 37 | @mcp_app.tool() 38 | @track_performance("biomcp.article_searcher") 39 | async def article_searcher( 40 | chemicals: Annotated[ 41 | list[str] | str | None, 42 | Field(description="Chemical/drug names to search for"), 43 | ] = None, 44 | diseases: Annotated[ 45 | list[str] | str | None, 46 | Field(description="Disease names to search for"), 47 | ] = None, 48 | genes: Annotated[ 49 | list[str] | str | None, 50 | Field(description="Gene symbols to search for"), 51 | ] = None, 52 | keywords: Annotated[ 53 | list[str] | str | None, 54 | Field(description="Free-text keywords to search for"), 55 | ] = None, 56 | variants: Annotated[ 57 | list[str] | str | None, 58 | Field( 59 | description="Variant strings to search for (e.g., 'V600E', 'p.D277Y')" 60 | ), 61 | ] = None, 62 | include_preprints: Annotated[ 63 | bool, 64 | Field(description="Include preprints from bioRxiv/medRxiv"), 65 | ] = True, 66 | include_cbioportal: Annotated[ 67 | bool, 68 | Field( 69 | description="Include cBioPortal cancer genomics summary when searching by gene" 70 | ), 71 | ] = True, 72 | page: Annotated[ 73 | int, 74 | Field(description="Page number (1-based)", ge=1), 75 | ] = 1, 76 | page_size: Annotated[ 77 | int, 78 | Field(description="Results per page", ge=1, le=100), 79 | ] = 10, 80 | ) -> str: 81 | """Search PubMed/PubTator3 for research articles and preprints. 82 | 83 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 84 | 85 | Use this tool to find scientific literature ABOUT genes, variants, diseases, or chemicals. 86 | Results include articles from PubMed and optionally preprints from bioRxiv/medRxiv. 87 | 88 | Important: This searches for ARTICLES ABOUT these topics, not database records. 89 | For genetic variant database records, use variant_searcher instead. 90 | 91 | Example usage: 92 | - Find articles about BRAF mutations in melanoma 93 | - Search for papers on a specific drug's effects 94 | - Locate research on gene-disease associations 95 | """ 96 | # Convert single values to lists 97 | chemicals = ensure_list(chemicals) if chemicals else None 98 | diseases = ensure_list(diseases) if diseases else None 99 | genes = ensure_list(genes) if genes else None 100 | keywords = ensure_list(keywords) if keywords else None 101 | variants = ensure_list(variants) if variants else None 102 | 103 | result = await _article_searcher( 104 | call_benefit="Direct article search for specific biomedical topics", 105 | chemicals=chemicals, 106 | diseases=diseases, 107 | genes=genes, 108 | keywords=keywords, 109 | variants=variants, 110 | include_preprints=include_preprints, 111 | include_cbioportal=include_cbioportal, 112 | ) 113 | 114 | # Add cBioPortal summary if searching by gene 115 | if include_cbioportal and genes: 116 | request_params = { 117 | "keywords": keywords, 118 | "diseases": diseases, 119 | "chemicals": chemicals, 120 | "variants": variants, 121 | } 122 | cbioportal_summary = await get_cbioportal_summary_for_genes( 123 | genes, request_params 124 | ) 125 | if cbioportal_summary: 126 | result = cbioportal_summary + "\n\n---\n\n" + result 127 | 128 | return result 129 | 130 | 131 | @mcp_app.tool() 132 | @track_performance("biomcp.article_getter") 133 | async def article_getter( 134 | pmid: Annotated[ 135 | str, 136 | Field( 137 | description="Article identifier - either a PubMed ID (e.g., '38768446' or 'PMC11193658') or DOI (e.g., '10.1101/2024.01.20.23288905')" 138 | ), 139 | ], 140 | ) -> str: 141 | """Fetch detailed information for a specific article. 142 | 143 | Retrieves the full abstract and available text for an article by its identifier. 144 | Supports: 145 | - PubMed IDs (PMID) for published articles 146 | - PMC IDs for articles in PubMed Central 147 | - DOIs for preprints from Europe PMC 148 | 149 | Returns formatted text including: 150 | - Title 151 | - Abstract 152 | - Full text (when available from PMC for published articles) 153 | - Source information (PubMed or Europe PMC) 154 | """ 155 | return await _article_details( 156 | call_benefit="Fetch detailed article information for analysis", 157 | pmid=pmid, 158 | ) 159 | 160 | 161 | # Trial Tools 162 | @mcp_app.tool() 163 | @track_performance("biomcp.trial_searcher") 164 | async def trial_searcher( 165 | conditions: Annotated[ 166 | list[str] | str | None, 167 | Field(description="Medical conditions to search for"), 168 | ] = None, 169 | interventions: Annotated[ 170 | list[str] | str | None, 171 | Field(description="Treatment interventions to search for"), 172 | ] = None, 173 | other_terms: Annotated[ 174 | list[str] | str | None, 175 | Field(description="Additional search terms"), 176 | ] = None, 177 | recruiting_status: Annotated[ 178 | Literal["OPEN", "CLOSED", "ANY"] | None, 179 | Field(description="Filter by recruiting status"), 180 | ] = None, 181 | phase: Annotated[ 182 | Literal[ 183 | "EARLY_PHASE1", 184 | "PHASE1", 185 | "PHASE2", 186 | "PHASE3", 187 | "PHASE4", 188 | "NOT_APPLICABLE", 189 | ] 190 | | None, 191 | Field(description="Filter by clinical trial phase"), 192 | ] = None, 193 | location: Annotated[ 194 | str | None, 195 | Field(description="Location term for geographic filtering"), 196 | ] = None, 197 | lat: Annotated[ 198 | float | None, 199 | Field( 200 | description="Latitude for location-based search. AI agents should geocode city names before using.", 201 | ge=-90, 202 | le=90, 203 | ), 204 | ] = None, 205 | long: Annotated[ 206 | float | None, 207 | Field( 208 | description="Longitude for location-based search. AI agents should geocode city names before using.", 209 | ge=-180, 210 | le=180, 211 | ), 212 | ] = None, 213 | distance: Annotated[ 214 | int | None, 215 | Field( 216 | description="Distance in miles from lat/long coordinates", 217 | ge=1, 218 | ), 219 | ] = None, 220 | age_group: Annotated[ 221 | Literal["CHILD", "ADULT", "OLDER_ADULT"] | None, 222 | Field(description="Filter by age group"), 223 | ] = None, 224 | sex: Annotated[ 225 | Literal["FEMALE", "MALE", "ALL"] | None, 226 | Field(description="Filter by biological sex"), 227 | ] = None, 228 | healthy_volunteers: Annotated[ 229 | Literal["YES", "NO"] | None, 230 | Field(description="Filter by healthy volunteer eligibility"), 231 | ] = None, 232 | study_type: Annotated[ 233 | Literal["INTERVENTIONAL", "OBSERVATIONAL", "EXPANDED_ACCESS"] | None, 234 | Field(description="Filter by study type"), 235 | ] = None, 236 | funder_type: Annotated[ 237 | Literal["NIH", "OTHER_GOV", "INDUSTRY", "OTHER"] | None, 238 | Field(description="Filter by funding source"), 239 | ] = None, 240 | page: Annotated[ 241 | int, 242 | Field(description="Page number (1-based)", ge=1), 243 | ] = 1, 244 | page_size: Annotated[ 245 | int, 246 | Field(description="Results per page", ge=1, le=100), 247 | ] = 10, 248 | ) -> str: 249 | """Search ClinicalTrials.gov for clinical studies. 250 | 251 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 252 | 253 | Comprehensive search tool for finding clinical trials based on multiple criteria. 254 | Supports filtering by conditions, interventions, location, phase, and eligibility. 255 | 256 | Location search notes: 257 | - Use either location term OR lat/long coordinates, not both 258 | - For city-based searches, AI agents should geocode to lat/long first 259 | - Distance parameter only works with lat/long coordinates 260 | 261 | Returns a formatted list of matching trials with key details. 262 | """ 263 | # Validate location parameters 264 | if location and (lat is not None or long is not None): 265 | raise ValueError( 266 | "Use either location term OR lat/long coordinates, not both" 267 | ) 268 | 269 | if (lat is not None and long is None) or ( 270 | lat is None and long is not None 271 | ): 272 | raise ValueError( 273 | "Both latitude and longitude must be provided together" 274 | ) 275 | 276 | if distance is not None and (lat is None or long is None): 277 | raise ValueError( 278 | "Distance parameter requires both latitude and longitude" 279 | ) 280 | 281 | # Convert single values to lists 282 | conditions = ensure_list(conditions) if conditions else None 283 | interventions = ensure_list(interventions) if interventions else None 284 | other_terms = ensure_list(other_terms) if other_terms else None 285 | 286 | return await _trial_searcher( 287 | call_benefit="Direct clinical trial search for specific criteria", 288 | conditions=conditions, 289 | interventions=interventions, 290 | terms=other_terms, 291 | recruiting_status=recruiting_status, 292 | phase=phase, 293 | lat=lat, 294 | long=long, 295 | distance=distance, 296 | age_group=age_group, 297 | study_type=study_type, 298 | page_size=page_size, 299 | ) 300 | 301 | 302 | @mcp_app.tool() 303 | @track_performance("biomcp.trial_getter") 304 | async def trial_getter( 305 | nct_id: Annotated[ 306 | str, 307 | Field(description="NCT ID (e.g., 'NCT06524388')"), 308 | ], 309 | ) -> str: 310 | """Fetch comprehensive details for a specific clinical trial. 311 | 312 | Retrieves all available information for a clinical trial by its NCT ID. 313 | This includes protocol details, locations, outcomes, and references. 314 | 315 | For specific sections only, use the specialized getter tools: 316 | - trial_protocol_getter: Core protocol information 317 | - trial_locations_getter: Site locations and contacts 318 | - trial_outcomes_getter: Primary/secondary outcomes and results 319 | - trial_references_getter: Publications and references 320 | """ 321 | results = [] 322 | 323 | # Get all sections 324 | protocol = await _trial_protocol( 325 | call_benefit="Fetch comprehensive trial details for analysis", 326 | nct_id=nct_id, 327 | ) 328 | if protocol: 329 | results.append(protocol) 330 | 331 | locations = await _trial_locations( 332 | call_benefit="Fetch comprehensive trial details for analysis", 333 | nct_id=nct_id, 334 | ) 335 | if locations: 336 | results.append(locations) 337 | 338 | outcomes = await _trial_outcomes( 339 | call_benefit="Fetch comprehensive trial details for analysis", 340 | nct_id=nct_id, 341 | ) 342 | if outcomes: 343 | results.append(outcomes) 344 | 345 | references = await _trial_references( 346 | call_benefit="Fetch comprehensive trial details for analysis", 347 | nct_id=nct_id, 348 | ) 349 | if references: 350 | results.append(references) 351 | 352 | return ( 353 | "\n\n".join(results) 354 | if results 355 | else f"No data found for trial {nct_id}" 356 | ) 357 | 358 | 359 | @mcp_app.tool() 360 | @track_performance("biomcp.trial_protocol_getter") 361 | async def trial_protocol_getter( 362 | nct_id: Annotated[ 363 | str, 364 | Field(description="NCT ID (e.g., 'NCT06524388')"), 365 | ], 366 | ) -> str: 367 | """Fetch core protocol information for a clinical trial. 368 | 369 | Retrieves essential protocol details including: 370 | - Official title and brief summary 371 | - Study status and sponsor information 372 | - Study design (type, phase, allocation, masking) 373 | - Eligibility criteria 374 | - Primary completion date 375 | """ 376 | return await _trial_protocol( 377 | call_benefit="Fetch trial protocol information for eligibility assessment", 378 | nct_id=nct_id, 379 | ) 380 | 381 | 382 | @mcp_app.tool() 383 | @track_performance("biomcp.trial_references_getter") 384 | async def trial_references_getter( 385 | nct_id: Annotated[ 386 | str, 387 | Field(description="NCT ID (e.g., 'NCT06524388')"), 388 | ], 389 | ) -> str: 390 | """Fetch publications and references for a clinical trial. 391 | 392 | Retrieves all linked publications including: 393 | - Published results papers 394 | - Background literature 395 | - Protocol publications 396 | - Related analyses 397 | 398 | Includes PubMed IDs when available for easy cross-referencing. 399 | """ 400 | return await _trial_references( 401 | call_benefit="Fetch trial publications and references for evidence review", 402 | nct_id=nct_id, 403 | ) 404 | 405 | 406 | @mcp_app.tool() 407 | @track_performance("biomcp.trial_outcomes_getter") 408 | async def trial_outcomes_getter( 409 | nct_id: Annotated[ 410 | str, 411 | Field(description="NCT ID (e.g., 'NCT06524388')"), 412 | ], 413 | ) -> str: 414 | """Fetch outcome measures and results for a clinical trial. 415 | 416 | Retrieves detailed outcome information including: 417 | - Primary outcome measures 418 | - Secondary outcome measures 419 | - Results data (if available) 420 | - Adverse events (if reported) 421 | 422 | Note: Results are only available for completed trials that have posted data. 423 | """ 424 | return await _trial_outcomes( 425 | call_benefit="Fetch trial outcome measures and results for efficacy assessment", 426 | nct_id=nct_id, 427 | ) 428 | 429 | 430 | @mcp_app.tool() 431 | @track_performance("biomcp.trial_locations_getter") 432 | async def trial_locations_getter( 433 | nct_id: Annotated[ 434 | str, 435 | Field(description="NCT ID (e.g., 'NCT06524388')"), 436 | ], 437 | ) -> str: 438 | """Fetch contact and location details for a clinical trial. 439 | 440 | Retrieves all study locations including: 441 | - Facility names and addresses 442 | - Principal investigator information 443 | - Contact details (when recruiting) 444 | - Recruitment status by site 445 | 446 | Useful for finding trials near specific locations or contacting study teams. 447 | """ 448 | return await _trial_locations( 449 | call_benefit="Fetch trial locations and contacts for enrollment information", 450 | nct_id=nct_id, 451 | ) 452 | 453 | 454 | # Variant Tools 455 | @mcp_app.tool() 456 | @track_performance("biomcp.variant_searcher") 457 | async def variant_searcher( 458 | gene: Annotated[ 459 | str | None, 460 | Field(description="Gene symbol (e.g., 'BRAF', 'TP53')"), 461 | ] = None, 462 | hgvs: Annotated[ 463 | str | None, 464 | Field(description="HGVS notation (genomic, coding, or protein)"), 465 | ] = None, 466 | hgvsp: Annotated[ 467 | str | None, 468 | Field(description="Protein change in HGVS format (e.g., 'p.V600E')"), 469 | ] = None, 470 | hgvsc: Annotated[ 471 | str | None, 472 | Field(description="Coding sequence change (e.g., 'c.1799T>A')"), 473 | ] = None, 474 | rsid: Annotated[ 475 | str | None, 476 | Field(description="dbSNP rsID (e.g., 'rs113488022')"), 477 | ] = None, 478 | region: Annotated[ 479 | str | None, 480 | Field(description="Genomic region (e.g., 'chr7:140753336-140753337')"), 481 | ] = None, 482 | significance: Annotated[ 483 | Literal[ 484 | "pathogenic", 485 | "likely_pathogenic", 486 | "uncertain_significance", 487 | "likely_benign", 488 | "benign", 489 | "conflicting", 490 | ] 491 | | None, 492 | Field(description="Clinical significance filter"), 493 | ] = None, 494 | frequency_min: Annotated[ 495 | float | None, 496 | Field(description="Minimum allele frequency", ge=0, le=1), 497 | ] = None, 498 | frequency_max: Annotated[ 499 | float | None, 500 | Field(description="Maximum allele frequency", ge=0, le=1), 501 | ] = None, 502 | consequence: Annotated[ 503 | str | None, 504 | Field(description="Variant consequence (e.g., 'missense_variant')"), 505 | ] = None, 506 | cadd_score_min: Annotated[ 507 | float | None, 508 | Field(description="Minimum CADD score for pathogenicity"), 509 | ] = None, 510 | sift_prediction: Annotated[ 511 | Literal["deleterious", "tolerated"] | None, 512 | Field(description="SIFT functional prediction"), 513 | ] = None, 514 | polyphen_prediction: Annotated[ 515 | Literal["probably_damaging", "possibly_damaging", "benign"] | None, 516 | Field(description="PolyPhen-2 functional prediction"), 517 | ] = None, 518 | include_cbioportal: Annotated[ 519 | bool, 520 | Field( 521 | description="Include cBioPortal cancer genomics summary when searching by gene" 522 | ), 523 | ] = True, 524 | page: Annotated[ 525 | int, 526 | Field(description="Page number (1-based)", ge=1), 527 | ] = 1, 528 | page_size: Annotated[ 529 | int, 530 | Field(description="Results per page", ge=1, le=100), 531 | ] = 10, 532 | ) -> str: 533 | """Search MyVariant.info for genetic variant DATABASE RECORDS. 534 | 535 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 536 | 537 | Important: This searches for variant DATABASE RECORDS (frequency, significance, etc.), 538 | NOT articles about variants. For articles about variants, use article_searcher. 539 | 540 | Searches the comprehensive variant database including: 541 | - Population frequencies (gnomAD, 1000 Genomes, etc.) 542 | - Clinical significance (ClinVar) 543 | - Functional predictions (SIFT, PolyPhen, CADD) 544 | - Gene and protein consequences 545 | 546 | Search by various identifiers or filter by clinical/functional criteria. 547 | """ 548 | result = await _variant_searcher( 549 | call_benefit="Direct variant database search for genetic analysis", 550 | gene=gene, 551 | hgvsp=hgvsp, 552 | hgvsc=hgvsc, 553 | rsid=rsid, 554 | region=region, 555 | significance=significance, 556 | min_frequency=frequency_min, 557 | max_frequency=frequency_max, 558 | cadd=cadd_score_min, 559 | sift=sift_prediction, 560 | polyphen=polyphen_prediction, 561 | size=page_size, 562 | offset=(page - 1) * page_size if page > 1 else 0, 563 | ) 564 | 565 | # Add cBioPortal summary if searching by gene 566 | if include_cbioportal and gene: 567 | cbioportal_summary = await get_variant_cbioportal_summary(gene) 568 | if cbioportal_summary: 569 | result = cbioportal_summary + "\n\n" + result 570 | 571 | return result 572 | 573 | 574 | @mcp_app.tool() 575 | @track_performance("biomcp.variant_getter") 576 | async def variant_getter( 577 | variant_id: Annotated[ 578 | str, 579 | Field( 580 | description="Variant ID (HGVS, rsID, or MyVariant ID like 'chr7:g.140753336A>T')" 581 | ), 582 | ], 583 | include_external: Annotated[ 584 | bool, 585 | Field( 586 | description="Include external annotations (TCGA, 1000 Genomes, functional predictions)" 587 | ), 588 | ] = True, 589 | ) -> str: 590 | """Fetch comprehensive details for a specific genetic variant. 591 | 592 | Retrieves all available information for a variant including: 593 | - Gene location and consequences 594 | - Population frequencies across databases 595 | - Clinical significance from ClinVar 596 | - Functional predictions 597 | - External annotations (TCGA cancer data, conservation scores) 598 | 599 | Accepts various ID formats: 600 | - HGVS: NM_004333.4:c.1799T>A 601 | - rsID: rs113488022 602 | - MyVariant ID: chr7:g.140753336A>T 603 | """ 604 | return await _variant_details( 605 | call_benefit="Fetch comprehensive variant annotations for interpretation", 606 | variant_id=variant_id, 607 | include_external=include_external, 608 | ) 609 | 610 | 611 | @mcp_app.tool() 612 | @track_performance("biomcp.alphagenome_predictor") 613 | async def alphagenome_predictor( 614 | chromosome: Annotated[ 615 | str, 616 | Field(description="Chromosome (e.g., 'chr7', 'chrX')"), 617 | ], 618 | position: Annotated[ 619 | int, 620 | Field(description="1-based genomic position of the variant"), 621 | ], 622 | reference: Annotated[ 623 | str, 624 | Field(description="Reference allele(s) (e.g., 'A', 'ATG')"), 625 | ], 626 | alternate: Annotated[ 627 | str, 628 | Field(description="Alternate allele(s) (e.g., 'T', 'A')"), 629 | ], 630 | interval_size: Annotated[ 631 | int, 632 | Field( 633 | description="Size of genomic interval to analyze in bp (max 1,000,000)", 634 | ge=2000, 635 | le=1000000, 636 | ), 637 | ] = 131072, 638 | tissue_types: Annotated[ 639 | list[str] | str | None, 640 | Field( 641 | description="UBERON ontology terms for tissue-specific predictions (e.g., 'UBERON:0002367' for external ear)" 642 | ), 643 | ] = None, 644 | significance_threshold: Annotated[ 645 | float, 646 | Field( 647 | description="Threshold for significant log2 fold changes (default: 0.5)", 648 | ge=0.0, 649 | le=5.0, 650 | ), 651 | ] = 0.5, 652 | api_key: Annotated[ 653 | str | None, 654 | Field( 655 | description="AlphaGenome API key. Check if user mentioned 'my AlphaGenome API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 656 | ), 657 | ] = None, 658 | ) -> str: 659 | """Predict variant effects on gene regulation using Google DeepMind's AlphaGenome. 660 | 661 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your analysis strategy! 662 | 663 | AlphaGenome provides state-of-the-art predictions for how genetic variants 664 | affect gene regulation, including: 665 | - Gene expression changes (RNA-seq) 666 | - Chromatin accessibility impacts (ATAC-seq, DNase-seq) 667 | - Splicing alterations 668 | - Promoter activity changes (CAGE) 669 | 670 | This tool requires: 671 | 1. AlphaGenome to be installed (see error message for instructions) 672 | 2. An API key from https://deepmind.google.com/science/alphagenome 673 | 674 | API Key Options: 675 | - Provide directly via the api_key parameter 676 | - Or set ALPHAGENOME_API_KEY environment variable 677 | 678 | Example usage: 679 | - Predict regulatory effects of BRAF V600E mutation: chr7:140753336 A>T 680 | - Assess non-coding variant impact on gene expression 681 | - Evaluate promoter variants in specific tissues 682 | 683 | Note: This is an optional tool that enhances variant interpretation 684 | with AI predictions. Standard annotations remain available via variant_getter. 685 | """ 686 | from biomcp.variants.alphagenome import predict_variant_effects 687 | 688 | # Convert tissue_types to list if needed 689 | tissue_types_list = ensure_list(tissue_types) if tissue_types else None 690 | 691 | # Call the prediction function 692 | return await predict_variant_effects( 693 | chromosome=chromosome, 694 | position=position, 695 | reference=reference, 696 | alternate=alternate, 697 | interval_size=interval_size, 698 | tissue_types=tissue_types_list, 699 | significance_threshold=significance_threshold, 700 | api_key=api_key, 701 | ) 702 | 703 | 704 | # Gene Tools 705 | @mcp_app.tool() 706 | @track_performance("biomcp.gene_getter") 707 | async def gene_getter( 708 | gene_id_or_symbol: Annotated[ 709 | str, 710 | Field( 711 | description="Gene symbol (e.g., 'TP53', 'BRAF') or Entrez ID (e.g., '7157')" 712 | ), 713 | ], 714 | ) -> str: 715 | """Get detailed gene information from MyGene.info. 716 | 717 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! 718 | 719 | Provides real-time gene annotations including: 720 | - Official gene name and symbol 721 | - Gene summary/description 722 | - Aliases and alternative names 723 | - Gene type (protein-coding, etc.) 724 | - Links to external databases 725 | 726 | This tool fetches CURRENT gene information from MyGene.info, ensuring 727 | you always have the latest annotations and nomenclature. 728 | 729 | Example usage: 730 | - Get information about TP53 tumor suppressor 731 | - Look up BRAF kinase gene details 732 | - Find the official name for a gene by its alias 733 | 734 | Note: For genetic variants, use variant_searcher. For articles about genes, use article_searcher. 735 | """ 736 | return await _gene_details( 737 | call_benefit="Get up-to-date gene annotations and information", 738 | gene_id_or_symbol=gene_id_or_symbol, 739 | ) 740 | 741 | 742 | # Disease Tools 743 | @mcp_app.tool() 744 | @track_performance("biomcp.disease_getter") 745 | async def disease_getter( 746 | disease_id_or_name: Annotated[ 747 | str, 748 | Field( 749 | description="Disease name (e.g., 'melanoma', 'lung cancer') or ontology ID (e.g., 'MONDO:0016575', 'DOID:1909')" 750 | ), 751 | ], 752 | ) -> str: 753 | """Get detailed disease information from MyDisease.info. 754 | 755 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! 756 | 757 | Provides real-time disease annotations including: 758 | - Official disease name and definition 759 | - Disease synonyms and alternative names 760 | - Ontology mappings (MONDO, DOID, OMIM, etc.) 761 | - Associated phenotypes 762 | - Links to disease databases 763 | 764 | This tool fetches CURRENT disease information from MyDisease.info, ensuring 765 | you always have the latest ontology mappings and definitions. 766 | 767 | Example usage: 768 | - Get the definition of GIST (Gastrointestinal Stromal Tumor) 769 | - Look up synonyms for melanoma 770 | - Find the MONDO ID for a disease by name 771 | 772 | Note: For clinical trials about diseases, use trial_searcher. For articles about diseases, use article_searcher. 773 | """ 774 | return await _disease_details( 775 | call_benefit="Get up-to-date disease definitions and ontology information", 776 | disease_id_or_name=disease_id_or_name, 777 | ) 778 | 779 | 780 | @mcp_app.tool() 781 | @track_performance("biomcp.drug_getter") 782 | async def drug_getter( 783 | drug_id_or_name: Annotated[ 784 | str, 785 | Field( 786 | description="Drug name (e.g., 'aspirin', 'imatinib') or ID (e.g., 'DB00945', 'CHEMBL941')" 787 | ), 788 | ], 789 | ) -> str: 790 | """Get detailed drug/chemical information from MyChem.info. 791 | 792 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to understand your research goal! 793 | 794 | This tool provides comprehensive drug information including: 795 | - Chemical properties (formula, InChIKey) 796 | - Drug identifiers (DrugBank, ChEMBL, PubChem) 797 | - Trade names and brand names 798 | - Clinical indications 799 | - Mechanism of action 800 | - Pharmacology details 801 | - Links to drug databases 802 | 803 | This tool fetches CURRENT drug information from MyChem.info, part of the 804 | BioThings suite, ensuring you always have the latest drug data. 805 | 806 | Example usage: 807 | - Get information about imatinib (Gleevec) 808 | - Look up details for DrugBank ID DB00619 809 | - Find the mechanism of action for pembrolizumab 810 | 811 | Note: For clinical trials about drugs, use trial_searcher. For articles about drugs, use article_searcher. 812 | """ 813 | return await _drug_details(drug_id_or_name) 814 | 815 | 816 | # NCI-Specific Tools 817 | @mcp_app.tool() 818 | @track_performance("biomcp.nci_organization_searcher") 819 | async def nci_organization_searcher( 820 | name: Annotated[ 821 | str | None, 822 | Field( 823 | description="Organization name to search for (partial match supported)" 824 | ), 825 | ] = None, 826 | organization_type: Annotated[ 827 | str | None, 828 | Field( 829 | description="Type of organization (e.g., 'Academic', 'Industry', 'Government')" 830 | ), 831 | ] = None, 832 | city: Annotated[ 833 | str | None, 834 | Field( 835 | description="City where organization is located. IMPORTANT: Always use with state to avoid API errors" 836 | ), 837 | ] = None, 838 | state: Annotated[ 839 | str | None, 840 | Field( 841 | description="State/province code (e.g., 'CA', 'NY'). IMPORTANT: Always use with city to avoid API errors" 842 | ), 843 | ] = None, 844 | api_key: Annotated[ 845 | str | None, 846 | Field( 847 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 848 | ), 849 | ] = None, 850 | page: Annotated[ 851 | int, 852 | Field(description="Page number (1-based)", ge=1), 853 | ] = 1, 854 | page_size: Annotated[ 855 | int, 856 | Field(description="Results per page", ge=1, le=100), 857 | ] = 20, 858 | ) -> str: 859 | """Search for organizations in the NCI Clinical Trials database. 860 | 861 | Searches the National Cancer Institute's curated database of organizations 862 | involved in cancer clinical trials. This includes: 863 | - Academic medical centers 864 | - Community hospitals 865 | - Industry sponsors 866 | - Government facilities 867 | - Research networks 868 | 869 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 870 | 871 | IMPORTANT: To avoid API errors, always use city AND state together when searching by location. 872 | The NCI API has limitations on broad searches. 873 | 874 | Example usage: 875 | - Find cancer centers in Boston, MA (city AND state) 876 | - Search for "MD Anderson" in Houston, TX 877 | - List academic organizations in Cleveland, OH 878 | - Search by organization name alone (without location) 879 | """ 880 | from biomcp.integrations.cts_api import CTSAPIError 881 | from biomcp.organizations import search_organizations 882 | from biomcp.organizations.search import format_organization_results 883 | 884 | try: 885 | results = await search_organizations( 886 | name=name, 887 | org_type=organization_type, 888 | city=city, 889 | state=state, 890 | page_size=page_size, 891 | page=page, 892 | api_key=api_key, 893 | ) 894 | return format_organization_results(results) 895 | except CTSAPIError as e: 896 | # Check for Elasticsearch bucket limit error 897 | error_msg = str(e) 898 | if "too_many_buckets_exception" in error_msg or "75000" in error_msg: 899 | return ( 900 | "⚠️ **Search Too Broad**\n\n" 901 | "The NCI API cannot process this search because it returns too many results.\n\n" 902 | "**To fix this, try:**\n" 903 | "1. **Always use city AND state together** for location searches\n" 904 | "2. Add an organization name (even partial) to narrow results\n" 905 | "3. Use multiple filters together (name + location, or name + type)\n\n" 906 | "**Examples that work:**\n" 907 | "- `nci_organization_searcher(city='Cleveland', state='OH')`\n" 908 | "- `nci_organization_searcher(name='Cleveland Clinic')`\n" 909 | "- `nci_organization_searcher(name='cancer', city='Boston', state='MA')`\n" 910 | "- `nci_organization_searcher(organization_type='Academic', city='Houston', state='TX')`" 911 | ) 912 | raise 913 | 914 | 915 | @mcp_app.tool() 916 | @track_performance("biomcp.nci_organization_getter") 917 | async def nci_organization_getter( 918 | organization_id: Annotated[ 919 | str, 920 | Field(description="NCI organization ID (e.g., 'NCI-2011-03337')"), 921 | ], 922 | api_key: Annotated[ 923 | str | None, 924 | Field( 925 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 926 | ), 927 | ] = None, 928 | ) -> str: 929 | """Get detailed information about a specific organization from NCI. 930 | 931 | Retrieves comprehensive details about an organization including: 932 | - Full name and aliases 933 | - Address and contact information 934 | - Organization type and role 935 | - Associated clinical trials 936 | - Research focus areas 937 | 938 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 939 | 940 | Example usage: 941 | - Get details about a specific cancer center 942 | - Find contact information for trial sponsors 943 | - View organization's trial portfolio 944 | """ 945 | from biomcp.organizations import get_organization 946 | from biomcp.organizations.getter import format_organization_details 947 | 948 | org_data = await get_organization( 949 | org_id=organization_id, 950 | api_key=api_key, 951 | ) 952 | 953 | return format_organization_details(org_data) 954 | 955 | 956 | @mcp_app.tool() 957 | @track_performance("biomcp.nci_intervention_searcher") 958 | async def nci_intervention_searcher( 959 | name: Annotated[ 960 | str | None, 961 | Field( 962 | description="Intervention name to search for (e.g., 'pembrolizumab')" 963 | ), 964 | ] = None, 965 | intervention_type: Annotated[ 966 | str | None, 967 | Field( 968 | description="Type of intervention: 'Drug', 'Device', 'Biological', 'Procedure', 'Radiation', 'Behavioral', 'Genetic', 'Dietary', 'Other'" 969 | ), 970 | ] = None, 971 | synonyms: Annotated[ 972 | bool, 973 | Field(description="Include synonym matches in search"), 974 | ] = True, 975 | api_key: Annotated[ 976 | str | None, 977 | Field( 978 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 979 | ), 980 | ] = None, 981 | page: Annotated[ 982 | int, 983 | Field(description="Page number (1-based)", ge=1), 984 | ] = 1, 985 | page_size: Annotated[ 986 | int | None, 987 | Field( 988 | description="Results per page. If not specified, returns all matching results.", 989 | ge=1, 990 | le=100, 991 | ), 992 | ] = None, 993 | ) -> str: 994 | """Search for interventions in the NCI Clinical Trials database. 995 | 996 | Searches the National Cancer Institute's curated database of interventions 997 | used in cancer clinical trials. This includes: 998 | - FDA-approved drugs 999 | - Investigational agents 1000 | - Medical devices 1001 | - Surgical procedures 1002 | - Radiation therapies 1003 | - Behavioral interventions 1004 | 1005 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 1006 | 1007 | Example usage: 1008 | - Find all trials using pembrolizumab 1009 | - Search for CAR-T cell therapies 1010 | - List radiation therapy protocols 1011 | - Find dietary interventions 1012 | """ 1013 | from biomcp.integrations.cts_api import CTSAPIError 1014 | from biomcp.interventions import search_interventions 1015 | from biomcp.interventions.search import format_intervention_results 1016 | 1017 | try: 1018 | results = await search_interventions( 1019 | name=name, 1020 | intervention_type=intervention_type, 1021 | synonyms=synonyms, 1022 | page_size=page_size, 1023 | page=page, 1024 | api_key=api_key, 1025 | ) 1026 | return format_intervention_results(results) 1027 | except CTSAPIError as e: 1028 | # Check for Elasticsearch bucket limit error 1029 | error_msg = str(e) 1030 | if "too_many_buckets_exception" in error_msg or "75000" in error_msg: 1031 | return ( 1032 | "⚠️ **Search Too Broad**\n\n" 1033 | "The NCI API cannot process this search because it returns too many results.\n\n" 1034 | "**Try adding more specific filters:**\n" 1035 | "- Add an intervention name (even partial)\n" 1036 | "- Specify an intervention type (e.g., 'Drug', 'Device')\n" 1037 | "- Search for a specific drug or therapy name\n\n" 1038 | "**Example searches that work better:**\n" 1039 | "- Search for 'pembrolizumab' instead of all drugs\n" 1040 | "- Search for 'CAR-T' to find CAR-T cell therapies\n" 1041 | "- Filter by type: Drug, Device, Procedure, etc." 1042 | ) 1043 | raise 1044 | 1045 | 1046 | @mcp_app.tool() 1047 | @track_performance("biomcp.nci_intervention_getter") 1048 | async def nci_intervention_getter( 1049 | intervention_id: Annotated[ 1050 | str, 1051 | Field(description="NCI intervention ID (e.g., 'INT123456')"), 1052 | ], 1053 | api_key: Annotated[ 1054 | str | None, 1055 | Field( 1056 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 1057 | ), 1058 | ] = None, 1059 | ) -> str: 1060 | """Get detailed information about a specific intervention from NCI. 1061 | 1062 | Retrieves comprehensive details about an intervention including: 1063 | - Full name and synonyms 1064 | - Intervention type and category 1065 | - Mechanism of action (for drugs) 1066 | - FDA approval status 1067 | - Associated clinical trials 1068 | - Combination therapies 1069 | 1070 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 1071 | 1072 | Example usage: 1073 | - Get details about a specific drug 1074 | - Find all trials using a device 1075 | - View combination therapy protocols 1076 | """ 1077 | from biomcp.interventions import get_intervention 1078 | from biomcp.interventions.getter import format_intervention_details 1079 | 1080 | intervention_data = await get_intervention( 1081 | intervention_id=intervention_id, 1082 | api_key=api_key, 1083 | ) 1084 | 1085 | return format_intervention_details(intervention_data) 1086 | 1087 | 1088 | # Biomarker Tools 1089 | @mcp_app.tool() 1090 | @track_performance("biomcp.nci_biomarker_searcher") 1091 | async def nci_biomarker_searcher( 1092 | name: Annotated[ 1093 | str | None, 1094 | Field( 1095 | description="Biomarker name to search for (e.g., 'PD-L1', 'EGFR mutation')" 1096 | ), 1097 | ] = None, 1098 | biomarker_type: Annotated[ 1099 | str | None, 1100 | Field(description="Type of biomarker ('reference_gene' or 'branch')"), 1101 | ] = None, 1102 | api_key: Annotated[ 1103 | str | None, 1104 | Field( 1105 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 1106 | ), 1107 | ] = None, 1108 | page: Annotated[ 1109 | int, 1110 | Field(description="Page number (1-based)", ge=1), 1111 | ] = 1, 1112 | page_size: Annotated[ 1113 | int, 1114 | Field(description="Results per page", ge=1, le=100), 1115 | ] = 20, 1116 | ) -> str: 1117 | """Search for biomarkers in the NCI Clinical Trials database. 1118 | 1119 | Searches for biomarkers used in clinical trial eligibility criteria. 1120 | This is essential for precision medicine trials that select patients 1121 | based on specific biomarker characteristics. 1122 | 1123 | Biomarker examples: 1124 | - Gene mutations (e.g., BRAF V600E, EGFR T790M) 1125 | - Protein expression (e.g., PD-L1 ≥ 50%, HER2 positive) 1126 | - Gene fusions (e.g., ALK fusion, ROS1 fusion) 1127 | - Other molecular markers (e.g., MSI-H, TMB-high) 1128 | 1129 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 1130 | 1131 | Note: Biomarker data availability may be limited in CTRP. 1132 | Results focus on biomarkers used in trial eligibility criteria. 1133 | 1134 | Example usage: 1135 | - Search for PD-L1 expression biomarkers 1136 | - Find trials requiring EGFR mutations 1137 | - Look up biomarkers tested by NGS 1138 | - Search for HER2 expression markers 1139 | """ 1140 | from biomcp.biomarkers import search_biomarkers 1141 | from biomcp.biomarkers.search import format_biomarker_results 1142 | from biomcp.integrations.cts_api import CTSAPIError 1143 | 1144 | try: 1145 | results = await search_biomarkers( 1146 | name=name, 1147 | biomarker_type=biomarker_type, 1148 | page_size=page_size, 1149 | page=page, 1150 | api_key=api_key, 1151 | ) 1152 | return format_biomarker_results(results) 1153 | except CTSAPIError as e: 1154 | # Check for Elasticsearch bucket limit error 1155 | error_msg = str(e) 1156 | if "too_many_buckets_exception" in error_msg or "75000" in error_msg: 1157 | return ( 1158 | "⚠️ **Search Too Broad**\n\n" 1159 | "The NCI API cannot process this search because it returns too many results.\n\n" 1160 | "**Try adding more specific filters:**\n" 1161 | "- Add a biomarker name (even partial)\n" 1162 | "- Specify a gene symbol\n" 1163 | "- Add an assay type (e.g., 'IHC', 'NGS')\n\n" 1164 | "**Example searches that work:**\n" 1165 | "- `nci_biomarker_searcher(name='PD-L1')`\n" 1166 | "- `nci_biomarker_searcher(gene='EGFR', biomarker_type='mutation')`\n" 1167 | "- `nci_biomarker_searcher(assay_type='IHC')`" 1168 | ) 1169 | raise 1170 | 1171 | 1172 | # NCI Disease Tools 1173 | @mcp_app.tool() 1174 | @track_performance("biomcp.nci_disease_searcher") 1175 | async def nci_disease_searcher( 1176 | name: Annotated[ 1177 | str | None, 1178 | Field(description="Disease name to search for (partial match)"), 1179 | ] = None, 1180 | include_synonyms: Annotated[ 1181 | bool, 1182 | Field(description="Include synonym matches in search"), 1183 | ] = True, 1184 | category: Annotated[ 1185 | str | None, 1186 | Field(description="Disease category/type filter"), 1187 | ] = None, 1188 | api_key: Annotated[ 1189 | str | None, 1190 | Field( 1191 | description="NCI API key. Check if user mentioned 'my NCI API key is...' in their message. If not provided here and no env var is set, user will be prompted to provide one." 1192 | ), 1193 | ] = None, 1194 | page: Annotated[ 1195 | int, 1196 | Field(description="Page number (1-based)", ge=1), 1197 | ] = 1, 1198 | page_size: Annotated[ 1199 | int, 1200 | Field(description="Results per page", ge=1, le=100), 1201 | ] = 20, 1202 | ) -> str: 1203 | """Search NCI's controlled vocabulary of cancer conditions. 1204 | 1205 | Searches the National Cancer Institute's curated database of cancer 1206 | conditions and diseases used in clinical trials. This is different from 1207 | the general disease_getter tool which uses MyDisease.info. 1208 | 1209 | NCI's disease vocabulary provides: 1210 | - Official cancer terminology used in trials 1211 | - Disease synonyms and alternative names 1212 | - Hierarchical disease classifications 1213 | - Standardized disease codes for trial matching 1214 | 1215 | Requires NCI API key from: https://clinicaltrialsapi.cancer.gov/ 1216 | 1217 | Example usage: 1218 | - Search for specific cancer types (e.g., "melanoma") 1219 | - Find all lung cancer subtypes 1220 | - Look up official names for disease synonyms 1221 | - Get standardized disease terms for trial searches 1222 | 1223 | Note: This is specifically for NCI's cancer disease vocabulary. 1224 | For general disease information, use the disease_getter tool. 1225 | """ 1226 | from biomcp.diseases import search_diseases 1227 | from biomcp.diseases.search import format_disease_results 1228 | from biomcp.integrations.cts_api import CTSAPIError 1229 | 1230 | try: 1231 | results = await search_diseases( 1232 | name=name, 1233 | include_synonyms=include_synonyms, 1234 | category=category, 1235 | page_size=page_size, 1236 | page=page, 1237 | api_key=api_key, 1238 | ) 1239 | return format_disease_results(results) 1240 | except CTSAPIError as e: 1241 | # Check for Elasticsearch bucket limit error 1242 | error_msg = str(e) 1243 | if "too_many_buckets_exception" in error_msg or "75000" in error_msg: 1244 | return ( 1245 | "⚠️ **Search Too Broad**\n\n" 1246 | "The NCI API cannot process this search because it returns too many results.\n\n" 1247 | "**Try adding more specific filters:**\n" 1248 | "- Add a disease name (even partial)\n" 1249 | "- Specify a disease category\n" 1250 | "- Use more specific search terms\n\n" 1251 | "**Example searches that work:**\n" 1252 | "- `nci_disease_searcher(name='melanoma')`\n" 1253 | "- `nci_disease_searcher(name='lung', category='maintype')`\n" 1254 | "- `nci_disease_searcher(name='NSCLC')`" 1255 | ) 1256 | raise 1257 | 1258 | 1259 | # OpenFDA Tools 1260 | @mcp_app.tool() 1261 | @track_performance("biomcp.openfda_adverse_searcher") 1262 | async def openfda_adverse_searcher( 1263 | drug: Annotated[ 1264 | str | None, 1265 | Field(description="Drug name to search for adverse events"), 1266 | ] = None, 1267 | reaction: Annotated[ 1268 | str | None, 1269 | Field(description="Adverse reaction term to search for"), 1270 | ] = None, 1271 | serious: Annotated[ 1272 | bool | None, 1273 | Field(description="Filter for serious events only"), 1274 | ] = None, 1275 | limit: Annotated[ 1276 | int, 1277 | Field(description="Maximum number of results", ge=1, le=100), 1278 | ] = 25, 1279 | page: Annotated[ 1280 | int, 1281 | Field(description="Page number (1-based)", ge=1), 1282 | ] = 1, 1283 | api_key: Annotated[ 1284 | str | None, 1285 | Field( 1286 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1287 | ), 1288 | ] = None, 1289 | ) -> str: 1290 | """Search FDA adverse event reports (FAERS) for drug safety information. 1291 | 1292 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1293 | 1294 | Searches FDA's Adverse Event Reporting System for: 1295 | - Drug side effects and adverse reactions 1296 | - Serious event reports (death, hospitalization, disability) 1297 | - Safety signal patterns across patient populations 1298 | 1299 | Note: These reports do not establish causation - they are voluntary reports 1300 | that may contain incomplete or unverified information. 1301 | """ 1302 | from biomcp.openfda import search_adverse_events 1303 | 1304 | skip = (page - 1) * limit 1305 | return await search_adverse_events( 1306 | drug=drug, 1307 | reaction=reaction, 1308 | serious=serious, 1309 | limit=limit, 1310 | skip=skip, 1311 | api_key=api_key, 1312 | ) 1313 | 1314 | 1315 | @mcp_app.tool() 1316 | @track_performance("biomcp.openfda_adverse_getter") 1317 | async def openfda_adverse_getter( 1318 | report_id: Annotated[ 1319 | str, 1320 | Field(description="Safety report ID"), 1321 | ], 1322 | api_key: Annotated[ 1323 | str | None, 1324 | Field( 1325 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1326 | ), 1327 | ] = None, 1328 | ) -> str: 1329 | """Get detailed information for a specific FDA adverse event report. 1330 | 1331 | Retrieves complete details including: 1332 | - Patient demographics and medical history 1333 | - All drugs involved and dosages 1334 | - Complete list of adverse reactions 1335 | - Event narrative and outcomes 1336 | - Reporter information 1337 | """ 1338 | from biomcp.openfda import get_adverse_event 1339 | 1340 | return await get_adverse_event(report_id, api_key=api_key) 1341 | 1342 | 1343 | @mcp_app.tool() 1344 | @track_performance("biomcp.openfda_label_searcher") 1345 | async def openfda_label_searcher( 1346 | name: Annotated[ 1347 | str | None, 1348 | Field(description="Drug name to search for"), 1349 | ] = None, 1350 | indication: Annotated[ 1351 | str | None, 1352 | Field(description="Search for drugs indicated for this condition"), 1353 | ] = None, 1354 | boxed_warning: Annotated[ 1355 | bool, 1356 | Field(description="Filter for drugs with boxed warnings"), 1357 | ] = False, 1358 | section: Annotated[ 1359 | str | None, 1360 | Field( 1361 | description="Specific label section (e.g., 'contraindications', 'warnings')" 1362 | ), 1363 | ] = None, 1364 | limit: Annotated[ 1365 | int, 1366 | Field(description="Maximum number of results", ge=1, le=100), 1367 | ] = 25, 1368 | page: Annotated[ 1369 | int, 1370 | Field(description="Page number (1-based)", ge=1), 1371 | ] = 1, 1372 | api_key: Annotated[ 1373 | str | None, 1374 | Field( 1375 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1376 | ), 1377 | ] = None, 1378 | ) -> str: 1379 | """Search FDA drug product labels (SPL) for prescribing information. 1380 | 1381 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1382 | 1383 | Searches official FDA drug labels for: 1384 | - Approved indications and usage 1385 | - Dosage and administration guidelines 1386 | - Contraindications and warnings 1387 | - Drug interactions and adverse reactions 1388 | - Special population considerations 1389 | 1390 | Label sections include: indications, dosage, contraindications, warnings, 1391 | adverse, interactions, pregnancy, pediatric, geriatric, overdose 1392 | """ 1393 | from biomcp.openfda import search_drug_labels 1394 | 1395 | skip = (page - 1) * limit 1396 | return await search_drug_labels( 1397 | name=name, 1398 | indication=indication, 1399 | boxed_warning=boxed_warning, 1400 | section=section, 1401 | limit=limit, 1402 | skip=skip, 1403 | api_key=api_key, 1404 | ) 1405 | 1406 | 1407 | @mcp_app.tool() 1408 | @track_performance("biomcp.openfda_label_getter") 1409 | async def openfda_label_getter( 1410 | set_id: Annotated[ 1411 | str, 1412 | Field(description="Label set ID"), 1413 | ], 1414 | sections: Annotated[ 1415 | list[str] | None, 1416 | Field( 1417 | description="Specific sections to retrieve (default: key sections)" 1418 | ), 1419 | ] = None, 1420 | api_key: Annotated[ 1421 | str | None, 1422 | Field( 1423 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1424 | ), 1425 | ] = None, 1426 | ) -> str: 1427 | """Get complete FDA drug label information by set ID. 1428 | 1429 | Retrieves the full prescribing information including: 1430 | - Complete indications and usage text 1431 | - Detailed dosing instructions 1432 | - All warnings and precautions 1433 | - Clinical pharmacology and studies 1434 | - Manufacturing and storage information 1435 | 1436 | Specify sections to retrieve specific parts, or leave empty for default key sections. 1437 | """ 1438 | from biomcp.openfda import get_drug_label 1439 | 1440 | return await get_drug_label(set_id, sections, api_key=api_key) 1441 | 1442 | 1443 | @mcp_app.tool() 1444 | @track_performance("biomcp.openfda_device_searcher") 1445 | async def openfda_device_searcher( 1446 | device: Annotated[ 1447 | str | None, 1448 | Field(description="Device name to search for"), 1449 | ] = None, 1450 | manufacturer: Annotated[ 1451 | str | None, 1452 | Field(description="Manufacturer name"), 1453 | ] = None, 1454 | problem: Annotated[ 1455 | str | None, 1456 | Field(description="Device problem description"), 1457 | ] = None, 1458 | product_code: Annotated[ 1459 | str | None, 1460 | Field(description="FDA product code"), 1461 | ] = None, 1462 | genomics_only: Annotated[ 1463 | bool, 1464 | Field(description="Filter to genomic/diagnostic devices only"), 1465 | ] = True, 1466 | limit: Annotated[ 1467 | int, 1468 | Field(description="Maximum number of results", ge=1, le=100), 1469 | ] = 25, 1470 | page: Annotated[ 1471 | int, 1472 | Field(description="Page number (1-based)", ge=1), 1473 | ] = 1, 1474 | api_key: Annotated[ 1475 | str | None, 1476 | Field( 1477 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1478 | ), 1479 | ] = None, 1480 | ) -> str: 1481 | """Search FDA device adverse event reports (MAUDE) for medical device issues. 1482 | 1483 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1484 | 1485 | Searches FDA's device adverse event database for: 1486 | - Device malfunctions and failures 1487 | - Patient injuries related to devices 1488 | - Genomic test and diagnostic device issues 1489 | 1490 | By default, filters to genomic/diagnostic devices relevant to precision medicine. 1491 | Set genomics_only=False to search all medical devices. 1492 | """ 1493 | from biomcp.openfda import search_device_events 1494 | 1495 | skip = (page - 1) * limit 1496 | return await search_device_events( 1497 | device=device, 1498 | manufacturer=manufacturer, 1499 | problem=problem, 1500 | product_code=product_code, 1501 | genomics_only=genomics_only, 1502 | limit=limit, 1503 | skip=skip, 1504 | api_key=api_key, 1505 | ) 1506 | 1507 | 1508 | @mcp_app.tool() 1509 | @track_performance("biomcp.openfda_device_getter") 1510 | async def openfda_device_getter( 1511 | mdr_report_key: Annotated[ 1512 | str, 1513 | Field(description="MDR report key"), 1514 | ], 1515 | api_key: Annotated[ 1516 | str | None, 1517 | Field( 1518 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1519 | ), 1520 | ] = None, 1521 | ) -> str: 1522 | """Get detailed information for a specific FDA device event report. 1523 | 1524 | Retrieves complete device event details including: 1525 | - Device identification and specifications 1526 | - Complete event narrative 1527 | - Patient outcomes and impacts 1528 | - Manufacturer analysis and actions 1529 | - Remedial actions taken 1530 | """ 1531 | from biomcp.openfda import get_device_event 1532 | 1533 | return await get_device_event(mdr_report_key, api_key=api_key) 1534 | 1535 | 1536 | @mcp_app.tool() 1537 | @track_performance("biomcp.openfda_approval_searcher") 1538 | async def openfda_approval_searcher( 1539 | drug: Annotated[ 1540 | str | None, 1541 | Field(description="Drug name (brand or generic) to search for"), 1542 | ] = None, 1543 | application_number: Annotated[ 1544 | str | None, 1545 | Field(description="NDA or BLA application number"), 1546 | ] = None, 1547 | approval_year: Annotated[ 1548 | str | None, 1549 | Field(description="Year of approval (YYYY format)"), 1550 | ] = None, 1551 | limit: Annotated[ 1552 | int, 1553 | Field(description="Maximum number of results", ge=1, le=100), 1554 | ] = 25, 1555 | page: Annotated[ 1556 | int, 1557 | Field(description="Page number (1-based)", ge=1), 1558 | ] = 1, 1559 | api_key: Annotated[ 1560 | str | None, 1561 | Field( 1562 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1563 | ), 1564 | ] = None, 1565 | ) -> str: 1566 | """Search FDA drug approval records from Drugs@FDA database. 1567 | 1568 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1569 | 1570 | Returns information about: 1571 | - Application numbers and sponsors 1572 | - Brand and generic names 1573 | - Product formulations and strengths 1574 | - Marketing status and approval dates 1575 | - Submission history 1576 | 1577 | Useful for verifying if a drug is FDA-approved and when. 1578 | """ 1579 | from biomcp.openfda import search_drug_approvals 1580 | 1581 | skip = (page - 1) * limit 1582 | return await search_drug_approvals( 1583 | drug=drug, 1584 | application_number=application_number, 1585 | approval_year=approval_year, 1586 | limit=limit, 1587 | skip=skip, 1588 | api_key=api_key, 1589 | ) 1590 | 1591 | 1592 | @mcp_app.tool() 1593 | @track_performance("biomcp.openfda_approval_getter") 1594 | async def openfda_approval_getter( 1595 | application_number: Annotated[ 1596 | str, 1597 | Field(description="NDA or BLA application number"), 1598 | ], 1599 | api_key: Annotated[ 1600 | str | None, 1601 | Field( 1602 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1603 | ), 1604 | ] = None, 1605 | ) -> str: 1606 | """Get detailed FDA drug approval information for a specific application. 1607 | 1608 | Returns comprehensive approval details including: 1609 | - Full product list with dosage forms and strengths 1610 | - Complete submission history 1611 | - Marketing status timeline 1612 | - Therapeutic equivalence codes 1613 | - Pharmacologic class information 1614 | """ 1615 | from biomcp.openfda import get_drug_approval 1616 | 1617 | return await get_drug_approval(application_number, api_key=api_key) 1618 | 1619 | 1620 | @mcp_app.tool() 1621 | @track_performance("biomcp.openfda_recall_searcher") 1622 | async def openfda_recall_searcher( 1623 | drug: Annotated[ 1624 | str | None, 1625 | Field(description="Drug name to search for recalls"), 1626 | ] = None, 1627 | recall_class: Annotated[ 1628 | str | None, 1629 | Field( 1630 | description="Recall classification (1=most serious, 2=moderate, 3=least serious)" 1631 | ), 1632 | ] = None, 1633 | status: Annotated[ 1634 | str | None, 1635 | Field(description="Recall status (ongoing, completed, terminated)"), 1636 | ] = None, 1637 | reason: Annotated[ 1638 | str | None, 1639 | Field(description="Search text in recall reason"), 1640 | ] = None, 1641 | since_date: Annotated[ 1642 | str | None, 1643 | Field(description="Show recalls after this date (YYYYMMDD format)"), 1644 | ] = None, 1645 | limit: Annotated[ 1646 | int, 1647 | Field(description="Maximum number of results", ge=1, le=100), 1648 | ] = 25, 1649 | page: Annotated[ 1650 | int, 1651 | Field(description="Page number (1-based)", ge=1), 1652 | ] = 1, 1653 | api_key: Annotated[ 1654 | str | None, 1655 | Field( 1656 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1657 | ), 1658 | ] = None, 1659 | ) -> str: 1660 | """Search FDA drug recall records from the Enforcement database. 1661 | 1662 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1663 | 1664 | Returns recall information including: 1665 | - Classification (Class I, II, or III) 1666 | - Recall reason and description 1667 | - Product identification 1668 | - Distribution information 1669 | - Recalling firm details 1670 | - Current status 1671 | 1672 | Class I = most serious (death/serious harm) 1673 | Class II = moderate (temporary/reversible harm) 1674 | Class III = least serious (unlikely to cause harm) 1675 | """ 1676 | from biomcp.openfda import search_drug_recalls 1677 | 1678 | skip = (page - 1) * limit 1679 | return await search_drug_recalls( 1680 | drug=drug, 1681 | recall_class=recall_class, 1682 | status=status, 1683 | reason=reason, 1684 | since_date=since_date, 1685 | limit=limit, 1686 | skip=skip, 1687 | api_key=api_key, 1688 | ) 1689 | 1690 | 1691 | @mcp_app.tool() 1692 | @track_performance("biomcp.openfda_recall_getter") 1693 | async def openfda_recall_getter( 1694 | recall_number: Annotated[ 1695 | str, 1696 | Field(description="FDA recall number"), 1697 | ], 1698 | api_key: Annotated[ 1699 | str | None, 1700 | Field( 1701 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1702 | ), 1703 | ] = None, 1704 | ) -> str: 1705 | """Get detailed FDA drug recall information for a specific recall. 1706 | 1707 | Returns complete recall details including: 1708 | - Full product description and code information 1709 | - Complete reason for recall 1710 | - Distribution pattern and locations 1711 | - Quantity of product recalled 1712 | - Firm information and actions taken 1713 | - Timeline of recall events 1714 | """ 1715 | from biomcp.openfda import get_drug_recall 1716 | 1717 | return await get_drug_recall(recall_number, api_key=api_key) 1718 | 1719 | 1720 | @mcp_app.tool() 1721 | @track_performance("biomcp.openfda_shortage_searcher") 1722 | async def openfda_shortage_searcher( 1723 | drug: Annotated[ 1724 | str | None, 1725 | Field(description="Drug name (generic or brand) to search"), 1726 | ] = None, 1727 | status: Annotated[ 1728 | str | None, 1729 | Field(description="Shortage status (current or resolved)"), 1730 | ] = None, 1731 | therapeutic_category: Annotated[ 1732 | str | None, 1733 | Field( 1734 | description="Therapeutic category (e.g., Oncology, Anti-infective)" 1735 | ), 1736 | ] = None, 1737 | limit: Annotated[ 1738 | int, 1739 | Field(description="Maximum number of results", ge=1, le=100), 1740 | ] = 25, 1741 | page: Annotated[ 1742 | int, 1743 | Field(description="Page number (1-based)", ge=1), 1744 | ] = 1, 1745 | api_key: Annotated[ 1746 | str | None, 1747 | Field( 1748 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1749 | ), 1750 | ] = None, 1751 | ) -> str: 1752 | """Search FDA drug shortage records. 1753 | 1754 | ⚠️ PREREQUISITE: Use the 'think' tool FIRST to plan your research strategy! 1755 | 1756 | Returns shortage information including: 1757 | - Current shortage status 1758 | - Shortage start and resolution dates 1759 | - Reason for shortage 1760 | - Therapeutic category 1761 | - Manufacturer information 1762 | - Estimated resolution timeline 1763 | 1764 | Note: Shortage data is cached and updated periodically. 1765 | Check FDA.gov for most current information. 1766 | """ 1767 | from biomcp.openfda import search_drug_shortages 1768 | 1769 | skip = (page - 1) * limit 1770 | return await search_drug_shortages( 1771 | drug=drug, 1772 | status=status, 1773 | therapeutic_category=therapeutic_category, 1774 | limit=limit, 1775 | skip=skip, 1776 | api_key=api_key, 1777 | ) 1778 | 1779 | 1780 | @mcp_app.tool() 1781 | @track_performance("biomcp.openfda_shortage_getter") 1782 | async def openfda_shortage_getter( 1783 | drug: Annotated[ 1784 | str, 1785 | Field(description="Drug name (generic or brand)"), 1786 | ], 1787 | api_key: Annotated[ 1788 | str | None, 1789 | Field( 1790 | description="Optional OpenFDA API key (overrides OPENFDA_API_KEY env var)" 1791 | ), 1792 | ] = None, 1793 | ) -> str: 1794 | """Get detailed FDA drug shortage information for a specific drug. 1795 | 1796 | Returns comprehensive shortage details including: 1797 | - Complete timeline of shortage 1798 | - Detailed reason for shortage 1799 | - All affected manufacturers 1800 | - Alternative products if available 1801 | - Resolution status and estimates 1802 | - Additional notes and recommendations 1803 | 1804 | Data is updated periodically from FDA shortage database. 1805 | """ 1806 | from biomcp.openfda import get_drug_shortage 1807 | 1808 | return await get_drug_shortage(drug, api_key=api_key) 1809 | ```