This is page 16 of 19. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .github │ ├── actions │ │ └── setup-python-env │ │ └── action.yml │ ├── dependabot.yml │ └── workflows │ ├── ci.yml │ ├── deploy-docs.yml │ ├── main.yml.disabled │ ├── on-release-main.yml │ └── validate-codecov-config.yml ├── .gitignore ├── .pre-commit-config.yaml ├── BIOMCP_DATA_FLOW.md ├── CHANGELOG.md ├── CNAME ├── codecov.yaml ├── docker-compose.yml ├── Dockerfile ├── docs │ ├── apis │ │ ├── error-codes.md │ │ ├── overview.md │ │ └── python-sdk.md │ ├── assets │ │ ├── biomcp-cursor-locations.png │ │ ├── favicon.ico │ │ ├── icon.png │ │ ├── logo.png │ │ ├── mcp_architecture.txt │ │ └── remote-connection │ │ ├── 00_connectors.png │ │ ├── 01_add_custom_connector.png │ │ ├── 02_connector_enabled.png │ │ ├── 03_connect_to_biomcp.png │ │ ├── 04_select_google_oauth.png │ │ └── 05_success_connect.png │ ├── backend-services-reference │ │ ├── 01-overview.md │ │ ├── 02-biothings-suite.md │ │ ├── 03-cbioportal.md │ │ ├── 04-clinicaltrials-gov.md │ │ ├── 05-nci-cts-api.md │ │ ├── 06-pubtator3.md │ │ └── 07-alphagenome.md │ ├── blog │ │ ├── ai-assisted-clinical-trial-search-analysis.md │ │ ├── images │ │ │ ├── deep-researcher-video.png │ │ │ ├── researcher-announce.png │ │ │ ├── researcher-drop-down.png │ │ │ ├── researcher-prompt.png │ │ │ ├── trial-search-assistant.png │ │ │ └── what_is_biomcp_thumbnail.png │ │ └── researcher-persona-resource.md │ ├── changelog.md │ ├── CNAME │ ├── concepts │ │ ├── 01-what-is-biomcp.md │ │ ├── 02-the-deep-researcher-persona.md │ │ └── 03-sequential-thinking-with-the-think-tool.md │ ├── developer-guides │ │ ├── 01-server-deployment.md │ │ ├── 02-contributing-and-testing.md │ │ ├── 03-third-party-endpoints.md │ │ ├── 04-transport-protocol.md │ │ ├── 05-error-handling.md │ │ ├── 06-http-client-and-caching.md │ │ ├── 07-performance-optimizations.md │ │ └── generate_endpoints.py │ ├── faq-condensed.md │ ├── FDA_SECURITY.md │ ├── genomoncology.md │ ├── getting-started │ │ ├── 01-quickstart-cli.md │ │ ├── 02-claude-desktop-integration.md │ │ └── 03-authentication-and-api-keys.md │ ├── how-to-guides │ │ ├── 01-find-articles-and-cbioportal-data.md │ │ ├── 02-find-trials-with-nci-and-biothings.md │ │ ├── 03-get-comprehensive-variant-annotations.md │ │ ├── 04-predict-variant-effects-with-alphagenome.md │ │ ├── 05-logging-and-monitoring-with-bigquery.md │ │ └── 06-search-nci-organizations-and-interventions.md │ ├── index.md │ ├── policies.md │ ├── reference │ │ ├── architecture-diagrams.md │ │ ├── quick-architecture.md │ │ ├── quick-reference.md │ │ └── visual-architecture.md │ ├── robots.txt │ ├── stylesheets │ │ ├── announcement.css │ │ └── extra.css │ ├── troubleshooting.md │ ├── tutorials │ │ ├── biothings-prompts.md │ │ ├── claude-code-biomcp-alphagenome.md │ │ ├── nci-prompts.md │ │ ├── openfda-integration.md │ │ ├── openfda-prompts.md │ │ ├── pydantic-ai-integration.md │ │ └── remote-connection.md │ ├── user-guides │ │ ├── 01-command-line-interface.md │ │ ├── 02-mcp-tools-reference.md │ │ └── 03-integrating-with-ides-and-clients.md │ └── workflows │ └── all-workflows.md ├── example_scripts │ ├── mcp_integration.py │ └── python_sdk.py ├── glama.json ├── LICENSE ├── lzyank.toml ├── Makefile ├── mkdocs.yml ├── package-lock.json ├── package.json ├── pyproject.toml ├── README.md ├── scripts │ ├── check_docs_in_mkdocs.py │ ├── check_http_imports.py │ └── generate_endpoints_doc.py ├── smithery.yaml ├── src │ └── biomcp │ ├── __init__.py │ ├── __main__.py │ ├── articles │ │ ├── __init__.py │ │ ├── autocomplete.py │ │ ├── fetch.py │ │ ├── preprints.py │ │ ├── search_optimized.py │ │ ├── search.py │ │ └── unified.py │ ├── biomarkers │ │ ├── __init__.py │ │ └── search.py │ ├── cbioportal_helper.py │ ├── circuit_breaker.py │ ├── cli │ │ ├── __init__.py │ │ ├── articles.py │ │ ├── biomarkers.py │ │ ├── diseases.py │ │ ├── health.py │ │ ├── interventions.py │ │ ├── main.py │ │ ├── openfda.py │ │ ├── organizations.py │ │ ├── server.py │ │ ├── trials.py │ │ └── variants.py │ ├── connection_pool.py │ ├── constants.py │ ├── core.py │ ├── diseases │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── domain_handlers.py │ ├── drugs │ │ ├── __init__.py │ │ └── getter.py │ ├── exceptions.py │ ├── genes │ │ ├── __init__.py │ │ └── getter.py │ ├── http_client_simple.py │ ├── http_client.py │ ├── individual_tools.py │ ├── integrations │ │ ├── __init__.py │ │ ├── biothings_client.py │ │ └── cts_api.py │ ├── interventions │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── logging_filter.py │ ├── metrics_handler.py │ ├── metrics.py │ ├── openfda │ │ ├── __init__.py │ │ ├── adverse_events_helpers.py │ │ ├── adverse_events.py │ │ ├── cache.py │ │ ├── constants.py │ │ ├── device_events_helpers.py │ │ ├── device_events.py │ │ ├── drug_approvals.py │ │ ├── drug_labels_helpers.py │ │ ├── drug_labels.py │ │ ├── drug_recalls_helpers.py │ │ ├── drug_recalls.py │ │ ├── drug_shortages_detail_helpers.py │ │ ├── drug_shortages_helpers.py │ │ ├── drug_shortages.py │ │ ├── exceptions.py │ │ ├── input_validation.py │ │ ├── rate_limiter.py │ │ ├── utils.py │ │ └── validation.py │ ├── organizations │ │ ├── __init__.py │ │ ├── getter.py │ │ └── search.py │ ├── parameter_parser.py │ ├── prefetch.py │ ├── query_parser.py │ ├── query_router.py │ ├── rate_limiter.py │ ├── render.py │ ├── request_batcher.py │ ├── resources │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── instructions.md │ │ └── researcher.md │ ├── retry.py │ ├── router_handlers.py │ ├── router.py │ ├── shared_context.py │ ├── thinking │ │ ├── __init__.py │ │ ├── sequential.py │ │ └── session.py │ ├── thinking_tool.py │ ├── thinking_tracker.py │ ├── trials │ │ ├── __init__.py │ │ ├── getter.py │ │ ├── nci_getter.py │ │ ├── nci_search.py │ │ └── search.py │ ├── utils │ │ ├── __init__.py │ │ ├── cancer_types_api.py │ │ ├── cbio_http_adapter.py │ │ ├── endpoint_registry.py │ │ ├── gene_validator.py │ │ ├── metrics.py │ │ ├── mutation_filter.py │ │ ├── query_utils.py │ │ ├── rate_limiter.py │ │ └── request_cache.py │ ├── variants │ │ ├── __init__.py │ │ ├── alphagenome.py │ │ ├── cancer_types.py │ │ ├── cbio_external_client.py │ │ ├── cbioportal_mutations.py │ │ ├── cbioportal_search_helpers.py │ │ ├── cbioportal_search.py │ │ ├── constants.py │ │ ├── external.py │ │ ├── filters.py │ │ ├── getter.py │ │ ├── links.py │ │ └── search.py │ └── workers │ ├── __init__.py │ ├── worker_entry_stytch.js │ ├── worker_entry.js │ └── worker.py ├── tests │ ├── bdd │ │ ├── cli_help │ │ │ ├── help.feature │ │ │ └── test_help.py │ │ ├── conftest.py │ │ ├── features │ │ │ └── alphagenome_integration.feature │ │ ├── fetch_articles │ │ │ ├── fetch.feature │ │ │ └── test_fetch.py │ │ ├── get_trials │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── get_variants │ │ │ ├── get.feature │ │ │ └── test_get.py │ │ ├── search_articles │ │ │ ├── autocomplete.feature │ │ │ ├── search.feature │ │ │ ├── test_autocomplete.py │ │ │ └── test_search.py │ │ ├── search_trials │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ ├── search_variants │ │ │ ├── search.feature │ │ │ └── test_search.py │ │ └── steps │ │ └── test_alphagenome_steps.py │ ├── config │ │ └── test_smithery_config.py │ ├── conftest.py │ ├── data │ │ ├── ct_gov │ │ │ ├── clinical_trials_api_v2.yaml │ │ │ ├── trials_NCT04280705.json │ │ │ └── trials_NCT04280705.txt │ │ ├── myvariant │ │ │ ├── myvariant_api.yaml │ │ │ ├── myvariant_field_descriptions.csv │ │ │ ├── variants_full_braf_v600e.json │ │ │ ├── variants_full_braf_v600e.txt │ │ │ └── variants_part_braf_v600_multiple.json │ │ ├── openfda │ │ │ ├── drugsfda_detail.json │ │ │ ├── drugsfda_search.json │ │ │ ├── enforcement_detail.json │ │ │ └── enforcement_search.json │ │ └── pubtator │ │ ├── pubtator_autocomplete.json │ │ └── pubtator3_paper.txt │ ├── integration │ │ ├── test_openfda_integration.py │ │ ├── test_preprints_integration.py │ │ ├── test_simple.py │ │ └── test_variants_integration.py │ ├── tdd │ │ ├── articles │ │ │ ├── test_autocomplete.py │ │ │ ├── test_cbioportal_integration.py │ │ │ ├── test_fetch.py │ │ │ ├── test_preprints.py │ │ │ ├── test_search.py │ │ │ └── test_unified.py │ │ ├── conftest.py │ │ ├── drugs │ │ │ ├── __init__.py │ │ │ └── test_drug_getter.py │ │ ├── openfda │ │ │ ├── __init__.py │ │ │ ├── test_adverse_events.py │ │ │ ├── test_device_events.py │ │ │ ├── test_drug_approvals.py │ │ │ ├── test_drug_labels.py │ │ │ ├── test_drug_recalls.py │ │ │ ├── test_drug_shortages.py │ │ │ └── test_security.py │ │ ├── test_biothings_integration_real.py │ │ ├── test_biothings_integration.py │ │ ├── test_circuit_breaker.py │ │ ├── test_concurrent_requests.py │ │ ├── test_connection_pool.py │ │ ├── test_domain_handlers.py │ │ ├── test_drug_approvals.py │ │ ├── test_drug_recalls.py │ │ ├── test_drug_shortages.py │ │ ├── test_endpoint_documentation.py │ │ ├── test_error_scenarios.py │ │ ├── test_europe_pmc_fetch.py │ │ ├── test_mcp_integration.py │ │ ├── test_mcp_tools.py │ │ ├── test_metrics.py │ │ ├── test_nci_integration.py │ │ ├── test_nci_mcp_tools.py │ │ ├── test_network_policies.py │ │ ├── test_offline_mode.py │ │ ├── test_openfda_unified.py │ │ ├── test_pten_r173_search.py │ │ ├── test_render.py │ │ ├── test_request_batcher.py.disabled │ │ ├── test_retry.py │ │ ├── test_router.py │ │ ├── test_shared_context.py.disabled │ │ ├── test_unified_biothings.py │ │ ├── thinking │ │ │ ├── __init__.py │ │ │ └── test_sequential.py │ │ ├── trials │ │ │ ├── test_backward_compatibility.py │ │ │ ├── test_getter.py │ │ │ └── test_search.py │ │ ├── utils │ │ │ ├── test_gene_validator.py │ │ │ ├── test_mutation_filter.py │ │ │ ├── test_rate_limiter.py │ │ │ └── test_request_cache.py │ │ ├── variants │ │ │ ├── constants.py │ │ │ ├── test_alphagenome_api_key.py │ │ │ ├── test_alphagenome_comprehensive.py │ │ │ ├── test_alphagenome.py │ │ │ ├── test_cbioportal_mutations.py │ │ │ ├── test_cbioportal_search.py │ │ │ ├── test_external_integration.py │ │ │ ├── test_external.py │ │ │ ├── test_extract_gene_aa_change.py │ │ │ ├── test_filters.py │ │ │ ├── test_getter.py │ │ │ ├── test_links.py │ │ │ └── test_search.py │ │ └── workers │ │ └── test_worker_sanitization.js │ └── test_pydantic_ai_integration.py ├── THIRD_PARTY_ENDPOINTS.md ├── tox.ini ├── uv.lock └── wrangler.toml ``` # Files -------------------------------------------------------------------------------- /tests/data/myvariant/myvariant_field_descriptions.csv: -------------------------------------------------------------------------------- ``` 1 | field,"description" 2 | "cadd._license","License information URL for the CADD data source." 3 | "cadd.alt","Alternate allele for the variant in CADD." 4 | "cadd.anc","Ancestral allele according to CADD analysis." 5 | "cadd.annotype","Annotation type (e.g., CodingTranscript) from CADD." 6 | "cadd.bstatistic","B-statistic score from CADD, related to conservation." 7 | "cadd.chmm.bivflnk","ChromHMM state score: Flanking Bivalent TSS/Enh." 8 | "cadd.chmm.enh","ChromHMM state score: Active Enhancer." 9 | "cadd.chmm.enhbiv","ChromHMM state score: Bivalent Enhancer." 10 | "cadd.chmm.het","ChromHMM state score: Heterochromatin." 11 | "cadd.chmm.quies","ChromHMM state score: Quiescent/Low activity." 12 | "cadd.chmm.reprpc","ChromHMM state score: Repressed Polycomb." 13 | "cadd.chmm.reprpcwk","ChromHMM state score: Weak Repressed Polycomb." 14 | "cadd.chmm.tssa","ChromHMM state score: Active TSS." 15 | "cadd.chmm.tssaflnk","ChromHMM state score: Flanking Active TSS." 16 | "cadd.chmm.tssbiv","ChromHMM state score: Bivalent TSS." 17 | "cadd.chmm.tx","ChromHMM state score: Strong transcription." 18 | "cadd.chmm.txflnk","ChromHMM state score: Transcribed at gene 5' and 3'." 19 | "cadd.chmm.txwk","ChromHMM state score: Weak transcription." 20 | "cadd.chmm.znfrpts","ChromHMM state score: ZNF genes & repeats." 21 | "cadd.chrom","Chromosome number for the variant in CADD." 22 | "cadd.consdetail","Detailed consequence of the variant (e.g., missense) from CADD." 23 | "cadd.consequence","General consequence category (e.g., NON_SYNONYMOUS) from CADD." 24 | "cadd.consscore","Conservation score from CADD." 25 | "cadd.cpg","Indicator if the variant is in a CpG island (0 or 1)." 26 | "cadd.dna.helt","DNA physical property: Helix twist value." 27 | "cadd.dna.mgw","DNA physical property: Minor groove width value." 28 | "cadd.dna.prot","DNA physical property: Propeller twist value." 29 | "cadd.dna.roll","DNA physical property: Roll value." 30 | "cadd.encode.exp","ENCODE gene expression value." 31 | "cadd.encode.h3k27ac","ENCODE histone modification H3K27ac signal value." 32 | "cadd.encode.h3k4me1","ENCODE histone modification H3K4me1 signal value." 33 | "cadd.encode.h3k4me3","ENCODE histone modification H3K4me3 signal value." 34 | "cadd.encode.nucleo","ENCODE nucleosome occupancy signal value." 35 | "cadd.exon","Exon number and total exons (e.g., 15/18)." 36 | "cadd.fitcons","FitCons score indicating functional impact based on evolutionary data." 37 | "cadd.gc","GC content in the surrounding region." 38 | "cadd.gene.ccds_id","Consensus CDS (CCDS) identifier for the gene." 39 | "cadd.gene.cds.cdna_pos","Position of the variant within the cDNA sequence." 40 | "cadd.gene.cds.cds_pos","Position of the variant within the coding sequence (CDS)." 41 | "cadd.gene.cds.rel_cdna_pos","Relative position within the cDNA sequence." 42 | "cadd.gene.cds.rel_cds_pos","Relative position within the coding sequence (CDS)." 43 | "cadd.gene.feature_id","Ensembl transcript identifier (ENST)." 44 | "cadd.gene.gene_id","Ensembl gene identifier (ENSG)." 45 | "cadd.gene.genename","Gene symbol (e.g., BRAF)." 46 | "cadd.gene.prot.domain","Protein domain affected by the variant." 47 | "cadd.gene.prot.protpos","Amino acid position within the protein." 48 | "cadd.gene.prot.rel_prot_pos","Relative position within the protein sequence." 49 | "cadd.gerp.n","GERP++ Neutral rate score." 50 | "cadd.gerp.rs","GERP++ Rejected Substitutions score (conservation score)." 51 | "cadd.gerp.rs_pval","P-value associated with the GERP++ RS score." 52 | "cadd.gerp.s","GERP++ S score, equivalent to RS score." 53 | "cadd.grantham","Grantham score measuring physicochemical difference between amino acids." 54 | "cadd.isderived","Indicates if the alternate allele is derived (TRUE/FALSE)." 55 | "cadd.isknownvariant","Indicates if the variant is known in dbSNP (TRUE/FALSE)." 56 | "cadd.istv","Indicates if the variant is a transversion (TRUE/FALSE)." 57 | "cadd.length","Length of the variant (0 for SNVs)." 58 | "cadd.mapability.20bp","Mapability score based on 20bp reads." 59 | "cadd.mapability.35bp","Mapability score based on 35bp reads." 60 | "cadd.min_dist_tse","Minimum distance to the nearest transcription start site end (TSE)." 61 | "cadd.min_dist_tss","Minimum distance to the nearest transcription start site (TSS)." 62 | "cadd.mutindex","Mutation index score from CADD." 63 | "cadd.naa","New amino acid resulting from the variant." 64 | "cadd.oaa","Original amino acid affected by the variant." 65 | hadd.phast_cons.mammalian,"PhastCons conservation score across mammals." 66 | "cadd.phast_cons.primate","PhastCons conservation score across primates." 67 | "cadd.phast_cons.vertebrate","PhastCons conservation score across vertebrates." 68 | hadd.phred,"CADD PHRED-like scaled score indicating deleteriousness." 69 | "cadd.phylop.mammalian","PhyloP conservation score across mammals." 70 | "cadd.phylop.primate",PhyloP conservation score across primates. 71 | "cadd.phylop.vertebrate","PhyloP conservation score across vertebrates." 72 | "cadd.polyphen.cat","PolyPhen-2 qualitative prediction (e.g., probably_damaging)." 73 | "cadd.polyphen.val","PolyPhen-2 quantitative score (0 to 1)." 74 | "cadd.pos","Genomic position of the variant (hg19)." 75 | "cadd.rawscore","CADD raw score before scaling." 76 | "cadd.ref","Reference allele for the variant in CADD." 77 | "cadd.segway","Segway annotation for the genomic region." 78 | "cadd.sift.cat","SIFT qualitative prediction (e.g., deleterious)." 79 | "cadd.sift.val","SIFT quantitative score (0 to 1)." 80 | "cadd.type","Type of variant (e.g., SNV)." 81 | "cgi._license","License information URL for the CGI data source." 82 | "cgi.association","Drug association type (Responsive, Resistant) from CGI." 83 | "cgi.cdna","cDNA change notation (e.g., c.1799T>A) from CGI." 84 | "cgi.drug","Drug name associated with the variant from CGI." 85 | "cgi.evidence_level","Level of evidence for the drug association from CGI." 86 | "cgi.gene","Gene symbol associated with the variant from CGI." 87 | "cgi.primary_tumor_type",Primary tumor type associated with the CGI entry. 88 | "cgi.protein_change","Protein change notation (e.g., BRAF:V600E) from CGI." 89 | "cgi.region","Genomic region description from CGI." 90 | "cgi.source","Source identifier (e.g., PubMed ID, ASCO abstract) from CGI." 91 | "cgi.transcript","Transcript identifier associated with the CGI entry." 92 | "chrom","Chromosome number for the variant." 93 | "civic._license","License information URL for the CIViC data source." 94 | "civic.alleleRegistryId","Allele Registry ID associated with the variant in CIViC." 95 | "civic.clinvarIds","List of associated ClinVar Variation IDs." 96 | "civic.comments.totalCount","Total number of comments associated with the CIViC variant entry." 97 | "civic.contributors.curators.lastActionDate","Timestamp of the last action by a CIViC curator." 98 | "civic.contributors.curators.totalActionCount","Total number of actions performed by a CIViC curator." 99 | "civic.contributors.curators.uniqueActions.action","Type of action performed by a CIViC curator (e.g., REVISION_SUGGESTED)." 100 | "civic.contributors.curators.uniqueActions.count","Count of a specific unique action by a CIViC curator." 101 | "civic.contributors.curators.user.id","User ID of the CIViC curator." 102 | "civic.contributors.editors.lastActionDate","Timestamp of the last action by a CIViC editor." 103 | "civic.contributors.editors.totalActionCount","Total number of actions performed by a CIViC editor." 104 | "civic.contributors.editors.uniqueActions.action","Type of action performed by a CIViC editor (e.g., REVISION_ACCEPTED)." 105 | "civic.contributors.editors.uniqueActions.count","Count of a specific unique action by a CIViC editor." 106 | "civic.contributors.editors.user.id","User ID of the CIViC editor." 107 | "civic.coordinates.chromosome","Chromosome for the variant according to CIViC coordinates." 108 | "civic.coordinates.coordinateType","Type of coordinate system used in CIViC (e.g., GENE_VARIANT_COORDINATE)." 109 | "civic.coordinates.ensemblVersion","Ensembl version used for CIViC coordinates." 110 | "civic.coordinates.referenceBases","Reference bases for the variant in CIViC coordinates." 111 | "civic.coordinates.referenceBuild","Reference genome build used for CIViC coordinates (e.g., GRCH37)." 112 | "civic.coordinates.representativeTranscript","Representative transcript ID used for CIViC coordinates." 113 | "civic.coordinates.start","Start position of the variant in CIViC coordinates." 114 | "civic.coordinates.stop","Stop position of the variant in CIViC coordinates." 115 | "civic.coordinates.variantBases","Variant bases for the variant in CIViC coordinates." 116 | "civic.creationActivity.createdAt","Timestamp when the CIViC variant entry was created." 117 | "civic.creationActivity.user.displayName","Display name of the user who created the CIViC entry." 118 | "civic.creationActivity.user.id","User ID of the creator of the CIViC entry." 119 | "civic.creationActivity.user.role","Role of the user who created the CIViC entry (e.g., ADMIN)." 120 | "civic.deprecated","Boolean indicating if the CIViC variant entry is deprecated." 121 | "civic.feature.deprecated","Boolean indicating if the associated CIViC feature (gene) is deprecated." 122 | "civic.feature.flagged","Boolean indicating if the associated CIViC feature (gene) is flagged." 123 | "civic.feature.id","Internal CIViC ID for the associated feature (gene)." 124 | "civic.feature.link","URL link to the associated CIViC feature page." 125 | "civic.feature.name","Name of the associated CIViC feature (gene symbol)." 126 | "civic.flags.totalCount","Total number of flags associated with the CIViC variant entry." 127 | "civic.hgvsDescriptions","List of HGVS descriptions for the variant from CIViC." 128 | "civic.id","Internal CIViC ID for the variant." 129 | "civic.lastAcceptedRevisionEvent.originatingUser.displayName","Display name of the user whose revision was last accepted." 130 | "civic.lastAcceptedRevisionEvent.originatingUser.id","User ID of the user whose revision was last accepted." 131 | "civic.lastAcceptedRevisionEvent.originatingUser.role","Role of the user whose revision was last accepted." 132 | "civic.lastSubmittedRevisionEvent.originatingUser.displayName","Display name of the user who last submitted a revision." 133 | "civic.lastSubmittedRevisionEvent.originatingUser.id","User ID of the user who last submitted a revision." 134 | "civic.lastSubmittedRevisionEvent.originatingUser.role","Role of the user who last submitted a revision." 135 | "civic.maneSelectTranscript","MANE Select transcript HGVS description from CIViC." 136 | "civic.molecularProfiles.evidenceItems.description","Textual description of the evidence item in CIViC." 137 | "civic.molecularProfiles.evidenceItems.disease.diseaseAliases","List of aliases for the associated disease in CIViC." 138 | "civic.molecularProfiles.evidenceItems.disease.diseaseUrl","URL link to the disease ontology page." 139 | "civic.molecularProfiles.evidenceItems.disease.displayName","Display name of the associated disease in CIViC." 140 | "civic.molecularProfiles.evidenceItems.disease.doid","Disease Ontology ID (DOID) for the associated disease." 141 | "civic.molecularProfiles.evidenceItems.disease.id","Internal CIViC ID for the associated disease." 142 | "civic.molecularProfiles.evidenceItems.disease.link","URL link to the associated CIViC disease page." 143 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.doDef","Disease Ontology definition for the associated disease." 144 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.icd10","ICD-10 code(s) for the associated disease." 145 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.icdo","ICD-O code for the associated disease." 146 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.mesh","MeSH ID(s) for the associated disease." 147 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.mondoId","Mondo Disease Ontology ID for the associated disease." 148 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.ncit","NCI Thesaurus code(s) for the associated disease." 149 | "civic.molecularProfiles.evidenceItems.disease.name","Name of the associated disease in CIViC." 150 | "civic.molecularProfiles.evidenceItems.evidenceDirection","Direction of evidence (SUPPORTS, DOES_NOT_SUPPORT) in CIViC." 151 | "civic.molecularProfiles.evidenceItems.evidenceLevel","Level of evidence (A, B, C, D, E) in CIViC." 152 | "civic.molecularProfiles.evidenceItems.evidenceRating","Rating of the evidence (1-5 stars) in CIViC." 153 | "civic.molecularProfiles.evidenceItems.evidenceType","Type of evidence (e.g., PREDICTIVE, DIAGNOSTIC) in CIViC." 154 | "civic.molecularProfiles.evidenceItems.flagged","Boolean indicating if the CIViC evidence item is flagged." 155 | "civic.molecularProfiles.evidenceItems.id","Internal CIViC ID for the evidence item." 156 | "civic.molecularProfiles.evidenceItems.molecularProfile.id","Internal CIViC ID for the associated molecular profile." 157 | "civic.molecularProfiles.evidenceItems.name","Name of the evidence item (e.g., EID79)." 158 | "civic.molecularProfiles.evidenceItems.significance","Clinical significance of the evidence (e.g., SENSITIVITYRESPONSE, RESISTANCE) in CIViC." 159 | "civic.molecularProfiles.evidenceItems.source.abstract","Abstract of the source publication from CIViC." 160 | "civic.molecularProfiles.evidenceItems.source.authorString","Author list from the source publication." 161 | "civic.molecularProfiles.evidenceItems.source.citation","Short citation format for the source publication." 162 | "civic.molecularProfiles.evidenceItems.source.citationId","PubMed ID (PMID) or ASCO ID for the source." 163 | "civic.molecularProfiles.evidenceItems.source.id","Internal CIViC ID for the source." 164 | "civic.molecularProfiles.evidenceItems.source.journal","Journal name of the source publication." 165 | "civic.molecularProfiles.evidenceItems.source.link","URL link to the associated CIViC source page." 166 | "civic.molecularProfiles.evidenceItems.source.name","Formatted name of the source (e.g., PubMed: Howell et al., 2011)." 167 | "civic.molecularProfiles.evidenceItems.source.openAccess","Boolean indicating if the source is open access." 168 | "civic.molecularProfiles.evidenceItems.source.pmcId","PubMed Central ID (PMCID) if available." 169 | "civic.molecularProfiles.evidenceItems.source.publicationDate","Publication date of the source." 170 | "civic.molecularProfiles.evidenceItems.source.retracted","Boolean indicating if the source has been retracted." 171 | "civic.molecularProfiles.evidenceItems.source.retractionDate","Date the source was retracted, if applicable." 172 | "civic.molecularProfiles.evidenceItems.source.retractionNature","Nature of the retraction, if applicable." 173 | "civic.molecularProfiles.evidenceItems.source.retractionReasons","Reason(s) for retraction, if applicable." 174 | "civic.molecularProfiles.evidenceItems.source.sourceType","Type of source (e.g., PUBMED, ASCO)." 175 | "civic.molecularProfiles.evidenceItems.source.sourceUrl","URL link to the original source." 176 | "civic.molecularProfiles.evidenceItems.source.title","Title of the source publication." 177 | "civic.molecularProfiles.evidenceItems.therapies.deprecated","Boolean indicating if the therapy entry is deprecated in CIViC." 178 | "civic.molecularProfiles.evidenceItems.therapies.id","Internal CIViC ID for the therapy." 179 | "civic.molecularProfiles.evidenceItems.therapies.link","URL link to the associated CIViC therapy page." 180 | "civic.molecularProfiles.evidenceItems.therapies.name","Name of the therapy in CIViC." 181 | "civic.molecularProfiles.evidenceItems.variantOrigin","Origin of the variant (SOMATIC, GERMLINE, NA) for the evidence item." 182 | "civic.molecularProfiles.id","Internal CIViC ID for the molecular profile." 183 | "civic.molecularProfiles.molecularProfileAliases","List of aliases for the molecular profile in CIViC." 184 | "civic.molecularProfiles.molecularProfileScore","Score associated with the molecular profile in CIViC." 185 | "civic.molecularProfiles.name","Name of the molecular profile in CIViC." 186 | "civic.molecularProfiles.variants.id","Internal CIViC ID for the variant within the profile." 187 | "civic.molecularProfiles.variants.link","URL link to the associated CIViC variant page." 188 | "civic.molecularProfiles.variants.name","Name of the variant within the profile." 189 | "civic.name","Name of the variant in CIViC (e.g., V600E)." 190 | "civic.openCravatUrl","URL link to the OpenCRAVAT report for the variant." 191 | "civic.openRevisionCount","Number of open revisions for the CIViC variant entry." 192 | "civic.revisions.totalCount","Total number of revisions for the CIViC variant entry." 193 | "civic.variantAliases","List of aliases for the variant in CIViC." 194 | "civic.variantTypes.id","Internal CIViC ID for the variant type." 195 | "civic.variantTypes.link","URL link to the associated CIViC variant type page." 196 | "civic.variantTypes.name","Name of the variant type (e.g., Missense Variant)." 197 | "civic.variantTypes.soid","Sequence Ontology ID for the variant type." 198 | "clinvar._license","License information URL for the ClinVar data source." 199 | "clinvar.allele_id","ClinVar Allele ID." 200 | "clinvar.alt","Alternate allele in ClinVar." 201 | "clinvar.chrom","Chromosome number in ClinVar." 202 | "clinvar.cytogenic","Cytogenetic location (e.g., 7q34)." 203 | "clinvar.gene.id","Entrez Gene ID associated with the ClinVar record." 204 | "clinvar.gene.symbol","Gene symbol associated with the ClinVar record." 205 | "clinvar.hg19.end","End position of the variant in hg19 assembly." 206 | "clinvar.hg19.start","Start position of the variant in hg19 assembly." 207 | "clinvar.hg38.end","End position of the variant in hg38 assembly." 208 | "clinvar.hg38.start","Start position of the variant in hg38 assembly." 209 | "clinvar.hgvs.coding","List of HGVS coding sequence notations." 210 | "clinvar.hgvs.genomic","List of HGVS genomic sequence notations." 211 | "clinvar.hgvs.protein","List of HGVS protein sequence notations." 212 | "clinvar.omim","Associated Online Mendelian Inheritance in Man (OMIM) ID(s)." 213 | "clinvar.rcv.accession","ClinVar RCV accession number (identifies a submitted interpretation)." 214 | "clinvar.rcv.clinical_significance","Clinical significance assertion for the RCV record." 215 | "clinvar.rcv.conditions.identifiers.human_phenotype_ontology","Associated Human Phenotype Ontology (HPO) IDs." 216 | "clinvar.rcv.conditions.identifiers.medgen","Associated MedGen Concept Unique Identifier (CUI)." 217 | "clinvar.rcv.conditions.identifiers.mesh","Associated Medical Subject Headings (MeSH) ID(s)." 218 | "clinvar.rcv.conditions.identifiers.mondo","Associated Mondo Disease Ontology ID(s)." 219 | "clinvar.rcv.conditions.identifiers.omim","Associated OMIM ID(s) for the condition." 220 | "clinvar.rcv.conditions.identifiers.orphanet","Associated Orphanet ID(s)." 221 | "clinvar.rcv.conditions.name","Name of the condition associated with the RCV record." 222 | "clinvar.rcv.conditions.synonyms","Synonyms for the condition associated with the RCV record." 223 | "clinvar.rcv.last_evaluated","Date the RCV record was last evaluated by the submitter." 224 | "clinvar.rcv.number_submitters","Number of submitters for this interpretation." 225 | "clinvar.rcv.origin","Origin of the allele (somatic, germline, etc.)." 226 | "clinvar.rcv.preferred_name","Submitter's preferred name for the variant." 227 | "clinvar.rcv.review_status","Review status of the ClinVar RCV record." 228 | "clinvar.ref","Reference allele in ClinVar." 229 | "clinvar.rsid","Associated dbSNP Reference SNP (rs) identifier." 230 | "clinvar.type","Type of variant (e.g., single nucleotide variant)." 231 | "clinvar.variant_id","ClinVar Variation ID." 232 | "cosmic._license","License information URL for the COSMIC data source." 233 | "cosmic.alt","Alternate allele in COSMIC (relative to reference)." 234 | "cosmic.chrom","Chromosome number in COSMIC." 235 | "cosmic.cosmic_id","COSMIC mutation identifier (e.g., COSM476)." 236 | "cosmic.hg19.end","End position of the variant in hg19 assembly (COSMIC)." 237 | "cosmic.hg19.start","Start position of the variant in hg19 assembly (COSMIC)." 238 | "cosmic.mut_freq","Mutation frequency reported in COSMIC samples (?). UNKNOWN." 239 | "cosmic.mut_nt","Nucleotide change reported in COSMIC (e.g., T>A)." 240 | "cosmic.ref","Reference allele in COSMIC." 241 | "cosmic.tumor_site","Primary tumor site where the mutation was observed in COSMIC." 242 | "dbnsfp._license","License information URL for the dbNSFP data source." 243 | "dbnsfp.aa.alt","Alternate amino acid predicted by dbNSFP." 244 | "dbnsfp.aa.codon_degeneracy","Codon degeneracy value. UNKNOWN significance here." 245 | "dbnsfp.aa.codonpos","Position within the codon (1, 2, or 3)." 246 | "dbnsfp.aa.pos","Amino acid position in the protein sequence (for different transcripts)." 247 | "dbnsfp.aa.ref","Reference amino acid from dbNSFP." 248 | "dbnsfp.aa.refcodon","Reference codon sequence(s)." 249 | "dbnsfp.alphamissense.pred","AlphaMissense prediction (Pathogenic/Benign) for different transcripts." 250 | "dbnsfp.alphamissense.rankscore","AlphaMissense rank score (0-1, higher is more pathogenic)." 251 | "dbnsfp.alphamissense.score","AlphaMissense raw score for different transcripts." 252 | "dbnsfp.alt","Alternate allele in dbNSFP." 253 | "dbnsfp.ancestral_allele","Predicted ancestral allele from dbNSFP." 254 | "dbnsfp.appris","APPRIS annotation for the transcript (e.g., principal, alternative)." 255 | "dbnsfp.bayesdel.add_af.pred","BayesDel prediction (Deleterious/Tolerated) incorporating allele frequency." 256 | "dbnsfp.bayesdel.add_af.rankscore","BayesDel rank score incorporating allele frequency." 257 | "dbnsfp.bayesdel.add_af.score","BayesDel score incorporating allele frequency." 258 | "dbnsfp.bayesdel.no_af.pred","BayesDel prediction (Deleterious/Tolerated) without allele frequency." 259 | "dbnsfp.bayesdel.no_af.rankscore","BayesDel rank score without allele frequency." 260 | "dbnsfp.bayesdel.no_af.score","BayesDel score without allele frequency." 261 | "dbnsfp.bstatistic.converted_rankscore","BStatistic converted rank score." 262 | "dbnsfp.bstatistic.score","BStatistic raw score." 263 | "dbnsfp.chrom","Chromosome number in dbNSFP." 264 | "dbnsfp.clinpred.pred","ClinPred prediction (Deleterious/Benign)." 265 | "dbnsfp.clinpred.rankscore","ClinPred rank score." 266 | "dbnsfp.clinpred.score","ClinPred raw score." 267 | "dbnsfp.clinvar.clinvar_id","Associated ClinVar Variation ID in dbNSFP." 268 | "dbnsfp.clinvar.clnsig","ClinVar clinical significance assertions from dbNSFP." 269 | "dbnsfp.clinvar.hgvs","HGVS genomic notation from ClinVar via dbNSFP." 270 | "dbnsfp.clinvar.medgen","Associated MedGen CUIs from ClinVar via dbNSFP." 271 | "dbnsfp.clinvar.omim","Associated OMIM IDs from ClinVar via dbNSFP." 272 | "dbnsfp.clinvar.orphanet","Associated Orphanet IDs from ClinVar via dbNSFP." 273 | "dbnsfp.clinvar.review","ClinVar review status from dbNSFP." 274 | "dbnsfp.clinvar.trait","Associated traits/diseases from ClinVar via dbNSFP." 275 | "dbnsfp.clinvar.var_source","Sources cited for the ClinVar entry via dbNSFP." 276 | "dbnsfp.dann.rankscore","DANN rank score." 277 | "dbnsfp.dann.score","DANN raw score for predicting deleteriousness." 278 | "dbnsfp.deogen2.pred","DEOGEN2 prediction (Deleterious/Tolerated)." 279 | "dbnsfp.deogen2.rankscore","DEOGEN2 rank score." 280 | "dbnsfp.deogen2.score","DEOGEN2 raw score." 281 | "dbnsfp.eigen.phred_coding","Eigen Phred-scaled score for coding variants." 282 | "dbnsfp.eigen.raw_coding","Eigen raw score for coding variants." 283 | "dbnsfp.eigen.raw_coding_rankscore","Eigen rank score for coding variants." 284 | "dbnsfp.eigen-pc.phred_coding","Eigen-PC Phred-scaled score for coding variants (principal components)." 285 | "dbnsfp.eigen-pc.raw_coding","Eigen-PC raw score for coding variants (principal components)." 286 | "dbnsfp.eigen-pc.raw_coding_rankscore","Eigen-PC rank score for coding variants (principal components)." 287 | "dbnsfp.ensembl.geneid","Ensembl Gene ID from dbNSFP." 288 | "dbnsfp.ensembl.proteinid","Ensembl Protein ID from dbNSFP." 289 | "dbnsfp.ensembl.transcriptid","Ensembl Transcript ID from dbNSFP." 290 | "dbnsfp.esm1b.pred","ESM-1b prediction (Deleterious/Benign)." 291 | "dbnsfp.esm1b.rankscore","ESM-1b rank score." 292 | "dbnsfp.esm1b.score","ESM-1b raw score." 293 | "dbnsfp.eve.class10_pred","EVE prediction class (Pathogenic/Benign/Uncertain) at 10% FDR." 294 | "dbnsfp.eve.class20_pred","EVE prediction class at 20% FDR." 295 | "dbnsfp.eve.class25_pred","EVE prediction class at 25% FDR." 296 | "dbnsfp.eve.class30_pred","EVE prediction class at 30% FDR." 297 | "dbnsfp.eve.class40_pred","EVE prediction class at 40% FDR." 298 | "dbnsfp.eve.class50_pred","EVE prediction class at 50% FDR." 299 | "dbnsfp.eve.class60_pred","EVE prediction class at 60% FDR." 300 | "dbnsfp.eve.class70_pred","EVE prediction class at 70% FDR." 301 | "dbnsfp.eve.class75_pred","EVE prediction class at 75% FDR." 302 | "dbnsfp.eve.class80_pred","EVE prediction class at 80% FDR." 303 | "dbnsfp.eve.class90_pred","EVE prediction class at 90% FDR." 304 | "dbnsfp.eve.rankscore","EVE rank score." 305 | "dbnsfp.eve.score","EVE raw score." 306 | "dbnsfp.exac.ac","Allele count in ExAC database from dbNSFP." 307 | "dbnsfp.exac.adj_ac","Adjusted allele count in ExAC from dbNSFP." 308 | "dbnsfp.exac.adj_af","Adjusted allele frequency in ExAC from dbNSFP." 309 | "dbnsfp.exac.af","Allele frequency in ExAC database from dbNSFP." 310 | "dbnsfp.exac.afr.ac","Allele count in ExAC African population." 311 | "dbnsfp.exac.afr.af","Allele frequency in ExAC African population." 312 | "dbnsfp.exac.amr.ac","Allele count in ExAC American population." 313 | "dbnsfp.exac.amr.af","Allele frequency in ExAC American population." 314 | "dbnsfp.exac.eas.ac","Allele count in ExAC East Asian population." 315 | "dbnsfp.exac.eas.af","Allele frequency in ExAC East Asian population." 316 | "dbnsfp.exac.fin.ac","Allele count in ExAC Finnish population." 317 | "dbnsfp.exac.fin.af","Allele frequency in ExAC Finnish population." 318 | "dbnsfp.exac.nfe.ac","Allele count in ExAC Non-Finnish European population." 319 | "dbnsfp.exac.nfe.af","Allele frequency in ExAC Non-Finnish European population." 320 | "dbnsfp.exac.sas.ac","Allele count in ExAC South Asian population." 321 | "dbnsfp.exac.sas.af","Allele frequency in ExAC South Asian population." 322 | "dbnsfp.exac_nonpsych.ac","Allele count in ExAC non-psychiatric subset." 323 | "dbnsfp.exac_nonpsych.adj_ac","Adjusted allele count in ExAC non-psychiatric subset." 324 | "dbnsfp.exac_nonpsych.adj_af","Adjusted allele frequency in ExAC non-psychiatric subset." 325 | "dbnsfp.exac_nonpsych.af","Allele frequency in ExAC non-psychiatric subset." 326 | "dbnsfp.exac_nonpsych.afr.ac","Allele count in ExAC non-psych African population." 327 | "dbnsfp.exac_nonpsych.afr.af","Allele frequency in ExAC non-psych African population." 328 | "dbnsfp.exac_nonpsych.amr.ac","Allele count in ExAC non-psych American population." 329 | "dbnsfp.exac_nonpsych.amr.af","Allele frequency in ExAC non-psych American population." 330 | "dbnsfp.exac_nonpsych.eas.ac","Allele count in ExAC non-psych East Asian population." 331 | "dbnsfp.exac_nonpsych.eas.af","Allele frequency in ExAC non-psych East Asian population." 332 | "dbnsfp.exac_nonpsych.fin.ac","Allele count in ExAC non-psych Finnish population." 333 | "dbnsfp.exac_nonpsych.fin.af","Allele frequency in ExAC non-psych Finnish population." 334 | "dbnsfp.exac_nonpsych.nfe.ac","Allele count in ExAC non-psych Non-Finnish European population." 335 | "dbnsfp.exac_nonpsych.nfe.af","Allele frequency in ExAC non-psych Non-Finnish European population." 336 | "dbnsfp.exac_nonpsych.sas.ac","Allele count in ExAC non-psych South Asian population." 337 | "dbnsfp.exac_nonpsych.sas.af","Allele frequency in ExAC non-psych South Asian population." 338 | "dbnsfp.exac_nontcga.ac","Allele count in ExAC non-TCGA subset." 339 | "dbnsfp.exac_nontcga.adj_ac","Adjusted allele count in ExAC non-TCGA subset." 340 | "dbnsfp.exac_nontcga.adj_af","Adjusted allele frequency in ExAC non-TCGA subset." 341 | "dbnsfp.exac_nontcga.af","Allele frequency in ExAC non-TCGA subset." 342 | "dbnsfp.exac_nontcga.afr.ac","Allele count in ExAC non-TCGA African population." 343 | "dbnsfp.exac_nontcga.afr.af","Allele frequency in ExAC non-TCGA African population." 344 | "dbnsfp.exac_nontcga.amr.ac","Allele count in ExAC non-TCGA American population." 345 | "dbnsfp.exac_nontcga.amr.af","Allele frequency in ExAC non-TCGA American population." 346 | "dbnsfp.exac_nontcga.eas.ac","Allele count in ExAC non-TCGA East Asian population." 347 | "dbnsfp.exac_nontcga.eas.af","Allele frequency in ExAC non-TCGA East Asian population." 348 | "dbnsfp.exac_nontcga.fin.ac","Allele count in ExAC non-TCGA Finnish population." 349 | "dbnsfp.exac_nontcga.fin.af","Allele frequency in ExAC non-TCGA Finnish population." 350 | "dbnsfp.exac_nontcga.nfe.ac","Allele count in ExAC non-TCGA Non-Finnish European population." 351 | "dbnsfp.exac_nontcga.nfe.af","Allele frequency in ExAC non-TCGA Non-Finnish European population." 352 | "dbnsfp.exac_nontcga.sas.ac","Allele count in ExAC non-TCGA South Asian population." 353 | "dbnsfp.exac_nontcga.sas.af","Allele frequency in ExAC non-TCGA South Asian population." 354 | "dbnsfp.fathmm-mkl.coding_group","FATHMM-MKL coding group assignment." 355 | "dbnsfp.fathmm-mkl.coding_pred","FATHMM-MKL prediction (Deleterious/Neutral) for coding variants." 356 | "dbnsfp.fathmm-mkl.coding_rankscore","FATHMM-MKL rank score for coding variants." 357 | "dbnsfp.fathmm-mkl.coding_score","FATHMM-MKL raw score for coding variants." 358 | "dbnsfp.fathmm-xf.coding_pred","FATHMM-XF prediction (Deleterious/Neutral) for coding variants." 359 | "dbnsfp.fathmm-xf.coding_rankscore","FATHMM-XF rank score for coding variants." 360 | "dbnsfp.fathmm-xf.coding_score","FATHMM-XF raw score for coding variants." 361 | "dbnsfp.fitcons.gm12878.confidence_value","FitCons confidence value in GM12878 cell line." 362 | "dbnsfp.fitcons.gm12878.rankscore","FitCons rank score in GM12878 cell line." 363 | "dbnsfp.fitcons.gm12878.score","FitCons raw score in GM12878 cell line." 364 | "dbnsfp.fitcons.h1-hesc.confidence_value","FitCons confidence value in H1-hESC cell line." 365 | "dbnsfp.fitcons.h1-hesc.rankscore","FitCons rank score in H1-hESC cell line." 366 | "dbnsfp.fitcons.h1-hesc.score","FitCons raw score in H1-hESC cell line." 367 | "dbnsfp.fitcons.huvec.confidence_value","FitCons confidence value in HUVEC cell line." 368 | "dbnsfp.fitcons.huvec.rankscore","FitCons rank score in HUVEC cell line." 369 | "dbnsfp.fitcons.huvec.score","FitCons raw score in HUVEC cell line." 370 | "dbnsfp.fitcons.integrated.confidence_value","Integrated FitCons confidence value across cell lines." 371 | "dbnsfp.fitcons.integrated.rankscore","Integrated FitCons rank score across cell lines." 372 | "dbnsfp.fitcons.integrated.score","Integrated FitCons raw score across cell lines." 373 | "dbnsfp.gencode_basic","Indicates if transcript is part of GENCODE basic set (Y/N)." 374 | "dbnsfp.genename","Gene name(s) from dbNSFP." 375 | "dbnsfp.genocanyon.rankscore","GenoCanyon rank score." 376 | "dbnsfp.genocanyon.score","GenoCanyon raw score for functional prediction." 377 | "dbnsfp.gerp++.nr","GERP++ Neutral Rate score." 378 | "dbnsfp.gerp++.rs","GERP++ Rejected Substitutions score (conservation)." 379 | "dbnsfp.gerp++.rs_rankscore","GERP++ RS rank score." 380 | "dbnsfp.gmvp.rankscore","GMVP (Genome-Wide Missense Variant Pathogenicity) rank score." 381 | "dbnsfp.gmvp.score","GMVP raw score." 382 | "dbnsfp.hg18.end","End position in hg18 assembly." 383 | "dbnsfp.hg18.start","Start position in hg18 assembly." 384 | "dbnsfp.hg19.end","End position in hg19 assembly." 385 | "dbnsfp.hg19.start","Start position in hg19 assembly." 386 | "dbnsfp.hg38.end","End position in hg38 assembly." 387 | "dbnsfp.hg38.start","Start position in hg38 assembly." 388 | "dbnsfp.hgvsc","HGVS coding sequence notation(s) from dbNSFP." 389 | "dbnsfp.hgvsp","HGVS protein sequence notation(s) from dbNSFP." 390 | "dbnsfp.interpro.domain","InterPro protein domain annotation(s)." 391 | "dbnsfp.list-s2.pred","LIST-S2 prediction (Tolerated/Damaging)." 392 | "dbnsfp.list-s2.rankscore","LIST-S2 rank score." 393 | "dbnsfp.list-s2.score","LIST-S2 raw score." 394 | "dbnsfp.lrt.converted_rankscore","LRT converted rank score." 395 | "dbnsfp.lrt.omega","LRT omega value (dN/dS ratio)." 396 | "dbnsfp.lrt.pred","LRT prediction (Deleterious/Neutral/Unknown)." 397 | "dbnsfp.lrt.score","LRT raw score (likelihood ratio test)." 398 | "dbnsfp.m-cap.pred","M-CAP prediction (Deleterious/Tolerated)." 399 | "dbnsfp.m-cap.rankscore","M-CAP rank score." 400 | "dbnsfp.m-cap.score","M-CAP raw score." 401 | "dbnsfp.metalr.pred","MetaLR prediction (Tolerated/Damaging)." 402 | "dbnsfp.metalr.rankscore","MetaLR rank score." 403 | "dbnsfp.metalr.score","MetaLR raw score." 404 | "dbnsfp.metarnn.pred","MetaRNN prediction (Deleterious/Benign)." 405 | "dbnsfp.metarnn.rankscore","MetaRNN rank score." 406 | "dbnsfp.metarnn.score","MetaRNN raw score." 407 | "dbnsfp.metasvm.pred","MetaSVM prediction (Tolerated/Damaging)." 408 | "dbnsfp.metasvm.rankscore","MetaSVM rank score." 409 | "dbnsfp.metasvm.score","MetaSVM raw score." 410 | "dbnsfp.mpc.rankscore","MPC (Missense badness, PolyPhen-2, and Constraint) rank score." 411 | "dbnsfp.mpc.score","MPC raw score." 412 | "dbnsfp.mutationassessor.pred","MutationAssessor prediction (high/medium/low/neutral functional impact)." 413 | "dbnsfp.mutationassessor.rankscore","MutationAssessor rank score." 414 | "dbnsfp.mutationassessor.score","MutationAssessor raw score (functional impact score)." 415 | "dbnsfp.mutationtaster.aae","Amino acid change predicted by MutationTaster." 416 | "dbnsfp.mutationtaster.converted_rankscore","MutationTaster converted rank score." 417 | "dbnsfp.mutationtaster.model","MutationTaster model used for prediction." 418 | "dbnsfp.mutationtaster.pred","MutationTaster prediction (disease_causing_automatic/polymorphism_automatic)." 419 | "dbnsfp.mutationtaster.score","MutationTaster raw score (probability of being deleterious)." 420 | "dbnsfp.mutformer.rankscore","MutFormer rank score." 421 | "dbnsfp.mutformer.score","MutFormer raw score." 422 | "dbnsfp.mutpred.aa_change","Amino acid change considered by MutPred." 423 | "dbnsfp.mutpred.accession","UniProt accession used by MutPred." 424 | "dbnsfp.mutpred.pred.mechanism","Molecular mechanism predicted by MutPred to be affected." 425 | "dbnsfp.mutpred.pred.p_val","P-value associated with the MutPred mechanism prediction." 426 | "dbnsfp.mutpred.rankscore","MutPred rank score." 427 | "dbnsfp.mutpred.score","MutPred raw score (probability of being deleterious)." 428 | "dbnsfp.mvp.rankscore","MVP (Missense Variant Pathogenicity) rank score." 429 | "dbnsfp.mvp.score","MVP raw score." 430 | "dbnsfp.phactboost.rankscore","phACTboost rank score." 431 | "dbnsfp.phactboost.score","phACTboost raw score." 432 | "dbnsfp.phastcons.100way_vertebrate.rankscore","PhastCons 100-way vertebrate conservation rank score." 433 | "dbnsfp.phastcons.100way_vertebrate.score","PhastCons 100-way vertebrate conservation score." 434 | "dbnsfp.phastcons.17way_primate.rankscore","PhastCons 17-way primate conservation rank score." 435 | "dbnsfp.phastcons.17way_primate.score","PhastCons 17-way primate conservation score." 436 | "dbnsfp.phastcons.470way_mammalian.rankscore","PhastCons 470-way mammalian conservation rank score." 437 | "dbnsfp.phastcons.470way_mammalian.score","PhastCons 470-way mammalian conservation score." 438 | "dbnsfp.phylop.100way_vertebrate.rankscore","PhyloP 100-way vertebrate conservation rank score." 439 | "dbnsfp.phylop.100way_vertebrate.score","PhyloP 100-way vertebrate conservation score." 440 | "dbnsfp.phylop.17way_primate.rankscore","PhyloP 17-way primate conservation rank score." 441 | "dbnsfp.phylop.17way_primate.score","PhyloP 17-way primate conservation score." 442 | "dbnsfp.phylop.470way_mammalian.rankscore","PhyloP 470-way mammalian conservation rank score." 443 | "dbnsfp.phylop.470way_mammalian.score","PhyloP 470-way mammalian conservation score." 444 | "dbnsfp.polyphen2.hdiv.pred","PolyPhen-2 HDIV prediction (Probably_damaging/Possibly_damaging/Benign)." 445 | "dbnsfp.polyphen2.hdiv.rankscore","PolyPhen-2 HDIV rank score." 446 | "dbnsfp.polyphen2.hdiv.score","PolyPhen-2 HDIV raw score." 447 | "dbnsfp.polyphen2.hvar.pred","PolyPhen-2 HVAR prediction (Probably_damaging/Possibly_damaging/Benign)." 448 | "dbnsfp.polyphen2.hvar.rankscore","PolyPhen-2 HVAR rank score." 449 | "dbnsfp.polyphen2.hvar.score","PolyPhen-2 HVAR raw score." 450 | "dbnsfp.primateai.pred","PrimateAI prediction (Deleterious/Tolerated)." 451 | "dbnsfp.primateai.rankscore","PrimateAI rank score." 452 | "dbnsfp.primateai.score","PrimateAI raw score." 453 | "dbnsfp.ref","Reference allele in dbNSFP." 454 | "dbnsfp.reliability_index","dbNSFP internal reliability index. UNKNOWN significance." 455 | "dbnsfp.revel.rankscore","REVEL (Rare Exome Variant Ensemble Learner) rank score." 456 | "dbnsfp.revel.score","REVEL raw score." 457 | "dbnsfp.rsid","Associated dbSNP rsID from dbNSFP." 458 | "dbnsfp.siphy_29way.logodds_rankscore","SiPhy 29-way log-odds rank score." 459 | "dbnsfp.siphy_29way.logodds_score","SiPhy 29-way log-odds conservation score." 460 | "dbnsfp.siphy_29way.pi.a","SiPhy estimated probability of A at this position." 461 | "dbnsfp.siphy_29way.pi.c","SiPhy estimated probability of C at this position." 462 | "dbnsfp.siphy_29way.pi.g","SiPhy estimated probability of G at this position." 463 | "dbnsfp.siphy_29way.pi.t","SiPhy estimated probability of T at this position." 464 | "dbnsfp.tsl","Transcript Support Level from Ensembl." 465 | "dbnsfp.uniprot.acc","UniProt accession number(s)." 466 | "dbnsfp.uniprot.entry","UniProt entry name(s)." 467 | "dbnsfp.varity.er.rankscore","VARITY_ER (evidence-based ranking) rank score." 468 | "dbnsfp.varity.er.score","VARITY_ER raw score." 469 | "dbnsfp.varity.er_loo.rankscore","VARITY_ER_LOO (leave-one-out) rank score." 470 | "dbnsfp.varity.er_loo.score","VARITY_ER_LOO raw score." 471 | "dbnsfp.varity.r.rankscore","VARITY_R (rule-based ranking) rank score." 472 | "dbnsfp.varity.r.score","VARITY_R raw score." 473 | "dbnsfp.varity.r_loo.rankscore","VARITY_R_LOO (leave-one-out) rank score." 474 | "dbnsfp.varity.r_loo.score","VARITY_R_LOO raw score." 475 | "dbnsfp.vep_canonical","Indicates if the transcript is the VEP canonical transcript (YES/NO)." 476 | "dbsnp._license","License information URL for the dbSNP data source." 477 | "dbsnp.alleles.allele","Allele base (A, C, G, or T)." 478 | "dbsnp.alleles.freq.exac","Allele frequency in ExAC as reported by dbSNP." 479 | "dbsnp.alleles.freq.gnomad_exomes","Allele frequency in gnomAD exomes as reported by dbSNP." 480 | "dbsnp.alt","Alternate allele(s) in dbSNP." 481 | "dbsnp.chrom","Chromosome number in dbSNP." 482 | "dbsnp.citations","List of PubMed IDs citing this dbSNP entry." 483 | "dbsnp.dbsnp_build","dbSNP build number when the information was extracted." 484 | "dbsnp.gene.geneid","Entrez Gene ID associated with the dbSNP record." 485 | "dbsnp.gene.is_pseudo","Boolean indicating if the associated gene is a pseudogene." 486 | "dbsnp.gene.name","Full name of the associated gene." 487 | "dbsnp.gene.rnas.codon_aligned_transcript_change.deleted_sequence","Deleted sequence in codon-aligned transcript context." 488 | "dbsnp.gene.rnas.codon_aligned_transcript_change.inserted_sequence","Inserted sequence in codon-aligned transcript context." 489 | "dbsnp.gene.rnas.codon_aligned_transcript_change.position","Position of change in codon-aligned transcript context." 490 | "dbsnp.gene.rnas.codon_aligned_transcript_change.seq_id","Sequence ID for codon-aligned transcript context." 491 | "dbsnp.gene.rnas.hgvs","HGVS notation for the specific RNA transcript." 492 | "dbsnp.gene.rnas.protein.variant.spdi.deleted_sequence","Deleted sequence in SPDI protein context." 493 | "dbsnp.gene.rnas.protein.variant.spdi.inserted_sequence","Inserted sequence in SPDI protein context." 494 | "dbsnp.gene.rnas.protein.variant.spdi.position","Position of change in SPDI protein context." 495 | "dbsnp.gene.rnas.protein.variant.spdi.seq_id","Sequence ID for SPDI protein context." 496 | "dbsnp.gene.rnas.protein_product.refseq","RefSeq protein product identifier (NP_)." 497 | "dbsnp.gene.rnas.refseq","RefSeq RNA transcript identifier (NM_ or XM_)." 498 | "dbsnp.gene.rnas.so.accession","Sequence Ontology term accession (SO:...)." 499 | "dbsnp.gene.rnas.so.name","Sequence Ontology term name (e.g., coding_sequence_variant)." 500 | "dbsnp.gene.strand","Gene strand (+ or -)." 501 | "dbsnp.gene.symbol","Gene symbol (e.g., BRAF)." 502 | "dbsnp.hg19.end","End position in hg19 assembly (dbSNP)." 503 | "dbsnp.hg19.start","Start position in hg19 assembly (dbSNP)." 504 | "dbsnp.ref","Reference allele in dbSNP." 505 | "dbsnp.rsid","dbSNP Reference SNP (rs) identifier." 506 | "dbsnp.vartype","Type of variation (e.g., snv)." 507 | "docm.aa_change","Amino acid change notation (e.g., p.V600E) from DOCM." 508 | "docm.all_domains","All protein domains overlapping the variant position from DOCM." 509 | "docm.alt","Alternate allele in DOCM." 510 | "docm.c_position","cDNA position notation (e.g., c.1799) from DOCM." 511 | "docm.chrom","Chromosome number in DOCM." 512 | "docm.default_gene_name","Default gene name used in DOCM." 513 | "docm.deletion_substructures","Substructure information for deletions (often '-'). UNKNOWN." 514 | "docm.disease","Disease associated with the variant in DOCM." 515 | "docm.doid","Disease Ontology ID (DOID) associated with the variant in DOCM." 516 | "docm.domain","Specific protein domain containing the variant from DOCM." 517 | "docm.ensembl_gene_id","Ensembl gene ID from DOCM." 518 | "docm.genename","Gene name from DOCM." 519 | "docm.genename_source","Source of the gene name (e.g., HGNC) in DOCM." 520 | "docm.hg19.end","End position in hg19 assembly (DOCM)." 521 | "docm.hg19.start","Start position in hg19 assembly (DOCM)." 522 | "docm.primary","Indicates if this is the primary transcript used (?). UNKNOWN." 523 | "docm.pubmed_id","Associated PubMed IDs from DOCM." 524 | "docm.ref","Reference allele in DOCM." 525 | "docm.source","Original data source cited by DOCM (e.g., MyCancerGenome)." 526 | "docm.strand","Genomic strand (+ or -) in DOCM." 527 | "docm.transcript_error","Indicates errors found during transcript mapping in DOCM." 528 | "docm.transcript_name","Transcript name used for annotation in DOCM." 529 | "docm.transcript_source","Source of the transcript information (e.g., ensembl) in DOCM." 530 | "docm.transcript_species","Species of the transcript (e.g., human) in DOCM." 531 | "docm.transcript_status","Status of the transcript (e.g., known) in DOCM." 532 | "docm.transcript_version","Version of the transcript used in DOCM." 533 | "docm.trv_type","Type of transcript variation (e.g., missense) in DOCM." 534 | "docm.type","Type of variant (e.g., SNP) in DOCM." 535 | "docm.ucsc_cons","UCSC conservation score (?). UNKNOWN." 536 | "docm.url","URL link to the source entry in DOCM." 537 | "emv._license","License information URL for the EMV data source." 538 | "emv.egl_classification","EGL classification of the variant (e.g., Pathogenic)." 539 | "emv.egl_classification_date","Date of the EGL classification." 540 | "emv.egl_protein","Protein change notation used by EGL." 541 | "emv.egl_variant","Variant notation used by EGL (often HGVS coding)." 542 | "emv.exon","Exon number containing the variant from EMV." 543 | "emv.gene","Gene symbol from EMV." 544 | "emv.hgvs","List of HGVS notations associated with the variant in EMV." 545 | "emv.variant_id","Internal EMV variant identifier." 546 | "exac._license","License information URL for the ExAC data source." 547 | "exac.ac.ac","Total allele count in ExAC." 548 | "exac.ac.ac_adj","Adjusted total allele count in ExAC (after filtering)." 549 | "exac.ac.ac_afr","Allele count in ExAC African/African American population." 550 | "exac.ac.ac_amr","Allele count in ExAC American population." 551 | "exac.ac.ac_eas","Allele count in ExAC East Asian population." 552 | "exac.ac.ac_female","Allele count in ExAC female population." 553 | "exac.ac.ac_fin","Allele count in ExAC Finnish population." 554 | "exac.ac.ac_het","Heterozygous allele count in ExAC." 555 | "exac.ac.ac_hom","Homozygous allele count in ExAC." 556 | "exac.ac.ac_male","Allele count in ExAC male population." 557 | "exac.ac.ac_nfe","Allele count in ExAC Non-Finnish European population." 558 | "exac.ac.ac_oth","Allele count in ExAC Other population." 559 | "exac.ac.ac_sas","Allele count in ExAC South Asian population." 560 | "exac.af","Allele frequency in ExAC." 561 | "exac.alleles","Alternate allele(s) observed in ExAC." 562 | "exac.alt","Alternate allele in ExAC format." 563 | "exac.an.an","Total number of alleles genotyped in ExAC." 564 | "exac.an.an_adj","Adjusted total number of alleles in ExAC (after filtering)." 565 | "exac.an.an_afr","Number of alleles in ExAC African/African American population." 566 | "exac.an.an_amr","Number of alleles in ExAC American population." 567 | "exac.an.an_eas","Number of alleles in ExAC East Asian population." 568 | "exac.an.an_female","Number of alleles in ExAC female population." 569 | "exac.an.an_fin","Number of alleles in ExAC Finnish population." 570 | "exac.an.an_male","Number of alleles in ExAC male population." 571 | "exac.an.an_nfe","Number of alleles in ExAC Non-Finnish European population." 572 | "exac.an.an_oth","Number of alleles in ExAC Other population." 573 | "exac.an.an_sas","Number of alleles in ExAC South Asian population." 574 | "exac.baseqranksum","ExAC BaseQRankSum test statistic (mapping quality difference)." 575 | "exac.chrom","Chromosome number in ExAC." 576 | "exac.clippingranksum","ExAC ClippingRankSum test statistic." 577 | "exac.culprit","ExAC VQSR culprit annotation." 578 | "exac.fs","ExAC FisherStrand bias score." 579 | "exac.het.het_afr","Heterozygous count in ExAC African/African American population." 580 | "exac.het.het_amr","Heterozygous count in ExAC American population." 581 | "exac.het.het_eas","Heterozygous count in ExAC East Asian population." 582 | "exac.het.het_fin","Heterozygous count in ExAC Finnish population." 583 | "exac.het.het_nfe","Heterozygous count in ExAC Non-Finnish European population." 584 | "exac.het.het_oth","Heterozygous count in ExAC Other population." 585 | "exac.het.het_sas","Heterozygous count in ExAC South Asian population." 586 | "exac.hom.hom_afr","Homozygous count in ExAC African/African American population." 587 | "exac.hom.hom_amr","Homozygous count in ExAC American population." 588 | "exac.hom.hom_eas","Homozygous count in ExAC East Asian population." 589 | "exac.hom.hom_fin","Homozygous count in ExAC Finnish population." 590 | "exac.hom.hom_nfe","Homozygous count in ExAC Non-Finnish European population." 591 | "exac.hom.hom_oth","Homozygous count in ExAC Other population." 592 | "exac.hom.hom_sas","Homozygous count in ExAC South Asian population." 593 | "exac.inbreedingcoeff","ExAC Inbreeding Coefficient." 594 | "exac.mq.mq","ExAC root mean square Mapping Quality." 595 | "exac.mq.mq0","ExAC count of reads with mapping quality 0." 596 | "exac.mq.mqranksum","ExAC MQRankSum test statistic (mapping quality difference ref vs alt)." 597 | "exac.ncc","ExAC number of chromosomes carrying the variant in hemizygous state. UNKNOWN." 598 | "exac.pos","Genomic position in ExAC (hg19)." 599 | "exac.qd","ExAC Quality by Depth score." 600 | "exac.readposranksum","ExAC ReadPosRankSum test statistic (position bias)." 601 | "exac.ref","Reference allele in ExAC format." 602 | "exac.type","Variant type in ExAC (e.g., snp)." 603 | "exac.vqslod","ExAC Variant Quality Score Log-Odds." 604 | "exac_nontcga._license","License information URL for the ExAC non-TCGA data source." 605 | "exac_nontcga.ac.ac","Total allele count in ExAC non-TCGA subset." 606 | "exac_nontcga.ac.ac_adj","Adjusted total allele count in ExAC non-TCGA subset." 607 | "exac_nontcga.ac.ac_afr","Allele count in ExAC non-TCGA African/African American population." 608 | "exac_nontcga.ac.ac_amr","Allele count in ExAC non-TCGA American population." 609 | "exac_nontcga.ac.ac_eas","Allele count in ExAC non-TCGA East Asian population." 610 | "exac_nontcga.ac.ac_female","Allele count in ExAC non-TCGA female population." 611 | "exac_nontcga.ac.ac_fin","Allele count in ExAC non-TCGA Finnish population." 612 | "exac_nontcga.ac.ac_het","Heterozygous allele count in ExAC non-TCGA subset." 613 | "exac_nontcga.ac.ac_hom","Homozygous allele count in ExAC non-TCGA subset." 614 | "exac_nontcga.ac.ac_male","Allele count in ExAC non-TCGA male population." 615 | "exac_nontcga.ac.ac_nfe","Allele count in ExAC non-TCGA Non-Finnish European population." 616 | "exac_nontcga.ac.ac_oth","Allele count in ExAC non-TCGA Other population." 617 | "exac_nontcga.ac.ac_sas","Allele count in ExAC non-TCGA South Asian population." 618 | "exac_nontcga.af","Allele frequency in ExAC non-TCGA subset." 619 | "exac_nontcga.alleles","Alternate allele(s) observed in ExAC non-TCGA subset." 620 | "exac_nontcga.alt","Alternate allele in ExAC non-TCGA format." 621 | "exac_nontcga.an.an","Total number of alleles genotyped in ExAC non-TCGA subset." 622 | "exac_nontcga.an.an_adj","Adjusted total number of alleles in ExAC non-TCGA subset." 623 | "exac_nontcga.an.an_afr","Number of alleles in ExAC non-TCGA African/African American population." 624 | "exac_nontcga.an.an_amr","Number of alleles in ExAC non-TCGA American population." 625 | "exac_nontcga.an.an_eas","Number of alleles in ExAC non-TCGA East Asian population." 626 | "exac_nontcga.an.an_female","Number of alleles in ExAC non-TCGA female population." 627 | "exac_nontcga.an.an_fin","Number of alleles in ExAC non-TCGA Finnish population." 628 | "exac_nontcga.an.an_male","Number of alleles in ExAC non-TCGA male population." 629 | "exac_nontcga.an.an_nfe","Number of alleles in ExAC non-TCGA Non-Finnish European population." 630 | "exac_nontcga.an.an_oth","Number of alleles in ExAC non-TCGA Other population." 631 | "exac_nontcga.an.an_sas","Number of alleles in ExAC non-TCGA South Asian population." 632 | "exac_nontcga.baseqranksum","ExAC non-TCGA BaseQRankSum test statistic." 633 | "exac_nontcga.chrom","Chromosome number in ExAC non-TCGA subset." 634 | "exac_nontcga.clippingranksum","ExAC non-TCGA ClippingRankSum test statistic." 635 | "exac_nontcga.culprit","ExAC non-TCGA VQSR culprit annotation." 636 | "exac_nontcga.fs","ExAC non-TCGA FisherStrand bias score." 637 | "exac_nontcga.het.het_afr","Heterozygous count in ExAC non-TCGA African/African American population." 638 | "exac_nontcga.het.het_amr","Heterozygous count in ExAC non-TCGA American population." 639 | "exac_nontcga.het.het_eas","Heterozygous count in ExAC non-TCGA East Asian population." 640 | "exac_nontcga.het.het_fin","Heterozygous count in ExAC non-TCGA Finnish population." 641 | "exac_nontcga.het.het_nfe","Heterozygous count in ExAC non-TCGA Non-Finnish European population." 642 | "exac_nontcga.het.het_oth","Heterozygous count in ExAC non-TCGA Other population." 643 | "exac_nontcga.het.het_sas","Heterozygous count in ExAC non-TCGA South Asian population." 644 | "exac_nontcga.hom.hom_afr","Homozygous count in ExAC non-TCGA African/African American population." 645 | "exac_nontcga.hom.hom_amr","Homozygous count in ExAC non-TCGA American population." 646 | "exac_nontcga.hom.hom_eas","Homozygous count in ExAC non-TCGA East Asian population." 647 | "exac_nontcga.hom.hom_fin","Homozygous count in ExAC non-TCGA Finnish population." 648 | "exac_nontcga.hom.hom_nfe","Homozygous count in ExAC non-TCGA Non-Finnish European population." 649 | "exac_nontcga.hom.hom_oth","Homozygous count in ExAC non-TCGA Other population." 650 | "exac_nontcga.hom.hom_sas","Homozygous count in ExAC non-TCGA South Asian population." 651 | "exac_nontcga.inbreedingcoeff","ExAC non-TCGA Inbreeding Coefficient." 652 | "exac_nontcga.mq.mq","ExAC non-TCGA root mean square Mapping Quality." 653 | "exac_nontcga.mq.mq0","ExAC non-TCGA count of reads with mapping quality 0." 654 | "exac_nontcga.mq.mqranksum","ExAC non-TCGA MQRankSum test statistic." 655 | "exac_nontcga.ncc","ExAC non-TCGA number of hemizygous chromosomes. UNKNOWN." 656 | "exac_nontcga.pos","Genomic position in ExAC non-TCGA (hg19)." 657 | "exac_nontcga.qd","ExAC non-TCGA Quality by Depth score." 658 | "exac_nontcga.readposranksum","ExAC non-TCGA ReadPosRankSum test statistic." 659 | "exac_nontcga.ref","Reference allele in ExAC non-TCGA format." 660 | "exac_nontcga.type","Variant type in ExAC non-TCGA (e.g., snp)." 661 | "exac_nontcga.vqslod","ExAC non-TCGA Variant Quality Score Log-Odds." 662 | "gnomad_exome._license","License information URL for the gnomAD exome data source." 663 | "gnomad_exome.ac.ac","Total allele count in gnomAD exomes." 664 | "gnomad_exome.ac.ac_afr","Allele count in gnomAD exomes African/African American population." 665 | "gnomad_exome.ac.ac_afr_female","Allele count in gnomAD exomes African/African American female population." 666 | "gnomad_exome.ac.ac_afr_male","Allele count in gnomAD exomes African/African American male population." 667 | "gnomad_exome.ac.ac_amr","Allele count in gnomAD exomes American population." 668 | "gnomad_exome.ac.ac_amr_female","Allele count in gnomAD exomes American female population." 669 | "gnomad_exome.ac.ac_amr_male","Allele count in gnomAD exomes American male population." 670 | "gnomad_exome.ac.ac_asj","Allele count in gnomAD exomes Ashkenazi Jewish population." 671 | "gnomad_exome.ac.ac_asj_female","Allele count in gnomAD exomes Ashkenazi Jewish female population." 672 | "gnomad_exome.ac.ac_asj_male","Allele count in gnomAD exomes Ashkenazi Jewish male population." 673 | "gnomad_exome.ac.ac_eas","Allele count in gnomAD exomes East Asian population." 674 | "gnomad_exome.ac.ac_eas_female","Allele count in gnomAD exomes East Asian female population." 675 | "gnomad_exome.ac.ac_eas_jpn","Allele count in gnomAD exomes East Asian Japanese population." 676 | "gnomad_exome.ac.ac_eas_kor","Allele count in gnomAD exomes East Asian Korean population." 677 | "gnomad_exome.ac.ac_eas_male","Allele count in gnomAD exomes East Asian male population." 678 | "gnomad_exome.ac.ac_eas_oea","Allele count in gnomAD exomes East Asian Other population." 679 | "gnomad_exome.ac.ac_female","Total allele count in gnomAD exomes female population." 680 | "gnomad_exome.ac.ac_fin","Allele count in gnomAD exomes Finnish population." 681 | "gnomad_exome.ac.ac_fin_female","Allele count in gnomAD exomes Finnish female population." 682 | "gnomad_exome.ac.ac_fin_male","Allele count in gnomAD exomes Finnish male population." 683 | "gnomad_exome.ac.ac_male","Total allele count in gnomAD exomes male population." 684 | "gnomad_exome.ac.ac_nfe","Allele count in gnomAD exomes Non-Finnish European population." 685 | "gnomad_exome.ac.ac_nfe_bgr","Allele count in gnomAD exomes NFE Bulgarian population." 686 | "gnomad_exome.ac.ac_nfe_est","Allele count in gnomAD exomes NFE Estonian population." 687 | "gnomad_exome.ac.ac_nfe_female","Allele count in gnomAD exomes NFE female population." 688 | "gnomad_exome.ac.ac_nfe_male","Allele count in gnomAD exomes NFE male population." 689 | "gnomad_exome.ac.ac_nfe_nwe","Allele count in gnomAD exomes NFE North-Western European population." 690 | "gnomad_exome.ac.ac_nfe_onf","Allele count in gnomAD exomes NFE Other Non-Finnish European population." 691 | "gnomad_exome.ac.ac_nfe_seu","Allele count in gnomAD exomes NFE Southern European population." 692 | "gnomad_exome.ac.ac_nfe_swe","Allele count in gnomAD exomes NFE Swedish population." 693 | "gnomad_exome.ac.ac_oth","Allele count in gnomAD exomes Other population." 694 | "gnomad_exome.ac.ac_oth_female","Allele count in gnomAD exomes Other female population." 695 | "gnomad_exome.ac.ac_oth_male","Allele count in gnomAD exomes Other male population." 696 | "gnomad_exome.ac.ac_sas","Allele count in gnomAD exomes South Asian population." 697 | "gnomad_exome.ac.ac_sas_female","Allele count in gnomAD exomes South Asian female population." 698 | "gnomad_exome.ac.ac_sas_male","Allele count in gnomAD exomes South Asian male population." 699 | "gnomad_exome.af.af","Overall allele frequency in gnomAD exomes." 700 | "gnomad_exome.af.af_afr","Allele frequency in gnomAD exomes African/African American population." 701 | "gnomad_exome.af.af_afr_female","Allele frequency in gnomAD exomes African/African American female population." 702 | "gnomad_exome.af.af_afr_male","Allele frequency in gnomAD exomes African/African American male population." 703 | "gnomad_exome.af.af_amr","Allele frequency in gnomAD exomes American population." 704 | "gnomad_exome.af.af_amr_female","Allele frequency in gnomAD exomes American female population." 705 | "gnomad_exome.af.af_amr_male","Allele frequency in gnomAD exomes American male population." 706 | "gnomad_exome.af.af_asj","Allele frequency in gnomAD exomes Ashkenazi Jewish population." 707 | "gnomad_exome.af.af_asj_female","Allele frequency in gnomAD exomes Ashkenazi Jewish female population." 708 | "gnomad_exome.af.af_asj_male","Allele frequency in gnomAD exomes Ashkenazi Jewish male population." 709 | "gnomad_exome.af.af_eas","Allele frequency in gnomAD exomes East Asian population." 710 | "gnomad_exome.af.af_eas_female","Allele frequency in gnomAD exomes East Asian female population." 711 | "gnomad_exome.af.af_eas_jpn","Allele frequency in gnomAD exomes East Asian Japanese population." 712 | "gnomad_exome.af.af_eas_kor","Allele frequency in gnomAD exomes East Asian Korean population." 713 | "gnomad_exome.af.af_eas_male","Allele frequency in gnomAD exomes East Asian male population." 714 | "gnomad_exome.af.af_eas_oea","Allele frequency in gnomAD exomes East Asian Other population." 715 | "gnomad_exome.af.af_female","Overall allele frequency in gnomAD exomes female population." 716 | "gnomad_exome.af.af_fin","Allele frequency in gnomAD exomes Finnish population." 717 | "gnomad_exome.af.af_fin_female","Allele frequency in gnomAD exomes Finnish female population." 718 | "gnomad_exome.af.af_fin_male","Allele frequency in gnomAD exomes Finnish male population." 719 | "gnomad_exome.af.af_male","Overall allele frequency in gnomAD exomes male population." 720 | "gnomad_exome.af.af_nfe","Allele frequency in gnomAD exomes Non-Finnish European population." 721 | "gnomad_exome.af.af_nfe_bgr","Allele frequency in gnomAD exomes NFE Bulgarian population." 722 | "gnomad_exome.af.af_nfe_est","Allele frequency in gnomAD exomes NFE Estonian population." 723 | "gnomad_exome.af.af_nfe_female","Allele frequency in gnomAD exomes NFE female population." 724 | "gnomad_exome.af.af_nfe_male","Allele frequency in gnomAD exomes NFE male population." 725 | "gnomad_exome.af.af_nfe_nwe","Allele frequency in gnomAD exomes NFE North-Western European population." 726 | "gnomad_exome.af.af_nfe_onf","Allele frequency in gnomAD exomes NFE Other Non-Finnish European population." 727 | "gnomad_exome.af.af_nfe_seu","Allele frequency in gnomAD exomes NFE Southern European population." 728 | "gnomad_exome.af.af_nfe_swe","Allele frequency in gnomAD exomes NFE Swedish population." 729 | "gnomad_exome.af.af_oth","Allele frequency in gnomAD exomes Other population." 730 | "gnomad_exome.af.af_oth_female","Allele frequency in gnomAD exomes Other female population." 731 | "gnomad_exome.af.af_oth_male","Allele frequency in gnomAD exomes Other male population." 732 | "gnomad_exome.af.af_sas","Allele frequency in gnomAD exomes South Asian population." 733 | "gnomad_exome.af.af_sas_female","Allele frequency in gnomAD exomes South Asian female population." 734 | "gnomad_exome.af.af_sas_male","Allele frequency in gnomAD exomes South Asian male population." 735 | "gnomad_exome.alleles","Alternate allele(s) observed in gnomAD exomes." 736 | "gnomad_exome.alt","Alternate allele in gnomAD exome format." 737 | "gnomad_exome.an.an","Total number of alleles genotyped in gnomAD exomes." 738 | "gnomad_exome.an.an_afr","Number of alleles in gnomAD exomes African/African American population." 739 | "gnomad_exome.an.an_afr_female","Number of alleles in gnomAD exomes African/African American female population." 740 | "gnomad_exome.an.an_afr_male","Number of alleles in gnomAD exomes African/African American male population." 741 | "gnomad_exome.an.an_amr","Number of alleles in gnomAD exomes American population." 742 | "gnomad_exome.an.an_amr_female","Number of alleles in gnomAD exomes American female population." 743 | "gnomad_exome.an.an_amr_male","Number of alleles in gnomAD exomes American male population." 744 | "gnomad_exome.an.an_asj","Number of alleles in gnomAD exomes Ashkenazi Jewish population." 745 | "gnomad_exome.an.an_asj_female","Number of alleles in gnomAD exomes Ashkenazi Jewish female population." 746 | "gnomad_exome.an.an_asj_male","Number of alleles in gnomAD exomes Ashkenazi Jewish male population." 747 | "gnomad_exome.an.an_eas","Number of alleles in gnomAD exomes East Asian population." 748 | "gnomad_exome.an.an_eas_female","Number of alleles in gnomAD exomes East Asian female population." 749 | "gnomad_exome.an.an_eas_jpn","Number of alleles in gnomAD exomes East Asian Japanese population." 750 | "gnomad_exome.an.an_eas_kor","Number of alleles in gnomAD exomes East Asian Korean population." 751 | "gnomad_exome.an.an_eas_male","Number of alleles in gnomAD exomes East Asian male population." 752 | "gnomad_exome.an.an_eas_oea","Number of alleles in gnomAD exomes East Asian Other population." 753 | "gnomad_exome.an.an_female","Total number of alleles in gnomAD exomes female population." 754 | "gnomad_exome.an.an_fin","Number of alleles in gnomAD exomes Finnish population." 755 | "gnomad_exome.an.an_fin_female","Number of alleles in gnomAD exomes Finnish female population." 756 | "gnomad_exome.an.an_fin_male","Number of alleles in gnomAD exomes Finnish male population." 757 | "gnomad_exome.an.an_male","Total number of alleles in gnomAD exomes male population." 758 | "gnomad_exome.an.an_nfe","Number of alleles in gnomAD exomes Non-Finnish European population." 759 | "gnomad_exome.an.an_nfe_bgr","Number of alleles in gnomAD exomes NFE Bulgarian population." 760 | "gnomad_exome.an.an_nfe_est","Number of alleles in gnomAD exomes NFE Estonian population." 761 | "gnomad_exome.an.an_nfe_female","Number of alleles in gnomAD exomes NFE female population." 762 | "gnomad_exome.an.an_nfe_male","Number of alleles in gnomAD exomes NFE male population." 763 | "gnomad_exome.an.an_nfe_nwe","Number of alleles in gnomAD exomes NFE North-Western European population." 764 | "gnomad_exome.an.an_nfe_onf","Number of alleles in gnomAD exomes NFE Other Non-Finnish European population." 765 | "gnomad_exome.an.an_nfe_seu","Number of alleles in gnomAD exomes NFE Southern European population." 766 | "gnomad_exome.an.an_nfe_swe","Number of alleles in gnomAD exomes NFE Swedish population." 767 | "gnomad_exome.an.an_oth","Number of alleles in gnomAD exomes Other population." 768 | "gnomad_exome.an.an_oth_female","Number of alleles in gnomAD exomes Other female population." 769 | "gnomad_exome.an.an_oth_male","Number of alleles in gnomAD exomes Other male population." 770 | "gnomad_exome.an.an_sas","Number of alleles in gnomAD exomes South Asian population." 771 | "gnomad_exome.an.an_sas_female","Number of alleles in gnomAD exomes South Asian female population." 772 | "gnomad_exome.an.an_sas_male","Number of alleles in gnomAD exomes South Asian male population." 773 | "gnomad_exome.baseqranksum","gnomAD exome BaseQRankSum test statistic." 774 | "gnomad_exome.chrom","Chromosome number in gnomAD exomes." 775 | "gnomad_exome.clippingranksum","gnomAD exome ClippingRankSum test statistic." 776 | "gnomad_exome.dp","Total read depth at the variant position in gnomAD exomes." 777 | "gnomad_exome.fs","gnomAD exome FisherStrand bias score." 778 | "gnomad_exome.hom.hom","Total homozygous count in gnomAD exomes." 779 | "gnomad_exome.hom.hom_afr","Homozygous count in gnomAD exomes African/African American population." 780 | "gnomad_exome.hom.hom_afr_female","Homozygous count in gnomAD exomes African/African American female population." 781 | "gnomad_exome.hom.hom_afr_male","Homozygous count in gnomAD exomes African/African American male population." 782 | "gnomad_exome.hom.hom_amr","Homozygous count in gnomAD exomes American population." 783 | "gnomad_exome.hom.hom_amr_female","Homozygous count in gnomAD exomes American female population." 784 | "gnomad_exome.hom.hom_amr_male","Homozygous count in gnomAD exomes American male population." 785 | "gnomad_exome.hom.hom_asj","Homozygous count in gnomAD exomes Ashkenazi Jewish population." 786 | "gnomad_exome.hom.hom_asj_female","Homozygous count in gnomAD exomes Ashkenazi Jewish female population." 787 | "gnomad_exome.hom.hom_asj_male","Homozygous count in gnomAD exomes Ashkenazi Jewish male population." 788 | "gnomad_exome.hom.hom_eas","Homozygous count in gnomAD exomes East Asian population." 789 | "gnomad_exome.hom.hom_eas_female","Homozygous count in gnomAD exomes East Asian female population." 790 | "gnomad_exome.hom.hom_eas_jpn","Homozygous count in gnomAD exomes East Asian Japanese population." 791 | "gnomad_exome.hom.hom_eas_kor","Homozygous count in gnomAD exomes East Asian Korean population." 792 | "gnomad_exome.hom.hom_eas_male","Homozygous count in gnomAD exomes East Asian male population." 793 | "gnomad_exome.hom.hom_eas_oea","Homozygous count in gnomAD exomes East Asian Other population." 794 | "gnomad_exome.hom.hom_female","Total homozygous count in gnomAD exomes female population." 795 | "gnomad_exome.hom.hom_fin","Homozygous count in gnomAD exomes Finnish population." 796 | "gnomad_exome.hom.hom_fin_female","Homozygous count in gnomAD exomes Finnish female population." 797 | "gnomad_exome.hom.hom_fin_male","Homozygous count in gnomAD exomes Finnish male population." 798 | "gnomad_exome.hom.hom_male","Total homozygous count in gnomAD exomes male population." 799 | "gnomad_exome.hom.hom_nfe","Homozygous count in gnomAD exomes Non-Finnish European population." 800 | "gnomad_exome.hom.hom_nfe_bgr","Homozygous count in gnomAD exomes NFE Bulgarian population." 801 | "gnomad_exome.hom.hom_nfe_est","Homozygous count in gnomAD exomes NFE Estonian population." 802 | "gnomad_exome.hom.hom_nfe_female","Homozygous count in gnomAD exomes NFE female population." 803 | "gnomad_exome.hom.hom_nfe_male","Homozygous count in gnomAD exomes NFE male population." 804 | "gnomad_exome.hom.hom_nfe_nwe","Homozygous count in gnomAD exomes NFE North-Western European population." 805 | "gnomad_exome.hom.hom_nfe_onf","Homozygous count in gnomAD exomes NFE Other Non-Finnish European population." 806 | "gnomad_exome.hom.hom_nfe_seu","Homozygous count in gnomAD exomes NFE Southern European population." 807 | "gnomad_exome.hom.hom_nfe_swe","Homozygous count in gnomAD exomes NFE Swedish population." 808 | "gnomad_exome.hom.hom_oth","Homozygous count in gnomAD exomes Other population." 809 | "gnomad_exome.hom.hom_oth_female","Homozygous count in gnomAD exomes Other female population." 810 | "gnomad_exome.hom.hom_oth_male","Homozygous count in gnomAD exomes Other male population." 811 | "gnomad_exome.hom.hom_sas","Homozygous count in gnomAD exomes South Asian population." 812 | "gnomad_exome.hom.hom_sas_female","Homozygous count in gnomAD exomes South Asian female population." 813 | "gnomad_exome.hom.hom_sas_male","Homozygous count in gnomAD exomes South Asian male population." 814 | "gnomad_exome.inbreedingcoeff","gnomAD exome Inbreeding Coefficient." 815 | "gnomad_exome.mq.mq","gnomAD exome root mean square Mapping Quality." 816 | "gnomad_exome.mq.mqranksum","gnomAD exome MQRankSum test statistic." 817 | "gnomad_exome.pab_max","Maximum P(AB) value from gnomAD exomes. UNKNOWN significance." 818 | "gnomad_exome.pos","Genomic position in gnomAD exomes (hg19)." 819 | "gnomad_exome.qd","gnomAD exome Quality by Depth score." 820 | "gnomad_exome.readposranksum","gnomAD exome ReadPosRankSum test statistic." 821 | "gnomad_exome.ref","Reference allele in gnomAD exome format." 822 | "gnomad_exome.rf","Random Forest probability score from gnomAD exomes. UNKNOWN usage." 823 | "gnomad_exome.rsid","Associated dbSNP rsID from gnomAD exomes." 824 | "gnomad_exome.sor","gnomAD exome Strand Odds Ratio score." 825 | "gnomad_exome.type","Variant type in gnomAD exomes (e.g., snp)." 826 | "gnomad_exome.vqslod","gnomAD exome Variant Quality Score Log-Odds." 827 | "gnomad_exome.vqsr_culprit","gnomAD exome VQSR culprit annotation." 828 | "hg19.end","End position in hg19 assembly." 829 | "hg19.start","Start position in hg19 assembly." 830 | "mutdb._license","License information URL for the MutDB data source." 831 | "mutdb.alt","Alternate allele in MutDB." 832 | "mutdb.chrom","Chromosome number in MutDB." 833 | "mutdb.cosmic_id","Associated COSMIC ID(s) from MutDB." 834 | "mutdb.hg19.end","End position in hg19 assembly (MutDB)." 835 | "mutdb.hg19.start","Start position in hg19 assembly (MutDB)." 836 | "mutdb.mutpred_score","MutPred score reported by MutDB." 837 | "mutdb.ref","Reference allele in MutDB." 838 | "mutdb.rsid","Associated dbSNP rsID from MutDB." 839 | "mutdb.strand","Genomic strand reported by MutDB (m indicates '-')." 840 | "mutdb.uniprot_id","Associated UniProt variant ID from MutDB." 841 | "observed","Boolean indicating if the variant is observed in aggregated datasets." 842 | "snpeff._license","License information URL for the SnpEff data source." 843 | "snpeff.ann.cdna.length","Length of the cDNA sequence for the annotated transcript." 844 | "snpeff.ann.cdna.position","Position of the variant within the cDNA sequence." 845 | "snpeff.ann.cds.length","Length of the coding sequence (CDS) for the annotated transcript." 846 | "snpeff.ann.cds.position","Position of the variant within the coding sequence (CDS)." 847 | "snpeff.ann.effect","Predicted sequence ontology effect of the variant (e.g., missense_variant)." 848 | "snpeff.ann.feature_id","Feature ID (usually transcript ID like NM_004333.4) for the annotation." 849 | "snpeff.ann.feature_type","Type of feature annotated (e.g., transcript)." 850 | "snpeff.ann.gene_id","Gene symbol or ID associated with the annotation." 851 | "snpeff.ann.genename","Gene name associated with the annotation." 852 | "snpeff.ann.hgvs_c","HGVS coding sequence notation from SnpEff." 853 | "snpeff.ann.hgvs_p","HGVS protein sequence notation from SnpEff." 854 | "snpeff.ann.protein.length","Length of the protein sequence for the annotated transcript." 855 | "snpeff.ann.protein.position","Position of the amino acid change within the protein." 856 | "snpeff.ann.putative_impact","SnpEff predicted impact category (e.g., MODERATE, HIGH)." 857 | "snpeff.ann.rank","Rank of the annotation (exon/intron rank)." 858 | "snpeff.ann.total","Total number of exons/introns in the transcript." 859 | "snpeff.ann.transcript_biotype","Biotype of the transcript (e.g., protein_coding)." 860 | "vcf.alt","Alternate allele in VCF format." 861 | "vcf.position","Position of the variant in VCF format (hg19)." 862 | "vcf.ref","Reference allele in VCF format." 863 | ``` -------------------------------------------------------------------------------- /src/biomcp/router.py: -------------------------------------------------------------------------------- ```python 1 | """Unified search and fetch tools for BioMCP. 2 | 3 | This module provides the main MCP tools for searching and fetching biomedical data 4 | across different domains (articles, trials, variants) with integrated sequential 5 | thinking capabilities. 6 | """ 7 | 8 | import json 9 | import logging 10 | from typing import Annotated, Any, Literal 11 | 12 | from pydantic import Field 13 | 14 | from biomcp.constants import ( 15 | DEFAULT_PAGE_NUMBER, 16 | DEFAULT_PAGE_SIZE, 17 | DEFAULT_TITLE, 18 | ERROR_DOMAIN_REQUIRED, 19 | ESTIMATED_ADDITIONAL_RESULTS, 20 | MAX_RESULTS_PER_DOMAIN_DEFAULT, 21 | TRIAL_DETAIL_SECTIONS, 22 | VALID_DOMAINS, 23 | ) 24 | from biomcp.core import mcp_app 25 | from biomcp.domain_handlers import get_domain_handler 26 | from biomcp.exceptions import ( 27 | InvalidDomainError, 28 | InvalidParameterError, 29 | QueryParsingError, 30 | ResultParsingError, 31 | SearchExecutionError, 32 | ) 33 | from biomcp.integrations.biothings_client import BioThingsClient 34 | from biomcp.metrics import track_performance 35 | from biomcp.parameter_parser import ParameterParser 36 | from biomcp.query_parser import QueryParser 37 | from biomcp.query_router import QueryRouter, execute_routing_plan 38 | from biomcp.thinking_tracker import get_thinking_reminder 39 | from biomcp.trials import getter as trial_getter 40 | 41 | logger = logging.getLogger(__name__) 42 | 43 | 44 | def format_results( 45 | results: list[dict], domain: str, page: int, page_size: int, total: int 46 | ) -> dict: 47 | """Format search results according to OpenAI MCP search semantics. 48 | 49 | Converts domain-specific result formats into a standardized structure with: 50 | - id: Unique identifier for the result (required) 51 | - title: Human-readable title (required) 52 | - text: Brief preview or summary of the content (required) 53 | - url: Link to the full resource (optional but recommended for citations) 54 | 55 | Note: The OpenAI MCP specification does NOT require metadata in search results. 56 | Metadata should only be included in fetch results. 57 | 58 | Args: 59 | results: Raw results from domain-specific search 60 | domain: Type of results ('article', 'trial', or 'variant') 61 | page: Current page number (for internal tracking only) 62 | page_size: Number of results per page (for internal tracking only) 63 | total: Total number of results available (for internal tracking only) 64 | 65 | Returns: 66 | Dictionary with results array following OpenAI MCP format: 67 | {"results": [{"id", "title", "text", "url"}, ...]} 68 | 69 | Raises: 70 | InvalidDomainError: If domain is not recognized 71 | """ 72 | logger.debug(f"Formatting {len(results)} results for domain: {domain}") 73 | 74 | formatted_data = [] 75 | 76 | # Get the appropriate handler 77 | try: 78 | handler_class = get_domain_handler(domain) 79 | except ValueError: 80 | raise InvalidDomainError(domain, VALID_DOMAINS) from None 81 | 82 | # Format each result 83 | for result in results: 84 | try: 85 | formatted_result = handler_class.format_result(result) 86 | # Ensure the result has the required OpenAI MCP fields 87 | openai_result = { 88 | "id": formatted_result.get("id", ""), 89 | "title": formatted_result.get("title", DEFAULT_TITLE), 90 | "text": formatted_result.get( 91 | "snippet", formatted_result.get("text", "") 92 | ), 93 | "url": formatted_result.get("url", ""), 94 | } 95 | # Note: OpenAI MCP spec doesn't require metadata in search results 96 | # Only include it if explicitly needed for enhanced functionality 97 | formatted_data.append(openai_result) 98 | except Exception as e: 99 | logger.warning(f"Failed to format result in domain {domain}: {e}") 100 | # Skip malformed results 101 | continue 102 | 103 | # Add thinking reminder if needed (as first result) 104 | reminder = get_thinking_reminder() 105 | if reminder and formatted_data: 106 | reminder_result = { 107 | "id": "thinking-reminder", 108 | "title": "⚠️ Research Best Practice Reminder", 109 | "text": reminder, 110 | "url": "", 111 | } 112 | formatted_data.insert(0, reminder_result) 113 | 114 | # Return OpenAI MCP compliant format 115 | return {"results": formatted_data} 116 | 117 | 118 | # ──────────────────────────── 119 | # Unified SEARCH tool 120 | # ──────────────────────────── 121 | @mcp_app.tool() 122 | @track_performance("biomcp.search") 123 | async def search( # noqa: C901 124 | query: Annotated[ 125 | str, 126 | "Unified search query (e.g., 'gene:BRAF AND trials.condition:melanoma'). If provided, other parameters are ignored.", 127 | ], 128 | call_benefit: Annotated[ 129 | str | None, 130 | Field( 131 | description="Brief explanation of why this search is being performed and expected benefit. Helps improve search accuracy and provides context for analytics. Highly recommended for better results." 132 | ), 133 | ] = None, 134 | domain: Annotated[ 135 | Literal[ 136 | "article", 137 | "trial", 138 | "variant", 139 | "gene", 140 | "drug", 141 | "disease", 142 | "nci_organization", 143 | "nci_intervention", 144 | "nci_biomarker", 145 | "nci_disease", 146 | "fda_adverse", 147 | "fda_label", 148 | "fda_device", 149 | "fda_approval", 150 | "fda_recall", 151 | "fda_shortage", 152 | ] 153 | | None, 154 | Field( 155 | description="Domain to search: 'article' for papers/literature ABOUT genes/variants/diseases, 'trial' for clinical studies, 'variant' for genetic variant DATABASE RECORDS, 'gene' for gene information from MyGene.info, 'drug' for drug/chemical information from MyChem.info, 'disease' for disease information from MyDisease.info, 'nci_organization' for NCI cancer centers/sponsors, 'nci_intervention' for NCI drugs/devices/procedures, 'nci_biomarker' for NCI trial eligibility biomarkers, 'nci_disease' for NCI cancer vocabulary, 'fda_adverse' for FDA adverse event reports, 'fda_label' for FDA drug labels, 'fda_device' for FDA device events, 'fda_approval' for FDA drug approvals, 'fda_recall' for FDA drug recalls, 'fda_shortage' for FDA drug shortages" 156 | ), 157 | ] = None, 158 | genes: Annotated[list[str] | str | None, "Gene symbols"] = None, 159 | diseases: Annotated[list[str] | str | None, "Disease terms"] = None, 160 | variants: Annotated[list[str] | str | None, "Variant strings"] = None, 161 | chemicals: Annotated[list[str] | str | None, "Drug/chemical terms"] = None, 162 | keywords: Annotated[list[str] | str | None, "Free-text keywords"] = None, 163 | conditions: Annotated[list[str] | str | None, "Trial conditions"] = None, 164 | interventions: Annotated[ 165 | list[str] | str | None, "Trial interventions" 166 | ] = None, 167 | recruiting_status: Annotated[ 168 | str | None, "Trial status filter (OPEN, CLOSED, or ANY)" 169 | ] = None, 170 | phase: Annotated[str | None, "Trial phase filter"] = None, 171 | significance: Annotated[ 172 | str | None, "Variant clinical significance" 173 | ] = None, 174 | lat: Annotated[ 175 | float | None, 176 | "Latitude for trial location search. AI agents should geocode city names (e.g., 'Cleveland' → 41.4993) before using.", 177 | ] = None, 178 | long: Annotated[ 179 | float | None, 180 | "Longitude for trial location search. AI agents should geocode city names (e.g., 'Cleveland' → -81.6944) before using.", 181 | ] = None, 182 | distance: Annotated[ 183 | int | None, 184 | "Distance in miles from lat/long for trial search (default: 50 miles if lat/long provided)", 185 | ] = None, 186 | page: Annotated[int, "Page number (minimum: 1)"] = DEFAULT_PAGE_NUMBER, 187 | page_size: Annotated[int, "Results per page (1-100)"] = DEFAULT_PAGE_SIZE, 188 | max_results_per_domain: Annotated[ 189 | int | None, "Max results per domain (unified search only)" 190 | ] = None, 191 | explain_query: Annotated[ 192 | bool, "Return query explanation (unified search only)" 193 | ] = False, 194 | get_schema: Annotated[ 195 | bool, "Return searchable fields schema instead of results" 196 | ] = False, 197 | api_key: Annotated[ 198 | str | None, 199 | Field( 200 | description="NCI API key for searching NCI domains (nci_organization, nci_intervention, nci_biomarker, nci_disease). Required for NCI searches. Get a free key at: https://clinicaltrialsapi.cancer.gov/" 201 | ), 202 | ] = None, 203 | ) -> dict: 204 | """Search biomedical literature, clinical trials, genetic variants, genes, drugs, and diseases. 205 | 206 | ⚠️ IMPORTANT: Have you used the 'think' tool first? If not, STOP and use it NOW! 207 | The 'think' tool is REQUIRED for proper research planning and should be your FIRST step. 208 | 209 | This tool provides access to biomedical data from PubMed/PubTator3, ClinicalTrials.gov, 210 | MyVariant.info, and the BioThings suite (MyGene.info, MyChem.info, MyDisease.info). 211 | It supports two search modes: 212 | 213 | ## 1. UNIFIED QUERY LANGUAGE 214 | Use the 'query' parameter with field-based syntax for precise cross-domain searches. 215 | 216 | Syntax: 217 | - Basic: "gene:BRAF" 218 | - AND logic: "gene:BRAF AND disease:melanoma" 219 | - OR logic: "gene:PTEN AND (R173 OR Arg173 OR 'position 173')" 220 | - Domain-specific: "trials.condition:melanoma AND trials.phase:3" 221 | 222 | Common fields: 223 | - Cross-domain: gene, disease, variant, chemical/drug 224 | - Articles: pmid, title, abstract, journal, author 225 | - Trials: trials.condition, trials.intervention, trials.phase, trials.status 226 | - Variants: variants.hgvs, variants.rsid, variants.significance 227 | 228 | Example: 229 | ``` 230 | await search( 231 | query="gene:BRAF AND disease:melanoma AND trials.phase:3", 232 | max_results_per_domain=20 233 | ) 234 | ``` 235 | 236 | ## 2. DOMAIN-SPECIFIC SEARCH 237 | Use the 'domain' parameter with specific filters for targeted searches. 238 | 239 | Domains: 240 | - "article": Search PubMed/PubTator3 for research articles and preprints ABOUT genes, variants, diseases, or chemicals 241 | - "trial": Search ClinicalTrials.gov for clinical studies 242 | - "variant": Search MyVariant.info for genetic variant DATABASE RECORDS (population frequency, clinical significance, etc.) - NOT for articles about variants! 243 | - "gene": Search MyGene.info for gene information (symbol, name, function, aliases) 244 | - "drug": Search MyChem.info for drug/chemical information (names, formulas, indications) 245 | - "disease": Search MyDisease.info for disease information (names, definitions, synonyms) 246 | - "nci_organization": Search NCI database for cancer centers, hospitals, and research sponsors (requires API key) 247 | - "nci_intervention": Search NCI database for drugs, devices, procedures used in cancer trials (requires API key) 248 | - "nci_biomarker": Search NCI database for biomarkers used in trial eligibility criteria (requires API key) 249 | - "nci_disease": Search NCI controlled vocabulary for cancer conditions and terms (requires API key) 250 | 251 | Example: 252 | ``` 253 | await search( 254 | domain="article", 255 | genes=["BRAF", "NRAS"], 256 | diseases=["melanoma"], 257 | page_size=50 258 | ) 259 | ``` 260 | 261 | ## DOMAIN SELECTION EXAMPLES: 262 | - To find ARTICLES about BRAF V600E mutation: domain="article", genes=["BRAF"], variants=["V600E"] 263 | - To find VARIANT DATA for BRAF mutations: domain="variant", gene="BRAF" 264 | - To find articles about ERBB2 p.D277Y: domain="article", genes=["ERBB2"], variants=["p.D277Y"] 265 | - Common mistake: Using domain="variant" when you want articles about a variant 266 | 267 | ## IMPORTANT NOTES: 268 | - For complex research questions, use the separate 'think' tool for systematic analysis 269 | - The tool returns results in OpenAI MCP format: {"results": [{"id", "title", "text", "url"}, ...]} 270 | - Search results do NOT include metadata (per OpenAI MCP specification) 271 | - Use the fetch tool to get detailed metadata for specific records 272 | - Use get_schema=True to explore available search fields 273 | - Use explain_query=True to understand query parsing (unified mode) 274 | - Domain-specific searches use AND logic for multiple values 275 | - For OR logic, use the unified query language 276 | - NEW: Article search keywords support OR with pipe separator: "R173|Arg173|p.R173" 277 | - Remember: domain="article" finds LITERATURE, domain="variant" finds DATABASE RECORDS 278 | 279 | ## RETURN FORMAT: 280 | All search modes return results in this format: 281 | ```json 282 | { 283 | "results": [ 284 | { 285 | "id": "unique_identifier", 286 | "title": "Human-readable title", 287 | "text": "Summary or snippet of content", 288 | "url": "Link to full resource" 289 | } 290 | ] 291 | } 292 | ``` 293 | """ 294 | logger.info(f"Search called with domain={domain}, query={query}") 295 | 296 | # Return schema if requested 297 | if get_schema: 298 | parser = QueryParser() 299 | return parser.get_schema() 300 | 301 | # Determine search mode 302 | if query and query.strip(): 303 | # Check if this is a unified query (contains field syntax like "gene:" or "AND") 304 | is_unified_query = any( 305 | marker in query for marker in [":", " AND ", " OR "] 306 | ) 307 | 308 | # Check if this is an NCI domain 309 | nci_domains = [ 310 | "nci_biomarker", 311 | "nci_organization", 312 | "nci_intervention", 313 | "nci_disease", 314 | ] 315 | is_nci_domain = domain in nci_domains if domain else False 316 | 317 | if not domain or (domain and is_unified_query and not is_nci_domain): 318 | # Use unified query mode if: 319 | # 1. No domain specified, OR 320 | # 2. Domain specified but query has field syntax AND it's not an NCI domain 321 | logger.info(f"Using unified query mode: {query}") 322 | return await _unified_search( 323 | query=query, 324 | max_results_per_domain=max_results_per_domain 325 | or MAX_RESULTS_PER_DOMAIN_DEFAULT, 326 | domains=None, 327 | explain_query=explain_query, 328 | ) 329 | elif domain: 330 | # Domain-specific search with query as keyword 331 | logger.info( 332 | f"Domain-specific search with query as keyword: domain={domain}, query={query}" 333 | ) 334 | # Convert query to keywords parameter for domain-specific search 335 | keywords = [query] 336 | 337 | # Legacy domain-based search 338 | if not domain: 339 | raise InvalidParameterError( 340 | "query or domain", None, ERROR_DOMAIN_REQUIRED 341 | ) 342 | 343 | # Validate pagination parameters 344 | try: 345 | page, page_size = ParameterParser.validate_page_params(page, page_size) 346 | except InvalidParameterError as e: 347 | logger.error(f"Invalid pagination parameters: {e}") 348 | raise 349 | 350 | # Parse parameters using ParameterParser 351 | genes = ParameterParser.parse_list_param(genes, "genes") 352 | diseases = ParameterParser.parse_list_param(diseases, "diseases") 353 | variants = ParameterParser.parse_list_param(variants, "variants") 354 | chemicals = ParameterParser.parse_list_param(chemicals, "chemicals") 355 | keywords = ParameterParser.parse_list_param(keywords, "keywords") 356 | conditions = ParameterParser.parse_list_param(conditions, "conditions") 357 | interventions = ParameterParser.parse_list_param( 358 | interventions, "interventions" 359 | ) 360 | 361 | logger.debug( 362 | f"Parsed parameters for domain {domain}: " 363 | f"genes={genes}, diseases={diseases}, variants={variants}" 364 | ) 365 | 366 | if domain == "article": 367 | from .router_handlers import handle_article_search 368 | 369 | items, total = await handle_article_search( 370 | genes=genes, 371 | diseases=diseases, 372 | variants=variants, 373 | chemicals=chemicals, 374 | keywords=keywords, 375 | page=page, 376 | page_size=page_size, 377 | ) 378 | 379 | return format_results( 380 | items, 381 | domain="article", 382 | page=page, 383 | page_size=page_size, 384 | total=total, 385 | ) 386 | 387 | elif domain == "trial": 388 | logger.info("Executing trial search") 389 | # Build the trial search parameters 390 | search_params: dict[str, Any] = {} 391 | if conditions: 392 | search_params["conditions"] = conditions 393 | if interventions: 394 | search_params["interventions"] = interventions 395 | if recruiting_status: 396 | search_params["recruiting_status"] = recruiting_status 397 | if phase: 398 | try: 399 | search_params["phase"] = ParameterParser.normalize_phase(phase) 400 | except InvalidParameterError: 401 | raise 402 | if keywords: 403 | search_params["keywords"] = keywords 404 | if lat is not None: 405 | search_params["lat"] = lat 406 | if long is not None: 407 | search_params["long"] = long 408 | if distance is not None: 409 | search_params["distance"] = distance 410 | 411 | try: 412 | from biomcp.trials.search import TrialQuery, search_trials 413 | 414 | # Convert search_params to TrialQuery 415 | trial_query = TrialQuery(**search_params, page_size=page_size) 416 | result_str = await search_trials(trial_query, output_json=True) 417 | except Exception as e: 418 | logger.error(f"Trial search failed: {e}") 419 | raise SearchExecutionError("trial", e) from e 420 | 421 | # Parse the JSON results 422 | try: 423 | results = json.loads(result_str) 424 | except (json.JSONDecodeError, TypeError) as e: 425 | logger.error(f"Failed to parse trial results: {e}") 426 | raise ResultParsingError("trial", e) from e 427 | 428 | # Handle different response formats from the trials API 429 | # The API can return either a dict with 'studies' key or a direct list 430 | if isinstance(results, dict): 431 | # ClinicalTrials.gov API v2 format with studies array 432 | if "studies" in results: 433 | items = results["studies"] 434 | total = len(items) # API doesn't provide total count 435 | # Legacy format or error 436 | elif "error" in results: 437 | logger.warning( 438 | f"Trial API returned error: {results.get('error')}" 439 | ) 440 | return format_results( 441 | [], domain="trial", page=page, page_size=page_size, total=0 442 | ) 443 | else: 444 | # Assume the dict itself is a single result 445 | items = [results] 446 | total = 1 447 | elif isinstance(results, list): 448 | # Direct list of results 449 | items = results 450 | total = len(items) 451 | else: 452 | items = [] 453 | total = 0 454 | 455 | logger.info(f"Trial search returned {total} total results") 456 | 457 | return format_results( 458 | items, domain="trial", page=page, page_size=page_size, total=total 459 | ) 460 | 461 | elif domain == "variant": 462 | logger.info("Executing variant search") 463 | # Build the variant search parameters 464 | # Note: variant searcher expects single gene, not list 465 | gene = genes[0] if genes else None 466 | 467 | # Use keywords to search for significance if provided 468 | keyword_list = keywords or [] 469 | if significance: 470 | keyword_list.append(significance) 471 | 472 | try: 473 | from biomcp.variants.search import VariantQuery, search_variants 474 | 475 | variant_query = VariantQuery( 476 | gene=gene, 477 | significance=significance, 478 | size=page_size, 479 | offset=(page - 1) * page_size, 480 | ) 481 | result_str = await search_variants(variant_query, output_json=True) 482 | except Exception as e: 483 | logger.error(f"Variant search failed: {e}") 484 | raise SearchExecutionError("variant", e) from e 485 | 486 | # Parse the JSON results 487 | try: 488 | all_results = json.loads(result_str) 489 | except (json.JSONDecodeError, TypeError) as e: 490 | logger.error(f"Failed to parse variant results: {e}") 491 | raise ResultParsingError("variant", e) from e 492 | 493 | # For variants, the results are already paginated by the API 494 | # We need to estimate total based on whether we got a full page 495 | items = all_results if isinstance(all_results, list) else [] 496 | # Rough estimate: if we got a full page, there might be more 497 | total = len(items) + ( 498 | ESTIMATED_ADDITIONAL_RESULTS if len(items) == page_size else 0 499 | ) 500 | 501 | logger.info(f"Variant search returned {len(items)} results") 502 | 503 | return format_results( 504 | items, 505 | domain="variant", 506 | page=page, 507 | page_size=page_size, 508 | total=total, 509 | ) 510 | 511 | elif domain == "gene": 512 | logger.info("Executing gene search") 513 | # Build the gene search query 514 | query_str = keywords[0] if keywords else genes[0] if genes else "" 515 | 516 | if not query_str: 517 | raise InvalidParameterError( 518 | "keywords or genes", None, "a gene symbol or search term" 519 | ) 520 | 521 | try: 522 | client = BioThingsClient() 523 | # For search, query by symbol/name 524 | results = await client._query_gene(query_str) 525 | 526 | if not results: 527 | items = [] 528 | total = 0 529 | else: 530 | # Fetch full details for each result (limited by page_size) 531 | items = [] 532 | for result in results[:page_size]: 533 | gene_id = result.get("_id") 534 | if gene_id: 535 | full_gene = await client._get_gene_by_id(gene_id) 536 | if full_gene: 537 | items.append(full_gene.model_dump()) 538 | 539 | total = len(results) 540 | 541 | except Exception as e: 542 | logger.error(f"Gene search failed: {e}") 543 | raise SearchExecutionError("gene", e) from e 544 | 545 | logger.info(f"Gene search returned {len(items)} results") 546 | 547 | return format_results( 548 | items, 549 | domain="gene", 550 | page=page, 551 | page_size=page_size, 552 | total=total, 553 | ) 554 | 555 | elif domain == "drug": 556 | logger.info("Executing drug search") 557 | # Build the drug search query 558 | query_str = ( 559 | keywords[0] if keywords else chemicals[0] if chemicals else "" 560 | ) 561 | 562 | if not query_str: 563 | raise InvalidParameterError( 564 | "keywords or chemicals", None, "a drug name or search term" 565 | ) 566 | 567 | try: 568 | client = BioThingsClient() 569 | # For search, query by name 570 | results = await client._query_drug(query_str) 571 | 572 | if not results: 573 | items = [] 574 | total = 0 575 | else: 576 | # Fetch full details for each result (limited by page_size) 577 | items = [] 578 | for result in results[:page_size]: 579 | drug_id = result.get("_id") 580 | if drug_id: 581 | full_drug = await client._get_drug_by_id(drug_id) 582 | if full_drug: 583 | items.append(full_drug.model_dump(by_alias=True)) 584 | 585 | total = len(results) 586 | 587 | except Exception as e: 588 | logger.error(f"Drug search failed: {e}") 589 | raise SearchExecutionError("drug", e) from e 590 | 591 | logger.info(f"Drug search returned {len(items)} results") 592 | 593 | return format_results( 594 | items, 595 | domain="drug", 596 | page=page, 597 | page_size=page_size, 598 | total=total, 599 | ) 600 | 601 | elif domain == "disease": 602 | logger.info("Executing disease search") 603 | # Build the disease search query 604 | query_str = ( 605 | keywords[0] if keywords else diseases[0] if diseases else "" 606 | ) 607 | 608 | if not query_str: 609 | raise InvalidParameterError( 610 | "keywords or diseases", None, "a disease name or search term" 611 | ) 612 | 613 | try: 614 | client = BioThingsClient() 615 | # For search, query by name 616 | results = await client._query_disease(query_str) 617 | 618 | if not results: 619 | items = [] 620 | total = 0 621 | else: 622 | # Fetch full details for each result (limited by page_size) 623 | items = [] 624 | for result in results[:page_size]: 625 | disease_id = result.get("_id") 626 | if disease_id: 627 | full_disease = await client._get_disease_by_id( 628 | disease_id 629 | ) 630 | if full_disease: 631 | items.append( 632 | full_disease.model_dump(by_alias=True) 633 | ) 634 | 635 | total = len(results) 636 | 637 | except Exception as e: 638 | logger.error(f"Disease search failed: {e}") 639 | raise SearchExecutionError("disease", e) from e 640 | 641 | logger.info(f"Disease search returned {len(items)} results") 642 | 643 | return format_results( 644 | items, 645 | domain="disease", 646 | page=page, 647 | page_size=page_size, 648 | total=total, 649 | ) 650 | 651 | elif domain == "nci_organization": 652 | from .router_handlers import handle_nci_organization_search 653 | 654 | # Extract NCI-specific parameters 655 | organization_type = keywords[0] if keywords else None 656 | city = None 657 | state = None 658 | name = keywords[0] if keywords else None 659 | 660 | # Try to parse location from keywords 661 | if keywords and len(keywords) >= 2: 662 | # Assume last two keywords might be city, state 663 | city = keywords[-2] 664 | state = keywords[-1] 665 | if len(state) == 2 and state.isupper(): 666 | # Likely a state code 667 | name = " ".join(keywords[:-2]) if len(keywords) > 2 else None 668 | else: 669 | # Not a state code, use all as name 670 | city = None 671 | state = None 672 | name = " ".join(keywords) 673 | 674 | items, total = await handle_nci_organization_search( 675 | name=name, 676 | organization_type=organization_type, 677 | city=city, 678 | state=state, 679 | api_key=api_key, 680 | page=page, 681 | page_size=page_size, 682 | ) 683 | 684 | return format_results( 685 | items, 686 | domain="nci_organization", 687 | page=page, 688 | page_size=page_size, 689 | total=total, 690 | ) 691 | 692 | elif domain == "nci_intervention": 693 | from .router_handlers import handle_nci_intervention_search 694 | 695 | # Extract parameters 696 | name = keywords[0] if keywords else None 697 | intervention_type = None # Could be parsed from additional params 698 | 699 | items, total = await handle_nci_intervention_search( 700 | name=name, 701 | intervention_type=intervention_type, 702 | synonyms=True, 703 | api_key=api_key, 704 | page=page, 705 | page_size=page_size, 706 | ) 707 | 708 | return format_results( 709 | items, 710 | domain="nci_intervention", 711 | page=page, 712 | page_size=page_size, 713 | total=total, 714 | ) 715 | 716 | elif domain == "nci_biomarker": 717 | from .router_handlers import handle_nci_biomarker_search 718 | 719 | # Extract parameters 720 | name = keywords[0] if keywords else None 721 | gene = genes[0] if genes else None 722 | 723 | items, total = await handle_nci_biomarker_search( 724 | name=name, 725 | gene=gene, 726 | biomarker_type=None, 727 | assay_type=None, 728 | api_key=api_key, 729 | page=page, 730 | page_size=page_size, 731 | ) 732 | 733 | return format_results( 734 | items, 735 | domain="nci_biomarker", 736 | page=page, 737 | page_size=page_size, 738 | total=total, 739 | ) 740 | 741 | elif domain == "nci_disease": 742 | from .router_handlers import handle_nci_disease_search 743 | 744 | # Extract parameters 745 | name = diseases[0] if diseases else keywords[0] if keywords else None 746 | 747 | items, total = await handle_nci_disease_search( 748 | name=name, 749 | include_synonyms=True, 750 | category=None, 751 | api_key=api_key, 752 | page=page, 753 | page_size=page_size, 754 | ) 755 | 756 | return format_results( 757 | items, 758 | domain="nci_disease", 759 | page=page, 760 | page_size=page_size, 761 | total=total, 762 | ) 763 | 764 | # OpenFDA domains 765 | elif domain == "fda_adverse": 766 | from biomcp.openfda import search_adverse_events 767 | 768 | drug_name = ( 769 | chemicals[0] if chemicals else keywords[0] if keywords else None 770 | ) 771 | skip = (page - 1) * page_size 772 | fda_result = await search_adverse_events( 773 | drug=drug_name, 774 | limit=page_size, 775 | skip=skip, 776 | api_key=api_key, 777 | ) 778 | # Parse the markdown result to extract items 779 | # For simplicity, return the result as a single item 780 | return {"results": [{"content": fda_result}]} 781 | 782 | elif domain == "fda_label": 783 | from biomcp.openfda import search_drug_labels 784 | 785 | drug_name = ( 786 | chemicals[0] if chemicals else keywords[0] if keywords else None 787 | ) 788 | skip = (page - 1) * page_size 789 | fda_result = await search_drug_labels( 790 | name=drug_name, 791 | limit=page_size, 792 | skip=skip, 793 | api_key=api_key, 794 | ) 795 | return {"results": [{"content": fda_result}]} 796 | 797 | elif domain == "fda_device": 798 | from biomcp.openfda import search_device_events 799 | 800 | device_name = keywords[0] if keywords else None 801 | skip = (page - 1) * page_size 802 | fda_result = await search_device_events( 803 | device=device_name, 804 | limit=page_size, 805 | skip=skip, 806 | api_key=api_key, 807 | ) 808 | return {"results": [{"content": fda_result}]} 809 | 810 | elif domain == "fda_approval": 811 | from biomcp.openfda import search_drug_approvals 812 | 813 | drug_name = ( 814 | chemicals[0] if chemicals else keywords[0] if keywords else None 815 | ) 816 | skip = (page - 1) * page_size 817 | fda_result = await search_drug_approvals( 818 | drug=drug_name, 819 | limit=page_size, 820 | skip=skip, 821 | api_key=api_key, 822 | ) 823 | return {"results": [{"content": fda_result}]} 824 | 825 | elif domain == "fda_recall": 826 | from biomcp.openfda import search_drug_recalls 827 | 828 | drug_name = ( 829 | chemicals[0] if chemicals else keywords[0] if keywords else None 830 | ) 831 | skip = (page - 1) * page_size 832 | fda_result = await search_drug_recalls( 833 | drug=drug_name, 834 | limit=page_size, 835 | skip=skip, 836 | api_key=api_key, 837 | ) 838 | return {"results": [{"content": fda_result}]} 839 | 840 | elif domain == "fda_shortage": 841 | from biomcp.openfda import search_drug_shortages 842 | 843 | drug_name = ( 844 | chemicals[0] if chemicals else keywords[0] if keywords else None 845 | ) 846 | skip = (page - 1) * page_size 847 | fda_result = await search_drug_shortages( 848 | drug=drug_name, 849 | limit=page_size, 850 | skip=skip, 851 | api_key=api_key, 852 | ) 853 | return {"results": [{"content": fda_result}]} 854 | 855 | else: 856 | raise InvalidDomainError(domain, VALID_DOMAINS) 857 | 858 | 859 | # ──────────────────────────── 860 | # Unified FETCH tool 861 | # ──────────────────────────── 862 | @mcp_app.tool() 863 | @track_performance("biomcp.fetch") 864 | async def fetch( # noqa: C901 865 | id: Annotated[ # noqa: A002 866 | str, 867 | "PMID / NCT ID / Variant ID / DOI / Gene ID / Drug ID / Disease ID / NCI Organization ID / NCI Intervention ID / NCI Disease ID / FDA Report ID / FDA Set ID / FDA MDR Key / FDA Application Number / FDA Recall Number", 868 | ], 869 | domain: Annotated[ 870 | Literal[ 871 | "article", 872 | "trial", 873 | "variant", 874 | "gene", 875 | "drug", 876 | "disease", 877 | "nci_organization", 878 | "nci_intervention", 879 | "nci_biomarker", 880 | "nci_disease", 881 | "fda_adverse", 882 | "fda_label", 883 | "fda_device", 884 | "fda_approval", 885 | "fda_recall", 886 | "fda_shortage", 887 | ] 888 | | None, 889 | Field( 890 | description="Domain of the record (auto-detected if not provided)" 891 | ), 892 | ] = None, 893 | call_benefit: Annotated[ 894 | str | None, 895 | Field( 896 | description="Brief explanation of why this fetch is being performed and expected benefit. Helps provide context for analytics and improves result relevance." 897 | ), 898 | ] = None, 899 | detail: Annotated[ 900 | Literal[ 901 | "protocol", "locations", "outcomes", "references", "all", "full" 902 | ] 903 | | None, 904 | "Specific section to retrieve (trials) or 'full' (articles)", 905 | ] = None, 906 | api_key: Annotated[ 907 | str | None, 908 | Field( 909 | description="NCI API key for fetching NCI records (nci_organization, nci_intervention, nci_disease). Required for NCI fetches. Get a free key at: https://clinicaltrialsapi.cancer.gov/" 910 | ), 911 | ] = None, 912 | ) -> dict: 913 | """Fetch comprehensive details for a specific biomedical record. 914 | 915 | This tool retrieves full information for articles, clinical trials, genetic variants, 916 | genes, drugs, or diseases using their unique identifiers. It returns data in a 917 | standardized format suitable for detailed analysis and research. 918 | 919 | ## IDENTIFIER FORMATS: 920 | - Articles: PMID (PubMed ID) - e.g., "35271234" OR DOI - e.g., "10.1101/2024.01.20.23288905" 921 | - Trials: NCT ID (ClinicalTrials.gov ID) - e.g., "NCT04280705" 922 | - Variants: HGVS notation or dbSNP ID - e.g., "chr7:g.140453136A>T" or "rs121913254" 923 | - Genes: Gene symbol or Entrez ID - e.g., "BRAF" or "673" 924 | - Drugs: Drug name or ID - e.g., "imatinib" or "DB00619" 925 | - Diseases: Disease name or ID - e.g., "melanoma" or "MONDO:0005105" 926 | - NCI Organizations: NCI organization ID - e.g., "NCI-2011-03337" 927 | - NCI Interventions: NCI intervention ID - e.g., "INT123456" 928 | - NCI Diseases: NCI disease ID - e.g., "C4872" 929 | 930 | The domain is automatically detected from the ID format if not provided: 931 | - NCT* → trial 932 | - Contains "/" with numeric prefix (DOI) → article 933 | - Pure numeric → article (PMID) 934 | - rs* or contains ':' or 'g.' → variant 935 | - For genes, drugs, diseases: manual specification recommended 936 | 937 | ## DOMAIN-SPECIFIC OPTIONS: 938 | 939 | ### Articles (domain="article"): 940 | - Returns full article metadata, abstract, and full text when available 941 | - Supports both PubMed articles (via PMID) and Europe PMC preprints (via DOI) 942 | - Includes annotations for genes, diseases, chemicals, and variants (PubMed only) 943 | - detail="full" attempts to retrieve full text content (PubMed only) 944 | 945 | ### Clinical Trials (domain="trial"): 946 | - detail=None or "protocol": Core study information 947 | - detail="locations": Study sites and contact information 948 | - detail="outcomes": Primary/secondary outcomes and results 949 | - detail="references": Related publications and citations 950 | - detail="all": Complete trial record with all sections 951 | 952 | ### Variants (domain="variant"): 953 | - Returns comprehensive variant information including: 954 | - Clinical significance and interpretations 955 | - Population frequencies 956 | - Gene/protein effects 957 | - External database links 958 | - detail parameter is ignored (always returns full data) 959 | 960 | ### Genes (domain="gene"): 961 | - Returns gene information from MyGene.info including: 962 | - Gene symbol, name, and type 963 | - Entrez ID and Ensembl IDs 964 | - Gene summary and aliases 965 | - RefSeq information 966 | - detail parameter is ignored (always returns full data) 967 | 968 | ### Drugs (domain="drug"): 969 | - Returns drug/chemical information from MyChem.info including: 970 | - Drug name and trade names 971 | - Chemical formula and structure IDs 972 | - Clinical indications 973 | - Mechanism of action 974 | - External database links (DrugBank, PubChem, ChEMBL) 975 | - detail parameter is ignored (always returns full data) 976 | 977 | ### Diseases (domain="disease"): 978 | - Returns disease information from MyDisease.info including: 979 | - Disease name and definition 980 | - MONDO ontology ID 981 | - Disease synonyms 982 | - Cross-references to other databases 983 | - Associated phenotypes 984 | - detail parameter is ignored (always returns full data) 985 | 986 | ### NCI Organizations (domain="nci_organization"): 987 | - Returns organization information from NCI database including: 988 | - Organization name and type 989 | - Full address and contact information 990 | - Research focus areas 991 | - Associated clinical trials 992 | - Requires NCI API key 993 | - detail parameter is ignored (always returns full data) 994 | 995 | ### NCI Interventions (domain="nci_intervention"): 996 | - Returns intervention information from NCI database including: 997 | - Intervention name and type 998 | - Synonyms and alternative names 999 | - Mechanism of action (for drugs) 1000 | - FDA approval status 1001 | - Associated clinical trials 1002 | - Requires NCI API key 1003 | - detail parameter is ignored (always returns full data) 1004 | 1005 | ### NCI Diseases (domain="nci_disease"): 1006 | - Returns disease information from NCI controlled vocabulary including: 1007 | - Preferred disease name 1008 | - Disease category and classification 1009 | - All known synonyms 1010 | - Cross-reference codes (ICD, SNOMED) 1011 | - Requires NCI API key 1012 | - detail parameter is ignored (always returns full data) 1013 | 1014 | ## RETURN FORMAT: 1015 | All fetch operations return a standardized format: 1016 | ```json 1017 | { 1018 | "id": "unique_identifier", 1019 | "title": "Record title or name", 1020 | "text": "Full content or comprehensive description", 1021 | "url": "Link to original source", 1022 | "metadata": { 1023 | // Domain-specific additional fields 1024 | } 1025 | } 1026 | ``` 1027 | 1028 | ## EXAMPLES: 1029 | 1030 | Fetch article by PMID (domain auto-detected): 1031 | ``` 1032 | await fetch(id="35271234") 1033 | ``` 1034 | 1035 | Fetch article by DOI (domain auto-detected): 1036 | ``` 1037 | await fetch(id="10.1101/2024.01.20.23288905") 1038 | ``` 1039 | 1040 | Fetch complete trial information (domain auto-detected): 1041 | ``` 1042 | await fetch( 1043 | id="NCT04280705", 1044 | detail="all" 1045 | ) 1046 | ``` 1047 | 1048 | Fetch variant with clinical interpretations: 1049 | ``` 1050 | await fetch(id="rs121913254") 1051 | ``` 1052 | 1053 | Explicitly specify domain (optional): 1054 | ``` 1055 | await fetch( 1056 | domain="variant", 1057 | id="chr7:g.140453136A>T" 1058 | ) 1059 | ``` 1060 | """ 1061 | # Auto-detect domain if not provided 1062 | if domain is None: 1063 | # Try to infer domain from ID format 1064 | if id.upper().startswith("NCT"): 1065 | domain = "trial" 1066 | logger.info(f"Auto-detected domain 'trial' from NCT ID: {id}") 1067 | elif "/" in id and id.split("/")[0].replace(".", "").isdigit(): 1068 | # DOI format (e.g., 10.1038/nature12373) - treat as article 1069 | domain = "article" 1070 | logger.info(f"Auto-detected domain 'article' from DOI: {id}") 1071 | elif id.isdigit(): 1072 | # Numeric ID - likely PMID 1073 | domain = "article" 1074 | logger.info( 1075 | f"Auto-detected domain 'article' from numeric ID: {id}" 1076 | ) 1077 | elif id.startswith("rs") or ":" in id or "g." in id: 1078 | # rsID or HGVS notation 1079 | domain = "variant" 1080 | logger.info(f"Auto-detected domain 'variant' from ID format: {id}") 1081 | else: 1082 | # Default to article if we can't determine 1083 | domain = "article" 1084 | logger.warning( 1085 | f"Could not auto-detect domain for ID '{id}', defaulting to 'article'" 1086 | ) 1087 | 1088 | logger.info(f"Fetch called for {domain} with id={id}, detail={detail}") 1089 | 1090 | if domain == "article": 1091 | logger.debug("Fetching article details") 1092 | try: 1093 | from biomcp.articles.fetch import _article_details 1094 | 1095 | # The _article_details function handles both PMIDs and DOIs 1096 | result_str = await _article_details( 1097 | call_benefit=call_benefit 1098 | or "Fetching article details via MCP tool", 1099 | pmid=id, 1100 | ) 1101 | except Exception as e: 1102 | logger.error(f"Article fetch failed: {e}") 1103 | raise SearchExecutionError("article", e) from e 1104 | 1105 | # Parse and return the first article 1106 | try: 1107 | articles = ( 1108 | json.loads(result_str) 1109 | if isinstance(result_str, str) 1110 | else result_str 1111 | ) 1112 | except (json.JSONDecodeError, TypeError) as e: 1113 | logger.error(f"Failed to parse article fetch results: {e}") 1114 | raise ResultParsingError("article", e) from e 1115 | 1116 | if not articles: 1117 | return {"error": "Article not found"} 1118 | 1119 | article = articles[0] 1120 | 1121 | # Check if the article is actually an error response 1122 | if "error" in article: 1123 | return {"error": article["error"]} 1124 | 1125 | # Format according to OpenAI MCP standard 1126 | full_text = article.get("full_text", "") 1127 | abstract = article.get("abstract", "") 1128 | text_content = full_text if full_text else abstract 1129 | 1130 | return { 1131 | "id": str(article.get("pmid", id)), 1132 | "title": article.get("title", DEFAULT_TITLE), 1133 | "text": text_content, 1134 | "url": article.get( 1135 | "url", f"https://pubmed.ncbi.nlm.nih.gov/{id}/" 1136 | ), 1137 | "metadata": { 1138 | "pmid": article.get("pmid"), 1139 | "journal": article.get("journal"), 1140 | "authors": article.get("authors"), 1141 | "year": article.get("year"), 1142 | "doi": article.get("doi"), 1143 | "annotations": article.get("annotations", {}), 1144 | "is_preprint": article.get("is_preprint", False), 1145 | "preprint_source": article.get("preprint_source"), 1146 | }, 1147 | } 1148 | 1149 | elif domain == "trial": 1150 | logger.debug(f"Fetching trial details for section: {detail}") 1151 | 1152 | # Validate detail parameter 1153 | if detail is not None and detail not in TRIAL_DETAIL_SECTIONS: 1154 | raise InvalidParameterError( 1155 | "detail", 1156 | detail, 1157 | f"one of: {', '.join(TRIAL_DETAIL_SECTIONS)} or None", 1158 | ) 1159 | 1160 | try: 1161 | # Always fetch protocol for basic info - get JSON format 1162 | protocol_json = await trial_getter.get_trial( 1163 | nct_id=id, 1164 | module=trial_getter.Module.PROTOCOL, 1165 | output_json=True, 1166 | ) 1167 | 1168 | # Parse the JSON response 1169 | try: 1170 | protocol_data = json.loads(protocol_json) 1171 | except json.JSONDecodeError as e: 1172 | logger.error(f"Failed to parse protocol JSON for {id}: {e}") 1173 | return { 1174 | "id": id, 1175 | "title": f"Clinical Trial {id}", 1176 | "text": f"Error parsing trial data: {e}", 1177 | "url": f"https://clinicaltrials.gov/study/{id}", 1178 | "metadata": { 1179 | "nct_id": id, 1180 | "error": f"JSON parse error: {e}", 1181 | }, 1182 | } 1183 | 1184 | # Check for errors in the response 1185 | if "error" in protocol_data: 1186 | return { 1187 | "id": id, 1188 | "title": f"Clinical Trial {id}", 1189 | "text": protocol_data.get( 1190 | "details", 1191 | protocol_data.get("error", "Trial not found"), 1192 | ), 1193 | "url": f"https://clinicaltrials.gov/study/{id}", 1194 | "metadata": { 1195 | "nct_id": id, 1196 | "error": protocol_data.get("error"), 1197 | }, 1198 | } 1199 | 1200 | # Build comprehensive text description 1201 | text_parts = [] 1202 | 1203 | # Extract protocol section data from the API response 1204 | protocol_section = protocol_data.get("protocolSection", {}) 1205 | 1206 | # Extract basic info from the protocol section 1207 | id_module = protocol_section.get("identificationModule", {}) 1208 | status_module = protocol_section.get("statusModule", {}) 1209 | desc_module = protocol_section.get("descriptionModule", {}) 1210 | conditions_module = protocol_section.get("conditionsModule", {}) 1211 | design_module = protocol_section.get("designModule", {}) 1212 | arms_module = protocol_section.get("armsInterventionsModule", {}) 1213 | 1214 | # Add basic protocol info to text 1215 | title = id_module.get("briefTitle", f"Clinical Trial {id}") 1216 | text_parts.append(f"Study Title: {title}") 1217 | 1218 | # Conditions 1219 | conditions = conditions_module.get("conditions", []) 1220 | if conditions: 1221 | text_parts.append(f"\nConditions: {', '.join(conditions)}") 1222 | 1223 | # Interventions 1224 | interventions = [] 1225 | for intervention in arms_module.get("interventions", []): 1226 | interventions.append(intervention.get("name", "")) 1227 | if interventions: 1228 | text_parts.append(f"Interventions: {', '.join(interventions)}") 1229 | 1230 | # Phase 1231 | phases = design_module.get("phases", []) 1232 | if phases: 1233 | text_parts.append(f"Phase: {', '.join(phases)}") 1234 | 1235 | # Status 1236 | overall_status = status_module.get("overallStatus", "N/A") 1237 | text_parts.append(f"Status: {overall_status}") 1238 | 1239 | # Summary 1240 | brief_summary = desc_module.get( 1241 | "briefSummary", "No summary available" 1242 | ) 1243 | text_parts.append(f"\nSummary: {brief_summary}") 1244 | 1245 | # Prepare metadata 1246 | metadata = {"nct_id": id, "protocol": protocol_data} 1247 | 1248 | if detail in ("all", "locations", "outcomes", "references"): 1249 | # Fetch additional sections as needed 1250 | if detail == "all" or detail == "locations": 1251 | try: 1252 | locations_json = await trial_getter.get_trial( 1253 | nct_id=id, 1254 | module=trial_getter.Module.LOCATIONS, 1255 | output_json=True, 1256 | ) 1257 | locations_data = json.loads(locations_json) 1258 | if "error" not in locations_data: 1259 | # Extract locations from the protocol section 1260 | locations_module = locations_data.get( 1261 | "protocolSection", {} 1262 | ).get("contactsLocationsModule", {}) 1263 | locations_list = locations_module.get( 1264 | "locations", [] 1265 | ) 1266 | metadata["locations"] = locations_list 1267 | if locations_list: 1268 | text_parts.append( 1269 | f"\n\nLocations: {len(locations_list)} study sites" 1270 | ) 1271 | except Exception as e: 1272 | logger.warning( 1273 | f"Failed to fetch locations for {id}: {e}" 1274 | ) 1275 | metadata["locations"] = [] 1276 | 1277 | if detail == "all" or detail == "outcomes": 1278 | try: 1279 | outcomes_json = await trial_getter.get_trial( 1280 | nct_id=id, 1281 | module=trial_getter.Module.OUTCOMES, 1282 | output_json=True, 1283 | ) 1284 | outcomes_data = json.loads(outcomes_json) 1285 | if "error" not in outcomes_data: 1286 | # Extract outcomes from the protocol section 1287 | outcomes_module = outcomes_data.get( 1288 | "protocolSection", {} 1289 | ).get("outcomesModule", {}) 1290 | primary_outcomes = outcomes_module.get( 1291 | "primaryOutcomes", [] 1292 | ) 1293 | secondary_outcomes = outcomes_module.get( 1294 | "secondaryOutcomes", [] 1295 | ) 1296 | metadata["outcomes"] = { 1297 | "primary_outcomes": primary_outcomes, 1298 | "secondary_outcomes": secondary_outcomes, 1299 | } 1300 | if primary_outcomes: 1301 | text_parts.append( 1302 | f"\n\nPrimary Outcomes: {len(primary_outcomes)} measures" 1303 | ) 1304 | except Exception as e: 1305 | logger.warning( 1306 | f"Failed to fetch outcomes for {id}: {e}" 1307 | ) 1308 | metadata["outcomes"] = {} 1309 | 1310 | if detail == "all" or detail == "references": 1311 | try: 1312 | references_json = await trial_getter.get_trial( 1313 | nct_id=id, 1314 | module=trial_getter.Module.REFERENCES, 1315 | output_json=True, 1316 | ) 1317 | references_data = json.loads(references_json) 1318 | if "error" not in references_data: 1319 | # Extract references from the protocol section 1320 | references_module = references_data.get( 1321 | "protocolSection", {} 1322 | ).get("referencesModule", {}) 1323 | references_list = references_module.get( 1324 | "references", [] 1325 | ) 1326 | metadata["references"] = references_list 1327 | if references_list: 1328 | text_parts.append( 1329 | f"\n\nReferences: {len(references_list)} publications" 1330 | ) 1331 | except Exception as e: 1332 | logger.warning( 1333 | f"Failed to fetch references for {id}: {e}" 1334 | ) 1335 | metadata["references"] = [] 1336 | 1337 | # Return OpenAI MCP compliant format 1338 | return { 1339 | "id": id, 1340 | "title": title, 1341 | "text": "\n".join(text_parts), 1342 | "url": f"https://clinicaltrials.gov/study/{id}", 1343 | "metadata": metadata, 1344 | } 1345 | 1346 | except Exception as e: 1347 | logger.error(f"Trial fetch failed: {e}") 1348 | raise SearchExecutionError("trial", e) from e 1349 | 1350 | elif domain == "variant": 1351 | logger.debug("Fetching variant details") 1352 | try: 1353 | from biomcp.variants.getter import get_variant 1354 | 1355 | result_str = await get_variant( 1356 | variant_id=id, 1357 | output_json=True, 1358 | include_external=True, 1359 | ) 1360 | except Exception as e: 1361 | logger.error(f"Variant fetch failed: {e}") 1362 | raise SearchExecutionError("variant", e) from e 1363 | 1364 | try: 1365 | variant_response = ( 1366 | json.loads(result_str) 1367 | if isinstance(result_str, str) 1368 | else result_str 1369 | ) 1370 | except (json.JSONDecodeError, TypeError) as e: 1371 | logger.error(f"Failed to parse variant fetch results: {e}") 1372 | raise ResultParsingError("variant", e) from e 1373 | 1374 | # get_variant returns a list, extract the first variant 1375 | if isinstance(variant_response, list) and variant_response: 1376 | variant_data = variant_response[0] 1377 | elif isinstance(variant_response, dict): 1378 | variant_data = variant_response 1379 | else: 1380 | return {"error": "Variant not found"} 1381 | 1382 | # Build comprehensive text description 1383 | text_parts = [] 1384 | 1385 | # Basic variant info 1386 | text_parts.append(f"Variant: {variant_data.get('_id', id)}") 1387 | 1388 | # Gene information 1389 | if variant_data.get("gene"): 1390 | gene_info = variant_data["gene"] 1391 | text_parts.append( 1392 | f"\nGene: {gene_info.get('symbol', 'Unknown')} ({gene_info.get('name', '')})" 1393 | ) 1394 | 1395 | # Clinical significance 1396 | if variant_data.get("clinvar"): 1397 | clinvar = variant_data["clinvar"] 1398 | if clinvar.get("clinical_significance"): 1399 | text_parts.append( 1400 | f"\nClinical Significance: {clinvar['clinical_significance']}" 1401 | ) 1402 | if clinvar.get("review_status"): 1403 | text_parts.append(f"Review Status: {clinvar['review_status']}") 1404 | 1405 | # dbSNP info 1406 | if variant_data.get("dbsnp"): 1407 | dbsnp = variant_data["dbsnp"] 1408 | if dbsnp.get("rsid"): 1409 | text_parts.append(f"\ndbSNP: {dbsnp['rsid']}") 1410 | 1411 | # CADD scores 1412 | if variant_data.get("cadd"): 1413 | cadd = variant_data["cadd"] 1414 | if cadd.get("phred"): 1415 | text_parts.append(f"\nCADD Score: {cadd['phred']}") 1416 | 1417 | # Allele frequencies 1418 | if variant_data.get("gnomad_exome"): 1419 | gnomad = variant_data["gnomad_exome"] 1420 | if gnomad.get("af", {}).get("af"): 1421 | text_parts.append( 1422 | f"\nGnomAD Allele Frequency: {gnomad['af']['af']:.6f}" 1423 | ) 1424 | 1425 | # External links 1426 | if variant_data.get("external_links"): 1427 | links = variant_data["external_links"] 1428 | text_parts.append( 1429 | f"\n\nExternal Resources: {len(links)} database links available" 1430 | ) 1431 | 1432 | # Check for external data indicators 1433 | if variant_data.get("tcga"): 1434 | text_parts.append("\n\nTCGA Data: Available") 1435 | if variant_data.get("1000genomes"): 1436 | text_parts.append("\n1000 Genomes Data: Available") 1437 | 1438 | # Determine best URL 1439 | url = variant_data.get("url", "") 1440 | if not url and variant_data.get("dbsnp", {}).get("rsid"): 1441 | url = f"https://www.ncbi.nlm.nih.gov/snp/{variant_data['dbsnp']['rsid']}" 1442 | elif not url: 1443 | url = f"https://myvariant.info/v1/variant/{id}" 1444 | 1445 | # Return OpenAI MCP compliant format 1446 | return { 1447 | "id": variant_data.get("_id", id), 1448 | "title": f"Variant {variant_data.get('_id', id)}", 1449 | "text": "\n".join(text_parts), 1450 | "url": url, 1451 | "metadata": variant_data, 1452 | } 1453 | 1454 | elif domain == "gene": 1455 | logger.debug("Fetching gene details") 1456 | try: 1457 | client = BioThingsClient() 1458 | gene_info = await client.get_gene_info(id) 1459 | 1460 | if not gene_info: 1461 | return {"error": f"Gene {id} not found"} 1462 | 1463 | # Build comprehensive text description 1464 | text_parts = [] 1465 | text_parts.append(f"Gene: {gene_info.symbol} ({gene_info.name})") 1466 | 1467 | if gene_info.entrezgene: 1468 | text_parts.append(f"\nEntrez ID: {gene_info.entrezgene}") 1469 | 1470 | if gene_info.type_of_gene: 1471 | text_parts.append(f"Type: {gene_info.type_of_gene}") 1472 | 1473 | if gene_info.summary: 1474 | text_parts.append(f"\nSummary: {gene_info.summary}") 1475 | 1476 | if gene_info.alias: 1477 | text_parts.append(f"\nAliases: {', '.join(gene_info.alias)}") 1478 | 1479 | # URL 1480 | url = ( 1481 | f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{gene_info.symbol}" 1482 | if gene_info.symbol 1483 | else "" 1484 | ) 1485 | 1486 | # Return OpenAI MCP compliant format 1487 | return { 1488 | "id": str(gene_info.gene_id), 1489 | "title": f"{gene_info.symbol}: {gene_info.name}" 1490 | if gene_info.symbol and gene_info.name 1491 | else gene_info.symbol or gene_info.name or DEFAULT_TITLE, 1492 | "text": "\n".join(text_parts), 1493 | "url": url, 1494 | "metadata": gene_info.model_dump(), 1495 | } 1496 | 1497 | except Exception as e: 1498 | logger.error(f"Gene fetch failed: {e}") 1499 | raise SearchExecutionError("gene", e) from e 1500 | 1501 | elif domain == "drug": 1502 | logger.debug("Fetching drug details") 1503 | try: 1504 | client = BioThingsClient() 1505 | drug_info = await client.get_drug_info(id) 1506 | 1507 | if not drug_info: 1508 | return {"error": f"Drug {id} not found"} 1509 | 1510 | # Build comprehensive text description 1511 | text_parts = [] 1512 | text_parts.append(f"Drug: {drug_info.name}") 1513 | 1514 | if drug_info.drugbank_id: 1515 | text_parts.append(f"\nDrugBank ID: {drug_info.drugbank_id}") 1516 | 1517 | if drug_info.formula: 1518 | text_parts.append(f"Formula: {drug_info.formula}") 1519 | 1520 | if drug_info.tradename: 1521 | text_parts.append( 1522 | f"\nTrade Names: {', '.join(drug_info.tradename)}" 1523 | ) 1524 | 1525 | if drug_info.description: 1526 | text_parts.append(f"\nDescription: {drug_info.description}") 1527 | 1528 | if drug_info.indication: 1529 | text_parts.append(f"\nIndication: {drug_info.indication}") 1530 | 1531 | if drug_info.mechanism_of_action: 1532 | text_parts.append( 1533 | f"\nMechanism of Action: {drug_info.mechanism_of_action}" 1534 | ) 1535 | 1536 | # URL 1537 | url = "" 1538 | if drug_info.drugbank_id: 1539 | url = f"https://www.drugbank.ca/drugs/{drug_info.drugbank_id}" 1540 | elif drug_info.pubchem_cid: 1541 | url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{drug_info.pubchem_cid}" 1542 | 1543 | # Return OpenAI MCP compliant format 1544 | return { 1545 | "id": drug_info.drug_id, 1546 | "title": drug_info.name or drug_info.drug_id or DEFAULT_TITLE, 1547 | "text": "\n".join(text_parts), 1548 | "url": url, 1549 | "metadata": drug_info.model_dump(), 1550 | } 1551 | 1552 | except Exception as e: 1553 | logger.error(f"Drug fetch failed: {e}") 1554 | raise SearchExecutionError("drug", e) from e 1555 | 1556 | elif domain == "disease": 1557 | logger.debug("Fetching disease details") 1558 | try: 1559 | client = BioThingsClient() 1560 | disease_info = await client.get_disease_info(id) 1561 | 1562 | if not disease_info: 1563 | return {"error": f"Disease {id} not found"} 1564 | 1565 | # Build comprehensive text description 1566 | text_parts = [] 1567 | text_parts.append(f"Disease: {disease_info.name}") 1568 | 1569 | if disease_info.mondo and isinstance(disease_info.mondo, dict): 1570 | mondo_id = disease_info.mondo.get("id") 1571 | if mondo_id: 1572 | text_parts.append(f"\nMONDO ID: {mondo_id}") 1573 | 1574 | if disease_info.definition: 1575 | text_parts.append(f"\nDefinition: {disease_info.definition}") 1576 | 1577 | if disease_info.synonyms: 1578 | text_parts.append( 1579 | f"\nSynonyms: {', '.join(disease_info.synonyms[:5])}" 1580 | ) 1581 | if len(disease_info.synonyms) > 5: 1582 | text_parts.append( 1583 | f" ... and {len(disease_info.synonyms) - 5} more" 1584 | ) 1585 | 1586 | if disease_info.phenotypes: 1587 | text_parts.append( 1588 | f"\nAssociated Phenotypes: {len(disease_info.phenotypes)}" 1589 | ) 1590 | 1591 | # URL 1592 | url = "" 1593 | if disease_info.mondo and isinstance(disease_info.mondo, dict): 1594 | mondo_id = disease_info.mondo.get("id") 1595 | if mondo_id: 1596 | url = f"https://monarchinitiative.org/disease/{mondo_id}" 1597 | 1598 | # Return OpenAI MCP compliant format 1599 | return { 1600 | "id": disease_info.disease_id, 1601 | "title": disease_info.name 1602 | or disease_info.disease_id 1603 | or DEFAULT_TITLE, 1604 | "text": "\n".join(text_parts), 1605 | "url": url, 1606 | "metadata": disease_info.model_dump(), 1607 | } 1608 | 1609 | except Exception as e: 1610 | logger.error(f"Disease fetch failed: {e}") 1611 | raise SearchExecutionError("disease", e) from e 1612 | 1613 | elif domain == "nci_organization": 1614 | logger.debug("Fetching NCI organization details") 1615 | try: 1616 | from biomcp.organizations import get_organization 1617 | from biomcp.organizations.getter import format_organization_details 1618 | 1619 | org_data = await get_organization( 1620 | org_id=id, 1621 | api_key=api_key, 1622 | ) 1623 | 1624 | # Format the details 1625 | formatted_text = format_organization_details(org_data) 1626 | 1627 | # Return OpenAI MCP compliant format 1628 | return { 1629 | "id": id, 1630 | "title": org_data.get("name", "Unknown Organization"), 1631 | "text": formatted_text, 1632 | "url": "", # NCI doesn't provide direct URLs 1633 | "metadata": org_data, 1634 | } 1635 | 1636 | except Exception as e: 1637 | logger.error(f"NCI organization fetch failed: {e}") 1638 | raise SearchExecutionError("nci_organization", e) from e 1639 | 1640 | elif domain == "nci_intervention": 1641 | logger.debug("Fetching NCI intervention details") 1642 | try: 1643 | from biomcp.interventions import get_intervention 1644 | from biomcp.interventions.getter import format_intervention_details 1645 | 1646 | intervention_data = await get_intervention( 1647 | intervention_id=id, 1648 | api_key=api_key, 1649 | ) 1650 | 1651 | # Format the details 1652 | formatted_text = format_intervention_details(intervention_data) 1653 | 1654 | # Return OpenAI MCP compliant format 1655 | return { 1656 | "id": id, 1657 | "title": intervention_data.get("name", "Unknown Intervention"), 1658 | "text": formatted_text, 1659 | "url": "", # NCI doesn't provide direct URLs 1660 | "metadata": intervention_data, 1661 | } 1662 | 1663 | except Exception as e: 1664 | logger.error(f"NCI intervention fetch failed: {e}") 1665 | raise SearchExecutionError("nci_intervention", e) from e 1666 | 1667 | elif domain == "nci_disease": 1668 | logger.debug("Fetching NCI disease details") 1669 | try: 1670 | from biomcp.diseases import get_disease_by_id 1671 | 1672 | disease_data = await get_disease_by_id( 1673 | disease_id=id, 1674 | api_key=api_key, 1675 | ) 1676 | 1677 | # Build text description 1678 | text_parts = [] 1679 | text_parts.append( 1680 | f"Disease: {disease_data.get('name', 'Unknown Disease')}" 1681 | ) 1682 | 1683 | if disease_data.get("category"): 1684 | text_parts.append(f"\nCategory: {disease_data['category']}") 1685 | 1686 | if disease_data.get("synonyms"): 1687 | synonyms = disease_data["synonyms"] 1688 | if isinstance(synonyms, list) and synonyms: 1689 | text_parts.append(f"\nSynonyms: {', '.join(synonyms[:5])}") 1690 | if len(synonyms) > 5: 1691 | text_parts.append( 1692 | f" ... and {len(synonyms) - 5} more" 1693 | ) 1694 | 1695 | if disease_data.get("codes"): 1696 | codes = disease_data["codes"] 1697 | if isinstance(codes, dict): 1698 | code_items = [ 1699 | f"{system}: {code}" for system, code in codes.items() 1700 | ] 1701 | if code_items: 1702 | text_parts.append(f"\nCodes: {', '.join(code_items)}") 1703 | 1704 | # Return OpenAI MCP compliant format 1705 | return { 1706 | "id": id, 1707 | "title": disease_data.get( 1708 | "name", 1709 | disease_data.get("preferred_name", "Unknown Disease"), 1710 | ), 1711 | "text": "\n".join(text_parts), 1712 | "url": "", # NCI doesn't provide direct URLs 1713 | "metadata": disease_data, 1714 | } 1715 | 1716 | except Exception as e: 1717 | logger.error(f"NCI disease fetch failed: {e}") 1718 | raise SearchExecutionError("nci_disease", e) from e 1719 | 1720 | # Note: nci_biomarker doesn't support fetching by ID, only searching 1721 | 1722 | # OpenFDA domains 1723 | elif domain == "fda_adverse": 1724 | from biomcp.openfda import get_adverse_event 1725 | 1726 | result = await get_adverse_event(id, api_key=api_key) 1727 | return { 1728 | "title": f"FDA Adverse Event Report {id}", 1729 | "text": result, 1730 | "url": "", 1731 | "metadata": {"report_id": id, "domain": "fda_adverse"}, 1732 | } 1733 | 1734 | elif domain == "fda_label": 1735 | from biomcp.openfda import get_drug_label 1736 | 1737 | result = await get_drug_label(id, api_key=api_key) 1738 | return { 1739 | "title": f"FDA Drug Label {id}", 1740 | "text": result, 1741 | "url": "", 1742 | "metadata": {"set_id": id, "domain": "fda_label"}, 1743 | } 1744 | 1745 | elif domain == "fda_device": 1746 | from biomcp.openfda import get_device_event 1747 | 1748 | result = await get_device_event(id, api_key=api_key) 1749 | return { 1750 | "title": f"FDA Device Event {id}", 1751 | "text": result, 1752 | "url": "", 1753 | "metadata": {"mdr_report_key": id, "domain": "fda_device"}, 1754 | } 1755 | 1756 | elif domain == "fda_approval": 1757 | from biomcp.openfda import get_drug_approval 1758 | 1759 | result = await get_drug_approval(id, api_key=api_key) 1760 | return { 1761 | "title": f"FDA Drug Approval {id}", 1762 | "text": result, 1763 | "url": "", 1764 | "metadata": {"application_number": id, "domain": "fda_approval"}, 1765 | } 1766 | 1767 | elif domain == "fda_recall": 1768 | from biomcp.openfda import get_drug_recall 1769 | 1770 | result = await get_drug_recall(id, api_key=api_key) 1771 | return { 1772 | "title": f"FDA Drug Recall {id}", 1773 | "text": result, 1774 | "url": "", 1775 | "metadata": {"recall_number": id, "domain": "fda_recall"}, 1776 | } 1777 | 1778 | elif domain == "fda_shortage": 1779 | from biomcp.openfda import get_drug_shortage 1780 | 1781 | result = await get_drug_shortage(id, api_key=api_key) 1782 | return { 1783 | "title": f"FDA Drug Shortage - {id}", 1784 | "text": result, 1785 | "url": "", 1786 | "metadata": {"drug": id, "domain": "fda_shortage"}, 1787 | } 1788 | 1789 | # Invalid domain 1790 | raise InvalidDomainError(domain, VALID_DOMAINS) 1791 | 1792 | 1793 | # Internal function for unified search 1794 | async def _unified_search( # noqa: C901 1795 | query: str, 1796 | max_results_per_domain: int = MAX_RESULTS_PER_DOMAIN_DEFAULT, 1797 | domains: list[str] | None = None, 1798 | explain_query: bool = False, 1799 | ) -> dict: 1800 | """Internal unified search implementation. 1801 | 1802 | Parses the unified query language and routes to appropriate domain tools. 1803 | Supports field-based syntax like 'gene:BRAF AND trials.phase:3'. 1804 | 1805 | Args: 1806 | query: Unified query string with field syntax 1807 | max_results_per_domain: Limit results per domain 1808 | domains: Optional list to filter which domains to search 1809 | explain_query: If True, return query parsing explanation 1810 | 1811 | Returns: 1812 | Dictionary with results organized by domain 1813 | 1814 | Raises: 1815 | QueryParsingError: If query cannot be parsed 1816 | SearchExecutionError: If search execution fails 1817 | """ 1818 | logger.info(f"Unified search with query: {query}") 1819 | # Parse the query 1820 | try: 1821 | parser = QueryParser() 1822 | parsed = parser.parse(query) 1823 | except Exception as e: 1824 | logger.error(f"Failed to parse query: {e}") 1825 | raise QueryParsingError(query, e) from e 1826 | 1827 | # Route to appropriate tools 1828 | router = QueryRouter() 1829 | plan = router.route(parsed) 1830 | 1831 | # Filter domains if specified 1832 | if domains: 1833 | filtered_tools = [] 1834 | for tool in plan.tools_to_call: 1835 | if ( 1836 | ("article" in tool and "articles" in domains) 1837 | or ("trial" in tool and "trials" in domains) 1838 | or ("variant" in tool and "variants" in domains) 1839 | ): 1840 | filtered_tools.append(tool) 1841 | plan.tools_to_call = filtered_tools 1842 | 1843 | # Return explanation if requested 1844 | if explain_query: 1845 | return { 1846 | "original_query": query, 1847 | "parsed_structure": { 1848 | "cross_domain_fields": parsed.cross_domain_fields, 1849 | "domain_specific_fields": parsed.domain_specific_fields, 1850 | "terms": [ 1851 | { 1852 | "field": term.field, 1853 | "operator": term.operator.value, 1854 | "value": term.value, 1855 | "domain": term.domain, 1856 | } 1857 | for term in parsed.terms 1858 | ], 1859 | }, 1860 | "routing_plan": { 1861 | "tools_to_call": plan.tools_to_call, 1862 | "field_mappings": plan.field_mappings, 1863 | }, 1864 | "schema": parser.get_schema(), 1865 | } 1866 | 1867 | # Execute the search plan 1868 | try: 1869 | results = await execute_routing_plan(plan, output_json=True) 1870 | except Exception as e: 1871 | logger.error(f"Failed to execute search plan: {e}") 1872 | raise SearchExecutionError("unified", e) from e 1873 | 1874 | # Format unified results - collect all results into a single array 1875 | all_results = [] 1876 | 1877 | for domain, result_str in results.items(): 1878 | if isinstance(result_str, dict) and "error" in result_str: 1879 | logger.warning(f"Error in domain {domain}: {result_str['error']}") 1880 | continue 1881 | 1882 | try: 1883 | data = ( 1884 | json.loads(result_str) 1885 | if isinstance(result_str, str) 1886 | else result_str 1887 | ) 1888 | 1889 | # Get the appropriate handler for formatting 1890 | handler_class = get_domain_handler( 1891 | domain.rstrip("s") 1892 | ) # Remove trailing 's' 1893 | 1894 | # Process and format each result 1895 | # Handle both list format and dict format (for articles with cBioPortal data) 1896 | items_to_process = [] 1897 | cbioportal_summary = None 1898 | 1899 | if isinstance(data, list): 1900 | items_to_process = data[:max_results_per_domain] 1901 | elif isinstance(data, dict): 1902 | # Handle unified search format with cBioPortal data 1903 | if "articles" in data: 1904 | items_to_process = data["articles"][ 1905 | :max_results_per_domain 1906 | ] 1907 | cbioportal_summary = data.get("cbioportal_summary") 1908 | else: 1909 | # Single item dict 1910 | items_to_process = [data] 1911 | 1912 | # Add cBioPortal summary as first result if available 1913 | if cbioportal_summary and domain == "articles": 1914 | try: 1915 | # Extract gene name from parsed query or summary 1916 | gene_name = parsed.cross_domain_fields.get("gene", "") 1917 | if not gene_name and "Summary for " in cbioportal_summary: 1918 | # Try to extract from summary title 1919 | import re 1920 | 1921 | match = re.search( 1922 | r"Summary for (\w+)", cbioportal_summary 1923 | ) 1924 | if match: 1925 | gene_name = match.group(1) 1926 | 1927 | cbio_result = { 1928 | "id": f"cbioportal_summary_{gene_name or 'gene'}", 1929 | "title": f"cBioPortal Summary for {gene_name or 'Gene'}", 1930 | "text": cbioportal_summary[:5000], # Limit text length 1931 | "url": f"https://www.cbioportal.org/results?gene_list={gene_name}" 1932 | if gene_name 1933 | else "", 1934 | } 1935 | all_results.append(cbio_result) 1936 | except Exception as e: 1937 | logger.warning(f"Failed to format cBioPortal summary: {e}") 1938 | 1939 | for item in items_to_process: 1940 | try: 1941 | formatted_result = handler_class.format_result(item) 1942 | # Ensure OpenAI MCP format 1943 | openai_result = { 1944 | "id": formatted_result.get("id", ""), 1945 | "title": formatted_result.get("title", DEFAULT_TITLE), 1946 | "text": formatted_result.get( 1947 | "snippet", formatted_result.get("text", "") 1948 | ), 1949 | "url": formatted_result.get("url", ""), 1950 | } 1951 | # Note: For unified search, we can optionally include domain in metadata 1952 | # This helps distinguish between result types 1953 | all_results.append(openai_result) 1954 | except Exception as e: 1955 | logger.warning( 1956 | f"Failed to format result in domain {domain}: {e}" 1957 | ) 1958 | continue 1959 | 1960 | except (json.JSONDecodeError, TypeError, ValueError) as e: 1961 | logger.warning(f"Failed to parse results for domain {domain}: {e}") 1962 | continue 1963 | 1964 | logger.info( 1965 | f"Unified search completed with {len(all_results)} total results" 1966 | ) 1967 | 1968 | # Return OpenAI MCP compliant format 1969 | return {"results": all_results} 1970 | ```