This is page 17 of 20. Use http://codebase.md/genomoncology/biomcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .github
│ ├── actions
│ │ └── setup-python-env
│ │ └── action.yml
│ ├── dependabot.yml
│ └── workflows
│ ├── ci.yml
│ ├── deploy-docs.yml
│ ├── main.yml.disabled
│ ├── on-release-main.yml
│ └── validate-codecov-config.yml
├── .gitignore
├── .pre-commit-config.yaml
├── BIOMCP_DATA_FLOW.md
├── CHANGELOG.md
├── CNAME
├── codecov.yaml
├── docker-compose.yml
├── Dockerfile
├── docs
│ ├── apis
│ │ ├── error-codes.md
│ │ ├── overview.md
│ │ └── python-sdk.md
│ ├── assets
│ │ ├── biomcp-cursor-locations.png
│ │ ├── favicon.ico
│ │ ├── icon.png
│ │ ├── logo.png
│ │ ├── mcp_architecture.txt
│ │ └── remote-connection
│ │ ├── 00_connectors.png
│ │ ├── 01_add_custom_connector.png
│ │ ├── 02_connector_enabled.png
│ │ ├── 03_connect_to_biomcp.png
│ │ ├── 04_select_google_oauth.png
│ │ └── 05_success_connect.png
│ ├── backend-services-reference
│ │ ├── 01-overview.md
│ │ ├── 02-biothings-suite.md
│ │ ├── 03-cbioportal.md
│ │ ├── 04-clinicaltrials-gov.md
│ │ ├── 05-nci-cts-api.md
│ │ ├── 06-pubtator3.md
│ │ └── 07-alphagenome.md
│ ├── blog
│ │ ├── ai-assisted-clinical-trial-search-analysis.md
│ │ ├── images
│ │ │ ├── deep-researcher-video.png
│ │ │ ├── researcher-announce.png
│ │ │ ├── researcher-drop-down.png
│ │ │ ├── researcher-prompt.png
│ │ │ ├── trial-search-assistant.png
│ │ │ └── what_is_biomcp_thumbnail.png
│ │ └── researcher-persona-resource.md
│ ├── changelog.md
│ ├── CNAME
│ ├── concepts
│ │ ├── 01-what-is-biomcp.md
│ │ ├── 02-the-deep-researcher-persona.md
│ │ └── 03-sequential-thinking-with-the-think-tool.md
│ ├── developer-guides
│ │ ├── 01-server-deployment.md
│ │ ├── 02-contributing-and-testing.md
│ │ ├── 03-third-party-endpoints.md
│ │ ├── 04-transport-protocol.md
│ │ ├── 05-error-handling.md
│ │ ├── 06-http-client-and-caching.md
│ │ ├── 07-performance-optimizations.md
│ │ └── generate_endpoints.py
│ ├── faq-condensed.md
│ ├── FDA_SECURITY.md
│ ├── genomoncology.md
│ ├── getting-started
│ │ ├── 01-quickstart-cli.md
│ │ ├── 02-claude-desktop-integration.md
│ │ └── 03-authentication-and-api-keys.md
│ ├── how-to-guides
│ │ ├── 01-find-articles-and-cbioportal-data.md
│ │ ├── 02-find-trials-with-nci-and-biothings.md
│ │ ├── 03-get-comprehensive-variant-annotations.md
│ │ ├── 04-predict-variant-effects-with-alphagenome.md
│ │ ├── 05-logging-and-monitoring-with-bigquery.md
│ │ └── 06-search-nci-organizations-and-interventions.md
│ ├── index.md
│ ├── policies.md
│ ├── reference
│ │ ├── architecture-diagrams.md
│ │ ├── quick-architecture.md
│ │ ├── quick-reference.md
│ │ └── visual-architecture.md
│ ├── robots.txt
│ ├── stylesheets
│ │ ├── announcement.css
│ │ └── extra.css
│ ├── troubleshooting.md
│ ├── tutorials
│ │ ├── biothings-prompts.md
│ │ ├── claude-code-biomcp-alphagenome.md
│ │ ├── nci-prompts.md
│ │ ├── openfda-integration.md
│ │ ├── openfda-prompts.md
│ │ ├── pydantic-ai-integration.md
│ │ └── remote-connection.md
│ ├── user-guides
│ │ ├── 01-command-line-interface.md
│ │ ├── 02-mcp-tools-reference.md
│ │ └── 03-integrating-with-ides-and-clients.md
│ └── workflows
│ └── all-workflows.md
├── example_scripts
│ ├── mcp_integration.py
│ └── python_sdk.py
├── glama.json
├── LICENSE
├── lzyank.toml
├── Makefile
├── mkdocs.yml
├── package-lock.json
├── package.json
├── pyproject.toml
├── README.md
├── scripts
│ ├── check_docs_in_mkdocs.py
│ ├── check_http_imports.py
│ └── generate_endpoints_doc.py
├── smithery.yaml
├── src
│ └── biomcp
│ ├── __init__.py
│ ├── __main__.py
│ ├── articles
│ │ ├── __init__.py
│ │ ├── autocomplete.py
│ │ ├── fetch.py
│ │ ├── preprints.py
│ │ ├── search_optimized.py
│ │ ├── search.py
│ │ └── unified.py
│ ├── biomarkers
│ │ ├── __init__.py
│ │ └── search.py
│ ├── cbioportal_helper.py
│ ├── circuit_breaker.py
│ ├── cli
│ │ ├── __init__.py
│ │ ├── articles.py
│ │ ├── biomarkers.py
│ │ ├── diseases.py
│ │ ├── health.py
│ │ ├── interventions.py
│ │ ├── main.py
│ │ ├── openfda.py
│ │ ├── organizations.py
│ │ ├── server.py
│ │ ├── trials.py
│ │ └── variants.py
│ ├── connection_pool.py
│ ├── constants.py
│ ├── core.py
│ ├── diseases
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── domain_handlers.py
│ ├── drugs
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── exceptions.py
│ ├── genes
│ │ ├── __init__.py
│ │ └── getter.py
│ ├── http_client_simple.py
│ ├── http_client.py
│ ├── individual_tools.py
│ ├── integrations
│ │ ├── __init__.py
│ │ ├── biothings_client.py
│ │ └── cts_api.py
│ ├── interventions
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── logging_filter.py
│ ├── metrics_handler.py
│ ├── metrics.py
│ ├── oncokb_helper.py
│ ├── openfda
│ │ ├── __init__.py
│ │ ├── adverse_events_helpers.py
│ │ ├── adverse_events.py
│ │ ├── cache.py
│ │ ├── constants.py
│ │ ├── device_events_helpers.py
│ │ ├── device_events.py
│ │ ├── drug_approvals.py
│ │ ├── drug_labels_helpers.py
│ │ ├── drug_labels.py
│ │ ├── drug_recalls_helpers.py
│ │ ├── drug_recalls.py
│ │ ├── drug_shortages_detail_helpers.py
│ │ ├── drug_shortages_helpers.py
│ │ ├── drug_shortages.py
│ │ ├── exceptions.py
│ │ ├── input_validation.py
│ │ ├── rate_limiter.py
│ │ ├── utils.py
│ │ └── validation.py
│ ├── organizations
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ └── search.py
│ ├── parameter_parser.py
│ ├── query_parser.py
│ ├── query_router.py
│ ├── rate_limiter.py
│ ├── render.py
│ ├── request_batcher.py
│ ├── resources
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── instructions.md
│ │ └── researcher.md
│ ├── retry.py
│ ├── router_handlers.py
│ ├── router.py
│ ├── shared_context.py
│ ├── thinking
│ │ ├── __init__.py
│ │ ├── sequential.py
│ │ └── session.py
│ ├── thinking_tool.py
│ ├── thinking_tracker.py
│ ├── trials
│ │ ├── __init__.py
│ │ ├── getter.py
│ │ ├── nci_getter.py
│ │ ├── nci_search.py
│ │ └── search.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cancer_types_api.py
│ │ ├── cbio_http_adapter.py
│ │ ├── endpoint_registry.py
│ │ ├── gene_validator.py
│ │ ├── metrics.py
│ │ ├── mutation_filter.py
│ │ ├── query_utils.py
│ │ ├── rate_limiter.py
│ │ └── request_cache.py
│ ├── variants
│ │ ├── __init__.py
│ │ ├── alphagenome.py
│ │ ├── cancer_types.py
│ │ ├── cbio_external_client.py
│ │ ├── cbioportal_mutations.py
│ │ ├── cbioportal_search_helpers.py
│ │ ├── cbioportal_search.py
│ │ ├── constants.py
│ │ ├── external.py
│ │ ├── filters.py
│ │ ├── getter.py
│ │ ├── links.py
│ │ ├── oncokb_client.py
│ │ ├── oncokb_models.py
│ │ └── search.py
│ └── workers
│ ├── __init__.py
│ ├── worker_entry_stytch.js
│ ├── worker_entry.js
│ └── worker.py
├── tests
│ ├── bdd
│ │ ├── cli_help
│ │ │ ├── help.feature
│ │ │ └── test_help.py
│ │ ├── conftest.py
│ │ ├── features
│ │ │ └── alphagenome_integration.feature
│ │ ├── fetch_articles
│ │ │ ├── fetch.feature
│ │ │ └── test_fetch.py
│ │ ├── get_trials
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── get_variants
│ │ │ ├── get.feature
│ │ │ └── test_get.py
│ │ ├── search_articles
│ │ │ ├── autocomplete.feature
│ │ │ ├── search.feature
│ │ │ ├── test_autocomplete.py
│ │ │ └── test_search.py
│ │ ├── search_trials
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ ├── search_variants
│ │ │ ├── search.feature
│ │ │ └── test_search.py
│ │ └── steps
│ │ └── test_alphagenome_steps.py
│ ├── config
│ │ └── test_smithery_config.py
│ ├── conftest.py
│ ├── data
│ │ ├── ct_gov
│ │ │ ├── clinical_trials_api_v2.yaml
│ │ │ ├── trials_NCT04280705.json
│ │ │ └── trials_NCT04280705.txt
│ │ ├── myvariant
│ │ │ ├── myvariant_api.yaml
│ │ │ ├── myvariant_field_descriptions.csv
│ │ │ ├── variants_full_braf_v600e.json
│ │ │ ├── variants_full_braf_v600e.txt
│ │ │ └── variants_part_braf_v600_multiple.json
│ │ ├── oncokb_mock_responses.json
│ │ ├── openfda
│ │ │ ├── drugsfda_detail.json
│ │ │ ├── drugsfda_search.json
│ │ │ ├── enforcement_detail.json
│ │ │ └── enforcement_search.json
│ │ └── pubtator
│ │ ├── pubtator_autocomplete.json
│ │ └── pubtator3_paper.txt
│ ├── integration
│ │ ├── test_oncokb_integration.py
│ │ ├── test_openfda_integration.py
│ │ ├── test_preprints_integration.py
│ │ ├── test_simple.py
│ │ └── test_variants_integration.py
│ ├── tdd
│ │ ├── articles
│ │ │ ├── test_autocomplete.py
│ │ │ ├── test_cbioportal_integration.py
│ │ │ ├── test_fetch.py
│ │ │ ├── test_preprints.py
│ │ │ ├── test_search.py
│ │ │ └── test_unified.py
│ │ ├── conftest.py
│ │ ├── drugs
│ │ │ ├── __init__.py
│ │ │ └── test_drug_getter.py
│ │ ├── openfda
│ │ │ ├── __init__.py
│ │ │ ├── test_adverse_events.py
│ │ │ ├── test_device_events.py
│ │ │ ├── test_drug_approvals.py
│ │ │ ├── test_drug_labels.py
│ │ │ ├── test_drug_recalls.py
│ │ │ ├── test_drug_shortages.py
│ │ │ └── test_security.py
│ │ ├── test_biothings_integration_real.py
│ │ ├── test_biothings_integration.py
│ │ ├── test_circuit_breaker.py
│ │ ├── test_concurrent_requests.py
│ │ ├── test_connection_pool.py
│ │ ├── test_domain_handlers.py
│ │ ├── test_drug_approvals.py
│ │ ├── test_drug_recalls.py
│ │ ├── test_drug_shortages.py
│ │ ├── test_endpoint_documentation.py
│ │ ├── test_error_scenarios.py
│ │ ├── test_europe_pmc_fetch.py
│ │ ├── test_mcp_integration.py
│ │ ├── test_mcp_tools.py
│ │ ├── test_metrics.py
│ │ ├── test_nci_integration.py
│ │ ├── test_nci_mcp_tools.py
│ │ ├── test_network_policies.py
│ │ ├── test_offline_mode.py
│ │ ├── test_openfda_unified.py
│ │ ├── test_pten_r173_search.py
│ │ ├── test_render.py
│ │ ├── test_request_batcher.py.disabled
│ │ ├── test_retry.py
│ │ ├── test_router.py
│ │ ├── test_shared_context.py.disabled
│ │ ├── test_unified_biothings.py
│ │ ├── thinking
│ │ │ ├── __init__.py
│ │ │ └── test_sequential.py
│ │ ├── trials
│ │ │ ├── test_backward_compatibility.py
│ │ │ ├── test_getter.py
│ │ │ └── test_search.py
│ │ ├── utils
│ │ │ ├── test_gene_validator.py
│ │ │ ├── test_mutation_filter.py
│ │ │ ├── test_rate_limiter.py
│ │ │ └── test_request_cache.py
│ │ ├── variants
│ │ │ ├── constants.py
│ │ │ ├── test_alphagenome_api_key.py
│ │ │ ├── test_alphagenome_comprehensive.py
│ │ │ ├── test_alphagenome.py
│ │ │ ├── test_cbioportal_mutations.py
│ │ │ ├── test_cbioportal_search.py
│ │ │ ├── test_external_integration.py
│ │ │ ├── test_external.py
│ │ │ ├── test_extract_gene_aa_change.py
│ │ │ ├── test_filters.py
│ │ │ ├── test_getter.py
│ │ │ ├── test_links.py
│ │ │ ├── test_oncokb_client.py
│ │ │ ├── test_oncokb_helper.py
│ │ │ └── test_search.py
│ │ └── workers
│ │ └── test_worker_sanitization.js
│ └── test_pydantic_ai_integration.py
├── THIRD_PARTY_ENDPOINTS.md
├── tox.ini
├── uv.lock
└── wrangler.toml
```
# Files
--------------------------------------------------------------------------------
/tests/data/myvariant/myvariant_field_descriptions.csv:
--------------------------------------------------------------------------------
```
1 | field,"description"
2 | "cadd._license","License information URL for the CADD data source."
3 | "cadd.alt","Alternate allele for the variant in CADD."
4 | "cadd.anc","Ancestral allele according to CADD analysis."
5 | "cadd.annotype","Annotation type (e.g., CodingTranscript) from CADD."
6 | "cadd.bstatistic","B-statistic score from CADD, related to conservation."
7 | "cadd.chmm.bivflnk","ChromHMM state score: Flanking Bivalent TSS/Enh."
8 | "cadd.chmm.enh","ChromHMM state score: Active Enhancer."
9 | "cadd.chmm.enhbiv","ChromHMM state score: Bivalent Enhancer."
10 | "cadd.chmm.het","ChromHMM state score: Heterochromatin."
11 | "cadd.chmm.quies","ChromHMM state score: Quiescent/Low activity."
12 | "cadd.chmm.reprpc","ChromHMM state score: Repressed Polycomb."
13 | "cadd.chmm.reprpcwk","ChromHMM state score: Weak Repressed Polycomb."
14 | "cadd.chmm.tssa","ChromHMM state score: Active TSS."
15 | "cadd.chmm.tssaflnk","ChromHMM state score: Flanking Active TSS."
16 | "cadd.chmm.tssbiv","ChromHMM state score: Bivalent TSS."
17 | "cadd.chmm.tx","ChromHMM state score: Strong transcription."
18 | "cadd.chmm.txflnk","ChromHMM state score: Transcribed at gene 5' and 3'."
19 | "cadd.chmm.txwk","ChromHMM state score: Weak transcription."
20 | "cadd.chmm.znfrpts","ChromHMM state score: ZNF genes & repeats."
21 | "cadd.chrom","Chromosome number for the variant in CADD."
22 | "cadd.consdetail","Detailed consequence of the variant (e.g., missense) from CADD."
23 | "cadd.consequence","General consequence category (e.g., NON_SYNONYMOUS) from CADD."
24 | "cadd.consscore","Conservation score from CADD."
25 | "cadd.cpg","Indicator if the variant is in a CpG island (0 or 1)."
26 | "cadd.dna.helt","DNA physical property: Helix twist value."
27 | "cadd.dna.mgw","DNA physical property: Minor groove width value."
28 | "cadd.dna.prot","DNA physical property: Propeller twist value."
29 | "cadd.dna.roll","DNA physical property: Roll value."
30 | "cadd.encode.exp","ENCODE gene expression value."
31 | "cadd.encode.h3k27ac","ENCODE histone modification H3K27ac signal value."
32 | "cadd.encode.h3k4me1","ENCODE histone modification H3K4me1 signal value."
33 | "cadd.encode.h3k4me3","ENCODE histone modification H3K4me3 signal value."
34 | "cadd.encode.nucleo","ENCODE nucleosome occupancy signal value."
35 | "cadd.exon","Exon number and total exons (e.g., 15/18)."
36 | "cadd.fitcons","FitCons score indicating functional impact based on evolutionary data."
37 | "cadd.gc","GC content in the surrounding region."
38 | "cadd.gene.ccds_id","Consensus CDS (CCDS) identifier for the gene."
39 | "cadd.gene.cds.cdna_pos","Position of the variant within the cDNA sequence."
40 | "cadd.gene.cds.cds_pos","Position of the variant within the coding sequence (CDS)."
41 | "cadd.gene.cds.rel_cdna_pos","Relative position within the cDNA sequence."
42 | "cadd.gene.cds.rel_cds_pos","Relative position within the coding sequence (CDS)."
43 | "cadd.gene.feature_id","Ensembl transcript identifier (ENST)."
44 | "cadd.gene.gene_id","Ensembl gene identifier (ENSG)."
45 | "cadd.gene.genename","Gene symbol (e.g., BRAF)."
46 | "cadd.gene.prot.domain","Protein domain affected by the variant."
47 | "cadd.gene.prot.protpos","Amino acid position within the protein."
48 | "cadd.gene.prot.rel_prot_pos","Relative position within the protein sequence."
49 | "cadd.gerp.n","GERP++ Neutral rate score."
50 | "cadd.gerp.rs","GERP++ Rejected Substitutions score (conservation score)."
51 | "cadd.gerp.rs_pval","P-value associated with the GERP++ RS score."
52 | "cadd.gerp.s","GERP++ S score, equivalent to RS score."
53 | "cadd.grantham","Grantham score measuring physicochemical difference between amino acids."
54 | "cadd.isderived","Indicates if the alternate allele is derived (TRUE/FALSE)."
55 | "cadd.isknownvariant","Indicates if the variant is known in dbSNP (TRUE/FALSE)."
56 | "cadd.istv","Indicates if the variant is a transversion (TRUE/FALSE)."
57 | "cadd.length","Length of the variant (0 for SNVs)."
58 | "cadd.mapability.20bp","Mapability score based on 20bp reads."
59 | "cadd.mapability.35bp","Mapability score based on 35bp reads."
60 | "cadd.min_dist_tse","Minimum distance to the nearest transcription start site end (TSE)."
61 | "cadd.min_dist_tss","Minimum distance to the nearest transcription start site (TSS)."
62 | "cadd.mutindex","Mutation index score from CADD."
63 | "cadd.naa","New amino acid resulting from the variant."
64 | "cadd.oaa","Original amino acid affected by the variant."
65 | hadd.phast_cons.mammalian,"PhastCons conservation score across mammals."
66 | "cadd.phast_cons.primate","PhastCons conservation score across primates."
67 | "cadd.phast_cons.vertebrate","PhastCons conservation score across vertebrates."
68 | hadd.phred,"CADD PHRED-like scaled score indicating deleteriousness."
69 | "cadd.phylop.mammalian","PhyloP conservation score across mammals."
70 | "cadd.phylop.primate",PhyloP conservation score across primates.
71 | "cadd.phylop.vertebrate","PhyloP conservation score across vertebrates."
72 | "cadd.polyphen.cat","PolyPhen-2 qualitative prediction (e.g., probably_damaging)."
73 | "cadd.polyphen.val","PolyPhen-2 quantitative score (0 to 1)."
74 | "cadd.pos","Genomic position of the variant (hg19)."
75 | "cadd.rawscore","CADD raw score before scaling."
76 | "cadd.ref","Reference allele for the variant in CADD."
77 | "cadd.segway","Segway annotation for the genomic region."
78 | "cadd.sift.cat","SIFT qualitative prediction (e.g., deleterious)."
79 | "cadd.sift.val","SIFT quantitative score (0 to 1)."
80 | "cadd.type","Type of variant (e.g., SNV)."
81 | "cgi._license","License information URL for the CGI data source."
82 | "cgi.association","Drug association type (Responsive, Resistant) from CGI."
83 | "cgi.cdna","cDNA change notation (e.g., c.1799T>A) from CGI."
84 | "cgi.drug","Drug name associated with the variant from CGI."
85 | "cgi.evidence_level","Level of evidence for the drug association from CGI."
86 | "cgi.gene","Gene symbol associated with the variant from CGI."
87 | "cgi.primary_tumor_type",Primary tumor type associated with the CGI entry.
88 | "cgi.protein_change","Protein change notation (e.g., BRAF:V600E) from CGI."
89 | "cgi.region","Genomic region description from CGI."
90 | "cgi.source","Source identifier (e.g., PubMed ID, ASCO abstract) from CGI."
91 | "cgi.transcript","Transcript identifier associated with the CGI entry."
92 | "chrom","Chromosome number for the variant."
93 | "civic._license","License information URL for the CIViC data source."
94 | "civic.alleleRegistryId","Allele Registry ID associated with the variant in CIViC."
95 | "civic.clinvarIds","List of associated ClinVar Variation IDs."
96 | "civic.comments.totalCount","Total number of comments associated with the CIViC variant entry."
97 | "civic.contributors.curators.lastActionDate","Timestamp of the last action by a CIViC curator."
98 | "civic.contributors.curators.totalActionCount","Total number of actions performed by a CIViC curator."
99 | "civic.contributors.curators.uniqueActions.action","Type of action performed by a CIViC curator (e.g., REVISION_SUGGESTED)."
100 | "civic.contributors.curators.uniqueActions.count","Count of a specific unique action by a CIViC curator."
101 | "civic.contributors.curators.user.id","User ID of the CIViC curator."
102 | "civic.contributors.editors.lastActionDate","Timestamp of the last action by a CIViC editor."
103 | "civic.contributors.editors.totalActionCount","Total number of actions performed by a CIViC editor."
104 | "civic.contributors.editors.uniqueActions.action","Type of action performed by a CIViC editor (e.g., REVISION_ACCEPTED)."
105 | "civic.contributors.editors.uniqueActions.count","Count of a specific unique action by a CIViC editor."
106 | "civic.contributors.editors.user.id","User ID of the CIViC editor."
107 | "civic.coordinates.chromosome","Chromosome for the variant according to CIViC coordinates."
108 | "civic.coordinates.coordinateType","Type of coordinate system used in CIViC (e.g., GENE_VARIANT_COORDINATE)."
109 | "civic.coordinates.ensemblVersion","Ensembl version used for CIViC coordinates."
110 | "civic.coordinates.referenceBases","Reference bases for the variant in CIViC coordinates."
111 | "civic.coordinates.referenceBuild","Reference genome build used for CIViC coordinates (e.g., GRCH37)."
112 | "civic.coordinates.representativeTranscript","Representative transcript ID used for CIViC coordinates."
113 | "civic.coordinates.start","Start position of the variant in CIViC coordinates."
114 | "civic.coordinates.stop","Stop position of the variant in CIViC coordinates."
115 | "civic.coordinates.variantBases","Variant bases for the variant in CIViC coordinates."
116 | "civic.creationActivity.createdAt","Timestamp when the CIViC variant entry was created."
117 | "civic.creationActivity.user.displayName","Display name of the user who created the CIViC entry."
118 | "civic.creationActivity.user.id","User ID of the creator of the CIViC entry."
119 | "civic.creationActivity.user.role","Role of the user who created the CIViC entry (e.g., ADMIN)."
120 | "civic.deprecated","Boolean indicating if the CIViC variant entry is deprecated."
121 | "civic.feature.deprecated","Boolean indicating if the associated CIViC feature (gene) is deprecated."
122 | "civic.feature.flagged","Boolean indicating if the associated CIViC feature (gene) is flagged."
123 | "civic.feature.id","Internal CIViC ID for the associated feature (gene)."
124 | "civic.feature.link","URL link to the associated CIViC feature page."
125 | "civic.feature.name","Name of the associated CIViC feature (gene symbol)."
126 | "civic.flags.totalCount","Total number of flags associated with the CIViC variant entry."
127 | "civic.hgvsDescriptions","List of HGVS descriptions for the variant from CIViC."
128 | "civic.id","Internal CIViC ID for the variant."
129 | "civic.lastAcceptedRevisionEvent.originatingUser.displayName","Display name of the user whose revision was last accepted."
130 | "civic.lastAcceptedRevisionEvent.originatingUser.id","User ID of the user whose revision was last accepted."
131 | "civic.lastAcceptedRevisionEvent.originatingUser.role","Role of the user whose revision was last accepted."
132 | "civic.lastSubmittedRevisionEvent.originatingUser.displayName","Display name of the user who last submitted a revision."
133 | "civic.lastSubmittedRevisionEvent.originatingUser.id","User ID of the user who last submitted a revision."
134 | "civic.lastSubmittedRevisionEvent.originatingUser.role","Role of the user who last submitted a revision."
135 | "civic.maneSelectTranscript","MANE Select transcript HGVS description from CIViC."
136 | "civic.molecularProfiles.evidenceItems.description","Textual description of the evidence item in CIViC."
137 | "civic.molecularProfiles.evidenceItems.disease.diseaseAliases","List of aliases for the associated disease in CIViC."
138 | "civic.molecularProfiles.evidenceItems.disease.diseaseUrl","URL link to the disease ontology page."
139 | "civic.molecularProfiles.evidenceItems.disease.displayName","Display name of the associated disease in CIViC."
140 | "civic.molecularProfiles.evidenceItems.disease.doid","Disease Ontology ID (DOID) for the associated disease."
141 | "civic.molecularProfiles.evidenceItems.disease.id","Internal CIViC ID for the associated disease."
142 | "civic.molecularProfiles.evidenceItems.disease.link","URL link to the associated CIViC disease page."
143 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.doDef","Disease Ontology definition for the associated disease."
144 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.icd10","ICD-10 code(s) for the associated disease."
145 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.icdo","ICD-O code for the associated disease."
146 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.mesh","MeSH ID(s) for the associated disease."
147 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.mondoId","Mondo Disease Ontology ID for the associated disease."
148 | "civic.molecularProfiles.evidenceItems.disease.myDiseaseInfo.ncit","NCI Thesaurus code(s) for the associated disease."
149 | "civic.molecularProfiles.evidenceItems.disease.name","Name of the associated disease in CIViC."
150 | "civic.molecularProfiles.evidenceItems.evidenceDirection","Direction of evidence (SUPPORTS, DOES_NOT_SUPPORT) in CIViC."
151 | "civic.molecularProfiles.evidenceItems.evidenceLevel","Level of evidence (A, B, C, D, E) in CIViC."
152 | "civic.molecularProfiles.evidenceItems.evidenceRating","Rating of the evidence (1-5 stars) in CIViC."
153 | "civic.molecularProfiles.evidenceItems.evidenceType","Type of evidence (e.g., PREDICTIVE, DIAGNOSTIC) in CIViC."
154 | "civic.molecularProfiles.evidenceItems.flagged","Boolean indicating if the CIViC evidence item is flagged."
155 | "civic.molecularProfiles.evidenceItems.id","Internal CIViC ID for the evidence item."
156 | "civic.molecularProfiles.evidenceItems.molecularProfile.id","Internal CIViC ID for the associated molecular profile."
157 | "civic.molecularProfiles.evidenceItems.name","Name of the evidence item (e.g., EID79)."
158 | "civic.molecularProfiles.evidenceItems.significance","Clinical significance of the evidence (e.g., SENSITIVITYRESPONSE, RESISTANCE) in CIViC."
159 | "civic.molecularProfiles.evidenceItems.source.abstract","Abstract of the source publication from CIViC."
160 | "civic.molecularProfiles.evidenceItems.source.authorString","Author list from the source publication."
161 | "civic.molecularProfiles.evidenceItems.source.citation","Short citation format for the source publication."
162 | "civic.molecularProfiles.evidenceItems.source.citationId","PubMed ID (PMID) or ASCO ID for the source."
163 | "civic.molecularProfiles.evidenceItems.source.id","Internal CIViC ID for the source."
164 | "civic.molecularProfiles.evidenceItems.source.journal","Journal name of the source publication."
165 | "civic.molecularProfiles.evidenceItems.source.link","URL link to the associated CIViC source page."
166 | "civic.molecularProfiles.evidenceItems.source.name","Formatted name of the source (e.g., PubMed: Howell et al., 2011)."
167 | "civic.molecularProfiles.evidenceItems.source.openAccess","Boolean indicating if the source is open access."
168 | "civic.molecularProfiles.evidenceItems.source.pmcId","PubMed Central ID (PMCID) if available."
169 | "civic.molecularProfiles.evidenceItems.source.publicationDate","Publication date of the source."
170 | "civic.molecularProfiles.evidenceItems.source.retracted","Boolean indicating if the source has been retracted."
171 | "civic.molecularProfiles.evidenceItems.source.retractionDate","Date the source was retracted, if applicable."
172 | "civic.molecularProfiles.evidenceItems.source.retractionNature","Nature of the retraction, if applicable."
173 | "civic.molecularProfiles.evidenceItems.source.retractionReasons","Reason(s) for retraction, if applicable."
174 | "civic.molecularProfiles.evidenceItems.source.sourceType","Type of source (e.g., PUBMED, ASCO)."
175 | "civic.molecularProfiles.evidenceItems.source.sourceUrl","URL link to the original source."
176 | "civic.molecularProfiles.evidenceItems.source.title","Title of the source publication."
177 | "civic.molecularProfiles.evidenceItems.therapies.deprecated","Boolean indicating if the therapy entry is deprecated in CIViC."
178 | "civic.molecularProfiles.evidenceItems.therapies.id","Internal CIViC ID for the therapy."
179 | "civic.molecularProfiles.evidenceItems.therapies.link","URL link to the associated CIViC therapy page."
180 | "civic.molecularProfiles.evidenceItems.therapies.name","Name of the therapy in CIViC."
181 | "civic.molecularProfiles.evidenceItems.variantOrigin","Origin of the variant (SOMATIC, GERMLINE, NA) for the evidence item."
182 | "civic.molecularProfiles.id","Internal CIViC ID for the molecular profile."
183 | "civic.molecularProfiles.molecularProfileAliases","List of aliases for the molecular profile in CIViC."
184 | "civic.molecularProfiles.molecularProfileScore","Score associated with the molecular profile in CIViC."
185 | "civic.molecularProfiles.name","Name of the molecular profile in CIViC."
186 | "civic.molecularProfiles.variants.id","Internal CIViC ID for the variant within the profile."
187 | "civic.molecularProfiles.variants.link","URL link to the associated CIViC variant page."
188 | "civic.molecularProfiles.variants.name","Name of the variant within the profile."
189 | "civic.name","Name of the variant in CIViC (e.g., V600E)."
190 | "civic.openCravatUrl","URL link to the OpenCRAVAT report for the variant."
191 | "civic.openRevisionCount","Number of open revisions for the CIViC variant entry."
192 | "civic.revisions.totalCount","Total number of revisions for the CIViC variant entry."
193 | "civic.variantAliases","List of aliases for the variant in CIViC."
194 | "civic.variantTypes.id","Internal CIViC ID for the variant type."
195 | "civic.variantTypes.link","URL link to the associated CIViC variant type page."
196 | "civic.variantTypes.name","Name of the variant type (e.g., Missense Variant)."
197 | "civic.variantTypes.soid","Sequence Ontology ID for the variant type."
198 | "clinvar._license","License information URL for the ClinVar data source."
199 | "clinvar.allele_id","ClinVar Allele ID."
200 | "clinvar.alt","Alternate allele in ClinVar."
201 | "clinvar.chrom","Chromosome number in ClinVar."
202 | "clinvar.cytogenic","Cytogenetic location (e.g., 7q34)."
203 | "clinvar.gene.id","Entrez Gene ID associated with the ClinVar record."
204 | "clinvar.gene.symbol","Gene symbol associated with the ClinVar record."
205 | "clinvar.hg19.end","End position of the variant in hg19 assembly."
206 | "clinvar.hg19.start","Start position of the variant in hg19 assembly."
207 | "clinvar.hg38.end","End position of the variant in hg38 assembly."
208 | "clinvar.hg38.start","Start position of the variant in hg38 assembly."
209 | "clinvar.hgvs.coding","List of HGVS coding sequence notations."
210 | "clinvar.hgvs.genomic","List of HGVS genomic sequence notations."
211 | "clinvar.hgvs.protein","List of HGVS protein sequence notations."
212 | "clinvar.omim","Associated Online Mendelian Inheritance in Man (OMIM) ID(s)."
213 | "clinvar.rcv.accession","ClinVar RCV accession number (identifies a submitted interpretation)."
214 | "clinvar.rcv.clinical_significance","Clinical significance assertion for the RCV record."
215 | "clinvar.rcv.conditions.identifiers.human_phenotype_ontology","Associated Human Phenotype Ontology (HPO) IDs."
216 | "clinvar.rcv.conditions.identifiers.medgen","Associated MedGen Concept Unique Identifier (CUI)."
217 | "clinvar.rcv.conditions.identifiers.mesh","Associated Medical Subject Headings (MeSH) ID(s)."
218 | "clinvar.rcv.conditions.identifiers.mondo","Associated Mondo Disease Ontology ID(s)."
219 | "clinvar.rcv.conditions.identifiers.omim","Associated OMIM ID(s) for the condition."
220 | "clinvar.rcv.conditions.identifiers.orphanet","Associated Orphanet ID(s)."
221 | "clinvar.rcv.conditions.name","Name of the condition associated with the RCV record."
222 | "clinvar.rcv.conditions.synonyms","Synonyms for the condition associated with the RCV record."
223 | "clinvar.rcv.last_evaluated","Date the RCV record was last evaluated by the submitter."
224 | "clinvar.rcv.number_submitters","Number of submitters for this interpretation."
225 | "clinvar.rcv.origin","Origin of the allele (somatic, germline, etc.)."
226 | "clinvar.rcv.preferred_name","Submitter's preferred name for the variant."
227 | "clinvar.rcv.review_status","Review status of the ClinVar RCV record."
228 | "clinvar.ref","Reference allele in ClinVar."
229 | "clinvar.rsid","Associated dbSNP Reference SNP (rs) identifier."
230 | "clinvar.type","Type of variant (e.g., single nucleotide variant)."
231 | "clinvar.variant_id","ClinVar Variation ID."
232 | "cosmic._license","License information URL for the COSMIC data source."
233 | "cosmic.alt","Alternate allele in COSMIC (relative to reference)."
234 | "cosmic.chrom","Chromosome number in COSMIC."
235 | "cosmic.cosmic_id","COSMIC mutation identifier (e.g., COSM476)."
236 | "cosmic.hg19.end","End position of the variant in hg19 assembly (COSMIC)."
237 | "cosmic.hg19.start","Start position of the variant in hg19 assembly (COSMIC)."
238 | "cosmic.mut_freq","Mutation frequency reported in COSMIC samples (?). UNKNOWN."
239 | "cosmic.mut_nt","Nucleotide change reported in COSMIC (e.g., T>A)."
240 | "cosmic.ref","Reference allele in COSMIC."
241 | "cosmic.tumor_site","Primary tumor site where the mutation was observed in COSMIC."
242 | "dbnsfp._license","License information URL for the dbNSFP data source."
243 | "dbnsfp.aa.alt","Alternate amino acid predicted by dbNSFP."
244 | "dbnsfp.aa.codon_degeneracy","Codon degeneracy value. UNKNOWN significance here."
245 | "dbnsfp.aa.codonpos","Position within the codon (1, 2, or 3)."
246 | "dbnsfp.aa.pos","Amino acid position in the protein sequence (for different transcripts)."
247 | "dbnsfp.aa.ref","Reference amino acid from dbNSFP."
248 | "dbnsfp.aa.refcodon","Reference codon sequence(s)."
249 | "dbnsfp.alphamissense.pred","AlphaMissense prediction (Pathogenic/Benign) for different transcripts."
250 | "dbnsfp.alphamissense.rankscore","AlphaMissense rank score (0-1, higher is more pathogenic)."
251 | "dbnsfp.alphamissense.score","AlphaMissense raw score for different transcripts."
252 | "dbnsfp.alt","Alternate allele in dbNSFP."
253 | "dbnsfp.ancestral_allele","Predicted ancestral allele from dbNSFP."
254 | "dbnsfp.appris","APPRIS annotation for the transcript (e.g., principal, alternative)."
255 | "dbnsfp.bayesdel.add_af.pred","BayesDel prediction (Deleterious/Tolerated) incorporating allele frequency."
256 | "dbnsfp.bayesdel.add_af.rankscore","BayesDel rank score incorporating allele frequency."
257 | "dbnsfp.bayesdel.add_af.score","BayesDel score incorporating allele frequency."
258 | "dbnsfp.bayesdel.no_af.pred","BayesDel prediction (Deleterious/Tolerated) without allele frequency."
259 | "dbnsfp.bayesdel.no_af.rankscore","BayesDel rank score without allele frequency."
260 | "dbnsfp.bayesdel.no_af.score","BayesDel score without allele frequency."
261 | "dbnsfp.bstatistic.converted_rankscore","BStatistic converted rank score."
262 | "dbnsfp.bstatistic.score","BStatistic raw score."
263 | "dbnsfp.chrom","Chromosome number in dbNSFP."
264 | "dbnsfp.clinpred.pred","ClinPred prediction (Deleterious/Benign)."
265 | "dbnsfp.clinpred.rankscore","ClinPred rank score."
266 | "dbnsfp.clinpred.score","ClinPred raw score."
267 | "dbnsfp.clinvar.clinvar_id","Associated ClinVar Variation ID in dbNSFP."
268 | "dbnsfp.clinvar.clnsig","ClinVar clinical significance assertions from dbNSFP."
269 | "dbnsfp.clinvar.hgvs","HGVS genomic notation from ClinVar via dbNSFP."
270 | "dbnsfp.clinvar.medgen","Associated MedGen CUIs from ClinVar via dbNSFP."
271 | "dbnsfp.clinvar.omim","Associated OMIM IDs from ClinVar via dbNSFP."
272 | "dbnsfp.clinvar.orphanet","Associated Orphanet IDs from ClinVar via dbNSFP."
273 | "dbnsfp.clinvar.review","ClinVar review status from dbNSFP."
274 | "dbnsfp.clinvar.trait","Associated traits/diseases from ClinVar via dbNSFP."
275 | "dbnsfp.clinvar.var_source","Sources cited for the ClinVar entry via dbNSFP."
276 | "dbnsfp.dann.rankscore","DANN rank score."
277 | "dbnsfp.dann.score","DANN raw score for predicting deleteriousness."
278 | "dbnsfp.deogen2.pred","DEOGEN2 prediction (Deleterious/Tolerated)."
279 | "dbnsfp.deogen2.rankscore","DEOGEN2 rank score."
280 | "dbnsfp.deogen2.score","DEOGEN2 raw score."
281 | "dbnsfp.eigen.phred_coding","Eigen Phred-scaled score for coding variants."
282 | "dbnsfp.eigen.raw_coding","Eigen raw score for coding variants."
283 | "dbnsfp.eigen.raw_coding_rankscore","Eigen rank score for coding variants."
284 | "dbnsfp.eigen-pc.phred_coding","Eigen-PC Phred-scaled score for coding variants (principal components)."
285 | "dbnsfp.eigen-pc.raw_coding","Eigen-PC raw score for coding variants (principal components)."
286 | "dbnsfp.eigen-pc.raw_coding_rankscore","Eigen-PC rank score for coding variants (principal components)."
287 | "dbnsfp.ensembl.geneid","Ensembl Gene ID from dbNSFP."
288 | "dbnsfp.ensembl.proteinid","Ensembl Protein ID from dbNSFP."
289 | "dbnsfp.ensembl.transcriptid","Ensembl Transcript ID from dbNSFP."
290 | "dbnsfp.esm1b.pred","ESM-1b prediction (Deleterious/Benign)."
291 | "dbnsfp.esm1b.rankscore","ESM-1b rank score."
292 | "dbnsfp.esm1b.score","ESM-1b raw score."
293 | "dbnsfp.eve.class10_pred","EVE prediction class (Pathogenic/Benign/Uncertain) at 10% FDR."
294 | "dbnsfp.eve.class20_pred","EVE prediction class at 20% FDR."
295 | "dbnsfp.eve.class25_pred","EVE prediction class at 25% FDR."
296 | "dbnsfp.eve.class30_pred","EVE prediction class at 30% FDR."
297 | "dbnsfp.eve.class40_pred","EVE prediction class at 40% FDR."
298 | "dbnsfp.eve.class50_pred","EVE prediction class at 50% FDR."
299 | "dbnsfp.eve.class60_pred","EVE prediction class at 60% FDR."
300 | "dbnsfp.eve.class70_pred","EVE prediction class at 70% FDR."
301 | "dbnsfp.eve.class75_pred","EVE prediction class at 75% FDR."
302 | "dbnsfp.eve.class80_pred","EVE prediction class at 80% FDR."
303 | "dbnsfp.eve.class90_pred","EVE prediction class at 90% FDR."
304 | "dbnsfp.eve.rankscore","EVE rank score."
305 | "dbnsfp.eve.score","EVE raw score."
306 | "dbnsfp.exac.ac","Allele count in ExAC database from dbNSFP."
307 | "dbnsfp.exac.adj_ac","Adjusted allele count in ExAC from dbNSFP."
308 | "dbnsfp.exac.adj_af","Adjusted allele frequency in ExAC from dbNSFP."
309 | "dbnsfp.exac.af","Allele frequency in ExAC database from dbNSFP."
310 | "dbnsfp.exac.afr.ac","Allele count in ExAC African population."
311 | "dbnsfp.exac.afr.af","Allele frequency in ExAC African population."
312 | "dbnsfp.exac.amr.ac","Allele count in ExAC American population."
313 | "dbnsfp.exac.amr.af","Allele frequency in ExAC American population."
314 | "dbnsfp.exac.eas.ac","Allele count in ExAC East Asian population."
315 | "dbnsfp.exac.eas.af","Allele frequency in ExAC East Asian population."
316 | "dbnsfp.exac.fin.ac","Allele count in ExAC Finnish population."
317 | "dbnsfp.exac.fin.af","Allele frequency in ExAC Finnish population."
318 | "dbnsfp.exac.nfe.ac","Allele count in ExAC Non-Finnish European population."
319 | "dbnsfp.exac.nfe.af","Allele frequency in ExAC Non-Finnish European population."
320 | "dbnsfp.exac.sas.ac","Allele count in ExAC South Asian population."
321 | "dbnsfp.exac.sas.af","Allele frequency in ExAC South Asian population."
322 | "dbnsfp.exac_nonpsych.ac","Allele count in ExAC non-psychiatric subset."
323 | "dbnsfp.exac_nonpsych.adj_ac","Adjusted allele count in ExAC non-psychiatric subset."
324 | "dbnsfp.exac_nonpsych.adj_af","Adjusted allele frequency in ExAC non-psychiatric subset."
325 | "dbnsfp.exac_nonpsych.af","Allele frequency in ExAC non-psychiatric subset."
326 | "dbnsfp.exac_nonpsych.afr.ac","Allele count in ExAC non-psych African population."
327 | "dbnsfp.exac_nonpsych.afr.af","Allele frequency in ExAC non-psych African population."
328 | "dbnsfp.exac_nonpsych.amr.ac","Allele count in ExAC non-psych American population."
329 | "dbnsfp.exac_nonpsych.amr.af","Allele frequency in ExAC non-psych American population."
330 | "dbnsfp.exac_nonpsych.eas.ac","Allele count in ExAC non-psych East Asian population."
331 | "dbnsfp.exac_nonpsych.eas.af","Allele frequency in ExAC non-psych East Asian population."
332 | "dbnsfp.exac_nonpsych.fin.ac","Allele count in ExAC non-psych Finnish population."
333 | "dbnsfp.exac_nonpsych.fin.af","Allele frequency in ExAC non-psych Finnish population."
334 | "dbnsfp.exac_nonpsych.nfe.ac","Allele count in ExAC non-psych Non-Finnish European population."
335 | "dbnsfp.exac_nonpsych.nfe.af","Allele frequency in ExAC non-psych Non-Finnish European population."
336 | "dbnsfp.exac_nonpsych.sas.ac","Allele count in ExAC non-psych South Asian population."
337 | "dbnsfp.exac_nonpsych.sas.af","Allele frequency in ExAC non-psych South Asian population."
338 | "dbnsfp.exac_nontcga.ac","Allele count in ExAC non-TCGA subset."
339 | "dbnsfp.exac_nontcga.adj_ac","Adjusted allele count in ExAC non-TCGA subset."
340 | "dbnsfp.exac_nontcga.adj_af","Adjusted allele frequency in ExAC non-TCGA subset."
341 | "dbnsfp.exac_nontcga.af","Allele frequency in ExAC non-TCGA subset."
342 | "dbnsfp.exac_nontcga.afr.ac","Allele count in ExAC non-TCGA African population."
343 | "dbnsfp.exac_nontcga.afr.af","Allele frequency in ExAC non-TCGA African population."
344 | "dbnsfp.exac_nontcga.amr.ac","Allele count in ExAC non-TCGA American population."
345 | "dbnsfp.exac_nontcga.amr.af","Allele frequency in ExAC non-TCGA American population."
346 | "dbnsfp.exac_nontcga.eas.ac","Allele count in ExAC non-TCGA East Asian population."
347 | "dbnsfp.exac_nontcga.eas.af","Allele frequency in ExAC non-TCGA East Asian population."
348 | "dbnsfp.exac_nontcga.fin.ac","Allele count in ExAC non-TCGA Finnish population."
349 | "dbnsfp.exac_nontcga.fin.af","Allele frequency in ExAC non-TCGA Finnish population."
350 | "dbnsfp.exac_nontcga.nfe.ac","Allele count in ExAC non-TCGA Non-Finnish European population."
351 | "dbnsfp.exac_nontcga.nfe.af","Allele frequency in ExAC non-TCGA Non-Finnish European population."
352 | "dbnsfp.exac_nontcga.sas.ac","Allele count in ExAC non-TCGA South Asian population."
353 | "dbnsfp.exac_nontcga.sas.af","Allele frequency in ExAC non-TCGA South Asian population."
354 | "dbnsfp.fathmm-mkl.coding_group","FATHMM-MKL coding group assignment."
355 | "dbnsfp.fathmm-mkl.coding_pred","FATHMM-MKL prediction (Deleterious/Neutral) for coding variants."
356 | "dbnsfp.fathmm-mkl.coding_rankscore","FATHMM-MKL rank score for coding variants."
357 | "dbnsfp.fathmm-mkl.coding_score","FATHMM-MKL raw score for coding variants."
358 | "dbnsfp.fathmm-xf.coding_pred","FATHMM-XF prediction (Deleterious/Neutral) for coding variants."
359 | "dbnsfp.fathmm-xf.coding_rankscore","FATHMM-XF rank score for coding variants."
360 | "dbnsfp.fathmm-xf.coding_score","FATHMM-XF raw score for coding variants."
361 | "dbnsfp.fitcons.gm12878.confidence_value","FitCons confidence value in GM12878 cell line."
362 | "dbnsfp.fitcons.gm12878.rankscore","FitCons rank score in GM12878 cell line."
363 | "dbnsfp.fitcons.gm12878.score","FitCons raw score in GM12878 cell line."
364 | "dbnsfp.fitcons.h1-hesc.confidence_value","FitCons confidence value in H1-hESC cell line."
365 | "dbnsfp.fitcons.h1-hesc.rankscore","FitCons rank score in H1-hESC cell line."
366 | "dbnsfp.fitcons.h1-hesc.score","FitCons raw score in H1-hESC cell line."
367 | "dbnsfp.fitcons.huvec.confidence_value","FitCons confidence value in HUVEC cell line."
368 | "dbnsfp.fitcons.huvec.rankscore","FitCons rank score in HUVEC cell line."
369 | "dbnsfp.fitcons.huvec.score","FitCons raw score in HUVEC cell line."
370 | "dbnsfp.fitcons.integrated.confidence_value","Integrated FitCons confidence value across cell lines."
371 | "dbnsfp.fitcons.integrated.rankscore","Integrated FitCons rank score across cell lines."
372 | "dbnsfp.fitcons.integrated.score","Integrated FitCons raw score across cell lines."
373 | "dbnsfp.gencode_basic","Indicates if transcript is part of GENCODE basic set (Y/N)."
374 | "dbnsfp.genename","Gene name(s) from dbNSFP."
375 | "dbnsfp.genocanyon.rankscore","GenoCanyon rank score."
376 | "dbnsfp.genocanyon.score","GenoCanyon raw score for functional prediction."
377 | "dbnsfp.gerp++.nr","GERP++ Neutral Rate score."
378 | "dbnsfp.gerp++.rs","GERP++ Rejected Substitutions score (conservation)."
379 | "dbnsfp.gerp++.rs_rankscore","GERP++ RS rank score."
380 | "dbnsfp.gmvp.rankscore","GMVP (Genome-Wide Missense Variant Pathogenicity) rank score."
381 | "dbnsfp.gmvp.score","GMVP raw score."
382 | "dbnsfp.hg18.end","End position in hg18 assembly."
383 | "dbnsfp.hg18.start","Start position in hg18 assembly."
384 | "dbnsfp.hg19.end","End position in hg19 assembly."
385 | "dbnsfp.hg19.start","Start position in hg19 assembly."
386 | "dbnsfp.hg38.end","End position in hg38 assembly."
387 | "dbnsfp.hg38.start","Start position in hg38 assembly."
388 | "dbnsfp.hgvsc","HGVS coding sequence notation(s) from dbNSFP."
389 | "dbnsfp.hgvsp","HGVS protein sequence notation(s) from dbNSFP."
390 | "dbnsfp.interpro.domain","InterPro protein domain annotation(s)."
391 | "dbnsfp.list-s2.pred","LIST-S2 prediction (Tolerated/Damaging)."
392 | "dbnsfp.list-s2.rankscore","LIST-S2 rank score."
393 | "dbnsfp.list-s2.score","LIST-S2 raw score."
394 | "dbnsfp.lrt.converted_rankscore","LRT converted rank score."
395 | "dbnsfp.lrt.omega","LRT omega value (dN/dS ratio)."
396 | "dbnsfp.lrt.pred","LRT prediction (Deleterious/Neutral/Unknown)."
397 | "dbnsfp.lrt.score","LRT raw score (likelihood ratio test)."
398 | "dbnsfp.m-cap.pred","M-CAP prediction (Deleterious/Tolerated)."
399 | "dbnsfp.m-cap.rankscore","M-CAP rank score."
400 | "dbnsfp.m-cap.score","M-CAP raw score."
401 | "dbnsfp.metalr.pred","MetaLR prediction (Tolerated/Damaging)."
402 | "dbnsfp.metalr.rankscore","MetaLR rank score."
403 | "dbnsfp.metalr.score","MetaLR raw score."
404 | "dbnsfp.metarnn.pred","MetaRNN prediction (Deleterious/Benign)."
405 | "dbnsfp.metarnn.rankscore","MetaRNN rank score."
406 | "dbnsfp.metarnn.score","MetaRNN raw score."
407 | "dbnsfp.metasvm.pred","MetaSVM prediction (Tolerated/Damaging)."
408 | "dbnsfp.metasvm.rankscore","MetaSVM rank score."
409 | "dbnsfp.metasvm.score","MetaSVM raw score."
410 | "dbnsfp.mpc.rankscore","MPC (Missense badness, PolyPhen-2, and Constraint) rank score."
411 | "dbnsfp.mpc.score","MPC raw score."
412 | "dbnsfp.mutationassessor.pred","MutationAssessor prediction (high/medium/low/neutral functional impact)."
413 | "dbnsfp.mutationassessor.rankscore","MutationAssessor rank score."
414 | "dbnsfp.mutationassessor.score","MutationAssessor raw score (functional impact score)."
415 | "dbnsfp.mutationtaster.aae","Amino acid change predicted by MutationTaster."
416 | "dbnsfp.mutationtaster.converted_rankscore","MutationTaster converted rank score."
417 | "dbnsfp.mutationtaster.model","MutationTaster model used for prediction."
418 | "dbnsfp.mutationtaster.pred","MutationTaster prediction (disease_causing_automatic/polymorphism_automatic)."
419 | "dbnsfp.mutationtaster.score","MutationTaster raw score (probability of being deleterious)."
420 | "dbnsfp.mutformer.rankscore","MutFormer rank score."
421 | "dbnsfp.mutformer.score","MutFormer raw score."
422 | "dbnsfp.mutpred.aa_change","Amino acid change considered by MutPred."
423 | "dbnsfp.mutpred.accession","UniProt accession used by MutPred."
424 | "dbnsfp.mutpred.pred.mechanism","Molecular mechanism predicted by MutPred to be affected."
425 | "dbnsfp.mutpred.pred.p_val","P-value associated with the MutPred mechanism prediction."
426 | "dbnsfp.mutpred.rankscore","MutPred rank score."
427 | "dbnsfp.mutpred.score","MutPred raw score (probability of being deleterious)."
428 | "dbnsfp.mvp.rankscore","MVP (Missense Variant Pathogenicity) rank score."
429 | "dbnsfp.mvp.score","MVP raw score."
430 | "dbnsfp.phactboost.rankscore","phACTboost rank score."
431 | "dbnsfp.phactboost.score","phACTboost raw score."
432 | "dbnsfp.phastcons.100way_vertebrate.rankscore","PhastCons 100-way vertebrate conservation rank score."
433 | "dbnsfp.phastcons.100way_vertebrate.score","PhastCons 100-way vertebrate conservation score."
434 | "dbnsfp.phastcons.17way_primate.rankscore","PhastCons 17-way primate conservation rank score."
435 | "dbnsfp.phastcons.17way_primate.score","PhastCons 17-way primate conservation score."
436 | "dbnsfp.phastcons.470way_mammalian.rankscore","PhastCons 470-way mammalian conservation rank score."
437 | "dbnsfp.phastcons.470way_mammalian.score","PhastCons 470-way mammalian conservation score."
438 | "dbnsfp.phylop.100way_vertebrate.rankscore","PhyloP 100-way vertebrate conservation rank score."
439 | "dbnsfp.phylop.100way_vertebrate.score","PhyloP 100-way vertebrate conservation score."
440 | "dbnsfp.phylop.17way_primate.rankscore","PhyloP 17-way primate conservation rank score."
441 | "dbnsfp.phylop.17way_primate.score","PhyloP 17-way primate conservation score."
442 | "dbnsfp.phylop.470way_mammalian.rankscore","PhyloP 470-way mammalian conservation rank score."
443 | "dbnsfp.phylop.470way_mammalian.score","PhyloP 470-way mammalian conservation score."
444 | "dbnsfp.polyphen2.hdiv.pred","PolyPhen-2 HDIV prediction (Probably_damaging/Possibly_damaging/Benign)."
445 | "dbnsfp.polyphen2.hdiv.rankscore","PolyPhen-2 HDIV rank score."
446 | "dbnsfp.polyphen2.hdiv.score","PolyPhen-2 HDIV raw score."
447 | "dbnsfp.polyphen2.hvar.pred","PolyPhen-2 HVAR prediction (Probably_damaging/Possibly_damaging/Benign)."
448 | "dbnsfp.polyphen2.hvar.rankscore","PolyPhen-2 HVAR rank score."
449 | "dbnsfp.polyphen2.hvar.score","PolyPhen-2 HVAR raw score."
450 | "dbnsfp.primateai.pred","PrimateAI prediction (Deleterious/Tolerated)."
451 | "dbnsfp.primateai.rankscore","PrimateAI rank score."
452 | "dbnsfp.primateai.score","PrimateAI raw score."
453 | "dbnsfp.ref","Reference allele in dbNSFP."
454 | "dbnsfp.reliability_index","dbNSFP internal reliability index. UNKNOWN significance."
455 | "dbnsfp.revel.rankscore","REVEL (Rare Exome Variant Ensemble Learner) rank score."
456 | "dbnsfp.revel.score","REVEL raw score."
457 | "dbnsfp.rsid","Associated dbSNP rsID from dbNSFP."
458 | "dbnsfp.siphy_29way.logodds_rankscore","SiPhy 29-way log-odds rank score."
459 | "dbnsfp.siphy_29way.logodds_score","SiPhy 29-way log-odds conservation score."
460 | "dbnsfp.siphy_29way.pi.a","SiPhy estimated probability of A at this position."
461 | "dbnsfp.siphy_29way.pi.c","SiPhy estimated probability of C at this position."
462 | "dbnsfp.siphy_29way.pi.g","SiPhy estimated probability of G at this position."
463 | "dbnsfp.siphy_29way.pi.t","SiPhy estimated probability of T at this position."
464 | "dbnsfp.tsl","Transcript Support Level from Ensembl."
465 | "dbnsfp.uniprot.acc","UniProt accession number(s)."
466 | "dbnsfp.uniprot.entry","UniProt entry name(s)."
467 | "dbnsfp.varity.er.rankscore","VARITY_ER (evidence-based ranking) rank score."
468 | "dbnsfp.varity.er.score","VARITY_ER raw score."
469 | "dbnsfp.varity.er_loo.rankscore","VARITY_ER_LOO (leave-one-out) rank score."
470 | "dbnsfp.varity.er_loo.score","VARITY_ER_LOO raw score."
471 | "dbnsfp.varity.r.rankscore","VARITY_R (rule-based ranking) rank score."
472 | "dbnsfp.varity.r.score","VARITY_R raw score."
473 | "dbnsfp.varity.r_loo.rankscore","VARITY_R_LOO (leave-one-out) rank score."
474 | "dbnsfp.varity.r_loo.score","VARITY_R_LOO raw score."
475 | "dbnsfp.vep_canonical","Indicates if the transcript is the VEP canonical transcript (YES/NO)."
476 | "dbsnp._license","License information URL for the dbSNP data source."
477 | "dbsnp.alleles.allele","Allele base (A, C, G, or T)."
478 | "dbsnp.alleles.freq.exac","Allele frequency in ExAC as reported by dbSNP."
479 | "dbsnp.alleles.freq.gnomad_exomes","Allele frequency in gnomAD exomes as reported by dbSNP."
480 | "dbsnp.alt","Alternate allele(s) in dbSNP."
481 | "dbsnp.chrom","Chromosome number in dbSNP."
482 | "dbsnp.citations","List of PubMed IDs citing this dbSNP entry."
483 | "dbsnp.dbsnp_build","dbSNP build number when the information was extracted."
484 | "dbsnp.gene.geneid","Entrez Gene ID associated with the dbSNP record."
485 | "dbsnp.gene.is_pseudo","Boolean indicating if the associated gene is a pseudogene."
486 | "dbsnp.gene.name","Full name of the associated gene."
487 | "dbsnp.gene.rnas.codon_aligned_transcript_change.deleted_sequence","Deleted sequence in codon-aligned transcript context."
488 | "dbsnp.gene.rnas.codon_aligned_transcript_change.inserted_sequence","Inserted sequence in codon-aligned transcript context."
489 | "dbsnp.gene.rnas.codon_aligned_transcript_change.position","Position of change in codon-aligned transcript context."
490 | "dbsnp.gene.rnas.codon_aligned_transcript_change.seq_id","Sequence ID for codon-aligned transcript context."
491 | "dbsnp.gene.rnas.hgvs","HGVS notation for the specific RNA transcript."
492 | "dbsnp.gene.rnas.protein.variant.spdi.deleted_sequence","Deleted sequence in SPDI protein context."
493 | "dbsnp.gene.rnas.protein.variant.spdi.inserted_sequence","Inserted sequence in SPDI protein context."
494 | "dbsnp.gene.rnas.protein.variant.spdi.position","Position of change in SPDI protein context."
495 | "dbsnp.gene.rnas.protein.variant.spdi.seq_id","Sequence ID for SPDI protein context."
496 | "dbsnp.gene.rnas.protein_product.refseq","RefSeq protein product identifier (NP_)."
497 | "dbsnp.gene.rnas.refseq","RefSeq RNA transcript identifier (NM_ or XM_)."
498 | "dbsnp.gene.rnas.so.accession","Sequence Ontology term accession (SO:...)."
499 | "dbsnp.gene.rnas.so.name","Sequence Ontology term name (e.g., coding_sequence_variant)."
500 | "dbsnp.gene.strand","Gene strand (+ or -)."
501 | "dbsnp.gene.symbol","Gene symbol (e.g., BRAF)."
502 | "dbsnp.hg19.end","End position in hg19 assembly (dbSNP)."
503 | "dbsnp.hg19.start","Start position in hg19 assembly (dbSNP)."
504 | "dbsnp.ref","Reference allele in dbSNP."
505 | "dbsnp.rsid","dbSNP Reference SNP (rs) identifier."
506 | "dbsnp.vartype","Type of variation (e.g., snv)."
507 | "docm.aa_change","Amino acid change notation (e.g., p.V600E) from DOCM."
508 | "docm.all_domains","All protein domains overlapping the variant position from DOCM."
509 | "docm.alt","Alternate allele in DOCM."
510 | "docm.c_position","cDNA position notation (e.g., c.1799) from DOCM."
511 | "docm.chrom","Chromosome number in DOCM."
512 | "docm.default_gene_name","Default gene name used in DOCM."
513 | "docm.deletion_substructures","Substructure information for deletions (often '-'). UNKNOWN."
514 | "docm.disease","Disease associated with the variant in DOCM."
515 | "docm.doid","Disease Ontology ID (DOID) associated with the variant in DOCM."
516 | "docm.domain","Specific protein domain containing the variant from DOCM."
517 | "docm.ensembl_gene_id","Ensembl gene ID from DOCM."
518 | "docm.genename","Gene name from DOCM."
519 | "docm.genename_source","Source of the gene name (e.g., HGNC) in DOCM."
520 | "docm.hg19.end","End position in hg19 assembly (DOCM)."
521 | "docm.hg19.start","Start position in hg19 assembly (DOCM)."
522 | "docm.primary","Indicates if this is the primary transcript used (?). UNKNOWN."
523 | "docm.pubmed_id","Associated PubMed IDs from DOCM."
524 | "docm.ref","Reference allele in DOCM."
525 | "docm.source","Original data source cited by DOCM (e.g., MyCancerGenome)."
526 | "docm.strand","Genomic strand (+ or -) in DOCM."
527 | "docm.transcript_error","Indicates errors found during transcript mapping in DOCM."
528 | "docm.transcript_name","Transcript name used for annotation in DOCM."
529 | "docm.transcript_source","Source of the transcript information (e.g., ensembl) in DOCM."
530 | "docm.transcript_species","Species of the transcript (e.g., human) in DOCM."
531 | "docm.transcript_status","Status of the transcript (e.g., known) in DOCM."
532 | "docm.transcript_version","Version of the transcript used in DOCM."
533 | "docm.trv_type","Type of transcript variation (e.g., missense) in DOCM."
534 | "docm.type","Type of variant (e.g., SNP) in DOCM."
535 | "docm.ucsc_cons","UCSC conservation score (?). UNKNOWN."
536 | "docm.url","URL link to the source entry in DOCM."
537 | "emv._license","License information URL for the EMV data source."
538 | "emv.egl_classification","EGL classification of the variant (e.g., Pathogenic)."
539 | "emv.egl_classification_date","Date of the EGL classification."
540 | "emv.egl_protein","Protein change notation used by EGL."
541 | "emv.egl_variant","Variant notation used by EGL (often HGVS coding)."
542 | "emv.exon","Exon number containing the variant from EMV."
543 | "emv.gene","Gene symbol from EMV."
544 | "emv.hgvs","List of HGVS notations associated with the variant in EMV."
545 | "emv.variant_id","Internal EMV variant identifier."
546 | "exac._license","License information URL for the ExAC data source."
547 | "exac.ac.ac","Total allele count in ExAC."
548 | "exac.ac.ac_adj","Adjusted total allele count in ExAC (after filtering)."
549 | "exac.ac.ac_afr","Allele count in ExAC African/African American population."
550 | "exac.ac.ac_amr","Allele count in ExAC American population."
551 | "exac.ac.ac_eas","Allele count in ExAC East Asian population."
552 | "exac.ac.ac_female","Allele count in ExAC female population."
553 | "exac.ac.ac_fin","Allele count in ExAC Finnish population."
554 | "exac.ac.ac_het","Heterozygous allele count in ExAC."
555 | "exac.ac.ac_hom","Homozygous allele count in ExAC."
556 | "exac.ac.ac_male","Allele count in ExAC male population."
557 | "exac.ac.ac_nfe","Allele count in ExAC Non-Finnish European population."
558 | "exac.ac.ac_oth","Allele count in ExAC Other population."
559 | "exac.ac.ac_sas","Allele count in ExAC South Asian population."
560 | "exac.af","Allele frequency in ExAC."
561 | "exac.alleles","Alternate allele(s) observed in ExAC."
562 | "exac.alt","Alternate allele in ExAC format."
563 | "exac.an.an","Total number of alleles genotyped in ExAC."
564 | "exac.an.an_adj","Adjusted total number of alleles in ExAC (after filtering)."
565 | "exac.an.an_afr","Number of alleles in ExAC African/African American population."
566 | "exac.an.an_amr","Number of alleles in ExAC American population."
567 | "exac.an.an_eas","Number of alleles in ExAC East Asian population."
568 | "exac.an.an_female","Number of alleles in ExAC female population."
569 | "exac.an.an_fin","Number of alleles in ExAC Finnish population."
570 | "exac.an.an_male","Number of alleles in ExAC male population."
571 | "exac.an.an_nfe","Number of alleles in ExAC Non-Finnish European population."
572 | "exac.an.an_oth","Number of alleles in ExAC Other population."
573 | "exac.an.an_sas","Number of alleles in ExAC South Asian population."
574 | "exac.baseqranksum","ExAC BaseQRankSum test statistic (mapping quality difference)."
575 | "exac.chrom","Chromosome number in ExAC."
576 | "exac.clippingranksum","ExAC ClippingRankSum test statistic."
577 | "exac.culprit","ExAC VQSR culprit annotation."
578 | "exac.fs","ExAC FisherStrand bias score."
579 | "exac.het.het_afr","Heterozygous count in ExAC African/African American population."
580 | "exac.het.het_amr","Heterozygous count in ExAC American population."
581 | "exac.het.het_eas","Heterozygous count in ExAC East Asian population."
582 | "exac.het.het_fin","Heterozygous count in ExAC Finnish population."
583 | "exac.het.het_nfe","Heterozygous count in ExAC Non-Finnish European population."
584 | "exac.het.het_oth","Heterozygous count in ExAC Other population."
585 | "exac.het.het_sas","Heterozygous count in ExAC South Asian population."
586 | "exac.hom.hom_afr","Homozygous count in ExAC African/African American population."
587 | "exac.hom.hom_amr","Homozygous count in ExAC American population."
588 | "exac.hom.hom_eas","Homozygous count in ExAC East Asian population."
589 | "exac.hom.hom_fin","Homozygous count in ExAC Finnish population."
590 | "exac.hom.hom_nfe","Homozygous count in ExAC Non-Finnish European population."
591 | "exac.hom.hom_oth","Homozygous count in ExAC Other population."
592 | "exac.hom.hom_sas","Homozygous count in ExAC South Asian population."
593 | "exac.inbreedingcoeff","ExAC Inbreeding Coefficient."
594 | "exac.mq.mq","ExAC root mean square Mapping Quality."
595 | "exac.mq.mq0","ExAC count of reads with mapping quality 0."
596 | "exac.mq.mqranksum","ExAC MQRankSum test statistic (mapping quality difference ref vs alt)."
597 | "exac.ncc","ExAC number of chromosomes carrying the variant in hemizygous state. UNKNOWN."
598 | "exac.pos","Genomic position in ExAC (hg19)."
599 | "exac.qd","ExAC Quality by Depth score."
600 | "exac.readposranksum","ExAC ReadPosRankSum test statistic (position bias)."
601 | "exac.ref","Reference allele in ExAC format."
602 | "exac.type","Variant type in ExAC (e.g., snp)."
603 | "exac.vqslod","ExAC Variant Quality Score Log-Odds."
604 | "exac_nontcga._license","License information URL for the ExAC non-TCGA data source."
605 | "exac_nontcga.ac.ac","Total allele count in ExAC non-TCGA subset."
606 | "exac_nontcga.ac.ac_adj","Adjusted total allele count in ExAC non-TCGA subset."
607 | "exac_nontcga.ac.ac_afr","Allele count in ExAC non-TCGA African/African American population."
608 | "exac_nontcga.ac.ac_amr","Allele count in ExAC non-TCGA American population."
609 | "exac_nontcga.ac.ac_eas","Allele count in ExAC non-TCGA East Asian population."
610 | "exac_nontcga.ac.ac_female","Allele count in ExAC non-TCGA female population."
611 | "exac_nontcga.ac.ac_fin","Allele count in ExAC non-TCGA Finnish population."
612 | "exac_nontcga.ac.ac_het","Heterozygous allele count in ExAC non-TCGA subset."
613 | "exac_nontcga.ac.ac_hom","Homozygous allele count in ExAC non-TCGA subset."
614 | "exac_nontcga.ac.ac_male","Allele count in ExAC non-TCGA male population."
615 | "exac_nontcga.ac.ac_nfe","Allele count in ExAC non-TCGA Non-Finnish European population."
616 | "exac_nontcga.ac.ac_oth","Allele count in ExAC non-TCGA Other population."
617 | "exac_nontcga.ac.ac_sas","Allele count in ExAC non-TCGA South Asian population."
618 | "exac_nontcga.af","Allele frequency in ExAC non-TCGA subset."
619 | "exac_nontcga.alleles","Alternate allele(s) observed in ExAC non-TCGA subset."
620 | "exac_nontcga.alt","Alternate allele in ExAC non-TCGA format."
621 | "exac_nontcga.an.an","Total number of alleles genotyped in ExAC non-TCGA subset."
622 | "exac_nontcga.an.an_adj","Adjusted total number of alleles in ExAC non-TCGA subset."
623 | "exac_nontcga.an.an_afr","Number of alleles in ExAC non-TCGA African/African American population."
624 | "exac_nontcga.an.an_amr","Number of alleles in ExAC non-TCGA American population."
625 | "exac_nontcga.an.an_eas","Number of alleles in ExAC non-TCGA East Asian population."
626 | "exac_nontcga.an.an_female","Number of alleles in ExAC non-TCGA female population."
627 | "exac_nontcga.an.an_fin","Number of alleles in ExAC non-TCGA Finnish population."
628 | "exac_nontcga.an.an_male","Number of alleles in ExAC non-TCGA male population."
629 | "exac_nontcga.an.an_nfe","Number of alleles in ExAC non-TCGA Non-Finnish European population."
630 | "exac_nontcga.an.an_oth","Number of alleles in ExAC non-TCGA Other population."
631 | "exac_nontcga.an.an_sas","Number of alleles in ExAC non-TCGA South Asian population."
632 | "exac_nontcga.baseqranksum","ExAC non-TCGA BaseQRankSum test statistic."
633 | "exac_nontcga.chrom","Chromosome number in ExAC non-TCGA subset."
634 | "exac_nontcga.clippingranksum","ExAC non-TCGA ClippingRankSum test statistic."
635 | "exac_nontcga.culprit","ExAC non-TCGA VQSR culprit annotation."
636 | "exac_nontcga.fs","ExAC non-TCGA FisherStrand bias score."
637 | "exac_nontcga.het.het_afr","Heterozygous count in ExAC non-TCGA African/African American population."
638 | "exac_nontcga.het.het_amr","Heterozygous count in ExAC non-TCGA American population."
639 | "exac_nontcga.het.het_eas","Heterozygous count in ExAC non-TCGA East Asian population."
640 | "exac_nontcga.het.het_fin","Heterozygous count in ExAC non-TCGA Finnish population."
641 | "exac_nontcga.het.het_nfe","Heterozygous count in ExAC non-TCGA Non-Finnish European population."
642 | "exac_nontcga.het.het_oth","Heterozygous count in ExAC non-TCGA Other population."
643 | "exac_nontcga.het.het_sas","Heterozygous count in ExAC non-TCGA South Asian population."
644 | "exac_nontcga.hom.hom_afr","Homozygous count in ExAC non-TCGA African/African American population."
645 | "exac_nontcga.hom.hom_amr","Homozygous count in ExAC non-TCGA American population."
646 | "exac_nontcga.hom.hom_eas","Homozygous count in ExAC non-TCGA East Asian population."
647 | "exac_nontcga.hom.hom_fin","Homozygous count in ExAC non-TCGA Finnish population."
648 | "exac_nontcga.hom.hom_nfe","Homozygous count in ExAC non-TCGA Non-Finnish European population."
649 | "exac_nontcga.hom.hom_oth","Homozygous count in ExAC non-TCGA Other population."
650 | "exac_nontcga.hom.hom_sas","Homozygous count in ExAC non-TCGA South Asian population."
651 | "exac_nontcga.inbreedingcoeff","ExAC non-TCGA Inbreeding Coefficient."
652 | "exac_nontcga.mq.mq","ExAC non-TCGA root mean square Mapping Quality."
653 | "exac_nontcga.mq.mq0","ExAC non-TCGA count of reads with mapping quality 0."
654 | "exac_nontcga.mq.mqranksum","ExAC non-TCGA MQRankSum test statistic."
655 | "exac_nontcga.ncc","ExAC non-TCGA number of hemizygous chromosomes. UNKNOWN."
656 | "exac_nontcga.pos","Genomic position in ExAC non-TCGA (hg19)."
657 | "exac_nontcga.qd","ExAC non-TCGA Quality by Depth score."
658 | "exac_nontcga.readposranksum","ExAC non-TCGA ReadPosRankSum test statistic."
659 | "exac_nontcga.ref","Reference allele in ExAC non-TCGA format."
660 | "exac_nontcga.type","Variant type in ExAC non-TCGA (e.g., snp)."
661 | "exac_nontcga.vqslod","ExAC non-TCGA Variant Quality Score Log-Odds."
662 | "gnomad_exome._license","License information URL for the gnomAD exome data source."
663 | "gnomad_exome.ac.ac","Total allele count in gnomAD exomes."
664 | "gnomad_exome.ac.ac_afr","Allele count in gnomAD exomes African/African American population."
665 | "gnomad_exome.ac.ac_afr_female","Allele count in gnomAD exomes African/African American female population."
666 | "gnomad_exome.ac.ac_afr_male","Allele count in gnomAD exomes African/African American male population."
667 | "gnomad_exome.ac.ac_amr","Allele count in gnomAD exomes American population."
668 | "gnomad_exome.ac.ac_amr_female","Allele count in gnomAD exomes American female population."
669 | "gnomad_exome.ac.ac_amr_male","Allele count in gnomAD exomes American male population."
670 | "gnomad_exome.ac.ac_asj","Allele count in gnomAD exomes Ashkenazi Jewish population."
671 | "gnomad_exome.ac.ac_asj_female","Allele count in gnomAD exomes Ashkenazi Jewish female population."
672 | "gnomad_exome.ac.ac_asj_male","Allele count in gnomAD exomes Ashkenazi Jewish male population."
673 | "gnomad_exome.ac.ac_eas","Allele count in gnomAD exomes East Asian population."
674 | "gnomad_exome.ac.ac_eas_female","Allele count in gnomAD exomes East Asian female population."
675 | "gnomad_exome.ac.ac_eas_jpn","Allele count in gnomAD exomes East Asian Japanese population."
676 | "gnomad_exome.ac.ac_eas_kor","Allele count in gnomAD exomes East Asian Korean population."
677 | "gnomad_exome.ac.ac_eas_male","Allele count in gnomAD exomes East Asian male population."
678 | "gnomad_exome.ac.ac_eas_oea","Allele count in gnomAD exomes East Asian Other population."
679 | "gnomad_exome.ac.ac_female","Total allele count in gnomAD exomes female population."
680 | "gnomad_exome.ac.ac_fin","Allele count in gnomAD exomes Finnish population."
681 | "gnomad_exome.ac.ac_fin_female","Allele count in gnomAD exomes Finnish female population."
682 | "gnomad_exome.ac.ac_fin_male","Allele count in gnomAD exomes Finnish male population."
683 | "gnomad_exome.ac.ac_male","Total allele count in gnomAD exomes male population."
684 | "gnomad_exome.ac.ac_nfe","Allele count in gnomAD exomes Non-Finnish European population."
685 | "gnomad_exome.ac.ac_nfe_bgr","Allele count in gnomAD exomes NFE Bulgarian population."
686 | "gnomad_exome.ac.ac_nfe_est","Allele count in gnomAD exomes NFE Estonian population."
687 | "gnomad_exome.ac.ac_nfe_female","Allele count in gnomAD exomes NFE female population."
688 | "gnomad_exome.ac.ac_nfe_male","Allele count in gnomAD exomes NFE male population."
689 | "gnomad_exome.ac.ac_nfe_nwe","Allele count in gnomAD exomes NFE North-Western European population."
690 | "gnomad_exome.ac.ac_nfe_onf","Allele count in gnomAD exomes NFE Other Non-Finnish European population."
691 | "gnomad_exome.ac.ac_nfe_seu","Allele count in gnomAD exomes NFE Southern European population."
692 | "gnomad_exome.ac.ac_nfe_swe","Allele count in gnomAD exomes NFE Swedish population."
693 | "gnomad_exome.ac.ac_oth","Allele count in gnomAD exomes Other population."
694 | "gnomad_exome.ac.ac_oth_female","Allele count in gnomAD exomes Other female population."
695 | "gnomad_exome.ac.ac_oth_male","Allele count in gnomAD exomes Other male population."
696 | "gnomad_exome.ac.ac_sas","Allele count in gnomAD exomes South Asian population."
697 | "gnomad_exome.ac.ac_sas_female","Allele count in gnomAD exomes South Asian female population."
698 | "gnomad_exome.ac.ac_sas_male","Allele count in gnomAD exomes South Asian male population."
699 | "gnomad_exome.af.af","Overall allele frequency in gnomAD exomes."
700 | "gnomad_exome.af.af_afr","Allele frequency in gnomAD exomes African/African American population."
701 | "gnomad_exome.af.af_afr_female","Allele frequency in gnomAD exomes African/African American female population."
702 | "gnomad_exome.af.af_afr_male","Allele frequency in gnomAD exomes African/African American male population."
703 | "gnomad_exome.af.af_amr","Allele frequency in gnomAD exomes American population."
704 | "gnomad_exome.af.af_amr_female","Allele frequency in gnomAD exomes American female population."
705 | "gnomad_exome.af.af_amr_male","Allele frequency in gnomAD exomes American male population."
706 | "gnomad_exome.af.af_asj","Allele frequency in gnomAD exomes Ashkenazi Jewish population."
707 | "gnomad_exome.af.af_asj_female","Allele frequency in gnomAD exomes Ashkenazi Jewish female population."
708 | "gnomad_exome.af.af_asj_male","Allele frequency in gnomAD exomes Ashkenazi Jewish male population."
709 | "gnomad_exome.af.af_eas","Allele frequency in gnomAD exomes East Asian population."
710 | "gnomad_exome.af.af_eas_female","Allele frequency in gnomAD exomes East Asian female population."
711 | "gnomad_exome.af.af_eas_jpn","Allele frequency in gnomAD exomes East Asian Japanese population."
712 | "gnomad_exome.af.af_eas_kor","Allele frequency in gnomAD exomes East Asian Korean population."
713 | "gnomad_exome.af.af_eas_male","Allele frequency in gnomAD exomes East Asian male population."
714 | "gnomad_exome.af.af_eas_oea","Allele frequency in gnomAD exomes East Asian Other population."
715 | "gnomad_exome.af.af_female","Overall allele frequency in gnomAD exomes female population."
716 | "gnomad_exome.af.af_fin","Allele frequency in gnomAD exomes Finnish population."
717 | "gnomad_exome.af.af_fin_female","Allele frequency in gnomAD exomes Finnish female population."
718 | "gnomad_exome.af.af_fin_male","Allele frequency in gnomAD exomes Finnish male population."
719 | "gnomad_exome.af.af_male","Overall allele frequency in gnomAD exomes male population."
720 | "gnomad_exome.af.af_nfe","Allele frequency in gnomAD exomes Non-Finnish European population."
721 | "gnomad_exome.af.af_nfe_bgr","Allele frequency in gnomAD exomes NFE Bulgarian population."
722 | "gnomad_exome.af.af_nfe_est","Allele frequency in gnomAD exomes NFE Estonian population."
723 | "gnomad_exome.af.af_nfe_female","Allele frequency in gnomAD exomes NFE female population."
724 | "gnomad_exome.af.af_nfe_male","Allele frequency in gnomAD exomes NFE male population."
725 | "gnomad_exome.af.af_nfe_nwe","Allele frequency in gnomAD exomes NFE North-Western European population."
726 | "gnomad_exome.af.af_nfe_onf","Allele frequency in gnomAD exomes NFE Other Non-Finnish European population."
727 | "gnomad_exome.af.af_nfe_seu","Allele frequency in gnomAD exomes NFE Southern European population."
728 | "gnomad_exome.af.af_nfe_swe","Allele frequency in gnomAD exomes NFE Swedish population."
729 | "gnomad_exome.af.af_oth","Allele frequency in gnomAD exomes Other population."
730 | "gnomad_exome.af.af_oth_female","Allele frequency in gnomAD exomes Other female population."
731 | "gnomad_exome.af.af_oth_male","Allele frequency in gnomAD exomes Other male population."
732 | "gnomad_exome.af.af_sas","Allele frequency in gnomAD exomes South Asian population."
733 | "gnomad_exome.af.af_sas_female","Allele frequency in gnomAD exomes South Asian female population."
734 | "gnomad_exome.af.af_sas_male","Allele frequency in gnomAD exomes South Asian male population."
735 | "gnomad_exome.alleles","Alternate allele(s) observed in gnomAD exomes."
736 | "gnomad_exome.alt","Alternate allele in gnomAD exome format."
737 | "gnomad_exome.an.an","Total number of alleles genotyped in gnomAD exomes."
738 | "gnomad_exome.an.an_afr","Number of alleles in gnomAD exomes African/African American population."
739 | "gnomad_exome.an.an_afr_female","Number of alleles in gnomAD exomes African/African American female population."
740 | "gnomad_exome.an.an_afr_male","Number of alleles in gnomAD exomes African/African American male population."
741 | "gnomad_exome.an.an_amr","Number of alleles in gnomAD exomes American population."
742 | "gnomad_exome.an.an_amr_female","Number of alleles in gnomAD exomes American female population."
743 | "gnomad_exome.an.an_amr_male","Number of alleles in gnomAD exomes American male population."
744 | "gnomad_exome.an.an_asj","Number of alleles in gnomAD exomes Ashkenazi Jewish population."
745 | "gnomad_exome.an.an_asj_female","Number of alleles in gnomAD exomes Ashkenazi Jewish female population."
746 | "gnomad_exome.an.an_asj_male","Number of alleles in gnomAD exomes Ashkenazi Jewish male population."
747 | "gnomad_exome.an.an_eas","Number of alleles in gnomAD exomes East Asian population."
748 | "gnomad_exome.an.an_eas_female","Number of alleles in gnomAD exomes East Asian female population."
749 | "gnomad_exome.an.an_eas_jpn","Number of alleles in gnomAD exomes East Asian Japanese population."
750 | "gnomad_exome.an.an_eas_kor","Number of alleles in gnomAD exomes East Asian Korean population."
751 | "gnomad_exome.an.an_eas_male","Number of alleles in gnomAD exomes East Asian male population."
752 | "gnomad_exome.an.an_eas_oea","Number of alleles in gnomAD exomes East Asian Other population."
753 | "gnomad_exome.an.an_female","Total number of alleles in gnomAD exomes female population."
754 | "gnomad_exome.an.an_fin","Number of alleles in gnomAD exomes Finnish population."
755 | "gnomad_exome.an.an_fin_female","Number of alleles in gnomAD exomes Finnish female population."
756 | "gnomad_exome.an.an_fin_male","Number of alleles in gnomAD exomes Finnish male population."
757 | "gnomad_exome.an.an_male","Total number of alleles in gnomAD exomes male population."
758 | "gnomad_exome.an.an_nfe","Number of alleles in gnomAD exomes Non-Finnish European population."
759 | "gnomad_exome.an.an_nfe_bgr","Number of alleles in gnomAD exomes NFE Bulgarian population."
760 | "gnomad_exome.an.an_nfe_est","Number of alleles in gnomAD exomes NFE Estonian population."
761 | "gnomad_exome.an.an_nfe_female","Number of alleles in gnomAD exomes NFE female population."
762 | "gnomad_exome.an.an_nfe_male","Number of alleles in gnomAD exomes NFE male population."
763 | "gnomad_exome.an.an_nfe_nwe","Number of alleles in gnomAD exomes NFE North-Western European population."
764 | "gnomad_exome.an.an_nfe_onf","Number of alleles in gnomAD exomes NFE Other Non-Finnish European population."
765 | "gnomad_exome.an.an_nfe_seu","Number of alleles in gnomAD exomes NFE Southern European population."
766 | "gnomad_exome.an.an_nfe_swe","Number of alleles in gnomAD exomes NFE Swedish population."
767 | "gnomad_exome.an.an_oth","Number of alleles in gnomAD exomes Other population."
768 | "gnomad_exome.an.an_oth_female","Number of alleles in gnomAD exomes Other female population."
769 | "gnomad_exome.an.an_oth_male","Number of alleles in gnomAD exomes Other male population."
770 | "gnomad_exome.an.an_sas","Number of alleles in gnomAD exomes South Asian population."
771 | "gnomad_exome.an.an_sas_female","Number of alleles in gnomAD exomes South Asian female population."
772 | "gnomad_exome.an.an_sas_male","Number of alleles in gnomAD exomes South Asian male population."
773 | "gnomad_exome.baseqranksum","gnomAD exome BaseQRankSum test statistic."
774 | "gnomad_exome.chrom","Chromosome number in gnomAD exomes."
775 | "gnomad_exome.clippingranksum","gnomAD exome ClippingRankSum test statistic."
776 | "gnomad_exome.dp","Total read depth at the variant position in gnomAD exomes."
777 | "gnomad_exome.fs","gnomAD exome FisherStrand bias score."
778 | "gnomad_exome.hom.hom","Total homozygous count in gnomAD exomes."
779 | "gnomad_exome.hom.hom_afr","Homozygous count in gnomAD exomes African/African American population."
780 | "gnomad_exome.hom.hom_afr_female","Homozygous count in gnomAD exomes African/African American female population."
781 | "gnomad_exome.hom.hom_afr_male","Homozygous count in gnomAD exomes African/African American male population."
782 | "gnomad_exome.hom.hom_amr","Homozygous count in gnomAD exomes American population."
783 | "gnomad_exome.hom.hom_amr_female","Homozygous count in gnomAD exomes American female population."
784 | "gnomad_exome.hom.hom_amr_male","Homozygous count in gnomAD exomes American male population."
785 | "gnomad_exome.hom.hom_asj","Homozygous count in gnomAD exomes Ashkenazi Jewish population."
786 | "gnomad_exome.hom.hom_asj_female","Homozygous count in gnomAD exomes Ashkenazi Jewish female population."
787 | "gnomad_exome.hom.hom_asj_male","Homozygous count in gnomAD exomes Ashkenazi Jewish male population."
788 | "gnomad_exome.hom.hom_eas","Homozygous count in gnomAD exomes East Asian population."
789 | "gnomad_exome.hom.hom_eas_female","Homozygous count in gnomAD exomes East Asian female population."
790 | "gnomad_exome.hom.hom_eas_jpn","Homozygous count in gnomAD exomes East Asian Japanese population."
791 | "gnomad_exome.hom.hom_eas_kor","Homozygous count in gnomAD exomes East Asian Korean population."
792 | "gnomad_exome.hom.hom_eas_male","Homozygous count in gnomAD exomes East Asian male population."
793 | "gnomad_exome.hom.hom_eas_oea","Homozygous count in gnomAD exomes East Asian Other population."
794 | "gnomad_exome.hom.hom_female","Total homozygous count in gnomAD exomes female population."
795 | "gnomad_exome.hom.hom_fin","Homozygous count in gnomAD exomes Finnish population."
796 | "gnomad_exome.hom.hom_fin_female","Homozygous count in gnomAD exomes Finnish female population."
797 | "gnomad_exome.hom.hom_fin_male","Homozygous count in gnomAD exomes Finnish male population."
798 | "gnomad_exome.hom.hom_male","Total homozygous count in gnomAD exomes male population."
799 | "gnomad_exome.hom.hom_nfe","Homozygous count in gnomAD exomes Non-Finnish European population."
800 | "gnomad_exome.hom.hom_nfe_bgr","Homozygous count in gnomAD exomes NFE Bulgarian population."
801 | "gnomad_exome.hom.hom_nfe_est","Homozygous count in gnomAD exomes NFE Estonian population."
802 | "gnomad_exome.hom.hom_nfe_female","Homozygous count in gnomAD exomes NFE female population."
803 | "gnomad_exome.hom.hom_nfe_male","Homozygous count in gnomAD exomes NFE male population."
804 | "gnomad_exome.hom.hom_nfe_nwe","Homozygous count in gnomAD exomes NFE North-Western European population."
805 | "gnomad_exome.hom.hom_nfe_onf","Homozygous count in gnomAD exomes NFE Other Non-Finnish European population."
806 | "gnomad_exome.hom.hom_nfe_seu","Homozygous count in gnomAD exomes NFE Southern European population."
807 | "gnomad_exome.hom.hom_nfe_swe","Homozygous count in gnomAD exomes NFE Swedish population."
808 | "gnomad_exome.hom.hom_oth","Homozygous count in gnomAD exomes Other population."
809 | "gnomad_exome.hom.hom_oth_female","Homozygous count in gnomAD exomes Other female population."
810 | "gnomad_exome.hom.hom_oth_male","Homozygous count in gnomAD exomes Other male population."
811 | "gnomad_exome.hom.hom_sas","Homozygous count in gnomAD exomes South Asian population."
812 | "gnomad_exome.hom.hom_sas_female","Homozygous count in gnomAD exomes South Asian female population."
813 | "gnomad_exome.hom.hom_sas_male","Homozygous count in gnomAD exomes South Asian male population."
814 | "gnomad_exome.inbreedingcoeff","gnomAD exome Inbreeding Coefficient."
815 | "gnomad_exome.mq.mq","gnomAD exome root mean square Mapping Quality."
816 | "gnomad_exome.mq.mqranksum","gnomAD exome MQRankSum test statistic."
817 | "gnomad_exome.pab_max","Maximum P(AB) value from gnomAD exomes. UNKNOWN significance."
818 | "gnomad_exome.pos","Genomic position in gnomAD exomes (hg19)."
819 | "gnomad_exome.qd","gnomAD exome Quality by Depth score."
820 | "gnomad_exome.readposranksum","gnomAD exome ReadPosRankSum test statistic."
821 | "gnomad_exome.ref","Reference allele in gnomAD exome format."
822 | "gnomad_exome.rf","Random Forest probability score from gnomAD exomes. UNKNOWN usage."
823 | "gnomad_exome.rsid","Associated dbSNP rsID from gnomAD exomes."
824 | "gnomad_exome.sor","gnomAD exome Strand Odds Ratio score."
825 | "gnomad_exome.type","Variant type in gnomAD exomes (e.g., snp)."
826 | "gnomad_exome.vqslod","gnomAD exome Variant Quality Score Log-Odds."
827 | "gnomad_exome.vqsr_culprit","gnomAD exome VQSR culprit annotation."
828 | "hg19.end","End position in hg19 assembly."
829 | "hg19.start","Start position in hg19 assembly."
830 | "mutdb._license","License information URL for the MutDB data source."
831 | "mutdb.alt","Alternate allele in MutDB."
832 | "mutdb.chrom","Chromosome number in MutDB."
833 | "mutdb.cosmic_id","Associated COSMIC ID(s) from MutDB."
834 | "mutdb.hg19.end","End position in hg19 assembly (MutDB)."
835 | "mutdb.hg19.start","Start position in hg19 assembly (MutDB)."
836 | "mutdb.mutpred_score","MutPred score reported by MutDB."
837 | "mutdb.ref","Reference allele in MutDB."
838 | "mutdb.rsid","Associated dbSNP rsID from MutDB."
839 | "mutdb.strand","Genomic strand reported by MutDB (m indicates '-')."
840 | "mutdb.uniprot_id","Associated UniProt variant ID from MutDB."
841 | "observed","Boolean indicating if the variant is observed in aggregated datasets."
842 | "snpeff._license","License information URL for the SnpEff data source."
843 | "snpeff.ann.cdna.length","Length of the cDNA sequence for the annotated transcript."
844 | "snpeff.ann.cdna.position","Position of the variant within the cDNA sequence."
845 | "snpeff.ann.cds.length","Length of the coding sequence (CDS) for the annotated transcript."
846 | "snpeff.ann.cds.position","Position of the variant within the coding sequence (CDS)."
847 | "snpeff.ann.effect","Predicted sequence ontology effect of the variant (e.g., missense_variant)."
848 | "snpeff.ann.feature_id","Feature ID (usually transcript ID like NM_004333.4) for the annotation."
849 | "snpeff.ann.feature_type","Type of feature annotated (e.g., transcript)."
850 | "snpeff.ann.gene_id","Gene symbol or ID associated with the annotation."
851 | "snpeff.ann.genename","Gene name associated with the annotation."
852 | "snpeff.ann.hgvs_c","HGVS coding sequence notation from SnpEff."
853 | "snpeff.ann.hgvs_p","HGVS protein sequence notation from SnpEff."
854 | "snpeff.ann.protein.length","Length of the protein sequence for the annotated transcript."
855 | "snpeff.ann.protein.position","Position of the amino acid change within the protein."
856 | "snpeff.ann.putative_impact","SnpEff predicted impact category (e.g., MODERATE, HIGH)."
857 | "snpeff.ann.rank","Rank of the annotation (exon/intron rank)."
858 | "snpeff.ann.total","Total number of exons/introns in the transcript."
859 | "snpeff.ann.transcript_biotype","Biotype of the transcript (e.g., protein_coding)."
860 | "vcf.alt","Alternate allele in VCF format."
861 | "vcf.position","Position of the variant in VCF format (hg19)."
862 | "vcf.ref","Reference allele in VCF format."
863 |
```
--------------------------------------------------------------------------------
/src/biomcp/router.py:
--------------------------------------------------------------------------------
```python
1 | """Unified search and fetch tools for BioMCP.
2 |
3 | This module provides the main MCP tools for searching and fetching biomedical data
4 | across different domains (articles, trials, variants) with integrated sequential
5 | thinking capabilities.
6 | """
7 |
8 | import json
9 | import logging
10 | from typing import Annotated, Any, Literal
11 |
12 | from pydantic import Field
13 |
14 | from biomcp.constants import (
15 | DEFAULT_PAGE_NUMBER,
16 | DEFAULT_PAGE_SIZE,
17 | DEFAULT_TITLE,
18 | ERROR_DOMAIN_REQUIRED,
19 | ESTIMATED_ADDITIONAL_RESULTS,
20 | MAX_RESULTS_PER_DOMAIN_DEFAULT,
21 | TRIAL_DETAIL_SECTIONS,
22 | VALID_DOMAINS,
23 | )
24 | from biomcp.core import mcp_app
25 | from biomcp.domain_handlers import get_domain_handler
26 | from biomcp.exceptions import (
27 | InvalidDomainError,
28 | InvalidParameterError,
29 | QueryParsingError,
30 | ResultParsingError,
31 | SearchExecutionError,
32 | )
33 | from biomcp.integrations.biothings_client import BioThingsClient
34 | from biomcp.metrics import track_performance
35 | from biomcp.parameter_parser import ParameterParser
36 | from biomcp.query_parser import QueryParser
37 | from biomcp.query_router import QueryRouter, execute_routing_plan
38 | from biomcp.thinking_tracker import get_thinking_reminder
39 | from biomcp.trials import getter as trial_getter
40 |
41 | logger = logging.getLogger(__name__)
42 |
43 |
44 | def format_results(
45 | results: list[dict], domain: str, page: int, page_size: int, total: int
46 | ) -> dict:
47 | """Format search results according to OpenAI MCP search semantics.
48 |
49 | Converts domain-specific result formats into a standardized structure with:
50 | - id: Unique identifier for the result (required)
51 | - title: Human-readable title (required)
52 | - text: Brief preview or summary of the content (required)
53 | - url: Link to the full resource (optional but recommended for citations)
54 |
55 | Note: The OpenAI MCP specification does NOT require metadata in search results.
56 | Metadata should only be included in fetch results.
57 |
58 | Args:
59 | results: Raw results from domain-specific search
60 | domain: Type of results ('article', 'trial', or 'variant')
61 | page: Current page number (for internal tracking only)
62 | page_size: Number of results per page (for internal tracking only)
63 | total: Total number of results available (for internal tracking only)
64 |
65 | Returns:
66 | Dictionary with results array following OpenAI MCP format:
67 | {"results": [{"id", "title", "text", "url"}, ...]}
68 |
69 | Raises:
70 | InvalidDomainError: If domain is not recognized
71 | """
72 | logger.debug(f"Formatting {len(results)} results for domain: {domain}")
73 |
74 | formatted_data = []
75 |
76 | # Get the appropriate handler
77 | try:
78 | handler_class = get_domain_handler(domain)
79 | except ValueError:
80 | raise InvalidDomainError(domain, VALID_DOMAINS) from None
81 |
82 | # Format each result
83 | for result in results:
84 | try:
85 | formatted_result = handler_class.format_result(result)
86 | # Ensure the result has the required OpenAI MCP fields
87 | openai_result = {
88 | "id": formatted_result.get("id", ""),
89 | "title": formatted_result.get("title", DEFAULT_TITLE),
90 | "text": formatted_result.get(
91 | "snippet", formatted_result.get("text", "")
92 | ),
93 | "url": formatted_result.get("url", ""),
94 | }
95 | # Note: OpenAI MCP spec doesn't require metadata in search results
96 | # Only include it if explicitly needed for enhanced functionality
97 | formatted_data.append(openai_result)
98 | except Exception as e:
99 | logger.warning(f"Failed to format result in domain {domain}: {e}")
100 | # Skip malformed results
101 | continue
102 |
103 | # Add thinking reminder if needed (as first result)
104 | reminder = get_thinking_reminder()
105 | if reminder and formatted_data:
106 | reminder_result = {
107 | "id": "thinking-reminder",
108 | "title": "⚠️ Research Best Practice Reminder",
109 | "text": reminder,
110 | "url": "",
111 | }
112 | formatted_data.insert(0, reminder_result)
113 |
114 | # Return OpenAI MCP compliant format
115 | return {"results": formatted_data}
116 |
117 |
118 | # ────────────────────────────
119 | # Unified SEARCH tool
120 | # ────────────────────────────
121 | @mcp_app.tool()
122 | @track_performance("biomcp.search")
123 | async def search( # noqa: C901
124 | query: Annotated[
125 | str,
126 | "Unified search query (e.g., 'gene:BRAF AND trials.condition:melanoma'). If provided, other parameters are ignored.",
127 | ],
128 | call_benefit: Annotated[
129 | str | None,
130 | Field(
131 | description="Brief explanation of why this search is being performed and expected benefit. Helps improve search accuracy and provides context for analytics. Highly recommended for better results."
132 | ),
133 | ] = None,
134 | domain: Annotated[
135 | Literal[
136 | "article",
137 | "trial",
138 | "variant",
139 | "gene",
140 | "drug",
141 | "disease",
142 | "nci_organization",
143 | "nci_intervention",
144 | "nci_biomarker",
145 | "nci_disease",
146 | "fda_adverse",
147 | "fda_label",
148 | "fda_device",
149 | "fda_approval",
150 | "fda_recall",
151 | "fda_shortage",
152 | ]
153 | | None,
154 | Field(
155 | description="Domain to search: 'article' for papers/literature ABOUT genes/variants/diseases, 'trial' for clinical studies, 'variant' for genetic variant DATABASE RECORDS, 'gene' for gene information from MyGene.info, 'drug' for drug/chemical information from MyChem.info, 'disease' for disease information from MyDisease.info, 'nci_organization' for NCI cancer centers/sponsors, 'nci_intervention' for NCI drugs/devices/procedures, 'nci_biomarker' for NCI trial eligibility biomarkers, 'nci_disease' for NCI cancer vocabulary, 'fda_adverse' for FDA adverse event reports, 'fda_label' for FDA drug labels, 'fda_device' for FDA device events, 'fda_approval' for FDA drug approvals, 'fda_recall' for FDA drug recalls, 'fda_shortage' for FDA drug shortages"
156 | ),
157 | ] = None,
158 | genes: Annotated[list[str] | str | None, "Gene symbols"] = None,
159 | diseases: Annotated[list[str] | str | None, "Disease terms"] = None,
160 | variants: Annotated[list[str] | str | None, "Variant strings"] = None,
161 | chemicals: Annotated[list[str] | str | None, "Drug/chemical terms"] = None,
162 | keywords: Annotated[list[str] | str | None, "Free-text keywords"] = None,
163 | conditions: Annotated[list[str] | str | None, "Trial conditions"] = None,
164 | interventions: Annotated[
165 | list[str] | str | None, "Trial interventions"
166 | ] = None,
167 | recruiting_status: Annotated[
168 | str | None, "Trial status filter (OPEN, CLOSED, or ANY)"
169 | ] = None,
170 | phase: Annotated[str | None, "Trial phase filter"] = None,
171 | significance: Annotated[
172 | str | None, "Variant clinical significance"
173 | ] = None,
174 | lat: Annotated[
175 | float | None,
176 | "Latitude for trial location search. AI agents should geocode city names (e.g., 'Cleveland' → 41.4993) before using.",
177 | ] = None,
178 | long: Annotated[
179 | float | None,
180 | "Longitude for trial location search. AI agents should geocode city names (e.g., 'Cleveland' → -81.6944) before using.",
181 | ] = None,
182 | distance: Annotated[
183 | int | None,
184 | "Distance in miles from lat/long for trial search (default: 50 miles if lat/long provided)",
185 | ] = None,
186 | page: Annotated[int, "Page number (minimum: 1)"] = DEFAULT_PAGE_NUMBER,
187 | page_size: Annotated[int, "Results per page (1-100)"] = DEFAULT_PAGE_SIZE,
188 | max_results_per_domain: Annotated[
189 | int | None, "Max results per domain (unified search only)"
190 | ] = None,
191 | explain_query: Annotated[
192 | bool, "Return query explanation (unified search only)"
193 | ] = False,
194 | get_schema: Annotated[
195 | bool, "Return searchable fields schema instead of results"
196 | ] = False,
197 | api_key: Annotated[
198 | str | None,
199 | Field(
200 | description="NCI API key for searching NCI domains (nci_organization, nci_intervention, nci_biomarker, nci_disease). Required for NCI searches. Get a free key at: https://clinicaltrialsapi.cancer.gov/"
201 | ),
202 | ] = None,
203 | ) -> dict:
204 | """Search biomedical literature, clinical trials, genetic variants, genes, drugs, and diseases.
205 |
206 | ⚠️ IMPORTANT: Have you used the 'think' tool first? If not, STOP and use it NOW!
207 | The 'think' tool is REQUIRED for proper research planning and should be your FIRST step.
208 |
209 | This tool provides access to biomedical data from PubMed/PubTator3, ClinicalTrials.gov,
210 | MyVariant.info, and the BioThings suite (MyGene.info, MyChem.info, MyDisease.info).
211 | It supports two search modes:
212 |
213 | ## 1. UNIFIED QUERY LANGUAGE
214 | Use the 'query' parameter with field-based syntax for precise cross-domain searches.
215 |
216 | Syntax:
217 | - Basic: "gene:BRAF"
218 | - AND logic: "gene:BRAF AND disease:melanoma"
219 | - OR logic: "gene:PTEN AND (R173 OR Arg173 OR 'position 173')"
220 | - Domain-specific: "trials.condition:melanoma AND trials.phase:3"
221 |
222 | Common fields:
223 | - Cross-domain: gene, disease, variant, chemical/drug
224 | - Articles: pmid, title, abstract, journal, author
225 | - Trials: trials.condition, trials.intervention, trials.phase, trials.status
226 | - Variants: variants.hgvs, variants.rsid, variants.significance
227 |
228 | Example:
229 | ```
230 | await search(
231 | query="gene:BRAF AND disease:melanoma AND trials.phase:3",
232 | max_results_per_domain=20
233 | )
234 | ```
235 |
236 | ## 2. DOMAIN-SPECIFIC SEARCH
237 | Use the 'domain' parameter with specific filters for targeted searches.
238 |
239 | Domains:
240 | - "article": Search PubMed/PubTator3 for research articles and preprints ABOUT genes, variants, diseases, or chemicals
241 | - "trial": Search ClinicalTrials.gov for clinical studies
242 | - "variant": Search MyVariant.info for genetic variant DATABASE RECORDS (population frequency, clinical significance, etc.) - NOT for articles about variants!
243 | - "gene": Search MyGene.info for gene information (symbol, name, function, aliases)
244 | - "drug": Search MyChem.info for drug/chemical information (names, formulas, indications)
245 | - "disease": Search MyDisease.info for disease information (names, definitions, synonyms)
246 | - "nci_organization": Search NCI database for cancer centers, hospitals, and research sponsors (requires API key)
247 | - "nci_intervention": Search NCI database for drugs, devices, procedures used in cancer trials (requires API key)
248 | - "nci_biomarker": Search NCI database for biomarkers used in trial eligibility criteria (requires API key)
249 | - "nci_disease": Search NCI controlled vocabulary for cancer conditions and terms (requires API key)
250 |
251 | Example:
252 | ```
253 | await search(
254 | domain="article",
255 | genes=["BRAF", "NRAS"],
256 | diseases=["melanoma"],
257 | page_size=50
258 | )
259 | ```
260 |
261 | ## DOMAIN SELECTION EXAMPLES:
262 | - To find ARTICLES about BRAF V600E mutation: domain="article", genes=["BRAF"], variants=["V600E"]
263 | - To find VARIANT DATA for BRAF mutations: domain="variant", gene="BRAF"
264 | - To find articles about ERBB2 p.D277Y: domain="article", genes=["ERBB2"], variants=["p.D277Y"]
265 | - Common mistake: Using domain="variant" when you want articles about a variant
266 |
267 | ## IMPORTANT NOTES:
268 | - For complex research questions, use the separate 'think' tool for systematic analysis
269 | - The tool returns results in OpenAI MCP format: {"results": [{"id", "title", "text", "url"}, ...]}
270 | - Search results do NOT include metadata (per OpenAI MCP specification)
271 | - Use the fetch tool to get detailed metadata for specific records
272 | - Use get_schema=True to explore available search fields
273 | - Use explain_query=True to understand query parsing (unified mode)
274 | - Domain-specific searches use AND logic for multiple values
275 | - For OR logic, use the unified query language
276 | - NEW: Article search keywords support OR with pipe separator: "R173|Arg173|p.R173"
277 | - Remember: domain="article" finds LITERATURE, domain="variant" finds DATABASE RECORDS
278 |
279 | ## RETURN FORMAT:
280 | All search modes return results in this format:
281 | ```json
282 | {
283 | "results": [
284 | {
285 | "id": "unique_identifier",
286 | "title": "Human-readable title",
287 | "text": "Summary or snippet of content",
288 | "url": "Link to full resource"
289 | }
290 | ]
291 | }
292 | ```
293 | """
294 | logger.info(f"Search called with domain={domain}, query={query}")
295 |
296 | # Return schema if requested
297 | if get_schema:
298 | parser = QueryParser()
299 | return parser.get_schema()
300 |
301 | # Determine search mode
302 | if query and query.strip():
303 | # Check if this is a unified query (contains field syntax like "gene:" or "AND")
304 | is_unified_query = any(
305 | marker in query for marker in [":", " AND ", " OR "]
306 | )
307 |
308 | # Check if this is an NCI domain
309 | nci_domains = [
310 | "nci_biomarker",
311 | "nci_organization",
312 | "nci_intervention",
313 | "nci_disease",
314 | ]
315 | is_nci_domain = domain in nci_domains if domain else False
316 |
317 | if not domain or (domain and is_unified_query and not is_nci_domain):
318 | # Use unified query mode if:
319 | # 1. No domain specified, OR
320 | # 2. Domain specified but query has field syntax AND it's not an NCI domain
321 | logger.info(f"Using unified query mode: {query}")
322 | return await _unified_search(
323 | query=query,
324 | max_results_per_domain=max_results_per_domain
325 | or MAX_RESULTS_PER_DOMAIN_DEFAULT,
326 | domains=None,
327 | explain_query=explain_query,
328 | )
329 | elif domain:
330 | # Domain-specific search with query as keyword
331 | logger.info(
332 | f"Domain-specific search with query as keyword: domain={domain}, query={query}"
333 | )
334 | # Convert query to keywords parameter for domain-specific search
335 | keywords = [query]
336 |
337 | # Legacy domain-based search
338 | if not domain:
339 | raise InvalidParameterError(
340 | "query or domain", None, ERROR_DOMAIN_REQUIRED
341 | )
342 |
343 | # Validate pagination parameters
344 | try:
345 | page, page_size = ParameterParser.validate_page_params(page, page_size)
346 | except InvalidParameterError as e:
347 | logger.error(f"Invalid pagination parameters: {e}")
348 | raise
349 |
350 | # Parse parameters using ParameterParser
351 | genes = ParameterParser.parse_list_param(genes, "genes")
352 | diseases = ParameterParser.parse_list_param(diseases, "diseases")
353 | variants = ParameterParser.parse_list_param(variants, "variants")
354 | chemicals = ParameterParser.parse_list_param(chemicals, "chemicals")
355 | keywords = ParameterParser.parse_list_param(keywords, "keywords")
356 | conditions = ParameterParser.parse_list_param(conditions, "conditions")
357 | interventions = ParameterParser.parse_list_param(
358 | interventions, "interventions"
359 | )
360 |
361 | logger.debug(
362 | f"Parsed parameters for domain {domain}: "
363 | f"genes={genes}, diseases={diseases}, variants={variants}"
364 | )
365 |
366 | if domain == "article":
367 | from .router_handlers import handle_article_search
368 |
369 | items, total = await handle_article_search(
370 | genes=genes,
371 | diseases=diseases,
372 | variants=variants,
373 | chemicals=chemicals,
374 | keywords=keywords,
375 | page=page,
376 | page_size=page_size,
377 | )
378 |
379 | return format_results(
380 | items,
381 | domain="article",
382 | page=page,
383 | page_size=page_size,
384 | total=total,
385 | )
386 |
387 | elif domain == "trial":
388 | logger.info("Executing trial search")
389 | # Build the trial search parameters
390 | search_params: dict[str, Any] = {}
391 | if conditions:
392 | search_params["conditions"] = conditions
393 | if interventions:
394 | search_params["interventions"] = interventions
395 | if recruiting_status:
396 | search_params["recruiting_status"] = recruiting_status
397 | if phase:
398 | try:
399 | search_params["phase"] = ParameterParser.normalize_phase(phase)
400 | except InvalidParameterError:
401 | raise
402 | if keywords:
403 | search_params["keywords"] = keywords
404 | if lat is not None:
405 | search_params["lat"] = lat
406 | if long is not None:
407 | search_params["long"] = long
408 | if distance is not None:
409 | search_params["distance"] = distance
410 |
411 | try:
412 | from biomcp.trials.search import TrialQuery, search_trials
413 |
414 | # Convert search_params to TrialQuery
415 | trial_query = TrialQuery(**search_params, page_size=page_size)
416 | result_str = await search_trials(trial_query, output_json=True)
417 | except Exception as e:
418 | logger.error(f"Trial search failed: {e}")
419 | raise SearchExecutionError("trial", e) from e
420 |
421 | # Parse the JSON results
422 | try:
423 | results = json.loads(result_str)
424 | except (json.JSONDecodeError, TypeError) as e:
425 | logger.error(f"Failed to parse trial results: {e}")
426 | raise ResultParsingError("trial", e) from e
427 |
428 | # Handle different response formats from the trials API
429 | # The API can return either a dict with 'studies' key or a direct list
430 | if isinstance(results, dict):
431 | # ClinicalTrials.gov API v2 format with studies array
432 | if "studies" in results:
433 | items = results["studies"]
434 | total = len(items) # API doesn't provide total count
435 | # Legacy format or error
436 | elif "error" in results:
437 | logger.warning(
438 | f"Trial API returned error: {results.get('error')}"
439 | )
440 | return format_results(
441 | [], domain="trial", page=page, page_size=page_size, total=0
442 | )
443 | else:
444 | # Assume the dict itself is a single result
445 | items = [results]
446 | total = 1
447 | elif isinstance(results, list):
448 | # Direct list of results
449 | items = results
450 | total = len(items)
451 | else:
452 | items = []
453 | total = 0
454 |
455 | logger.info(f"Trial search returned {total} total results")
456 |
457 | return format_results(
458 | items, domain="trial", page=page, page_size=page_size, total=total
459 | )
460 |
461 | elif domain == "variant":
462 | logger.info("Executing variant search")
463 | # Build the variant search parameters
464 | # Note: variant searcher expects single gene, not list
465 | gene = genes[0] if genes else None
466 |
467 | # Use keywords to search for significance if provided
468 | keyword_list = keywords or []
469 | if significance:
470 | keyword_list.append(significance)
471 |
472 | try:
473 | from biomcp.variants.search import VariantQuery, search_variants
474 |
475 | variant_query = VariantQuery(
476 | gene=gene,
477 | significance=significance,
478 | size=page_size,
479 | offset=(page - 1) * page_size,
480 | )
481 | result_str = await search_variants(variant_query, output_json=True)
482 | except Exception as e:
483 | logger.error(f"Variant search failed: {e}")
484 | raise SearchExecutionError("variant", e) from e
485 |
486 | # Parse the JSON results
487 | try:
488 | all_results = json.loads(result_str)
489 | except (json.JSONDecodeError, TypeError) as e:
490 | logger.error(f"Failed to parse variant results: {e}")
491 | raise ResultParsingError("variant", e) from e
492 |
493 | # For variants, the results are already paginated by the API
494 | # We need to estimate total based on whether we got a full page
495 | items = all_results if isinstance(all_results, list) else []
496 | # Rough estimate: if we got a full page, there might be more
497 | total = len(items) + (
498 | ESTIMATED_ADDITIONAL_RESULTS if len(items) == page_size else 0
499 | )
500 |
501 | logger.info(f"Variant search returned {len(items)} results")
502 |
503 | return format_results(
504 | items,
505 | domain="variant",
506 | page=page,
507 | page_size=page_size,
508 | total=total,
509 | )
510 |
511 | elif domain == "gene":
512 | logger.info("Executing gene search")
513 | # Build the gene search query
514 | query_str = keywords[0] if keywords else genes[0] if genes else ""
515 |
516 | if not query_str:
517 | raise InvalidParameterError(
518 | "keywords or genes", None, "a gene symbol or search term"
519 | )
520 |
521 | try:
522 | client = BioThingsClient()
523 | # For search, query by symbol/name
524 | results = await client._query_gene(query_str)
525 |
526 | if not results:
527 | items = []
528 | total = 0
529 | else:
530 | # Fetch full details for each result (limited by page_size)
531 | items = []
532 | for result in results[:page_size]:
533 | gene_id = result.get("_id")
534 | if gene_id:
535 | full_gene = await client._get_gene_by_id(gene_id)
536 | if full_gene:
537 | items.append(full_gene.model_dump())
538 |
539 | total = len(results)
540 |
541 | except Exception as e:
542 | logger.error(f"Gene search failed: {e}")
543 | raise SearchExecutionError("gene", e) from e
544 |
545 | logger.info(f"Gene search returned {len(items)} results")
546 |
547 | return format_results(
548 | items,
549 | domain="gene",
550 | page=page,
551 | page_size=page_size,
552 | total=total,
553 | )
554 |
555 | elif domain == "drug":
556 | logger.info("Executing drug search")
557 | # Build the drug search query
558 | query_str = (
559 | keywords[0] if keywords else chemicals[0] if chemicals else ""
560 | )
561 |
562 | if not query_str:
563 | raise InvalidParameterError(
564 | "keywords or chemicals", None, "a drug name or search term"
565 | )
566 |
567 | try:
568 | client = BioThingsClient()
569 | # For search, query by name
570 | results = await client._query_drug(query_str)
571 |
572 | if not results:
573 | items = []
574 | total = 0
575 | else:
576 | # Fetch full details for each result (limited by page_size)
577 | items = []
578 | for result in results[:page_size]:
579 | drug_id = result.get("_id")
580 | if drug_id:
581 | full_drug = await client._get_drug_by_id(drug_id)
582 | if full_drug:
583 | items.append(full_drug.model_dump(by_alias=True))
584 |
585 | total = len(results)
586 |
587 | except Exception as e:
588 | logger.error(f"Drug search failed: {e}")
589 | raise SearchExecutionError("drug", e) from e
590 |
591 | logger.info(f"Drug search returned {len(items)} results")
592 |
593 | return format_results(
594 | items,
595 | domain="drug",
596 | page=page,
597 | page_size=page_size,
598 | total=total,
599 | )
600 |
601 | elif domain == "disease":
602 | logger.info("Executing disease search")
603 | # Build the disease search query
604 | query_str = (
605 | keywords[0] if keywords else diseases[0] if diseases else ""
606 | )
607 |
608 | if not query_str:
609 | raise InvalidParameterError(
610 | "keywords or diseases", None, "a disease name or search term"
611 | )
612 |
613 | try:
614 | client = BioThingsClient()
615 | # For search, query by name
616 | results = await client._query_disease(query_str)
617 |
618 | if not results:
619 | items = []
620 | total = 0
621 | else:
622 | # Fetch full details for each result (limited by page_size)
623 | items = []
624 | for result in results[:page_size]:
625 | disease_id = result.get("_id")
626 | if disease_id:
627 | full_disease = await client._get_disease_by_id(
628 | disease_id
629 | )
630 | if full_disease:
631 | items.append(
632 | full_disease.model_dump(by_alias=True)
633 | )
634 |
635 | total = len(results)
636 |
637 | except Exception as e:
638 | logger.error(f"Disease search failed: {e}")
639 | raise SearchExecutionError("disease", e) from e
640 |
641 | logger.info(f"Disease search returned {len(items)} results")
642 |
643 | return format_results(
644 | items,
645 | domain="disease",
646 | page=page,
647 | page_size=page_size,
648 | total=total,
649 | )
650 |
651 | elif domain == "nci_organization":
652 | from .router_handlers import handle_nci_organization_search
653 |
654 | # Extract NCI-specific parameters
655 | organization_type = keywords[0] if keywords else None
656 | city = None
657 | state = None
658 | name = keywords[0] if keywords else None
659 |
660 | # Try to parse location from keywords
661 | if keywords and len(keywords) >= 2:
662 | # Assume last two keywords might be city, state
663 | city = keywords[-2]
664 | state = keywords[-1]
665 | if len(state) == 2 and state.isupper():
666 | # Likely a state code
667 | name = " ".join(keywords[:-2]) if len(keywords) > 2 else None
668 | else:
669 | # Not a state code, use all as name
670 | city = None
671 | state = None
672 | name = " ".join(keywords)
673 |
674 | items, total = await handle_nci_organization_search(
675 | name=name,
676 | organization_type=organization_type,
677 | city=city,
678 | state=state,
679 | api_key=api_key,
680 | page=page,
681 | page_size=page_size,
682 | )
683 |
684 | return format_results(
685 | items,
686 | domain="nci_organization",
687 | page=page,
688 | page_size=page_size,
689 | total=total,
690 | )
691 |
692 | elif domain == "nci_intervention":
693 | from .router_handlers import handle_nci_intervention_search
694 |
695 | # Extract parameters
696 | name = keywords[0] if keywords else None
697 | intervention_type = None # Could be parsed from additional params
698 |
699 | items, total = await handle_nci_intervention_search(
700 | name=name,
701 | intervention_type=intervention_type,
702 | synonyms=True,
703 | api_key=api_key,
704 | page=page,
705 | page_size=page_size,
706 | )
707 |
708 | return format_results(
709 | items,
710 | domain="nci_intervention",
711 | page=page,
712 | page_size=page_size,
713 | total=total,
714 | )
715 |
716 | elif domain == "nci_biomarker":
717 | from .router_handlers import handle_nci_biomarker_search
718 |
719 | # Extract parameters
720 | name = keywords[0] if keywords else None
721 | gene = genes[0] if genes else None
722 |
723 | items, total = await handle_nci_biomarker_search(
724 | name=name,
725 | gene=gene,
726 | biomarker_type=None,
727 | assay_type=None,
728 | api_key=api_key,
729 | page=page,
730 | page_size=page_size,
731 | )
732 |
733 | return format_results(
734 | items,
735 | domain="nci_biomarker",
736 | page=page,
737 | page_size=page_size,
738 | total=total,
739 | )
740 |
741 | elif domain == "nci_disease":
742 | from .router_handlers import handle_nci_disease_search
743 |
744 | # Extract parameters
745 | name = diseases[0] if diseases else keywords[0] if keywords else None
746 |
747 | items, total = await handle_nci_disease_search(
748 | name=name,
749 | include_synonyms=True,
750 | category=None,
751 | api_key=api_key,
752 | page=page,
753 | page_size=page_size,
754 | )
755 |
756 | return format_results(
757 | items,
758 | domain="nci_disease",
759 | page=page,
760 | page_size=page_size,
761 | total=total,
762 | )
763 |
764 | # OpenFDA domains
765 | elif domain == "fda_adverse":
766 | from biomcp.openfda import search_adverse_events
767 |
768 | drug_name = (
769 | chemicals[0] if chemicals else keywords[0] if keywords else None
770 | )
771 | skip = (page - 1) * page_size
772 | fda_result = await search_adverse_events(
773 | drug=drug_name,
774 | limit=page_size,
775 | skip=skip,
776 | api_key=api_key,
777 | )
778 | # Parse the markdown result to extract items
779 | # For simplicity, return the result as a single item
780 | return {"results": [{"content": fda_result}]}
781 |
782 | elif domain == "fda_label":
783 | from biomcp.openfda import search_drug_labels
784 |
785 | drug_name = (
786 | chemicals[0] if chemicals else keywords[0] if keywords else None
787 | )
788 | skip = (page - 1) * page_size
789 | fda_result = await search_drug_labels(
790 | name=drug_name,
791 | limit=page_size,
792 | skip=skip,
793 | api_key=api_key,
794 | )
795 | return {"results": [{"content": fda_result}]}
796 |
797 | elif domain == "fda_device":
798 | from biomcp.openfda import search_device_events
799 |
800 | device_name = keywords[0] if keywords else None
801 | skip = (page - 1) * page_size
802 | fda_result = await search_device_events(
803 | device=device_name,
804 | limit=page_size,
805 | skip=skip,
806 | api_key=api_key,
807 | )
808 | return {"results": [{"content": fda_result}]}
809 |
810 | elif domain == "fda_approval":
811 | from biomcp.openfda import search_drug_approvals
812 |
813 | drug_name = (
814 | chemicals[0] if chemicals else keywords[0] if keywords else None
815 | )
816 | skip = (page - 1) * page_size
817 | fda_result = await search_drug_approvals(
818 | drug=drug_name,
819 | limit=page_size,
820 | skip=skip,
821 | api_key=api_key,
822 | )
823 | return {"results": [{"content": fda_result}]}
824 |
825 | elif domain == "fda_recall":
826 | from biomcp.openfda import search_drug_recalls
827 |
828 | drug_name = (
829 | chemicals[0] if chemicals else keywords[0] if keywords else None
830 | )
831 | skip = (page - 1) * page_size
832 | fda_result = await search_drug_recalls(
833 | drug=drug_name,
834 | limit=page_size,
835 | skip=skip,
836 | api_key=api_key,
837 | )
838 | return {"results": [{"content": fda_result}]}
839 |
840 | elif domain == "fda_shortage":
841 | from biomcp.openfda import search_drug_shortages
842 |
843 | drug_name = (
844 | chemicals[0] if chemicals else keywords[0] if keywords else None
845 | )
846 | skip = (page - 1) * page_size
847 | fda_result = await search_drug_shortages(
848 | drug=drug_name,
849 | limit=page_size,
850 | skip=skip,
851 | api_key=api_key,
852 | )
853 | return {"results": [{"content": fda_result}]}
854 |
855 | else:
856 | raise InvalidDomainError(domain, VALID_DOMAINS)
857 |
858 |
859 | # ────────────────────────────
860 | # Unified FETCH tool
861 | # ────────────────────────────
862 | @mcp_app.tool()
863 | @track_performance("biomcp.fetch")
864 | async def fetch( # noqa: C901
865 | id: Annotated[ # noqa: A002
866 | str,
867 | "PMID / NCT ID / Variant ID / DOI / Gene ID / Drug ID / Disease ID / NCI Organization ID / NCI Intervention ID / NCI Disease ID / FDA Report ID / FDA Set ID / FDA MDR Key / FDA Application Number / FDA Recall Number",
868 | ],
869 | domain: Annotated[
870 | Literal[
871 | "article",
872 | "trial",
873 | "variant",
874 | "gene",
875 | "drug",
876 | "disease",
877 | "nci_organization",
878 | "nci_intervention",
879 | "nci_biomarker",
880 | "nci_disease",
881 | "fda_adverse",
882 | "fda_label",
883 | "fda_device",
884 | "fda_approval",
885 | "fda_recall",
886 | "fda_shortage",
887 | ]
888 | | None,
889 | Field(
890 | description="Domain of the record (auto-detected if not provided)"
891 | ),
892 | ] = None,
893 | call_benefit: Annotated[
894 | str | None,
895 | Field(
896 | description="Brief explanation of why this fetch is being performed and expected benefit. Helps provide context for analytics and improves result relevance."
897 | ),
898 | ] = None,
899 | detail: Annotated[
900 | Literal[
901 | "protocol", "locations", "outcomes", "references", "all", "full"
902 | ]
903 | | None,
904 | "Specific section to retrieve (trials) or 'full' (articles)",
905 | ] = None,
906 | api_key: Annotated[
907 | str | None,
908 | Field(
909 | description="NCI API key for fetching NCI records (nci_organization, nci_intervention, nci_disease). Required for NCI fetches. Get a free key at: https://clinicaltrialsapi.cancer.gov/"
910 | ),
911 | ] = None,
912 | ) -> dict:
913 | """Fetch comprehensive details for a specific biomedical record.
914 |
915 | This tool retrieves full information for articles, clinical trials, genetic variants,
916 | genes, drugs, or diseases using their unique identifiers. It returns data in a
917 | standardized format suitable for detailed analysis and research.
918 |
919 | ## IDENTIFIER FORMATS:
920 | - Articles: PMID (PubMed ID) - e.g., "35271234" OR DOI - e.g., "10.1101/2024.01.20.23288905"
921 | - Trials: NCT ID (ClinicalTrials.gov ID) - e.g., "NCT04280705"
922 | - Variants: HGVS notation or dbSNP ID - e.g., "chr7:g.140453136A>T" or "rs121913254"
923 | - Genes: Gene symbol or Entrez ID - e.g., "BRAF" or "673"
924 | - Drugs: Drug name or ID - e.g., "imatinib" or "DB00619"
925 | - Diseases: Disease name or ID - e.g., "melanoma" or "MONDO:0005105"
926 | - NCI Organizations: NCI organization ID - e.g., "NCI-2011-03337"
927 | - NCI Interventions: NCI intervention ID - e.g., "INT123456"
928 | - NCI Diseases: NCI disease ID - e.g., "C4872"
929 |
930 | The domain is automatically detected from the ID format if not provided:
931 | - NCT* → trial
932 | - Contains "/" with numeric prefix (DOI) → article
933 | - Pure numeric → article (PMID)
934 | - rs* or contains ':' or 'g.' → variant
935 | - For genes, drugs, diseases: manual specification recommended
936 |
937 | ## DOMAIN-SPECIFIC OPTIONS:
938 |
939 | ### Articles (domain="article"):
940 | - Returns full article metadata, abstract, and full text when available
941 | - Supports both PubMed articles (via PMID) and Europe PMC preprints (via DOI)
942 | - Includes annotations for genes, diseases, chemicals, and variants (PubMed only)
943 | - detail="full" attempts to retrieve full text content (PubMed only)
944 |
945 | ### Clinical Trials (domain="trial"):
946 | - detail=None or "protocol": Core study information
947 | - detail="locations": Study sites and contact information
948 | - detail="outcomes": Primary/secondary outcomes and results
949 | - detail="references": Related publications and citations
950 | - detail="all": Complete trial record with all sections
951 |
952 | ### Variants (domain="variant"):
953 | - Returns comprehensive variant information including:
954 | - Clinical significance and interpretations
955 | - Population frequencies
956 | - Gene/protein effects
957 | - External database links
958 | - detail parameter is ignored (always returns full data)
959 |
960 | ### Genes (domain="gene"):
961 | - Returns gene information from MyGene.info including:
962 | - Gene symbol, name, and type
963 | - Entrez ID and Ensembl IDs
964 | - Gene summary and aliases
965 | - RefSeq information
966 | - detail parameter is ignored (always returns full data)
967 |
968 | ### Drugs (domain="drug"):
969 | - Returns drug/chemical information from MyChem.info including:
970 | - Drug name and trade names
971 | - Chemical formula and structure IDs
972 | - Clinical indications
973 | - Mechanism of action
974 | - External database links (DrugBank, PubChem, ChEMBL)
975 | - detail parameter is ignored (always returns full data)
976 |
977 | ### Diseases (domain="disease"):
978 | - Returns disease information from MyDisease.info including:
979 | - Disease name and definition
980 | - MONDO ontology ID
981 | - Disease synonyms
982 | - Cross-references to other databases
983 | - Associated phenotypes
984 | - detail parameter is ignored (always returns full data)
985 |
986 | ### NCI Organizations (domain="nci_organization"):
987 | - Returns organization information from NCI database including:
988 | - Organization name and type
989 | - Full address and contact information
990 | - Research focus areas
991 | - Associated clinical trials
992 | - Requires NCI API key
993 | - detail parameter is ignored (always returns full data)
994 |
995 | ### NCI Interventions (domain="nci_intervention"):
996 | - Returns intervention information from NCI database including:
997 | - Intervention name and type
998 | - Synonyms and alternative names
999 | - Mechanism of action (for drugs)
1000 | - FDA approval status
1001 | - Associated clinical trials
1002 | - Requires NCI API key
1003 | - detail parameter is ignored (always returns full data)
1004 |
1005 | ### NCI Diseases (domain="nci_disease"):
1006 | - Returns disease information from NCI controlled vocabulary including:
1007 | - Preferred disease name
1008 | - Disease category and classification
1009 | - All known synonyms
1010 | - Cross-reference codes (ICD, SNOMED)
1011 | - Requires NCI API key
1012 | - detail parameter is ignored (always returns full data)
1013 |
1014 | ## RETURN FORMAT:
1015 | All fetch operations return a standardized format:
1016 | ```json
1017 | {
1018 | "id": "unique_identifier",
1019 | "title": "Record title or name",
1020 | "text": "Full content or comprehensive description",
1021 | "url": "Link to original source",
1022 | "metadata": {
1023 | // Domain-specific additional fields
1024 | }
1025 | }
1026 | ```
1027 |
1028 | ## EXAMPLES:
1029 |
1030 | Fetch article by PMID (domain auto-detected):
1031 | ```
1032 | await fetch(id="35271234")
1033 | ```
1034 |
1035 | Fetch article by DOI (domain auto-detected):
1036 | ```
1037 | await fetch(id="10.1101/2024.01.20.23288905")
1038 | ```
1039 |
1040 | Fetch complete trial information (domain auto-detected):
1041 | ```
1042 | await fetch(
1043 | id="NCT04280705",
1044 | detail="all"
1045 | )
1046 | ```
1047 |
1048 | Fetch variant with clinical interpretations:
1049 | ```
1050 | await fetch(id="rs121913254")
1051 | ```
1052 |
1053 | Explicitly specify domain (optional):
1054 | ```
1055 | await fetch(
1056 | domain="variant",
1057 | id="chr7:g.140453136A>T"
1058 | )
1059 | ```
1060 | """
1061 | # Auto-detect domain if not provided
1062 | if domain is None:
1063 | # Try to infer domain from ID format
1064 | if id.upper().startswith("NCT"):
1065 | domain = "trial"
1066 | logger.info(f"Auto-detected domain 'trial' from NCT ID: {id}")
1067 | elif "/" in id and id.split("/")[0].replace(".", "").isdigit():
1068 | # DOI format (e.g., 10.1038/nature12373) - treat as article
1069 | domain = "article"
1070 | logger.info(f"Auto-detected domain 'article' from DOI: {id}")
1071 | elif id.isdigit():
1072 | # Numeric ID - likely PMID
1073 | domain = "article"
1074 | logger.info(
1075 | f"Auto-detected domain 'article' from numeric ID: {id}"
1076 | )
1077 | elif id.startswith("rs") or ":" in id or "g." in id:
1078 | # rsID or HGVS notation
1079 | domain = "variant"
1080 | logger.info(f"Auto-detected domain 'variant' from ID format: {id}")
1081 | else:
1082 | # Default to article if we can't determine
1083 | domain = "article"
1084 | logger.warning(
1085 | f"Could not auto-detect domain for ID '{id}', defaulting to 'article'"
1086 | )
1087 |
1088 | logger.info(f"Fetch called for {domain} with id={id}, detail={detail}")
1089 |
1090 | if domain == "article":
1091 | logger.debug("Fetching article details")
1092 | try:
1093 | from biomcp.articles.fetch import _article_details
1094 |
1095 | # The _article_details function handles both PMIDs and DOIs
1096 | result_str = await _article_details(
1097 | call_benefit=call_benefit
1098 | or "Fetching article details via MCP tool",
1099 | pmid=id,
1100 | )
1101 | except Exception as e:
1102 | logger.error(f"Article fetch failed: {e}")
1103 | raise SearchExecutionError("article", e) from e
1104 |
1105 | # Parse and return the first article
1106 | try:
1107 | articles = (
1108 | json.loads(result_str)
1109 | if isinstance(result_str, str)
1110 | else result_str
1111 | )
1112 | except (json.JSONDecodeError, TypeError) as e:
1113 | logger.error(f"Failed to parse article fetch results: {e}")
1114 | raise ResultParsingError("article", e) from e
1115 |
1116 | if not articles:
1117 | return {"error": "Article not found"}
1118 |
1119 | article = articles[0]
1120 |
1121 | # Check if the article is actually an error response
1122 | if "error" in article:
1123 | return {"error": article["error"]}
1124 |
1125 | # Format according to OpenAI MCP standard
1126 | full_text = article.get("full_text", "")
1127 | abstract = article.get("abstract", "")
1128 | text_content = full_text if full_text else abstract
1129 |
1130 | return {
1131 | "id": str(article.get("pmid", id)),
1132 | "title": article.get("title", DEFAULT_TITLE),
1133 | "text": text_content,
1134 | "url": article.get(
1135 | "url", f"https://pubmed.ncbi.nlm.nih.gov/{id}/"
1136 | ),
1137 | "metadata": {
1138 | "pmid": article.get("pmid"),
1139 | "journal": article.get("journal"),
1140 | "authors": article.get("authors"),
1141 | "year": article.get("year"),
1142 | "doi": article.get("doi"),
1143 | "annotations": article.get("annotations", {}),
1144 | "is_preprint": article.get("is_preprint", False),
1145 | "preprint_source": article.get("preprint_source"),
1146 | },
1147 | }
1148 |
1149 | elif domain == "trial":
1150 | logger.debug(f"Fetching trial details for section: {detail}")
1151 |
1152 | # Validate detail parameter
1153 | if detail is not None and detail not in TRIAL_DETAIL_SECTIONS:
1154 | raise InvalidParameterError(
1155 | "detail",
1156 | detail,
1157 | f"one of: {', '.join(TRIAL_DETAIL_SECTIONS)} or None",
1158 | )
1159 |
1160 | try:
1161 | # Always fetch protocol for basic info - get JSON format
1162 | protocol_json = await trial_getter.get_trial(
1163 | nct_id=id,
1164 | module=trial_getter.Module.PROTOCOL,
1165 | output_json=True,
1166 | )
1167 |
1168 | # Parse the JSON response
1169 | try:
1170 | protocol_data = json.loads(protocol_json)
1171 | except json.JSONDecodeError as e:
1172 | logger.error(f"Failed to parse protocol JSON for {id}: {e}")
1173 | return {
1174 | "id": id,
1175 | "title": f"Clinical Trial {id}",
1176 | "text": f"Error parsing trial data: {e}",
1177 | "url": f"https://clinicaltrials.gov/study/{id}",
1178 | "metadata": {
1179 | "nct_id": id,
1180 | "error": f"JSON parse error: {e}",
1181 | },
1182 | }
1183 |
1184 | # Check for errors in the response
1185 | if "error" in protocol_data:
1186 | return {
1187 | "id": id,
1188 | "title": f"Clinical Trial {id}",
1189 | "text": protocol_data.get(
1190 | "details",
1191 | protocol_data.get("error", "Trial not found"),
1192 | ),
1193 | "url": f"https://clinicaltrials.gov/study/{id}",
1194 | "metadata": {
1195 | "nct_id": id,
1196 | "error": protocol_data.get("error"),
1197 | },
1198 | }
1199 |
1200 | # Build comprehensive text description
1201 | text_parts = []
1202 |
1203 | # Extract protocol section data from the API response
1204 | protocol_section = protocol_data.get("protocolSection", {})
1205 |
1206 | # Extract basic info from the protocol section
1207 | id_module = protocol_section.get("identificationModule", {})
1208 | status_module = protocol_section.get("statusModule", {})
1209 | desc_module = protocol_section.get("descriptionModule", {})
1210 | conditions_module = protocol_section.get("conditionsModule", {})
1211 | design_module = protocol_section.get("designModule", {})
1212 | arms_module = protocol_section.get("armsInterventionsModule", {})
1213 |
1214 | # Add basic protocol info to text
1215 | title = id_module.get("briefTitle", f"Clinical Trial {id}")
1216 | text_parts.append(f"Study Title: {title}")
1217 |
1218 | # Conditions
1219 | conditions = conditions_module.get("conditions", [])
1220 | if conditions:
1221 | text_parts.append(f"\nConditions: {', '.join(conditions)}")
1222 |
1223 | # Interventions
1224 | interventions = []
1225 | for intervention in arms_module.get("interventions", []):
1226 | interventions.append(intervention.get("name", ""))
1227 | if interventions:
1228 | text_parts.append(f"Interventions: {', '.join(interventions)}")
1229 |
1230 | # Phase
1231 | phases = design_module.get("phases", [])
1232 | if phases:
1233 | text_parts.append(f"Phase: {', '.join(phases)}")
1234 |
1235 | # Status
1236 | overall_status = status_module.get("overallStatus", "N/A")
1237 | text_parts.append(f"Status: {overall_status}")
1238 |
1239 | # Summary
1240 | brief_summary = desc_module.get(
1241 | "briefSummary", "No summary available"
1242 | )
1243 | text_parts.append(f"\nSummary: {brief_summary}")
1244 |
1245 | # Prepare metadata
1246 | metadata = {"nct_id": id, "protocol": protocol_data}
1247 |
1248 | if detail in ("all", "locations", "outcomes", "references"):
1249 | # Fetch additional sections as needed
1250 | if detail == "all" or detail == "locations":
1251 | try:
1252 | locations_json = await trial_getter.get_trial(
1253 | nct_id=id,
1254 | module=trial_getter.Module.LOCATIONS,
1255 | output_json=True,
1256 | )
1257 | locations_data = json.loads(locations_json)
1258 | if "error" not in locations_data:
1259 | # Extract locations from the protocol section
1260 | locations_module = locations_data.get(
1261 | "protocolSection", {}
1262 | ).get("contactsLocationsModule", {})
1263 | locations_list = locations_module.get(
1264 | "locations", []
1265 | )
1266 | metadata["locations"] = locations_list
1267 | if locations_list:
1268 | text_parts.append(
1269 | f"\n\nLocations: {len(locations_list)} study sites"
1270 | )
1271 | except Exception as e:
1272 | logger.warning(
1273 | f"Failed to fetch locations for {id}: {e}"
1274 | )
1275 | metadata["locations"] = []
1276 |
1277 | if detail == "all" or detail == "outcomes":
1278 | try:
1279 | outcomes_json = await trial_getter.get_trial(
1280 | nct_id=id,
1281 | module=trial_getter.Module.OUTCOMES,
1282 | output_json=True,
1283 | )
1284 | outcomes_data = json.loads(outcomes_json)
1285 | if "error" not in outcomes_data:
1286 | # Extract outcomes from the protocol section
1287 | outcomes_module = outcomes_data.get(
1288 | "protocolSection", {}
1289 | ).get("outcomesModule", {})
1290 | primary_outcomes = outcomes_module.get(
1291 | "primaryOutcomes", []
1292 | )
1293 | secondary_outcomes = outcomes_module.get(
1294 | "secondaryOutcomes", []
1295 | )
1296 | metadata["outcomes"] = {
1297 | "primary_outcomes": primary_outcomes,
1298 | "secondary_outcomes": secondary_outcomes,
1299 | }
1300 | if primary_outcomes:
1301 | text_parts.append(
1302 | f"\n\nPrimary Outcomes: {len(primary_outcomes)} measures"
1303 | )
1304 | except Exception as e:
1305 | logger.warning(
1306 | f"Failed to fetch outcomes for {id}: {e}"
1307 | )
1308 | metadata["outcomes"] = {}
1309 |
1310 | if detail == "all" or detail == "references":
1311 | try:
1312 | references_json = await trial_getter.get_trial(
1313 | nct_id=id,
1314 | module=trial_getter.Module.REFERENCES,
1315 | output_json=True,
1316 | )
1317 | references_data = json.loads(references_json)
1318 | if "error" not in references_data:
1319 | # Extract references from the protocol section
1320 | references_module = references_data.get(
1321 | "protocolSection", {}
1322 | ).get("referencesModule", {})
1323 | references_list = references_module.get(
1324 | "references", []
1325 | )
1326 | metadata["references"] = references_list
1327 | if references_list:
1328 | text_parts.append(
1329 | f"\n\nReferences: {len(references_list)} publications"
1330 | )
1331 | except Exception as e:
1332 | logger.warning(
1333 | f"Failed to fetch references for {id}: {e}"
1334 | )
1335 | metadata["references"] = []
1336 |
1337 | # Return OpenAI MCP compliant format
1338 | return {
1339 | "id": id,
1340 | "title": title,
1341 | "text": "\n".join(text_parts),
1342 | "url": f"https://clinicaltrials.gov/study/{id}",
1343 | "metadata": metadata,
1344 | }
1345 |
1346 | except Exception as e:
1347 | logger.error(f"Trial fetch failed: {e}")
1348 | raise SearchExecutionError("trial", e) from e
1349 |
1350 | elif domain == "variant":
1351 | logger.debug("Fetching variant details")
1352 | try:
1353 | from biomcp.variants.getter import get_variant
1354 |
1355 | result_str = await get_variant(
1356 | variant_id=id,
1357 | output_json=True,
1358 | include_external=True,
1359 | )
1360 | except Exception as e:
1361 | logger.error(f"Variant fetch failed: {e}")
1362 | raise SearchExecutionError("variant", e) from e
1363 |
1364 | try:
1365 | variant_response = (
1366 | json.loads(result_str)
1367 | if isinstance(result_str, str)
1368 | else result_str
1369 | )
1370 | except (json.JSONDecodeError, TypeError) as e:
1371 | logger.error(f"Failed to parse variant fetch results: {e}")
1372 | raise ResultParsingError("variant", e) from e
1373 |
1374 | # get_variant returns a list, extract the first variant
1375 | if isinstance(variant_response, list) and variant_response:
1376 | variant_data = variant_response[0]
1377 | elif isinstance(variant_response, dict):
1378 | variant_data = variant_response
1379 | else:
1380 | return {"error": "Variant not found"}
1381 |
1382 | # Build comprehensive text description
1383 | text_parts = []
1384 |
1385 | # Basic variant info
1386 | text_parts.append(f"Variant: {variant_data.get('_id', id)}")
1387 |
1388 | # Gene information
1389 | if variant_data.get("gene"):
1390 | gene_info = variant_data["gene"]
1391 | text_parts.append(
1392 | f"\nGene: {gene_info.get('symbol', 'Unknown')} ({gene_info.get('name', '')})"
1393 | )
1394 |
1395 | # Clinical significance
1396 | if variant_data.get("clinvar"):
1397 | clinvar = variant_data["clinvar"]
1398 | if clinvar.get("clinical_significance"):
1399 | text_parts.append(
1400 | f"\nClinical Significance: {clinvar['clinical_significance']}"
1401 | )
1402 | if clinvar.get("review_status"):
1403 | text_parts.append(f"Review Status: {clinvar['review_status']}")
1404 |
1405 | # dbSNP info
1406 | if variant_data.get("dbsnp"):
1407 | dbsnp = variant_data["dbsnp"]
1408 | if dbsnp.get("rsid"):
1409 | text_parts.append(f"\ndbSNP: {dbsnp['rsid']}")
1410 |
1411 | # CADD scores
1412 | if variant_data.get("cadd"):
1413 | cadd = variant_data["cadd"]
1414 | if cadd.get("phred"):
1415 | text_parts.append(f"\nCADD Score: {cadd['phred']}")
1416 |
1417 | # Allele frequencies
1418 | if variant_data.get("gnomad_exome"):
1419 | gnomad = variant_data["gnomad_exome"]
1420 | if gnomad.get("af", {}).get("af"):
1421 | text_parts.append(
1422 | f"\nGnomAD Allele Frequency: {gnomad['af']['af']:.6f}"
1423 | )
1424 |
1425 | # External links
1426 | if variant_data.get("external_links"):
1427 | links = variant_data["external_links"]
1428 | text_parts.append(
1429 | f"\n\nExternal Resources: {len(links)} database links available"
1430 | )
1431 |
1432 | # Check for external data indicators
1433 | if variant_data.get("tcga"):
1434 | text_parts.append("\n\nTCGA Data: Available")
1435 | if variant_data.get("1000genomes"):
1436 | text_parts.append("\n1000 Genomes Data: Available")
1437 |
1438 | # Determine best URL
1439 | url = variant_data.get("url", "")
1440 | if not url and variant_data.get("dbsnp", {}).get("rsid"):
1441 | url = f"https://www.ncbi.nlm.nih.gov/snp/{variant_data['dbsnp']['rsid']}"
1442 | elif not url:
1443 | url = f"https://myvariant.info/v1/variant/{id}"
1444 |
1445 | # Return OpenAI MCP compliant format
1446 | return {
1447 | "id": variant_data.get("_id", id),
1448 | "title": f"Variant {variant_data.get('_id', id)}",
1449 | "text": "\n".join(text_parts),
1450 | "url": url,
1451 | "metadata": variant_data,
1452 | }
1453 |
1454 | elif domain == "gene":
1455 | logger.debug("Fetching gene details")
1456 | try:
1457 | client = BioThingsClient()
1458 | gene_info = await client.get_gene_info(id)
1459 |
1460 | if not gene_info:
1461 | return {"error": f"Gene {id} not found"}
1462 |
1463 | # Build comprehensive text description
1464 | text_parts = []
1465 | text_parts.append(f"Gene: {gene_info.symbol} ({gene_info.name})")
1466 |
1467 | if gene_info.entrezgene:
1468 | text_parts.append(f"\nEntrez ID: {gene_info.entrezgene}")
1469 |
1470 | if gene_info.type_of_gene:
1471 | text_parts.append(f"Type: {gene_info.type_of_gene}")
1472 |
1473 | if gene_info.summary:
1474 | text_parts.append(f"\nSummary: {gene_info.summary}")
1475 |
1476 | if gene_info.alias:
1477 | text_parts.append(f"\nAliases: {', '.join(gene_info.alias)}")
1478 |
1479 | # URL
1480 | url = (
1481 | f"https://www.genenames.org/data/gene-symbol-report/#!/symbol/{gene_info.symbol}"
1482 | if gene_info.symbol
1483 | else ""
1484 | )
1485 |
1486 | # Return OpenAI MCP compliant format
1487 | return {
1488 | "id": str(gene_info.gene_id),
1489 | "title": f"{gene_info.symbol}: {gene_info.name}"
1490 | if gene_info.symbol and gene_info.name
1491 | else gene_info.symbol or gene_info.name or DEFAULT_TITLE,
1492 | "text": "\n".join(text_parts),
1493 | "url": url,
1494 | "metadata": gene_info.model_dump(),
1495 | }
1496 |
1497 | except Exception as e:
1498 | logger.error(f"Gene fetch failed: {e}")
1499 | raise SearchExecutionError("gene", e) from e
1500 |
1501 | elif domain == "drug":
1502 | logger.debug("Fetching drug details")
1503 | try:
1504 | client = BioThingsClient()
1505 | drug_info = await client.get_drug_info(id)
1506 |
1507 | if not drug_info:
1508 | return {"error": f"Drug {id} not found"}
1509 |
1510 | # Build comprehensive text description
1511 | text_parts = []
1512 | text_parts.append(f"Drug: {drug_info.name}")
1513 |
1514 | if drug_info.drugbank_id:
1515 | text_parts.append(f"\nDrugBank ID: {drug_info.drugbank_id}")
1516 |
1517 | if drug_info.formula:
1518 | text_parts.append(f"Formula: {drug_info.formula}")
1519 |
1520 | if drug_info.tradename:
1521 | text_parts.append(
1522 | f"\nTrade Names: {', '.join(drug_info.tradename)}"
1523 | )
1524 |
1525 | if drug_info.description:
1526 | text_parts.append(f"\nDescription: {drug_info.description}")
1527 |
1528 | if drug_info.indication:
1529 | text_parts.append(f"\nIndication: {drug_info.indication}")
1530 |
1531 | if drug_info.mechanism_of_action:
1532 | text_parts.append(
1533 | f"\nMechanism of Action: {drug_info.mechanism_of_action}"
1534 | )
1535 |
1536 | # URL
1537 | url = ""
1538 | if drug_info.drugbank_id:
1539 | url = f"https://www.drugbank.ca/drugs/{drug_info.drugbank_id}"
1540 | elif drug_info.pubchem_cid:
1541 | url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{drug_info.pubchem_cid}"
1542 |
1543 | # Return OpenAI MCP compliant format
1544 | return {
1545 | "id": drug_info.drug_id,
1546 | "title": drug_info.name or drug_info.drug_id or DEFAULT_TITLE,
1547 | "text": "\n".join(text_parts),
1548 | "url": url,
1549 | "metadata": drug_info.model_dump(),
1550 | }
1551 |
1552 | except Exception as e:
1553 | logger.error(f"Drug fetch failed: {e}")
1554 | raise SearchExecutionError("drug", e) from e
1555 |
1556 | elif domain == "disease":
1557 | logger.debug("Fetching disease details")
1558 | try:
1559 | client = BioThingsClient()
1560 | disease_info = await client.get_disease_info(id)
1561 |
1562 | if not disease_info:
1563 | return {"error": f"Disease {id} not found"}
1564 |
1565 | # Build comprehensive text description
1566 | text_parts = []
1567 | text_parts.append(f"Disease: {disease_info.name}")
1568 |
1569 | if disease_info.mondo and isinstance(disease_info.mondo, dict):
1570 | mondo_id = disease_info.mondo.get("id")
1571 | if mondo_id:
1572 | text_parts.append(f"\nMONDO ID: {mondo_id}")
1573 |
1574 | if disease_info.definition:
1575 | text_parts.append(f"\nDefinition: {disease_info.definition}")
1576 |
1577 | if disease_info.synonyms:
1578 | text_parts.append(
1579 | f"\nSynonyms: {', '.join(disease_info.synonyms[:5])}"
1580 | )
1581 | if len(disease_info.synonyms) > 5:
1582 | text_parts.append(
1583 | f" ... and {len(disease_info.synonyms) - 5} more"
1584 | )
1585 |
1586 | if disease_info.phenotypes:
1587 | text_parts.append(
1588 | f"\nAssociated Phenotypes: {len(disease_info.phenotypes)}"
1589 | )
1590 |
1591 | # URL
1592 | url = ""
1593 | if disease_info.mondo and isinstance(disease_info.mondo, dict):
1594 | mondo_id = disease_info.mondo.get("id")
1595 | if mondo_id:
1596 | url = f"https://monarchinitiative.org/disease/{mondo_id}"
1597 |
1598 | # Return OpenAI MCP compliant format
1599 | return {
1600 | "id": disease_info.disease_id,
1601 | "title": disease_info.name
1602 | or disease_info.disease_id
1603 | or DEFAULT_TITLE,
1604 | "text": "\n".join(text_parts),
1605 | "url": url,
1606 | "metadata": disease_info.model_dump(),
1607 | }
1608 |
1609 | except Exception as e:
1610 | logger.error(f"Disease fetch failed: {e}")
1611 | raise SearchExecutionError("disease", e) from e
1612 |
1613 | elif domain == "nci_organization":
1614 | logger.debug("Fetching NCI organization details")
1615 | try:
1616 | from biomcp.organizations import get_organization
1617 | from biomcp.organizations.getter import format_organization_details
1618 |
1619 | org_data = await get_organization(
1620 | org_id=id,
1621 | api_key=api_key,
1622 | )
1623 |
1624 | # Format the details
1625 | formatted_text = format_organization_details(org_data)
1626 |
1627 | # Return OpenAI MCP compliant format
1628 | return {
1629 | "id": id,
1630 | "title": org_data.get("name", "Unknown Organization"),
1631 | "text": formatted_text,
1632 | "url": "", # NCI doesn't provide direct URLs
1633 | "metadata": org_data,
1634 | }
1635 |
1636 | except Exception as e:
1637 | logger.error(f"NCI organization fetch failed: {e}")
1638 | raise SearchExecutionError("nci_organization", e) from e
1639 |
1640 | elif domain == "nci_intervention":
1641 | logger.debug("Fetching NCI intervention details")
1642 | try:
1643 | from biomcp.interventions import get_intervention
1644 | from biomcp.interventions.getter import format_intervention_details
1645 |
1646 | intervention_data = await get_intervention(
1647 | intervention_id=id,
1648 | api_key=api_key,
1649 | )
1650 |
1651 | # Format the details
1652 | formatted_text = format_intervention_details(intervention_data)
1653 |
1654 | # Return OpenAI MCP compliant format
1655 | return {
1656 | "id": id,
1657 | "title": intervention_data.get("name", "Unknown Intervention"),
1658 | "text": formatted_text,
1659 | "url": "", # NCI doesn't provide direct URLs
1660 | "metadata": intervention_data,
1661 | }
1662 |
1663 | except Exception as e:
1664 | logger.error(f"NCI intervention fetch failed: {e}")
1665 | raise SearchExecutionError("nci_intervention", e) from e
1666 |
1667 | elif domain == "nci_disease":
1668 | logger.debug("Fetching NCI disease details")
1669 | try:
1670 | from biomcp.diseases import get_disease_by_id
1671 |
1672 | disease_data = await get_disease_by_id(
1673 | disease_id=id,
1674 | api_key=api_key,
1675 | )
1676 |
1677 | # Build text description
1678 | text_parts = []
1679 | text_parts.append(
1680 | f"Disease: {disease_data.get('name', 'Unknown Disease')}"
1681 | )
1682 |
1683 | if disease_data.get("category"):
1684 | text_parts.append(f"\nCategory: {disease_data['category']}")
1685 |
1686 | if disease_data.get("synonyms"):
1687 | synonyms = disease_data["synonyms"]
1688 | if isinstance(synonyms, list) and synonyms:
1689 | text_parts.append(f"\nSynonyms: {', '.join(synonyms[:5])}")
1690 | if len(synonyms) > 5:
1691 | text_parts.append(
1692 | f" ... and {len(synonyms) - 5} more"
1693 | )
1694 |
1695 | if disease_data.get("codes"):
1696 | codes = disease_data["codes"]
1697 | if isinstance(codes, dict):
1698 | code_items = [
1699 | f"{system}: {code}" for system, code in codes.items()
1700 | ]
1701 | if code_items:
1702 | text_parts.append(f"\nCodes: {', '.join(code_items)}")
1703 |
1704 | # Return OpenAI MCP compliant format
1705 | return {
1706 | "id": id,
1707 | "title": disease_data.get(
1708 | "name",
1709 | disease_data.get("preferred_name", "Unknown Disease"),
1710 | ),
1711 | "text": "\n".join(text_parts),
1712 | "url": "", # NCI doesn't provide direct URLs
1713 | "metadata": disease_data,
1714 | }
1715 |
1716 | except Exception as e:
1717 | logger.error(f"NCI disease fetch failed: {e}")
1718 | raise SearchExecutionError("nci_disease", e) from e
1719 |
1720 | # Note: nci_biomarker doesn't support fetching by ID, only searching
1721 |
1722 | # OpenFDA domains
1723 | elif domain == "fda_adverse":
1724 | from biomcp.openfda import get_adverse_event
1725 |
1726 | result = await get_adverse_event(id, api_key=api_key)
1727 | return {
1728 | "title": f"FDA Adverse Event Report {id}",
1729 | "text": result,
1730 | "url": "",
1731 | "metadata": {"report_id": id, "domain": "fda_adverse"},
1732 | }
1733 |
1734 | elif domain == "fda_label":
1735 | from biomcp.openfda import get_drug_label
1736 |
1737 | result = await get_drug_label(id, api_key=api_key)
1738 | return {
1739 | "title": f"FDA Drug Label {id}",
1740 | "text": result,
1741 | "url": "",
1742 | "metadata": {"set_id": id, "domain": "fda_label"},
1743 | }
1744 |
1745 | elif domain == "fda_device":
1746 | from biomcp.openfda import get_device_event
1747 |
1748 | result = await get_device_event(id, api_key=api_key)
1749 | return {
1750 | "title": f"FDA Device Event {id}",
1751 | "text": result,
1752 | "url": "",
1753 | "metadata": {"mdr_report_key": id, "domain": "fda_device"},
1754 | }
1755 |
1756 | elif domain == "fda_approval":
1757 | from biomcp.openfda import get_drug_approval
1758 |
1759 | result = await get_drug_approval(id, api_key=api_key)
1760 | return {
1761 | "title": f"FDA Drug Approval {id}",
1762 | "text": result,
1763 | "url": "",
1764 | "metadata": {"application_number": id, "domain": "fda_approval"},
1765 | }
1766 |
1767 | elif domain == "fda_recall":
1768 | from biomcp.openfda import get_drug_recall
1769 |
1770 | result = await get_drug_recall(id, api_key=api_key)
1771 | return {
1772 | "title": f"FDA Drug Recall {id}",
1773 | "text": result,
1774 | "url": "",
1775 | "metadata": {"recall_number": id, "domain": "fda_recall"},
1776 | }
1777 |
1778 | elif domain == "fda_shortage":
1779 | from biomcp.openfda import get_drug_shortage
1780 |
1781 | result = await get_drug_shortage(id, api_key=api_key)
1782 | return {
1783 | "title": f"FDA Drug Shortage - {id}",
1784 | "text": result,
1785 | "url": "",
1786 | "metadata": {"drug": id, "domain": "fda_shortage"},
1787 | }
1788 |
1789 | # Invalid domain
1790 | raise InvalidDomainError(domain, VALID_DOMAINS)
1791 |
1792 |
1793 | # Internal function for unified search
1794 | async def _unified_search( # noqa: C901
1795 | query: str,
1796 | max_results_per_domain: int = MAX_RESULTS_PER_DOMAIN_DEFAULT,
1797 | domains: list[str] | None = None,
1798 | explain_query: bool = False,
1799 | ) -> dict:
1800 | """Internal unified search implementation.
1801 |
1802 | Parses the unified query language and routes to appropriate domain tools.
1803 | Supports field-based syntax like 'gene:BRAF AND trials.phase:3'.
1804 |
1805 | Args:
1806 | query: Unified query string with field syntax
1807 | max_results_per_domain: Limit results per domain
1808 | domains: Optional list to filter which domains to search
1809 | explain_query: If True, return query parsing explanation
1810 |
1811 | Returns:
1812 | Dictionary with results organized by domain
1813 |
1814 | Raises:
1815 | QueryParsingError: If query cannot be parsed
1816 | SearchExecutionError: If search execution fails
1817 | """
1818 | logger.info(f"Unified search with query: {query}")
1819 | # Parse the query
1820 | try:
1821 | parser = QueryParser()
1822 | parsed = parser.parse(query)
1823 | except Exception as e:
1824 | logger.error(f"Failed to parse query: {e}")
1825 | raise QueryParsingError(query, e) from e
1826 |
1827 | # Route to appropriate tools
1828 | router = QueryRouter()
1829 | plan = router.route(parsed)
1830 |
1831 | # Filter domains if specified
1832 | if domains:
1833 | filtered_tools = []
1834 | for tool in plan.tools_to_call:
1835 | if (
1836 | ("article" in tool and "articles" in domains)
1837 | or ("trial" in tool and "trials" in domains)
1838 | or ("variant" in tool and "variants" in domains)
1839 | ):
1840 | filtered_tools.append(tool)
1841 | plan.tools_to_call = filtered_tools
1842 |
1843 | # Return explanation if requested
1844 | if explain_query:
1845 | return {
1846 | "original_query": query,
1847 | "parsed_structure": {
1848 | "cross_domain_fields": parsed.cross_domain_fields,
1849 | "domain_specific_fields": parsed.domain_specific_fields,
1850 | "terms": [
1851 | {
1852 | "field": term.field,
1853 | "operator": term.operator.value,
1854 | "value": term.value,
1855 | "domain": term.domain,
1856 | }
1857 | for term in parsed.terms
1858 | ],
1859 | },
1860 | "routing_plan": {
1861 | "tools_to_call": plan.tools_to_call,
1862 | "field_mappings": plan.field_mappings,
1863 | },
1864 | "schema": parser.get_schema(),
1865 | }
1866 |
1867 | # Execute the search plan
1868 | try:
1869 | results = await execute_routing_plan(plan, output_json=True)
1870 | except Exception as e:
1871 | logger.error(f"Failed to execute search plan: {e}")
1872 | raise SearchExecutionError("unified", e) from e
1873 |
1874 | # Format unified results - collect all results into a single array
1875 | all_results = []
1876 |
1877 | for domain, result_str in results.items():
1878 | if isinstance(result_str, dict) and "error" in result_str:
1879 | logger.warning(f"Error in domain {domain}: {result_str['error']}")
1880 | continue
1881 |
1882 | try:
1883 | data = (
1884 | json.loads(result_str)
1885 | if isinstance(result_str, str)
1886 | else result_str
1887 | )
1888 |
1889 | # Get the appropriate handler for formatting
1890 | handler_class = get_domain_handler(
1891 | domain.rstrip("s")
1892 | ) # Remove trailing 's'
1893 |
1894 | # Process and format each result
1895 | # Handle both list format and dict format (for articles with cBioPortal data)
1896 | items_to_process = []
1897 | cbioportal_summary = None
1898 |
1899 | if isinstance(data, list):
1900 | items_to_process = data[:max_results_per_domain]
1901 | elif isinstance(data, dict):
1902 | # Handle unified search format with cBioPortal data
1903 | if "articles" in data:
1904 | items_to_process = data["articles"][
1905 | :max_results_per_domain
1906 | ]
1907 | cbioportal_summary = data.get("cbioportal_summary")
1908 | else:
1909 | # Single item dict
1910 | items_to_process = [data]
1911 |
1912 | # Add cBioPortal summary as first result if available
1913 | if cbioportal_summary and domain == "articles":
1914 | try:
1915 | # Extract gene name from parsed query or summary
1916 | gene_name = parsed.cross_domain_fields.get("gene", "")
1917 | if not gene_name and "Summary for " in cbioportal_summary:
1918 | # Try to extract from summary title
1919 | import re
1920 |
1921 | match = re.search(
1922 | r"Summary for (\w+)", cbioportal_summary
1923 | )
1924 | if match:
1925 | gene_name = match.group(1)
1926 |
1927 | cbio_result = {
1928 | "id": f"cbioportal_summary_{gene_name or 'gene'}",
1929 | "title": f"cBioPortal Summary for {gene_name or 'Gene'}",
1930 | "text": cbioportal_summary[:5000], # Limit text length
1931 | "url": f"https://www.cbioportal.org/results?gene_list={gene_name}"
1932 | if gene_name
1933 | else "",
1934 | }
1935 | all_results.append(cbio_result)
1936 | except Exception as e:
1937 | logger.warning(f"Failed to format cBioPortal summary: {e}")
1938 |
1939 | for item in items_to_process:
1940 | try:
1941 | formatted_result = handler_class.format_result(item)
1942 | # Ensure OpenAI MCP format
1943 | openai_result = {
1944 | "id": formatted_result.get("id", ""),
1945 | "title": formatted_result.get("title", DEFAULT_TITLE),
1946 | "text": formatted_result.get(
1947 | "snippet", formatted_result.get("text", "")
1948 | ),
1949 | "url": formatted_result.get("url", ""),
1950 | }
1951 | # Note: For unified search, we can optionally include domain in metadata
1952 | # This helps distinguish between result types
1953 | all_results.append(openai_result)
1954 | except Exception as e:
1955 | logger.warning(
1956 | f"Failed to format result in domain {domain}: {e}"
1957 | )
1958 | continue
1959 |
1960 | except (json.JSONDecodeError, TypeError, ValueError) as e:
1961 | logger.warning(f"Failed to parse results for domain {domain}: {e}")
1962 | continue
1963 |
1964 | logger.info(
1965 | f"Unified search completed with {len(all_results)} total results"
1966 | )
1967 |
1968 | # Return OpenAI MCP compliant format
1969 | return {"results": all_results}
1970 |
```